xref: /openbmc/linux/kernel/trace/trace.c (revision d003d772)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id(), cond_data);
924 	local_irq_restore(flags);
925 }
926 
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929 	tracing_snapshot_instance_cond(tr, NULL);
930 }
931 
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948 	struct trace_array *tr = &global_trace;
949 
950 	tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953 
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:		The tracing instance to snapshot
957  * @cond_data:	The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969 	tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972 
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:		The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989 	void *cond_data = NULL;
990 
991 	arch_spin_lock(&tr->max_lock);
992 
993 	if (tr->cond_snapshot)
994 		cond_data = tr->cond_snapshot->cond_data;
995 
996 	arch_spin_unlock(&tr->max_lock);
997 
998 	return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001 
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003 					struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005 
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008 	int ret;
1009 
1010 	if (!tr->allocated_snapshot) {
1011 
1012 		/* allocate spare buffer */
1013 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015 		if (ret < 0)
1016 			return ret;
1017 
1018 		tr->allocated_snapshot = true;
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026 	/*
1027 	 * We don't free the ring buffer. instead, resize it because
1028 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1029 	 * we want preserve it.
1030 	 */
1031 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032 	set_buffer_entries(&tr->max_buffer, 1);
1033 	tracing_reset_online_cpus(&tr->max_buffer);
1034 	tr->allocated_snapshot = false;
1035 }
1036 
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049 	struct trace_array *tr = &global_trace;
1050 	int ret;
1051 
1052 	ret = tracing_alloc_snapshot_instance(tr);
1053 	WARN_ON(ret < 0);
1054 
1055 	return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058 
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot();
1075 	if (ret < 0)
1076 		return;
1077 
1078 	tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081 
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:		The tracing instance
1085  * @cond_data:	User data to associate with the snapshot
1086  * @update:	Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096 				 cond_update_fn_t update)
1097 {
1098 	struct cond_snapshot *cond_snapshot;
1099 	int ret = 0;
1100 
1101 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102 	if (!cond_snapshot)
1103 		return -ENOMEM;
1104 
1105 	cond_snapshot->cond_data = cond_data;
1106 	cond_snapshot->update = update;
1107 
1108 	mutex_lock(&trace_types_lock);
1109 
1110 	ret = tracing_alloc_snapshot_instance(tr);
1111 	if (ret)
1112 		goto fail_unlock;
1113 
1114 	if (tr->current_trace->use_max_tr) {
1115 		ret = -EBUSY;
1116 		goto fail_unlock;
1117 	}
1118 
1119 	/*
1120 	 * The cond_snapshot can only change to NULL without the
1121 	 * trace_types_lock. We don't care if we race with it going
1122 	 * to NULL, but we want to make sure that it's not set to
1123 	 * something other than NULL when we get here, which we can
1124 	 * do safely with only holding the trace_types_lock and not
1125 	 * having to take the max_lock.
1126 	 */
1127 	if (tr->cond_snapshot) {
1128 		ret = -EBUSY;
1129 		goto fail_unlock;
1130 	}
1131 
1132 	arch_spin_lock(&tr->max_lock);
1133 	tr->cond_snapshot = cond_snapshot;
1134 	arch_spin_unlock(&tr->max_lock);
1135 
1136 	mutex_unlock(&trace_types_lock);
1137 
1138 	return ret;
1139 
1140  fail_unlock:
1141 	mutex_unlock(&trace_types_lock);
1142 	kfree(cond_snapshot);
1143 	return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146 
1147 /**
1148  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149  * @tr:		The tracing instance
1150  *
1151  * Check whether the conditional snapshot for the given instance is
1152  * enabled; if so, free the cond_snapshot associated with it,
1153  * otherwise return -EINVAL.
1154  *
1155  * Returns 0 if successful, error otherwise.
1156  */
1157 int tracing_snapshot_cond_disable(struct trace_array *tr)
1158 {
1159 	int ret = 0;
1160 
1161 	arch_spin_lock(&tr->max_lock);
1162 
1163 	if (!tr->cond_snapshot)
1164 		ret = -EINVAL;
1165 	else {
1166 		kfree(tr->cond_snapshot);
1167 		tr->cond_snapshot = NULL;
1168 	}
1169 
1170 	arch_spin_unlock(&tr->max_lock);
1171 
1172 	return ret;
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175 #else
1176 void tracing_snapshot(void)
1177 {
1178 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182 {
1183 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186 int tracing_alloc_snapshot(void)
1187 {
1188 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189 	return -ENODEV;
1190 }
1191 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192 void tracing_snapshot_alloc(void)
1193 {
1194 	/* Give warning */
1195 	tracing_snapshot();
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	return NULL;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204 {
1205 	return -ENODEV;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208 int tracing_snapshot_cond_disable(struct trace_array *tr)
1209 {
1210 	return false;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213 #endif /* CONFIG_TRACER_SNAPSHOT */
1214 
1215 void tracer_tracing_off(struct trace_array *tr)
1216 {
1217 	if (tr->trace_buffer.buffer)
1218 		ring_buffer_record_off(tr->trace_buffer.buffer);
1219 	/*
1220 	 * This flag is looked at when buffers haven't been allocated
1221 	 * yet, or by some tracers (like irqsoff), that just want to
1222 	 * know if the ring buffer has been disabled, but it can handle
1223 	 * races of where it gets disabled but we still do a record.
1224 	 * As the check is in the fast path of the tracers, it is more
1225 	 * important to be fast than accurate.
1226 	 */
1227 	tr->buffer_disabled = 1;
1228 	/* Make the flag seen by readers */
1229 	smp_wmb();
1230 }
1231 
1232 /**
1233  * tracing_off - turn off tracing buffers
1234  *
1235  * This function stops the tracing buffers from recording data.
1236  * It does not disable any overhead the tracers themselves may
1237  * be causing. This function simply causes all recording to
1238  * the ring buffers to fail.
1239  */
1240 void tracing_off(void)
1241 {
1242 	tracer_tracing_off(&global_trace);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_off);
1245 
1246 void disable_trace_on_warning(void)
1247 {
1248 	if (__disable_trace_on_warning)
1249 		tracing_off();
1250 }
1251 
1252 /**
1253  * tracer_tracing_is_on - show real state of ring buffer enabled
1254  * @tr : the trace array to know if ring buffer is enabled
1255  *
1256  * Shows real state of the ring buffer if it is enabled or not.
1257  */
1258 bool tracer_tracing_is_on(struct trace_array *tr)
1259 {
1260 	if (tr->trace_buffer.buffer)
1261 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262 	return !tr->buffer_disabled;
1263 }
1264 
1265 /**
1266  * tracing_is_on - show state of ring buffers enabled
1267  */
1268 int tracing_is_on(void)
1269 {
1270 	return tracer_tracing_is_on(&global_trace);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_is_on);
1273 
1274 static int __init set_buf_size(char *str)
1275 {
1276 	unsigned long buf_size;
1277 
1278 	if (!str)
1279 		return 0;
1280 	buf_size = memparse(str, &str);
1281 	/* nr_entries can not be zero */
1282 	if (buf_size == 0)
1283 		return 0;
1284 	trace_buf_size = buf_size;
1285 	return 1;
1286 }
1287 __setup("trace_buf_size=", set_buf_size);
1288 
1289 static int __init set_tracing_thresh(char *str)
1290 {
1291 	unsigned long threshold;
1292 	int ret;
1293 
1294 	if (!str)
1295 		return 0;
1296 	ret = kstrtoul(str, 0, &threshold);
1297 	if (ret < 0)
1298 		return 0;
1299 	tracing_thresh = threshold * 1000;
1300 	return 1;
1301 }
1302 __setup("tracing_thresh=", set_tracing_thresh);
1303 
1304 unsigned long nsecs_to_usecs(unsigned long nsecs)
1305 {
1306 	return nsecs / 1000;
1307 }
1308 
1309 /*
1310  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313  * of strings in the order that the evals (enum) were defined.
1314  */
1315 #undef C
1316 #define C(a, b) b
1317 
1318 /* These must match the bit postions in trace_iterator_flags */
1319 static const char *trace_options[] = {
1320 	TRACE_FLAGS
1321 	NULL
1322 };
1323 
1324 static struct {
1325 	u64 (*func)(void);
1326 	const char *name;
1327 	int in_ns;		/* is this clock in nanoseconds? */
1328 } trace_clocks[] = {
1329 	{ trace_clock_local,		"local",	1 },
1330 	{ trace_clock_global,		"global",	1 },
1331 	{ trace_clock_counter,		"counter",	0 },
1332 	{ trace_clock_jiffies,		"uptime",	0 },
1333 	{ trace_clock,			"perf",		1 },
1334 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1335 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1336 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1337 	ARCH_TRACE_CLOCKS
1338 };
1339 
1340 bool trace_clock_in_ns(struct trace_array *tr)
1341 {
1342 	if (trace_clocks[tr->clock_id].in_ns)
1343 		return true;
1344 
1345 	return false;
1346 }
1347 
1348 /*
1349  * trace_parser_get_init - gets the buffer for trace parser
1350  */
1351 int trace_parser_get_init(struct trace_parser *parser, int size)
1352 {
1353 	memset(parser, 0, sizeof(*parser));
1354 
1355 	parser->buffer = kmalloc(size, GFP_KERNEL);
1356 	if (!parser->buffer)
1357 		return 1;
1358 
1359 	parser->size = size;
1360 	return 0;
1361 }
1362 
1363 /*
1364  * trace_parser_put - frees the buffer for trace parser
1365  */
1366 void trace_parser_put(struct trace_parser *parser)
1367 {
1368 	kfree(parser->buffer);
1369 	parser->buffer = NULL;
1370 }
1371 
1372 /*
1373  * trace_get_user - reads the user input string separated by  space
1374  * (matched by isspace(ch))
1375  *
1376  * For each string found the 'struct trace_parser' is updated,
1377  * and the function returns.
1378  *
1379  * Returns number of bytes read.
1380  *
1381  * See kernel/trace/trace.h for 'struct trace_parser' details.
1382  */
1383 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384 	size_t cnt, loff_t *ppos)
1385 {
1386 	char ch;
1387 	size_t read = 0;
1388 	ssize_t ret;
1389 
1390 	if (!*ppos)
1391 		trace_parser_clear(parser);
1392 
1393 	ret = get_user(ch, ubuf++);
1394 	if (ret)
1395 		goto out;
1396 
1397 	read++;
1398 	cnt--;
1399 
1400 	/*
1401 	 * The parser is not finished with the last write,
1402 	 * continue reading the user input without skipping spaces.
1403 	 */
1404 	if (!parser->cont) {
1405 		/* skip white space */
1406 		while (cnt && isspace(ch)) {
1407 			ret = get_user(ch, ubuf++);
1408 			if (ret)
1409 				goto out;
1410 			read++;
1411 			cnt--;
1412 		}
1413 
1414 		parser->idx = 0;
1415 
1416 		/* only spaces were written */
1417 		if (isspace(ch) || !ch) {
1418 			*ppos += read;
1419 			ret = read;
1420 			goto out;
1421 		}
1422 	}
1423 
1424 	/* read the non-space input */
1425 	while (cnt && !isspace(ch) && ch) {
1426 		if (parser->idx < parser->size - 1)
1427 			parser->buffer[parser->idx++] = ch;
1428 		else {
1429 			ret = -EINVAL;
1430 			goto out;
1431 		}
1432 		ret = get_user(ch, ubuf++);
1433 		if (ret)
1434 			goto out;
1435 		read++;
1436 		cnt--;
1437 	}
1438 
1439 	/* We either got finished input or we have to wait for another call. */
1440 	if (isspace(ch) || !ch) {
1441 		parser->buffer[parser->idx] = 0;
1442 		parser->cont = false;
1443 	} else if (parser->idx < parser->size - 1) {
1444 		parser->cont = true;
1445 		parser->buffer[parser->idx++] = ch;
1446 		/* Make sure the parsed string always terminates with '\0'. */
1447 		parser->buffer[parser->idx] = 0;
1448 	} else {
1449 		ret = -EINVAL;
1450 		goto out;
1451 	}
1452 
1453 	*ppos += read;
1454 	ret = read;
1455 
1456 out:
1457 	return ret;
1458 }
1459 
1460 /* TODO add a seq_buf_to_buffer() */
1461 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462 {
1463 	int len;
1464 
1465 	if (trace_seq_used(s) <= s->seq.readpos)
1466 		return -EBUSY;
1467 
1468 	len = trace_seq_used(s) - s->seq.readpos;
1469 	if (cnt > len)
1470 		cnt = len;
1471 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472 
1473 	s->seq.readpos += cnt;
1474 	return cnt;
1475 }
1476 
1477 unsigned long __read_mostly	tracing_thresh;
1478 
1479 #ifdef CONFIG_TRACER_MAX_TRACE
1480 /*
1481  * Copy the new maximum trace into the separate maximum-trace
1482  * structure. (this way the maximum trace is permanently saved,
1483  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484  */
1485 static void
1486 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487 {
1488 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1489 	struct trace_buffer *max_buf = &tr->max_buffer;
1490 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492 
1493 	max_buf->cpu = cpu;
1494 	max_buf->time_start = data->preempt_timestamp;
1495 
1496 	max_data->saved_latency = tr->max_latency;
1497 	max_data->critical_start = data->critical_start;
1498 	max_data->critical_end = data->critical_end;
1499 
1500 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501 	max_data->pid = tsk->pid;
1502 	/*
1503 	 * If tsk == current, then use current_uid(), as that does not use
1504 	 * RCU. The irq tracer can be called out of RCU scope.
1505 	 */
1506 	if (tsk == current)
1507 		max_data->uid = current_uid();
1508 	else
1509 		max_data->uid = task_uid(tsk);
1510 
1511 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512 	max_data->policy = tsk->policy;
1513 	max_data->rt_priority = tsk->rt_priority;
1514 
1515 	/* record this tasks comm */
1516 	tracing_record_cmdline(tsk);
1517 }
1518 
1519 /**
1520  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521  * @tr: tracer
1522  * @tsk: the task with the latency
1523  * @cpu: The cpu that initiated the trace.
1524  * @cond_data: User data associated with a conditional snapshot
1525  *
1526  * Flip the buffers between the @tr and the max_tr and record information
1527  * about which task was the cause of this latency.
1528  */
1529 void
1530 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531 	      void *cond_data)
1532 {
1533 	if (tr->stop_count)
1534 		return;
1535 
1536 	WARN_ON_ONCE(!irqs_disabled());
1537 
1538 	if (!tr->allocated_snapshot) {
1539 		/* Only the nop tracer should hit this when disabling */
1540 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541 		return;
1542 	}
1543 
1544 	arch_spin_lock(&tr->max_lock);
1545 
1546 	/* Inherit the recordable setting from trace_buffer */
1547 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548 		ring_buffer_record_on(tr->max_buffer.buffer);
1549 	else
1550 		ring_buffer_record_off(tr->max_buffer.buffer);
1551 
1552 #ifdef CONFIG_TRACER_SNAPSHOT
1553 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554 		goto out_unlock;
1555 #endif
1556 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557 
1558 	__update_max_tr(tr, tsk, cpu);
1559 
1560  out_unlock:
1561 	arch_spin_unlock(&tr->max_lock);
1562 }
1563 
1564 /**
1565  * update_max_tr_single - only copy one trace over, and reset the rest
1566  * @tr - tracer
1567  * @tsk - task with the latency
1568  * @cpu - the cpu of the buffer to copy.
1569  *
1570  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571  */
1572 void
1573 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574 {
1575 	int ret;
1576 
1577 	if (tr->stop_count)
1578 		return;
1579 
1580 	WARN_ON_ONCE(!irqs_disabled());
1581 	if (!tr->allocated_snapshot) {
1582 		/* Only the nop tracer should hit this when disabling */
1583 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584 		return;
1585 	}
1586 
1587 	arch_spin_lock(&tr->max_lock);
1588 
1589 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590 
1591 	if (ret == -EBUSY) {
1592 		/*
1593 		 * We failed to swap the buffer due to a commit taking
1594 		 * place on this CPU. We fail to record, but we reset
1595 		 * the max trace buffer (no one writes directly to it)
1596 		 * and flag that it failed.
1597 		 */
1598 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599 			"Failed to swap buffers due to commit in progress\n");
1600 	}
1601 
1602 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603 
1604 	__update_max_tr(tr, tsk, cpu);
1605 	arch_spin_unlock(&tr->max_lock);
1606 }
1607 #endif /* CONFIG_TRACER_MAX_TRACE */
1608 
1609 static int wait_on_pipe(struct trace_iterator *iter, int full)
1610 {
1611 	/* Iterators are static, they should be filled or empty */
1612 	if (trace_buffer_iter(iter, iter->cpu_file))
1613 		return 0;
1614 
1615 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616 				full);
1617 }
1618 
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static bool selftests_can_run;
1621 
1622 struct trace_selftests {
1623 	struct list_head		list;
1624 	struct tracer			*type;
1625 };
1626 
1627 static LIST_HEAD(postponed_selftests);
1628 
1629 static int save_selftest(struct tracer *type)
1630 {
1631 	struct trace_selftests *selftest;
1632 
1633 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634 	if (!selftest)
1635 		return -ENOMEM;
1636 
1637 	selftest->type = type;
1638 	list_add(&selftest->list, &postponed_selftests);
1639 	return 0;
1640 }
1641 
1642 static int run_tracer_selftest(struct tracer *type)
1643 {
1644 	struct trace_array *tr = &global_trace;
1645 	struct tracer *saved_tracer = tr->current_trace;
1646 	int ret;
1647 
1648 	if (!type->selftest || tracing_selftest_disabled)
1649 		return 0;
1650 
1651 	/*
1652 	 * If a tracer registers early in boot up (before scheduling is
1653 	 * initialized and such), then do not run its selftests yet.
1654 	 * Instead, run it a little later in the boot process.
1655 	 */
1656 	if (!selftests_can_run)
1657 		return save_selftest(type);
1658 
1659 	/*
1660 	 * Run a selftest on this tracer.
1661 	 * Here we reset the trace buffer, and set the current
1662 	 * tracer to be this tracer. The tracer can then run some
1663 	 * internal tracing to verify that everything is in order.
1664 	 * If we fail, we do not register this tracer.
1665 	 */
1666 	tracing_reset_online_cpus(&tr->trace_buffer);
1667 
1668 	tr->current_trace = type;
1669 
1670 #ifdef CONFIG_TRACER_MAX_TRACE
1671 	if (type->use_max_tr) {
1672 		/* If we expanded the buffers, make sure the max is expanded too */
1673 		if (ring_buffer_expanded)
1674 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675 					   RING_BUFFER_ALL_CPUS);
1676 		tr->allocated_snapshot = true;
1677 	}
1678 #endif
1679 
1680 	/* the test is responsible for initializing and enabling */
1681 	pr_info("Testing tracer %s: ", type->name);
1682 	ret = type->selftest(type, tr);
1683 	/* the test is responsible for resetting too */
1684 	tr->current_trace = saved_tracer;
1685 	if (ret) {
1686 		printk(KERN_CONT "FAILED!\n");
1687 		/* Add the warning after printing 'FAILED' */
1688 		WARN_ON(1);
1689 		return -1;
1690 	}
1691 	/* Only reset on passing, to avoid touching corrupted buffers */
1692 	tracing_reset_online_cpus(&tr->trace_buffer);
1693 
1694 #ifdef CONFIG_TRACER_MAX_TRACE
1695 	if (type->use_max_tr) {
1696 		tr->allocated_snapshot = false;
1697 
1698 		/* Shrink the max buffer again */
1699 		if (ring_buffer_expanded)
1700 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1701 					   RING_BUFFER_ALL_CPUS);
1702 	}
1703 #endif
1704 
1705 	printk(KERN_CONT "PASSED\n");
1706 	return 0;
1707 }
1708 
1709 static __init int init_trace_selftests(void)
1710 {
1711 	struct trace_selftests *p, *n;
1712 	struct tracer *t, **last;
1713 	int ret;
1714 
1715 	selftests_can_run = true;
1716 
1717 	mutex_lock(&trace_types_lock);
1718 
1719 	if (list_empty(&postponed_selftests))
1720 		goto out;
1721 
1722 	pr_info("Running postponed tracer tests:\n");
1723 
1724 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725 		ret = run_tracer_selftest(p->type);
1726 		/* If the test fails, then warn and remove from available_tracers */
1727 		if (ret < 0) {
1728 			WARN(1, "tracer: %s failed selftest, disabling\n",
1729 			     p->type->name);
1730 			last = &trace_types;
1731 			for (t = trace_types; t; t = t->next) {
1732 				if (t == p->type) {
1733 					*last = t->next;
1734 					break;
1735 				}
1736 				last = &t->next;
1737 			}
1738 		}
1739 		list_del(&p->list);
1740 		kfree(p);
1741 	}
1742 
1743  out:
1744 	mutex_unlock(&trace_types_lock);
1745 
1746 	return 0;
1747 }
1748 core_initcall(init_trace_selftests);
1749 #else
1750 static inline int run_tracer_selftest(struct tracer *type)
1751 {
1752 	return 0;
1753 }
1754 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1755 
1756 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757 
1758 static void __init apply_trace_boot_options(void);
1759 
1760 /**
1761  * register_tracer - register a tracer with the ftrace system.
1762  * @type - the plugin for the tracer
1763  *
1764  * Register a new plugin tracer.
1765  */
1766 int __init register_tracer(struct tracer *type)
1767 {
1768 	struct tracer *t;
1769 	int ret = 0;
1770 
1771 	if (!type->name) {
1772 		pr_info("Tracer must have a name\n");
1773 		return -1;
1774 	}
1775 
1776 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778 		return -1;
1779 	}
1780 
1781 	mutex_lock(&trace_types_lock);
1782 
1783 	tracing_selftest_running = true;
1784 
1785 	for (t = trace_types; t; t = t->next) {
1786 		if (strcmp(type->name, t->name) == 0) {
1787 			/* already found */
1788 			pr_info("Tracer %s already registered\n",
1789 				type->name);
1790 			ret = -1;
1791 			goto out;
1792 		}
1793 	}
1794 
1795 	if (!type->set_flag)
1796 		type->set_flag = &dummy_set_flag;
1797 	if (!type->flags) {
1798 		/*allocate a dummy tracer_flags*/
1799 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800 		if (!type->flags) {
1801 			ret = -ENOMEM;
1802 			goto out;
1803 		}
1804 		type->flags->val = 0;
1805 		type->flags->opts = dummy_tracer_opt;
1806 	} else
1807 		if (!type->flags->opts)
1808 			type->flags->opts = dummy_tracer_opt;
1809 
1810 	/* store the tracer for __set_tracer_option */
1811 	type->flags->trace = type;
1812 
1813 	ret = run_tracer_selftest(type);
1814 	if (ret < 0)
1815 		goto out;
1816 
1817 	type->next = trace_types;
1818 	trace_types = type;
1819 	add_tracer_options(&global_trace, type);
1820 
1821  out:
1822 	tracing_selftest_running = false;
1823 	mutex_unlock(&trace_types_lock);
1824 
1825 	if (ret || !default_bootup_tracer)
1826 		goto out_unlock;
1827 
1828 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829 		goto out_unlock;
1830 
1831 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832 	/* Do we want this tracer to start on bootup? */
1833 	tracing_set_tracer(&global_trace, type->name);
1834 	default_bootup_tracer = NULL;
1835 
1836 	apply_trace_boot_options();
1837 
1838 	/* disable other selftests, since this will break it. */
1839 	tracing_selftest_disabled = true;
1840 #ifdef CONFIG_FTRACE_STARTUP_TEST
1841 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842 	       type->name);
1843 #endif
1844 
1845  out_unlock:
1846 	return ret;
1847 }
1848 
1849 void tracing_reset(struct trace_buffer *buf, int cpu)
1850 {
1851 	struct ring_buffer *buffer = buf->buffer;
1852 
1853 	if (!buffer)
1854 		return;
1855 
1856 	ring_buffer_record_disable(buffer);
1857 
1858 	/* Make sure all commits have finished */
1859 	synchronize_rcu();
1860 	ring_buffer_reset_cpu(buffer, cpu);
1861 
1862 	ring_buffer_record_enable(buffer);
1863 }
1864 
1865 void tracing_reset_online_cpus(struct trace_buffer *buf)
1866 {
1867 	struct ring_buffer *buffer = buf->buffer;
1868 	int cpu;
1869 
1870 	if (!buffer)
1871 		return;
1872 
1873 	ring_buffer_record_disable(buffer);
1874 
1875 	/* Make sure all commits have finished */
1876 	synchronize_rcu();
1877 
1878 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879 
1880 	for_each_online_cpu(cpu)
1881 		ring_buffer_reset_cpu(buffer, cpu);
1882 
1883 	ring_buffer_record_enable(buffer);
1884 }
1885 
1886 /* Must have trace_types_lock held */
1887 void tracing_reset_all_online_cpus(void)
1888 {
1889 	struct trace_array *tr;
1890 
1891 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892 		if (!tr->clear_trace)
1893 			continue;
1894 		tr->clear_trace = false;
1895 		tracing_reset_online_cpus(&tr->trace_buffer);
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897 		tracing_reset_online_cpus(&tr->max_buffer);
1898 #endif
1899 	}
1900 }
1901 
1902 static int *tgid_map;
1903 
1904 #define SAVED_CMDLINES_DEFAULT 128
1905 #define NO_CMDLINE_MAP UINT_MAX
1906 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907 struct saved_cmdlines_buffer {
1908 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909 	unsigned *map_cmdline_to_pid;
1910 	unsigned cmdline_num;
1911 	int cmdline_idx;
1912 	char *saved_cmdlines;
1913 };
1914 static struct saved_cmdlines_buffer *savedcmd;
1915 
1916 /* temporary disable recording */
1917 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918 
1919 static inline char *get_saved_cmdlines(int idx)
1920 {
1921 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922 }
1923 
1924 static inline void set_cmdline(int idx, const char *cmdline)
1925 {
1926 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927 }
1928 
1929 static int allocate_cmdlines_buffer(unsigned int val,
1930 				    struct saved_cmdlines_buffer *s)
1931 {
1932 	s->map_cmdline_to_pid = kmalloc_array(val,
1933 					      sizeof(*s->map_cmdline_to_pid),
1934 					      GFP_KERNEL);
1935 	if (!s->map_cmdline_to_pid)
1936 		return -ENOMEM;
1937 
1938 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939 	if (!s->saved_cmdlines) {
1940 		kfree(s->map_cmdline_to_pid);
1941 		return -ENOMEM;
1942 	}
1943 
1944 	s->cmdline_idx = 0;
1945 	s->cmdline_num = val;
1946 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947 	       sizeof(s->map_pid_to_cmdline));
1948 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949 	       val * sizeof(*s->map_cmdline_to_pid));
1950 
1951 	return 0;
1952 }
1953 
1954 static int trace_create_savedcmd(void)
1955 {
1956 	int ret;
1957 
1958 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959 	if (!savedcmd)
1960 		return -ENOMEM;
1961 
1962 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963 	if (ret < 0) {
1964 		kfree(savedcmd);
1965 		savedcmd = NULL;
1966 		return -ENOMEM;
1967 	}
1968 
1969 	return 0;
1970 }
1971 
1972 int is_tracing_stopped(void)
1973 {
1974 	return global_trace.stop_count;
1975 }
1976 
1977 /**
1978  * tracing_start - quick start of the tracer
1979  *
1980  * If tracing is enabled but was stopped by tracing_stop,
1981  * this will start the tracer back up.
1982  */
1983 void tracing_start(void)
1984 {
1985 	struct ring_buffer *buffer;
1986 	unsigned long flags;
1987 
1988 	if (tracing_disabled)
1989 		return;
1990 
1991 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992 	if (--global_trace.stop_count) {
1993 		if (global_trace.stop_count < 0) {
1994 			/* Someone screwed up their debugging */
1995 			WARN_ON_ONCE(1);
1996 			global_trace.stop_count = 0;
1997 		}
1998 		goto out;
1999 	}
2000 
2001 	/* Prevent the buffers from switching */
2002 	arch_spin_lock(&global_trace.max_lock);
2003 
2004 	buffer = global_trace.trace_buffer.buffer;
2005 	if (buffer)
2006 		ring_buffer_record_enable(buffer);
2007 
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009 	buffer = global_trace.max_buffer.buffer;
2010 	if (buffer)
2011 		ring_buffer_record_enable(buffer);
2012 #endif
2013 
2014 	arch_spin_unlock(&global_trace.max_lock);
2015 
2016  out:
2017 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018 }
2019 
2020 static void tracing_start_tr(struct trace_array *tr)
2021 {
2022 	struct ring_buffer *buffer;
2023 	unsigned long flags;
2024 
2025 	if (tracing_disabled)
2026 		return;
2027 
2028 	/* If global, we need to also start the max tracer */
2029 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030 		return tracing_start();
2031 
2032 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2033 
2034 	if (--tr->stop_count) {
2035 		if (tr->stop_count < 0) {
2036 			/* Someone screwed up their debugging */
2037 			WARN_ON_ONCE(1);
2038 			tr->stop_count = 0;
2039 		}
2040 		goto out;
2041 	}
2042 
2043 	buffer = tr->trace_buffer.buffer;
2044 	if (buffer)
2045 		ring_buffer_record_enable(buffer);
2046 
2047  out:
2048 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049 }
2050 
2051 /**
2052  * tracing_stop - quick stop of the tracer
2053  *
2054  * Light weight way to stop tracing. Use in conjunction with
2055  * tracing_start.
2056  */
2057 void tracing_stop(void)
2058 {
2059 	struct ring_buffer *buffer;
2060 	unsigned long flags;
2061 
2062 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063 	if (global_trace.stop_count++)
2064 		goto out;
2065 
2066 	/* Prevent the buffers from switching */
2067 	arch_spin_lock(&global_trace.max_lock);
2068 
2069 	buffer = global_trace.trace_buffer.buffer;
2070 	if (buffer)
2071 		ring_buffer_record_disable(buffer);
2072 
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074 	buffer = global_trace.max_buffer.buffer;
2075 	if (buffer)
2076 		ring_buffer_record_disable(buffer);
2077 #endif
2078 
2079 	arch_spin_unlock(&global_trace.max_lock);
2080 
2081  out:
2082 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084 
2085 static void tracing_stop_tr(struct trace_array *tr)
2086 {
2087 	struct ring_buffer *buffer;
2088 	unsigned long flags;
2089 
2090 	/* If global, we need to also stop the max tracer */
2091 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092 		return tracing_stop();
2093 
2094 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2095 	if (tr->stop_count++)
2096 		goto out;
2097 
2098 	buffer = tr->trace_buffer.buffer;
2099 	if (buffer)
2100 		ring_buffer_record_disable(buffer);
2101 
2102  out:
2103 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104 }
2105 
2106 static int trace_save_cmdline(struct task_struct *tsk)
2107 {
2108 	unsigned pid, idx;
2109 
2110 	/* treat recording of idle task as a success */
2111 	if (!tsk->pid)
2112 		return 1;
2113 
2114 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115 		return 0;
2116 
2117 	/*
2118 	 * It's not the end of the world if we don't get
2119 	 * the lock, but we also don't want to spin
2120 	 * nor do we want to disable interrupts,
2121 	 * so if we miss here, then better luck next time.
2122 	 */
2123 	if (!arch_spin_trylock(&trace_cmdline_lock))
2124 		return 0;
2125 
2126 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127 	if (idx == NO_CMDLINE_MAP) {
2128 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129 
2130 		/*
2131 		 * Check whether the cmdline buffer at idx has a pid
2132 		 * mapped. We are going to overwrite that entry so we
2133 		 * need to clear the map_pid_to_cmdline. Otherwise we
2134 		 * would read the new comm for the old pid.
2135 		 */
2136 		pid = savedcmd->map_cmdline_to_pid[idx];
2137 		if (pid != NO_CMDLINE_MAP)
2138 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139 
2140 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142 
2143 		savedcmd->cmdline_idx = idx;
2144 	}
2145 
2146 	set_cmdline(idx, tsk->comm);
2147 
2148 	arch_spin_unlock(&trace_cmdline_lock);
2149 
2150 	return 1;
2151 }
2152 
2153 static void __trace_find_cmdline(int pid, char comm[])
2154 {
2155 	unsigned map;
2156 
2157 	if (!pid) {
2158 		strcpy(comm, "<idle>");
2159 		return;
2160 	}
2161 
2162 	if (WARN_ON_ONCE(pid < 0)) {
2163 		strcpy(comm, "<XXX>");
2164 		return;
2165 	}
2166 
2167 	if (pid > PID_MAX_DEFAULT) {
2168 		strcpy(comm, "<...>");
2169 		return;
2170 	}
2171 
2172 	map = savedcmd->map_pid_to_cmdline[pid];
2173 	if (map != NO_CMDLINE_MAP)
2174 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175 	else
2176 		strcpy(comm, "<...>");
2177 }
2178 
2179 void trace_find_cmdline(int pid, char comm[])
2180 {
2181 	preempt_disable();
2182 	arch_spin_lock(&trace_cmdline_lock);
2183 
2184 	__trace_find_cmdline(pid, comm);
2185 
2186 	arch_spin_unlock(&trace_cmdline_lock);
2187 	preempt_enable();
2188 }
2189 
2190 int trace_find_tgid(int pid)
2191 {
2192 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193 		return 0;
2194 
2195 	return tgid_map[pid];
2196 }
2197 
2198 static int trace_save_tgid(struct task_struct *tsk)
2199 {
2200 	/* treat recording of idle task as a success */
2201 	if (!tsk->pid)
2202 		return 1;
2203 
2204 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205 		return 0;
2206 
2207 	tgid_map[tsk->pid] = tsk->tgid;
2208 	return 1;
2209 }
2210 
2211 static bool tracing_record_taskinfo_skip(int flags)
2212 {
2213 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214 		return true;
2215 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216 		return true;
2217 	if (!__this_cpu_read(trace_taskinfo_save))
2218 		return true;
2219 	return false;
2220 }
2221 
2222 /**
2223  * tracing_record_taskinfo - record the task info of a task
2224  *
2225  * @task  - task to record
2226  * @flags - TRACE_RECORD_CMDLINE for recording comm
2227  *        - TRACE_RECORD_TGID for recording tgid
2228  */
2229 void tracing_record_taskinfo(struct task_struct *task, int flags)
2230 {
2231 	bool done;
2232 
2233 	if (tracing_record_taskinfo_skip(flags))
2234 		return;
2235 
2236 	/*
2237 	 * Record as much task information as possible. If some fail, continue
2238 	 * to try to record the others.
2239 	 */
2240 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242 
2243 	/* If recording any information failed, retry again soon. */
2244 	if (!done)
2245 		return;
2246 
2247 	__this_cpu_write(trace_taskinfo_save, false);
2248 }
2249 
2250 /**
2251  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252  *
2253  * @prev - previous task during sched_switch
2254  * @next - next task during sched_switch
2255  * @flags - TRACE_RECORD_CMDLINE for recording comm
2256  *          TRACE_RECORD_TGID for recording tgid
2257  */
2258 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259 					  struct task_struct *next, int flags)
2260 {
2261 	bool done;
2262 
2263 	if (tracing_record_taskinfo_skip(flags))
2264 		return;
2265 
2266 	/*
2267 	 * Record as much task information as possible. If some fail, continue
2268 	 * to try to record the others.
2269 	 */
2270 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274 
2275 	/* If recording any information failed, retry again soon. */
2276 	if (!done)
2277 		return;
2278 
2279 	__this_cpu_write(trace_taskinfo_save, false);
2280 }
2281 
2282 /* Helpers to record a specific task information */
2283 void tracing_record_cmdline(struct task_struct *task)
2284 {
2285 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286 }
2287 
2288 void tracing_record_tgid(struct task_struct *task)
2289 {
2290 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291 }
2292 
2293 /*
2294  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296  * simplifies those functions and keeps them in sync.
2297  */
2298 enum print_line_t trace_handle_return(struct trace_seq *s)
2299 {
2300 	return trace_seq_has_overflowed(s) ?
2301 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302 }
2303 EXPORT_SYMBOL_GPL(trace_handle_return);
2304 
2305 void
2306 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307 			     int pc)
2308 {
2309 	struct task_struct *tsk = current;
2310 
2311 	entry->preempt_count		= pc & 0xff;
2312 	entry->pid			= (tsk) ? tsk->pid : 0;
2313 	entry->flags =
2314 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316 #else
2317 		TRACE_FLAG_IRQS_NOSUPPORT |
2318 #endif
2319 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2320 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324 }
2325 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326 
2327 struct ring_buffer_event *
2328 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329 			  int type,
2330 			  unsigned long len,
2331 			  unsigned long flags, int pc)
2332 {
2333 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334 }
2335 
2336 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338 static int trace_buffered_event_ref;
2339 
2340 /**
2341  * trace_buffered_event_enable - enable buffering events
2342  *
2343  * When events are being filtered, it is quicker to use a temporary
2344  * buffer to write the event data into if there's a likely chance
2345  * that it will not be committed. The discard of the ring buffer
2346  * is not as fast as committing, and is much slower than copying
2347  * a commit.
2348  *
2349  * When an event is to be filtered, allocate per cpu buffers to
2350  * write the event data into, and if the event is filtered and discarded
2351  * it is simply dropped, otherwise, the entire data is to be committed
2352  * in one shot.
2353  */
2354 void trace_buffered_event_enable(void)
2355 {
2356 	struct ring_buffer_event *event;
2357 	struct page *page;
2358 	int cpu;
2359 
2360 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361 
2362 	if (trace_buffered_event_ref++)
2363 		return;
2364 
2365 	for_each_tracing_cpu(cpu) {
2366 		page = alloc_pages_node(cpu_to_node(cpu),
2367 					GFP_KERNEL | __GFP_NORETRY, 0);
2368 		if (!page)
2369 			goto failed;
2370 
2371 		event = page_address(page);
2372 		memset(event, 0, sizeof(*event));
2373 
2374 		per_cpu(trace_buffered_event, cpu) = event;
2375 
2376 		preempt_disable();
2377 		if (cpu == smp_processor_id() &&
2378 		    this_cpu_read(trace_buffered_event) !=
2379 		    per_cpu(trace_buffered_event, cpu))
2380 			WARN_ON_ONCE(1);
2381 		preempt_enable();
2382 	}
2383 
2384 	return;
2385  failed:
2386 	trace_buffered_event_disable();
2387 }
2388 
2389 static void enable_trace_buffered_event(void *data)
2390 {
2391 	/* Probably not needed, but do it anyway */
2392 	smp_rmb();
2393 	this_cpu_dec(trace_buffered_event_cnt);
2394 }
2395 
2396 static void disable_trace_buffered_event(void *data)
2397 {
2398 	this_cpu_inc(trace_buffered_event_cnt);
2399 }
2400 
2401 /**
2402  * trace_buffered_event_disable - disable buffering events
2403  *
2404  * When a filter is removed, it is faster to not use the buffered
2405  * events, and to commit directly into the ring buffer. Free up
2406  * the temp buffers when there are no more users. This requires
2407  * special synchronization with current events.
2408  */
2409 void trace_buffered_event_disable(void)
2410 {
2411 	int cpu;
2412 
2413 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414 
2415 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416 		return;
2417 
2418 	if (--trace_buffered_event_ref)
2419 		return;
2420 
2421 	preempt_disable();
2422 	/* For each CPU, set the buffer as used. */
2423 	smp_call_function_many(tracing_buffer_mask,
2424 			       disable_trace_buffered_event, NULL, 1);
2425 	preempt_enable();
2426 
2427 	/* Wait for all current users to finish */
2428 	synchronize_rcu();
2429 
2430 	for_each_tracing_cpu(cpu) {
2431 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432 		per_cpu(trace_buffered_event, cpu) = NULL;
2433 	}
2434 	/*
2435 	 * Make sure trace_buffered_event is NULL before clearing
2436 	 * trace_buffered_event_cnt.
2437 	 */
2438 	smp_wmb();
2439 
2440 	preempt_disable();
2441 	/* Do the work on each cpu */
2442 	smp_call_function_many(tracing_buffer_mask,
2443 			       enable_trace_buffered_event, NULL, 1);
2444 	preempt_enable();
2445 }
2446 
2447 static struct ring_buffer *temp_buffer;
2448 
2449 struct ring_buffer_event *
2450 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451 			  struct trace_event_file *trace_file,
2452 			  int type, unsigned long len,
2453 			  unsigned long flags, int pc)
2454 {
2455 	struct ring_buffer_event *entry;
2456 	int val;
2457 
2458 	*current_rb = trace_file->tr->trace_buffer.buffer;
2459 
2460 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462 	    (entry = this_cpu_read(trace_buffered_event))) {
2463 		/* Try to use the per cpu buffer first */
2464 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2465 		if (val == 1) {
2466 			trace_event_setup(entry, type, flags, pc);
2467 			entry->array[0] = len;
2468 			return entry;
2469 		}
2470 		this_cpu_dec(trace_buffered_event_cnt);
2471 	}
2472 
2473 	entry = __trace_buffer_lock_reserve(*current_rb,
2474 					    type, len, flags, pc);
2475 	/*
2476 	 * If tracing is off, but we have triggers enabled
2477 	 * we still need to look at the event data. Use the temp_buffer
2478 	 * to store the trace event for the tigger to use. It's recusive
2479 	 * safe and will not be recorded anywhere.
2480 	 */
2481 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482 		*current_rb = temp_buffer;
2483 		entry = __trace_buffer_lock_reserve(*current_rb,
2484 						    type, len, flags, pc);
2485 	}
2486 	return entry;
2487 }
2488 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489 
2490 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491 static DEFINE_MUTEX(tracepoint_printk_mutex);
2492 
2493 static void output_printk(struct trace_event_buffer *fbuffer)
2494 {
2495 	struct trace_event_call *event_call;
2496 	struct trace_event *event;
2497 	unsigned long flags;
2498 	struct trace_iterator *iter = tracepoint_print_iter;
2499 
2500 	/* We should never get here if iter is NULL */
2501 	if (WARN_ON_ONCE(!iter))
2502 		return;
2503 
2504 	event_call = fbuffer->trace_file->event_call;
2505 	if (!event_call || !event_call->event.funcs ||
2506 	    !event_call->event.funcs->trace)
2507 		return;
2508 
2509 	event = &fbuffer->trace_file->event_call->event;
2510 
2511 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512 	trace_seq_init(&iter->seq);
2513 	iter->ent = fbuffer->entry;
2514 	event_call->event.funcs->trace(iter, 0, event);
2515 	trace_seq_putc(&iter->seq, 0);
2516 	printk("%s", iter->seq.buffer);
2517 
2518 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519 }
2520 
2521 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522 			     void __user *buffer, size_t *lenp,
2523 			     loff_t *ppos)
2524 {
2525 	int save_tracepoint_printk;
2526 	int ret;
2527 
2528 	mutex_lock(&tracepoint_printk_mutex);
2529 	save_tracepoint_printk = tracepoint_printk;
2530 
2531 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532 
2533 	/*
2534 	 * This will force exiting early, as tracepoint_printk
2535 	 * is always zero when tracepoint_printk_iter is not allocated
2536 	 */
2537 	if (!tracepoint_print_iter)
2538 		tracepoint_printk = 0;
2539 
2540 	if (save_tracepoint_printk == tracepoint_printk)
2541 		goto out;
2542 
2543 	if (tracepoint_printk)
2544 		static_key_enable(&tracepoint_printk_key.key);
2545 	else
2546 		static_key_disable(&tracepoint_printk_key.key);
2547 
2548  out:
2549 	mutex_unlock(&tracepoint_printk_mutex);
2550 
2551 	return ret;
2552 }
2553 
2554 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555 {
2556 	if (static_key_false(&tracepoint_printk_key.key))
2557 		output_printk(fbuffer);
2558 
2559 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560 				    fbuffer->event, fbuffer->entry,
2561 				    fbuffer->flags, fbuffer->pc);
2562 }
2563 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564 
2565 /*
2566  * Skip 3:
2567  *
2568  *   trace_buffer_unlock_commit_regs()
2569  *   trace_event_buffer_commit()
2570  *   trace_event_raw_event_xxx()
2571  */
2572 # define STACK_SKIP 3
2573 
2574 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575 				     struct ring_buffer *buffer,
2576 				     struct ring_buffer_event *event,
2577 				     unsigned long flags, int pc,
2578 				     struct pt_regs *regs)
2579 {
2580 	__buffer_unlock_commit(buffer, event);
2581 
2582 	/*
2583 	 * If regs is not set, then skip the necessary functions.
2584 	 * Note, we can still get here via blktrace, wakeup tracer
2585 	 * and mmiotrace, but that's ok if they lose a function or
2586 	 * two. They are not that meaningful.
2587 	 */
2588 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589 	ftrace_trace_userstack(buffer, flags, pc);
2590 }
2591 
2592 /*
2593  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594  */
2595 void
2596 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597 				   struct ring_buffer_event *event)
2598 {
2599 	__buffer_unlock_commit(buffer, event);
2600 }
2601 
2602 static void
2603 trace_process_export(struct trace_export *export,
2604 	       struct ring_buffer_event *event)
2605 {
2606 	struct trace_entry *entry;
2607 	unsigned int size = 0;
2608 
2609 	entry = ring_buffer_event_data(event);
2610 	size = ring_buffer_event_length(event);
2611 	export->write(export, entry, size);
2612 }
2613 
2614 static DEFINE_MUTEX(ftrace_export_lock);
2615 
2616 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617 
2618 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619 
2620 static inline void ftrace_exports_enable(void)
2621 {
2622 	static_branch_enable(&ftrace_exports_enabled);
2623 }
2624 
2625 static inline void ftrace_exports_disable(void)
2626 {
2627 	static_branch_disable(&ftrace_exports_enabled);
2628 }
2629 
2630 static void ftrace_exports(struct ring_buffer_event *event)
2631 {
2632 	struct trace_export *export;
2633 
2634 	preempt_disable_notrace();
2635 
2636 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637 	while (export) {
2638 		trace_process_export(export, event);
2639 		export = rcu_dereference_raw_notrace(export->next);
2640 	}
2641 
2642 	preempt_enable_notrace();
2643 }
2644 
2645 static inline void
2646 add_trace_export(struct trace_export **list, struct trace_export *export)
2647 {
2648 	rcu_assign_pointer(export->next, *list);
2649 	/*
2650 	 * We are entering export into the list but another
2651 	 * CPU might be walking that list. We need to make sure
2652 	 * the export->next pointer is valid before another CPU sees
2653 	 * the export pointer included into the list.
2654 	 */
2655 	rcu_assign_pointer(*list, export);
2656 }
2657 
2658 static inline int
2659 rm_trace_export(struct trace_export **list, struct trace_export *export)
2660 {
2661 	struct trace_export **p;
2662 
2663 	for (p = list; *p != NULL; p = &(*p)->next)
2664 		if (*p == export)
2665 			break;
2666 
2667 	if (*p != export)
2668 		return -1;
2669 
2670 	rcu_assign_pointer(*p, (*p)->next);
2671 
2672 	return 0;
2673 }
2674 
2675 static inline void
2676 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677 {
2678 	if (*list == NULL)
2679 		ftrace_exports_enable();
2680 
2681 	add_trace_export(list, export);
2682 }
2683 
2684 static inline int
2685 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	int ret;
2688 
2689 	ret = rm_trace_export(list, export);
2690 	if (*list == NULL)
2691 		ftrace_exports_disable();
2692 
2693 	return ret;
2694 }
2695 
2696 int register_ftrace_export(struct trace_export *export)
2697 {
2698 	if (WARN_ON_ONCE(!export->write))
2699 		return -1;
2700 
2701 	mutex_lock(&ftrace_export_lock);
2702 
2703 	add_ftrace_export(&ftrace_exports_list, export);
2704 
2705 	mutex_unlock(&ftrace_export_lock);
2706 
2707 	return 0;
2708 }
2709 EXPORT_SYMBOL_GPL(register_ftrace_export);
2710 
2711 int unregister_ftrace_export(struct trace_export *export)
2712 {
2713 	int ret;
2714 
2715 	mutex_lock(&ftrace_export_lock);
2716 
2717 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2718 
2719 	mutex_unlock(&ftrace_export_lock);
2720 
2721 	return ret;
2722 }
2723 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724 
2725 void
2726 trace_function(struct trace_array *tr,
2727 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728 	       int pc)
2729 {
2730 	struct trace_event_call *call = &event_function;
2731 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732 	struct ring_buffer_event *event;
2733 	struct ftrace_entry *entry;
2734 
2735 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736 					    flags, pc);
2737 	if (!event)
2738 		return;
2739 	entry	= ring_buffer_event_data(event);
2740 	entry->ip			= ip;
2741 	entry->parent_ip		= parent_ip;
2742 
2743 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2744 		if (static_branch_unlikely(&ftrace_exports_enabled))
2745 			ftrace_exports(event);
2746 		__buffer_unlock_commit(buffer, event);
2747 	}
2748 }
2749 
2750 #ifdef CONFIG_STACKTRACE
2751 
2752 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753 struct ftrace_stack {
2754 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2755 };
2756 
2757 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759 
2760 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761 				 unsigned long flags,
2762 				 int skip, int pc, struct pt_regs *regs)
2763 {
2764 	struct trace_event_call *call = &event_kernel_stack;
2765 	struct ring_buffer_event *event;
2766 	struct stack_entry *entry;
2767 	struct stack_trace trace;
2768 	int use_stack;
2769 	int size = FTRACE_STACK_ENTRIES;
2770 
2771 	trace.nr_entries	= 0;
2772 	trace.skip		= skip;
2773 
2774 	/*
2775 	 * Add one, for this function and the call to save_stack_trace()
2776 	 * If regs is set, then these functions will not be in the way.
2777 	 */
2778 #ifndef CONFIG_UNWINDER_ORC
2779 	if (!regs)
2780 		trace.skip++;
2781 #endif
2782 
2783 	/*
2784 	 * Since events can happen in NMIs there's no safe way to
2785 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786 	 * or NMI comes in, it will just have to use the default
2787 	 * FTRACE_STACK_SIZE.
2788 	 */
2789 	preempt_disable_notrace();
2790 
2791 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792 	/*
2793 	 * We don't need any atomic variables, just a barrier.
2794 	 * If an interrupt comes in, we don't care, because it would
2795 	 * have exited and put the counter back to what we want.
2796 	 * We just need a barrier to keep gcc from moving things
2797 	 * around.
2798 	 */
2799 	barrier();
2800 	if (use_stack == 1) {
2801 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2802 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2803 
2804 		if (regs)
2805 			save_stack_trace_regs(regs, &trace);
2806 		else
2807 			save_stack_trace(&trace);
2808 
2809 		if (trace.nr_entries > size)
2810 			size = trace.nr_entries;
2811 	} else
2812 		/* From now on, use_stack is a boolean */
2813 		use_stack = 0;
2814 
2815 	size *= sizeof(unsigned long);
2816 
2817 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818 					    sizeof(*entry) + size, flags, pc);
2819 	if (!event)
2820 		goto out;
2821 	entry = ring_buffer_event_data(event);
2822 
2823 	memset(&entry->caller, 0, size);
2824 
2825 	if (use_stack)
2826 		memcpy(&entry->caller, trace.entries,
2827 		       trace.nr_entries * sizeof(unsigned long));
2828 	else {
2829 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2830 		trace.entries		= entry->caller;
2831 		if (regs)
2832 			save_stack_trace_regs(regs, &trace);
2833 		else
2834 			save_stack_trace(&trace);
2835 	}
2836 
2837 	entry->size = trace.nr_entries;
2838 
2839 	if (!call_filter_check_discard(call, entry, buffer, event))
2840 		__buffer_unlock_commit(buffer, event);
2841 
2842  out:
2843 	/* Again, don't let gcc optimize things here */
2844 	barrier();
2845 	__this_cpu_dec(ftrace_stack_reserve);
2846 	preempt_enable_notrace();
2847 
2848 }
2849 
2850 static inline void ftrace_trace_stack(struct trace_array *tr,
2851 				      struct ring_buffer *buffer,
2852 				      unsigned long flags,
2853 				      int skip, int pc, struct pt_regs *regs)
2854 {
2855 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856 		return;
2857 
2858 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859 }
2860 
2861 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862 		   int pc)
2863 {
2864 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865 
2866 	if (rcu_is_watching()) {
2867 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868 		return;
2869 	}
2870 
2871 	/*
2872 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873 	 * but if the above rcu_is_watching() failed, then the NMI
2874 	 * triggered someplace critical, and rcu_irq_enter() should
2875 	 * not be called from NMI.
2876 	 */
2877 	if (unlikely(in_nmi()))
2878 		return;
2879 
2880 	rcu_irq_enter_irqson();
2881 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882 	rcu_irq_exit_irqson();
2883 }
2884 
2885 /**
2886  * trace_dump_stack - record a stack back trace in the trace buffer
2887  * @skip: Number of functions to skip (helper handlers)
2888  */
2889 void trace_dump_stack(int skip)
2890 {
2891 	unsigned long flags;
2892 
2893 	if (tracing_disabled || tracing_selftest_running)
2894 		return;
2895 
2896 	local_save_flags(flags);
2897 
2898 #ifndef CONFIG_UNWINDER_ORC
2899 	/* Skip 1 to skip this function. */
2900 	skip++;
2901 #endif
2902 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903 			     flags, skip, preempt_count(), NULL);
2904 }
2905 EXPORT_SYMBOL_GPL(trace_dump_stack);
2906 
2907 static DEFINE_PER_CPU(int, user_stack_count);
2908 
2909 void
2910 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911 {
2912 	struct trace_event_call *call = &event_user_stack;
2913 	struct ring_buffer_event *event;
2914 	struct userstack_entry *entry;
2915 	struct stack_trace trace;
2916 
2917 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918 		return;
2919 
2920 	/*
2921 	 * NMIs can not handle page faults, even with fix ups.
2922 	 * The save user stack can (and often does) fault.
2923 	 */
2924 	if (unlikely(in_nmi()))
2925 		return;
2926 
2927 	/*
2928 	 * prevent recursion, since the user stack tracing may
2929 	 * trigger other kernel events.
2930 	 */
2931 	preempt_disable();
2932 	if (__this_cpu_read(user_stack_count))
2933 		goto out;
2934 
2935 	__this_cpu_inc(user_stack_count);
2936 
2937 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938 					    sizeof(*entry), flags, pc);
2939 	if (!event)
2940 		goto out_drop_count;
2941 	entry	= ring_buffer_event_data(event);
2942 
2943 	entry->tgid		= current->tgid;
2944 	memset(&entry->caller, 0, sizeof(entry->caller));
2945 
2946 	trace.nr_entries	= 0;
2947 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2948 	trace.skip		= 0;
2949 	trace.entries		= entry->caller;
2950 
2951 	save_stack_trace_user(&trace);
2952 	if (!call_filter_check_discard(call, entry, buffer, event))
2953 		__buffer_unlock_commit(buffer, event);
2954 
2955  out_drop_count:
2956 	__this_cpu_dec(user_stack_count);
2957  out:
2958 	preempt_enable();
2959 }
2960 
2961 #ifdef UNUSED
2962 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963 {
2964 	ftrace_trace_userstack(tr, flags, preempt_count());
2965 }
2966 #endif /* UNUSED */
2967 
2968 #endif /* CONFIG_STACKTRACE */
2969 
2970 /* created for use with alloc_percpu */
2971 struct trace_buffer_struct {
2972 	int nesting;
2973 	char buffer[4][TRACE_BUF_SIZE];
2974 };
2975 
2976 static struct trace_buffer_struct *trace_percpu_buffer;
2977 
2978 /*
2979  * Thise allows for lockless recording.  If we're nested too deeply, then
2980  * this returns NULL.
2981  */
2982 static char *get_trace_buf(void)
2983 {
2984 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985 
2986 	if (!buffer || buffer->nesting >= 4)
2987 		return NULL;
2988 
2989 	buffer->nesting++;
2990 
2991 	/* Interrupts must see nesting incremented before we use the buffer */
2992 	barrier();
2993 	return &buffer->buffer[buffer->nesting][0];
2994 }
2995 
2996 static void put_trace_buf(void)
2997 {
2998 	/* Don't let the decrement of nesting leak before this */
2999 	barrier();
3000 	this_cpu_dec(trace_percpu_buffer->nesting);
3001 }
3002 
3003 static int alloc_percpu_trace_buffer(void)
3004 {
3005 	struct trace_buffer_struct *buffers;
3006 
3007 	buffers = alloc_percpu(struct trace_buffer_struct);
3008 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009 		return -ENOMEM;
3010 
3011 	trace_percpu_buffer = buffers;
3012 	return 0;
3013 }
3014 
3015 static int buffers_allocated;
3016 
3017 void trace_printk_init_buffers(void)
3018 {
3019 	if (buffers_allocated)
3020 		return;
3021 
3022 	if (alloc_percpu_trace_buffer())
3023 		return;
3024 
3025 	/* trace_printk() is for debug use only. Don't use it in production. */
3026 
3027 	pr_warn("\n");
3028 	pr_warn("**********************************************************\n");
3029 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3030 	pr_warn("**                                                      **\n");
3031 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3032 	pr_warn("**                                                      **\n");
3033 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3034 	pr_warn("** unsafe for production use.                           **\n");
3035 	pr_warn("**                                                      **\n");
3036 	pr_warn("** If you see this message and you are not debugging    **\n");
3037 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3038 	pr_warn("**                                                      **\n");
3039 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3040 	pr_warn("**********************************************************\n");
3041 
3042 	/* Expand the buffers to set size */
3043 	tracing_update_buffers();
3044 
3045 	buffers_allocated = 1;
3046 
3047 	/*
3048 	 * trace_printk_init_buffers() can be called by modules.
3049 	 * If that happens, then we need to start cmdline recording
3050 	 * directly here. If the global_trace.buffer is already
3051 	 * allocated here, then this was called by module code.
3052 	 */
3053 	if (global_trace.trace_buffer.buffer)
3054 		tracing_start_cmdline_record();
3055 }
3056 
3057 void trace_printk_start_comm(void)
3058 {
3059 	/* Start tracing comms if trace printk is set */
3060 	if (!buffers_allocated)
3061 		return;
3062 	tracing_start_cmdline_record();
3063 }
3064 
3065 static void trace_printk_start_stop_comm(int enabled)
3066 {
3067 	if (!buffers_allocated)
3068 		return;
3069 
3070 	if (enabled)
3071 		tracing_start_cmdline_record();
3072 	else
3073 		tracing_stop_cmdline_record();
3074 }
3075 
3076 /**
3077  * trace_vbprintk - write binary msg to tracing buffer
3078  *
3079  */
3080 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082 	struct trace_event_call *call = &event_bprint;
3083 	struct ring_buffer_event *event;
3084 	struct ring_buffer *buffer;
3085 	struct trace_array *tr = &global_trace;
3086 	struct bprint_entry *entry;
3087 	unsigned long flags;
3088 	char *tbuffer;
3089 	int len = 0, size, pc;
3090 
3091 	if (unlikely(tracing_selftest_running || tracing_disabled))
3092 		return 0;
3093 
3094 	/* Don't pollute graph traces with trace_vprintk internals */
3095 	pause_graph_tracing();
3096 
3097 	pc = preempt_count();
3098 	preempt_disable_notrace();
3099 
3100 	tbuffer = get_trace_buf();
3101 	if (!tbuffer) {
3102 		len = 0;
3103 		goto out_nobuffer;
3104 	}
3105 
3106 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3107 
3108 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3109 		goto out;
3110 
3111 	local_save_flags(flags);
3112 	size = sizeof(*entry) + sizeof(u32) * len;
3113 	buffer = tr->trace_buffer.buffer;
3114 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3115 					    flags, pc);
3116 	if (!event)
3117 		goto out;
3118 	entry = ring_buffer_event_data(event);
3119 	entry->ip			= ip;
3120 	entry->fmt			= fmt;
3121 
3122 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3123 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3124 		__buffer_unlock_commit(buffer, event);
3125 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3126 	}
3127 
3128 out:
3129 	put_trace_buf();
3130 
3131 out_nobuffer:
3132 	preempt_enable_notrace();
3133 	unpause_graph_tracing();
3134 
3135 	return len;
3136 }
3137 EXPORT_SYMBOL_GPL(trace_vbprintk);
3138 
3139 __printf(3, 0)
3140 static int
3141 __trace_array_vprintk(struct ring_buffer *buffer,
3142 		      unsigned long ip, const char *fmt, va_list args)
3143 {
3144 	struct trace_event_call *call = &event_print;
3145 	struct ring_buffer_event *event;
3146 	int len = 0, size, pc;
3147 	struct print_entry *entry;
3148 	unsigned long flags;
3149 	char *tbuffer;
3150 
3151 	if (tracing_disabled || tracing_selftest_running)
3152 		return 0;
3153 
3154 	/* Don't pollute graph traces with trace_vprintk internals */
3155 	pause_graph_tracing();
3156 
3157 	pc = preempt_count();
3158 	preempt_disable_notrace();
3159 
3160 
3161 	tbuffer = get_trace_buf();
3162 	if (!tbuffer) {
3163 		len = 0;
3164 		goto out_nobuffer;
3165 	}
3166 
3167 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3168 
3169 	local_save_flags(flags);
3170 	size = sizeof(*entry) + len + 1;
3171 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3172 					    flags, pc);
3173 	if (!event)
3174 		goto out;
3175 	entry = ring_buffer_event_data(event);
3176 	entry->ip = ip;
3177 
3178 	memcpy(&entry->buf, tbuffer, len + 1);
3179 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3180 		__buffer_unlock_commit(buffer, event);
3181 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3182 	}
3183 
3184 out:
3185 	put_trace_buf();
3186 
3187 out_nobuffer:
3188 	preempt_enable_notrace();
3189 	unpause_graph_tracing();
3190 
3191 	return len;
3192 }
3193 
3194 __printf(3, 0)
3195 int trace_array_vprintk(struct trace_array *tr,
3196 			unsigned long ip, const char *fmt, va_list args)
3197 {
3198 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3199 }
3200 
3201 __printf(3, 0)
3202 int trace_array_printk(struct trace_array *tr,
3203 		       unsigned long ip, const char *fmt, ...)
3204 {
3205 	int ret;
3206 	va_list ap;
3207 
3208 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3209 		return 0;
3210 
3211 	va_start(ap, fmt);
3212 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3213 	va_end(ap);
3214 	return ret;
3215 }
3216 
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219 			   unsigned long ip, const char *fmt, ...)
3220 {
3221 	int ret;
3222 	va_list ap;
3223 
3224 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 		return 0;
3226 
3227 	va_start(ap, fmt);
3228 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 	va_end(ap);
3230 	return ret;
3231 }
3232 
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239 
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243 
3244 	iter->idx++;
3245 	if (buf_iter)
3246 		ring_buffer_read(buf_iter, NULL);
3247 }
3248 
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 		unsigned long *lost_events)
3252 {
3253 	struct ring_buffer_event *event;
3254 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255 
3256 	if (buf_iter)
3257 		event = ring_buffer_iter_peek(buf_iter, ts);
3258 	else
3259 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 					 lost_events);
3261 
3262 	if (event) {
3263 		iter->ent_size = ring_buffer_event_length(event);
3264 		return ring_buffer_event_data(event);
3265 	}
3266 	iter->ent_size = 0;
3267 	return NULL;
3268 }
3269 
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 		  unsigned long *missing_events, u64 *ent_ts)
3273 {
3274 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 	struct trace_entry *ent, *next = NULL;
3276 	unsigned long lost_events = 0, next_lost = 0;
3277 	int cpu_file = iter->cpu_file;
3278 	u64 next_ts = 0, ts;
3279 	int next_cpu = -1;
3280 	int next_size = 0;
3281 	int cpu;
3282 
3283 	/*
3284 	 * If we are in a per_cpu trace file, don't bother by iterating over
3285 	 * all cpu and peek directly.
3286 	 */
3287 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 			return NULL;
3290 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 		if (ent_cpu)
3292 			*ent_cpu = cpu_file;
3293 
3294 		return ent;
3295 	}
3296 
3297 	for_each_tracing_cpu(cpu) {
3298 
3299 		if (ring_buffer_empty_cpu(buffer, cpu))
3300 			continue;
3301 
3302 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303 
3304 		/*
3305 		 * Pick the entry with the smallest timestamp:
3306 		 */
3307 		if (ent && (!next || ts < next_ts)) {
3308 			next = ent;
3309 			next_cpu = cpu;
3310 			next_ts = ts;
3311 			next_lost = lost_events;
3312 			next_size = iter->ent_size;
3313 		}
3314 	}
3315 
3316 	iter->ent_size = next_size;
3317 
3318 	if (ent_cpu)
3319 		*ent_cpu = next_cpu;
3320 
3321 	if (ent_ts)
3322 		*ent_ts = next_ts;
3323 
3324 	if (missing_events)
3325 		*missing_events = next_lost;
3326 
3327 	return next;
3328 }
3329 
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 					  int *ent_cpu, u64 *ent_ts)
3333 {
3334 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336 
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340 	iter->ent = __find_next_entry(iter, &iter->cpu,
3341 				      &iter->lost_events, &iter->ts);
3342 
3343 	if (iter->ent)
3344 		trace_iterator_increment(iter);
3345 
3346 	return iter->ent ? iter : NULL;
3347 }
3348 
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 			    &iter->lost_events);
3353 }
3354 
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357 	struct trace_iterator *iter = m->private;
3358 	int i = (int)*pos;
3359 	void *ent;
3360 
3361 	WARN_ON_ONCE(iter->leftover);
3362 
3363 	(*pos)++;
3364 
3365 	/* can't go backwards */
3366 	if (iter->idx > i)
3367 		return NULL;
3368 
3369 	if (iter->idx < 0)
3370 		ent = trace_find_next_entry_inc(iter);
3371 	else
3372 		ent = iter;
3373 
3374 	while (ent && iter->idx < i)
3375 		ent = trace_find_next_entry_inc(iter);
3376 
3377 	iter->pos = *pos;
3378 
3379 	return ent;
3380 }
3381 
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384 	struct ring_buffer_event *event;
3385 	struct ring_buffer_iter *buf_iter;
3386 	unsigned long entries = 0;
3387 	u64 ts;
3388 
3389 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390 
3391 	buf_iter = trace_buffer_iter(iter, cpu);
3392 	if (!buf_iter)
3393 		return;
3394 
3395 	ring_buffer_iter_reset(buf_iter);
3396 
3397 	/*
3398 	 * We could have the case with the max latency tracers
3399 	 * that a reset never took place on a cpu. This is evident
3400 	 * by the timestamp being before the start of the buffer.
3401 	 */
3402 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 		if (ts >= iter->trace_buffer->time_start)
3404 			break;
3405 		entries++;
3406 		ring_buffer_read(buf_iter, NULL);
3407 	}
3408 
3409 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411 
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418 	struct trace_iterator *iter = m->private;
3419 	struct trace_array *tr = iter->tr;
3420 	int cpu_file = iter->cpu_file;
3421 	void *p = NULL;
3422 	loff_t l = 0;
3423 	int cpu;
3424 
3425 	/*
3426 	 * copy the tracer to avoid using a global lock all around.
3427 	 * iter->trace is a copy of current_trace, the pointer to the
3428 	 * name may be used instead of a strcmp(), as iter->trace->name
3429 	 * will point to the same string as current_trace->name.
3430 	 */
3431 	mutex_lock(&trace_types_lock);
3432 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 		*iter->trace = *tr->current_trace;
3434 	mutex_unlock(&trace_types_lock);
3435 
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 	if (iter->snapshot && iter->trace->use_max_tr)
3438 		return ERR_PTR(-EBUSY);
3439 #endif
3440 
3441 	if (!iter->snapshot)
3442 		atomic_inc(&trace_record_taskinfo_disabled);
3443 
3444 	if (*pos != iter->pos) {
3445 		iter->ent = NULL;
3446 		iter->cpu = 0;
3447 		iter->idx = -1;
3448 
3449 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 			for_each_tracing_cpu(cpu)
3451 				tracing_iter_reset(iter, cpu);
3452 		} else
3453 			tracing_iter_reset(iter, cpu_file);
3454 
3455 		iter->leftover = 0;
3456 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 			;
3458 
3459 	} else {
3460 		/*
3461 		 * If we overflowed the seq_file before, then we want
3462 		 * to just reuse the trace_seq buffer again.
3463 		 */
3464 		if (iter->leftover)
3465 			p = iter;
3466 		else {
3467 			l = *pos - 1;
3468 			p = s_next(m, p, &l);
3469 		}
3470 	}
3471 
3472 	trace_event_read_lock();
3473 	trace_access_lock(cpu_file);
3474 	return p;
3475 }
3476 
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479 	struct trace_iterator *iter = m->private;
3480 
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482 	if (iter->snapshot && iter->trace->use_max_tr)
3483 		return;
3484 #endif
3485 
3486 	if (!iter->snapshot)
3487 		atomic_dec(&trace_record_taskinfo_disabled);
3488 
3489 	trace_access_unlock(iter->cpu_file);
3490 	trace_event_read_unlock();
3491 }
3492 
3493 static void
3494 get_total_entries(struct trace_buffer *buf,
3495 		  unsigned long *total, unsigned long *entries)
3496 {
3497 	unsigned long count;
3498 	int cpu;
3499 
3500 	*total = 0;
3501 	*entries = 0;
3502 
3503 	for_each_tracing_cpu(cpu) {
3504 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3505 		/*
3506 		 * If this buffer has skipped entries, then we hold all
3507 		 * entries for the trace and we need to ignore the
3508 		 * ones before the time stamp.
3509 		 */
3510 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3511 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3512 			/* total is the same as the entries */
3513 			*total += count;
3514 		} else
3515 			*total += count +
3516 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3517 		*entries += count;
3518 	}
3519 }
3520 
3521 static void print_lat_help_header(struct seq_file *m)
3522 {
3523 	seq_puts(m, "#                  _------=> CPU#            \n"
3524 		    "#                 / _-----=> irqs-off        \n"
3525 		    "#                | / _----=> need-resched    \n"
3526 		    "#                || / _---=> hardirq/softirq \n"
3527 		    "#                ||| / _--=> preempt-depth   \n"
3528 		    "#                |||| /     delay            \n"
3529 		    "#  cmd     pid   ||||| time  |   caller      \n"
3530 		    "#     \\   /      |||||  \\    |   /         \n");
3531 }
3532 
3533 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3534 {
3535 	unsigned long total;
3536 	unsigned long entries;
3537 
3538 	get_total_entries(buf, &total, &entries);
3539 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3540 		   entries, total, num_online_cpus());
3541 	seq_puts(m, "#\n");
3542 }
3543 
3544 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3545 				   unsigned int flags)
3546 {
3547 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3548 
3549 	print_event_info(buf, m);
3550 
3551 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3552 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3553 }
3554 
3555 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3556 				       unsigned int flags)
3557 {
3558 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3559 	const char tgid_space[] = "          ";
3560 	const char space[] = "  ";
3561 
3562 	print_event_info(buf, m);
3563 
3564 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3565 		   tgid ? tgid_space : space);
3566 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3567 		   tgid ? tgid_space : space);
3568 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3569 		   tgid ? tgid_space : space);
3570 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3571 		   tgid ? tgid_space : space);
3572 	seq_printf(m, "#                          %s||| /     delay\n",
3573 		   tgid ? tgid_space : space);
3574 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3575 		   tgid ? "   TGID   " : space);
3576 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3577 		   tgid ? "     |    " : space);
3578 }
3579 
3580 void
3581 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3582 {
3583 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3584 	struct trace_buffer *buf = iter->trace_buffer;
3585 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3586 	struct tracer *type = iter->trace;
3587 	unsigned long entries;
3588 	unsigned long total;
3589 	const char *name = "preemption";
3590 
3591 	name = type->name;
3592 
3593 	get_total_entries(buf, &total, &entries);
3594 
3595 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3596 		   name, UTS_RELEASE);
3597 	seq_puts(m, "# -----------------------------------"
3598 		 "---------------------------------\n");
3599 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3600 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3601 		   nsecs_to_usecs(data->saved_latency),
3602 		   entries,
3603 		   total,
3604 		   buf->cpu,
3605 #if defined(CONFIG_PREEMPT_NONE)
3606 		   "server",
3607 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3608 		   "desktop",
3609 #elif defined(CONFIG_PREEMPT)
3610 		   "preempt",
3611 #else
3612 		   "unknown",
3613 #endif
3614 		   /* These are reserved for later use */
3615 		   0, 0, 0, 0);
3616 #ifdef CONFIG_SMP
3617 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3618 #else
3619 	seq_puts(m, ")\n");
3620 #endif
3621 	seq_puts(m, "#    -----------------\n");
3622 	seq_printf(m, "#    | task: %.16s-%d "
3623 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3624 		   data->comm, data->pid,
3625 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3626 		   data->policy, data->rt_priority);
3627 	seq_puts(m, "#    -----------------\n");
3628 
3629 	if (data->critical_start) {
3630 		seq_puts(m, "#  => started at: ");
3631 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3632 		trace_print_seq(m, &iter->seq);
3633 		seq_puts(m, "\n#  => ended at:   ");
3634 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3635 		trace_print_seq(m, &iter->seq);
3636 		seq_puts(m, "\n#\n");
3637 	}
3638 
3639 	seq_puts(m, "#\n");
3640 }
3641 
3642 static void test_cpu_buff_start(struct trace_iterator *iter)
3643 {
3644 	struct trace_seq *s = &iter->seq;
3645 	struct trace_array *tr = iter->tr;
3646 
3647 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3648 		return;
3649 
3650 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3651 		return;
3652 
3653 	if (cpumask_available(iter->started) &&
3654 	    cpumask_test_cpu(iter->cpu, iter->started))
3655 		return;
3656 
3657 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3658 		return;
3659 
3660 	if (cpumask_available(iter->started))
3661 		cpumask_set_cpu(iter->cpu, iter->started);
3662 
3663 	/* Don't print started cpu buffer for the first entry of the trace */
3664 	if (iter->idx > 1)
3665 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3666 				iter->cpu);
3667 }
3668 
3669 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3670 {
3671 	struct trace_array *tr = iter->tr;
3672 	struct trace_seq *s = &iter->seq;
3673 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3674 	struct trace_entry *entry;
3675 	struct trace_event *event;
3676 
3677 	entry = iter->ent;
3678 
3679 	test_cpu_buff_start(iter);
3680 
3681 	event = ftrace_find_event(entry->type);
3682 
3683 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3684 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3685 			trace_print_lat_context(iter);
3686 		else
3687 			trace_print_context(iter);
3688 	}
3689 
3690 	if (trace_seq_has_overflowed(s))
3691 		return TRACE_TYPE_PARTIAL_LINE;
3692 
3693 	if (event)
3694 		return event->funcs->trace(iter, sym_flags, event);
3695 
3696 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3697 
3698 	return trace_handle_return(s);
3699 }
3700 
3701 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3702 {
3703 	struct trace_array *tr = iter->tr;
3704 	struct trace_seq *s = &iter->seq;
3705 	struct trace_entry *entry;
3706 	struct trace_event *event;
3707 
3708 	entry = iter->ent;
3709 
3710 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3711 		trace_seq_printf(s, "%d %d %llu ",
3712 				 entry->pid, iter->cpu, iter->ts);
3713 
3714 	if (trace_seq_has_overflowed(s))
3715 		return TRACE_TYPE_PARTIAL_LINE;
3716 
3717 	event = ftrace_find_event(entry->type);
3718 	if (event)
3719 		return event->funcs->raw(iter, 0, event);
3720 
3721 	trace_seq_printf(s, "%d ?\n", entry->type);
3722 
3723 	return trace_handle_return(s);
3724 }
3725 
3726 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3727 {
3728 	struct trace_array *tr = iter->tr;
3729 	struct trace_seq *s = &iter->seq;
3730 	unsigned char newline = '\n';
3731 	struct trace_entry *entry;
3732 	struct trace_event *event;
3733 
3734 	entry = iter->ent;
3735 
3736 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3737 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3738 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3739 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3740 		if (trace_seq_has_overflowed(s))
3741 			return TRACE_TYPE_PARTIAL_LINE;
3742 	}
3743 
3744 	event = ftrace_find_event(entry->type);
3745 	if (event) {
3746 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3747 		if (ret != TRACE_TYPE_HANDLED)
3748 			return ret;
3749 	}
3750 
3751 	SEQ_PUT_FIELD(s, newline);
3752 
3753 	return trace_handle_return(s);
3754 }
3755 
3756 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3757 {
3758 	struct trace_array *tr = iter->tr;
3759 	struct trace_seq *s = &iter->seq;
3760 	struct trace_entry *entry;
3761 	struct trace_event *event;
3762 
3763 	entry = iter->ent;
3764 
3765 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3766 		SEQ_PUT_FIELD(s, entry->pid);
3767 		SEQ_PUT_FIELD(s, iter->cpu);
3768 		SEQ_PUT_FIELD(s, iter->ts);
3769 		if (trace_seq_has_overflowed(s))
3770 			return TRACE_TYPE_PARTIAL_LINE;
3771 	}
3772 
3773 	event = ftrace_find_event(entry->type);
3774 	return event ? event->funcs->binary(iter, 0, event) :
3775 		TRACE_TYPE_HANDLED;
3776 }
3777 
3778 int trace_empty(struct trace_iterator *iter)
3779 {
3780 	struct ring_buffer_iter *buf_iter;
3781 	int cpu;
3782 
3783 	/* If we are looking at one CPU buffer, only check that one */
3784 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3785 		cpu = iter->cpu_file;
3786 		buf_iter = trace_buffer_iter(iter, cpu);
3787 		if (buf_iter) {
3788 			if (!ring_buffer_iter_empty(buf_iter))
3789 				return 0;
3790 		} else {
3791 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3792 				return 0;
3793 		}
3794 		return 1;
3795 	}
3796 
3797 	for_each_tracing_cpu(cpu) {
3798 		buf_iter = trace_buffer_iter(iter, cpu);
3799 		if (buf_iter) {
3800 			if (!ring_buffer_iter_empty(buf_iter))
3801 				return 0;
3802 		} else {
3803 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3804 				return 0;
3805 		}
3806 	}
3807 
3808 	return 1;
3809 }
3810 
3811 /*  Called with trace_event_read_lock() held. */
3812 enum print_line_t print_trace_line(struct trace_iterator *iter)
3813 {
3814 	struct trace_array *tr = iter->tr;
3815 	unsigned long trace_flags = tr->trace_flags;
3816 	enum print_line_t ret;
3817 
3818 	if (iter->lost_events) {
3819 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3820 				 iter->cpu, iter->lost_events);
3821 		if (trace_seq_has_overflowed(&iter->seq))
3822 			return TRACE_TYPE_PARTIAL_LINE;
3823 	}
3824 
3825 	if (iter->trace && iter->trace->print_line) {
3826 		ret = iter->trace->print_line(iter);
3827 		if (ret != TRACE_TYPE_UNHANDLED)
3828 			return ret;
3829 	}
3830 
3831 	if (iter->ent->type == TRACE_BPUTS &&
3832 			trace_flags & TRACE_ITER_PRINTK &&
3833 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834 		return trace_print_bputs_msg_only(iter);
3835 
3836 	if (iter->ent->type == TRACE_BPRINT &&
3837 			trace_flags & TRACE_ITER_PRINTK &&
3838 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3839 		return trace_print_bprintk_msg_only(iter);
3840 
3841 	if (iter->ent->type == TRACE_PRINT &&
3842 			trace_flags & TRACE_ITER_PRINTK &&
3843 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3844 		return trace_print_printk_msg_only(iter);
3845 
3846 	if (trace_flags & TRACE_ITER_BIN)
3847 		return print_bin_fmt(iter);
3848 
3849 	if (trace_flags & TRACE_ITER_HEX)
3850 		return print_hex_fmt(iter);
3851 
3852 	if (trace_flags & TRACE_ITER_RAW)
3853 		return print_raw_fmt(iter);
3854 
3855 	return print_trace_fmt(iter);
3856 }
3857 
3858 void trace_latency_header(struct seq_file *m)
3859 {
3860 	struct trace_iterator *iter = m->private;
3861 	struct trace_array *tr = iter->tr;
3862 
3863 	/* print nothing if the buffers are empty */
3864 	if (trace_empty(iter))
3865 		return;
3866 
3867 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3868 		print_trace_header(m, iter);
3869 
3870 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3871 		print_lat_help_header(m);
3872 }
3873 
3874 void trace_default_header(struct seq_file *m)
3875 {
3876 	struct trace_iterator *iter = m->private;
3877 	struct trace_array *tr = iter->tr;
3878 	unsigned long trace_flags = tr->trace_flags;
3879 
3880 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3881 		return;
3882 
3883 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3884 		/* print nothing if the buffers are empty */
3885 		if (trace_empty(iter))
3886 			return;
3887 		print_trace_header(m, iter);
3888 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3889 			print_lat_help_header(m);
3890 	} else {
3891 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3892 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3893 				print_func_help_header_irq(iter->trace_buffer,
3894 							   m, trace_flags);
3895 			else
3896 				print_func_help_header(iter->trace_buffer, m,
3897 						       trace_flags);
3898 		}
3899 	}
3900 }
3901 
3902 static void test_ftrace_alive(struct seq_file *m)
3903 {
3904 	if (!ftrace_is_dead())
3905 		return;
3906 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3907 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3908 }
3909 
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 static void show_snapshot_main_help(struct seq_file *m)
3912 {
3913 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3914 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3915 		    "#                      Takes a snapshot of the main buffer.\n"
3916 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3917 		    "#                      (Doesn't have to be '2' works with any number that\n"
3918 		    "#                       is not a '0' or '1')\n");
3919 }
3920 
3921 static void show_snapshot_percpu_help(struct seq_file *m)
3922 {
3923 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3924 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3925 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3926 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3927 #else
3928 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3929 		    "#                     Must use main snapshot file to allocate.\n");
3930 #endif
3931 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3932 		    "#                      (Doesn't have to be '2' works with any number that\n"
3933 		    "#                       is not a '0' or '1')\n");
3934 }
3935 
3936 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3937 {
3938 	if (iter->tr->allocated_snapshot)
3939 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3940 	else
3941 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3942 
3943 	seq_puts(m, "# Snapshot commands:\n");
3944 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3945 		show_snapshot_main_help(m);
3946 	else
3947 		show_snapshot_percpu_help(m);
3948 }
3949 #else
3950 /* Should never be called */
3951 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3952 #endif
3953 
3954 static int s_show(struct seq_file *m, void *v)
3955 {
3956 	struct trace_iterator *iter = v;
3957 	int ret;
3958 
3959 	if (iter->ent == NULL) {
3960 		if (iter->tr) {
3961 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3962 			seq_puts(m, "#\n");
3963 			test_ftrace_alive(m);
3964 		}
3965 		if (iter->snapshot && trace_empty(iter))
3966 			print_snapshot_help(m, iter);
3967 		else if (iter->trace && iter->trace->print_header)
3968 			iter->trace->print_header(m);
3969 		else
3970 			trace_default_header(m);
3971 
3972 	} else if (iter->leftover) {
3973 		/*
3974 		 * If we filled the seq_file buffer earlier, we
3975 		 * want to just show it now.
3976 		 */
3977 		ret = trace_print_seq(m, &iter->seq);
3978 
3979 		/* ret should this time be zero, but you never know */
3980 		iter->leftover = ret;
3981 
3982 	} else {
3983 		print_trace_line(iter);
3984 		ret = trace_print_seq(m, &iter->seq);
3985 		/*
3986 		 * If we overflow the seq_file buffer, then it will
3987 		 * ask us for this data again at start up.
3988 		 * Use that instead.
3989 		 *  ret is 0 if seq_file write succeeded.
3990 		 *        -1 otherwise.
3991 		 */
3992 		iter->leftover = ret;
3993 	}
3994 
3995 	return 0;
3996 }
3997 
3998 /*
3999  * Should be used after trace_array_get(), trace_types_lock
4000  * ensures that i_cdev was already initialized.
4001  */
4002 static inline int tracing_get_cpu(struct inode *inode)
4003 {
4004 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4005 		return (long)inode->i_cdev - 1;
4006 	return RING_BUFFER_ALL_CPUS;
4007 }
4008 
4009 static const struct seq_operations tracer_seq_ops = {
4010 	.start		= s_start,
4011 	.next		= s_next,
4012 	.stop		= s_stop,
4013 	.show		= s_show,
4014 };
4015 
4016 static struct trace_iterator *
4017 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4018 {
4019 	struct trace_array *tr = inode->i_private;
4020 	struct trace_iterator *iter;
4021 	int cpu;
4022 
4023 	if (tracing_disabled)
4024 		return ERR_PTR(-ENODEV);
4025 
4026 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4027 	if (!iter)
4028 		return ERR_PTR(-ENOMEM);
4029 
4030 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4031 				    GFP_KERNEL);
4032 	if (!iter->buffer_iter)
4033 		goto release;
4034 
4035 	/*
4036 	 * We make a copy of the current tracer to avoid concurrent
4037 	 * changes on it while we are reading.
4038 	 */
4039 	mutex_lock(&trace_types_lock);
4040 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4041 	if (!iter->trace)
4042 		goto fail;
4043 
4044 	*iter->trace = *tr->current_trace;
4045 
4046 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4047 		goto fail;
4048 
4049 	iter->tr = tr;
4050 
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052 	/* Currently only the top directory has a snapshot */
4053 	if (tr->current_trace->print_max || snapshot)
4054 		iter->trace_buffer = &tr->max_buffer;
4055 	else
4056 #endif
4057 		iter->trace_buffer = &tr->trace_buffer;
4058 	iter->snapshot = snapshot;
4059 	iter->pos = -1;
4060 	iter->cpu_file = tracing_get_cpu(inode);
4061 	mutex_init(&iter->mutex);
4062 
4063 	/* Notify the tracer early; before we stop tracing. */
4064 	if (iter->trace && iter->trace->open)
4065 		iter->trace->open(iter);
4066 
4067 	/* Annotate start of buffers if we had overruns */
4068 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4069 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4070 
4071 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4072 	if (trace_clocks[tr->clock_id].in_ns)
4073 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4074 
4075 	/* stop the trace while dumping if we are not opening "snapshot" */
4076 	if (!iter->snapshot)
4077 		tracing_stop_tr(tr);
4078 
4079 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4080 		for_each_tracing_cpu(cpu) {
4081 			iter->buffer_iter[cpu] =
4082 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4083 							 cpu, GFP_KERNEL);
4084 		}
4085 		ring_buffer_read_prepare_sync();
4086 		for_each_tracing_cpu(cpu) {
4087 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4088 			tracing_iter_reset(iter, cpu);
4089 		}
4090 	} else {
4091 		cpu = iter->cpu_file;
4092 		iter->buffer_iter[cpu] =
4093 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4094 						 cpu, GFP_KERNEL);
4095 		ring_buffer_read_prepare_sync();
4096 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4097 		tracing_iter_reset(iter, cpu);
4098 	}
4099 
4100 	mutex_unlock(&trace_types_lock);
4101 
4102 	return iter;
4103 
4104  fail:
4105 	mutex_unlock(&trace_types_lock);
4106 	kfree(iter->trace);
4107 	kfree(iter->buffer_iter);
4108 release:
4109 	seq_release_private(inode, file);
4110 	return ERR_PTR(-ENOMEM);
4111 }
4112 
4113 int tracing_open_generic(struct inode *inode, struct file *filp)
4114 {
4115 	if (tracing_disabled)
4116 		return -ENODEV;
4117 
4118 	filp->private_data = inode->i_private;
4119 	return 0;
4120 }
4121 
4122 bool tracing_is_disabled(void)
4123 {
4124 	return (tracing_disabled) ? true: false;
4125 }
4126 
4127 /*
4128  * Open and update trace_array ref count.
4129  * Must have the current trace_array passed to it.
4130  */
4131 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4132 {
4133 	struct trace_array *tr = inode->i_private;
4134 
4135 	if (tracing_disabled)
4136 		return -ENODEV;
4137 
4138 	if (trace_array_get(tr) < 0)
4139 		return -ENODEV;
4140 
4141 	filp->private_data = inode->i_private;
4142 
4143 	return 0;
4144 }
4145 
4146 static int tracing_release(struct inode *inode, struct file *file)
4147 {
4148 	struct trace_array *tr = inode->i_private;
4149 	struct seq_file *m = file->private_data;
4150 	struct trace_iterator *iter;
4151 	int cpu;
4152 
4153 	if (!(file->f_mode & FMODE_READ)) {
4154 		trace_array_put(tr);
4155 		return 0;
4156 	}
4157 
4158 	/* Writes do not use seq_file */
4159 	iter = m->private;
4160 	mutex_lock(&trace_types_lock);
4161 
4162 	for_each_tracing_cpu(cpu) {
4163 		if (iter->buffer_iter[cpu])
4164 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4165 	}
4166 
4167 	if (iter->trace && iter->trace->close)
4168 		iter->trace->close(iter);
4169 
4170 	if (!iter->snapshot)
4171 		/* reenable tracing if it was previously enabled */
4172 		tracing_start_tr(tr);
4173 
4174 	__trace_array_put(tr);
4175 
4176 	mutex_unlock(&trace_types_lock);
4177 
4178 	mutex_destroy(&iter->mutex);
4179 	free_cpumask_var(iter->started);
4180 	kfree(iter->trace);
4181 	kfree(iter->buffer_iter);
4182 	seq_release_private(inode, file);
4183 
4184 	return 0;
4185 }
4186 
4187 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4188 {
4189 	struct trace_array *tr = inode->i_private;
4190 
4191 	trace_array_put(tr);
4192 	return 0;
4193 }
4194 
4195 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4196 {
4197 	struct trace_array *tr = inode->i_private;
4198 
4199 	trace_array_put(tr);
4200 
4201 	return single_release(inode, file);
4202 }
4203 
4204 static int tracing_open(struct inode *inode, struct file *file)
4205 {
4206 	struct trace_array *tr = inode->i_private;
4207 	struct trace_iterator *iter;
4208 	int ret = 0;
4209 
4210 	if (trace_array_get(tr) < 0)
4211 		return -ENODEV;
4212 
4213 	/* If this file was open for write, then erase contents */
4214 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4215 		int cpu = tracing_get_cpu(inode);
4216 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4217 
4218 #ifdef CONFIG_TRACER_MAX_TRACE
4219 		if (tr->current_trace->print_max)
4220 			trace_buf = &tr->max_buffer;
4221 #endif
4222 
4223 		if (cpu == RING_BUFFER_ALL_CPUS)
4224 			tracing_reset_online_cpus(trace_buf);
4225 		else
4226 			tracing_reset(trace_buf, cpu);
4227 	}
4228 
4229 	if (file->f_mode & FMODE_READ) {
4230 		iter = __tracing_open(inode, file, false);
4231 		if (IS_ERR(iter))
4232 			ret = PTR_ERR(iter);
4233 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4234 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4235 	}
4236 
4237 	if (ret < 0)
4238 		trace_array_put(tr);
4239 
4240 	return ret;
4241 }
4242 
4243 /*
4244  * Some tracers are not suitable for instance buffers.
4245  * A tracer is always available for the global array (toplevel)
4246  * or if it explicitly states that it is.
4247  */
4248 static bool
4249 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4250 {
4251 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4252 }
4253 
4254 /* Find the next tracer that this trace array may use */
4255 static struct tracer *
4256 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4257 {
4258 	while (t && !trace_ok_for_array(t, tr))
4259 		t = t->next;
4260 
4261 	return t;
4262 }
4263 
4264 static void *
4265 t_next(struct seq_file *m, void *v, loff_t *pos)
4266 {
4267 	struct trace_array *tr = m->private;
4268 	struct tracer *t = v;
4269 
4270 	(*pos)++;
4271 
4272 	if (t)
4273 		t = get_tracer_for_array(tr, t->next);
4274 
4275 	return t;
4276 }
4277 
4278 static void *t_start(struct seq_file *m, loff_t *pos)
4279 {
4280 	struct trace_array *tr = m->private;
4281 	struct tracer *t;
4282 	loff_t l = 0;
4283 
4284 	mutex_lock(&trace_types_lock);
4285 
4286 	t = get_tracer_for_array(tr, trace_types);
4287 	for (; t && l < *pos; t = t_next(m, t, &l))
4288 			;
4289 
4290 	return t;
4291 }
4292 
4293 static void t_stop(struct seq_file *m, void *p)
4294 {
4295 	mutex_unlock(&trace_types_lock);
4296 }
4297 
4298 static int t_show(struct seq_file *m, void *v)
4299 {
4300 	struct tracer *t = v;
4301 
4302 	if (!t)
4303 		return 0;
4304 
4305 	seq_puts(m, t->name);
4306 	if (t->next)
4307 		seq_putc(m, ' ');
4308 	else
4309 		seq_putc(m, '\n');
4310 
4311 	return 0;
4312 }
4313 
4314 static const struct seq_operations show_traces_seq_ops = {
4315 	.start		= t_start,
4316 	.next		= t_next,
4317 	.stop		= t_stop,
4318 	.show		= t_show,
4319 };
4320 
4321 static int show_traces_open(struct inode *inode, struct file *file)
4322 {
4323 	struct trace_array *tr = inode->i_private;
4324 	struct seq_file *m;
4325 	int ret;
4326 
4327 	if (tracing_disabled)
4328 		return -ENODEV;
4329 
4330 	ret = seq_open(file, &show_traces_seq_ops);
4331 	if (ret)
4332 		return ret;
4333 
4334 	m = file->private_data;
4335 	m->private = tr;
4336 
4337 	return 0;
4338 }
4339 
4340 static ssize_t
4341 tracing_write_stub(struct file *filp, const char __user *ubuf,
4342 		   size_t count, loff_t *ppos)
4343 {
4344 	return count;
4345 }
4346 
4347 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4348 {
4349 	int ret;
4350 
4351 	if (file->f_mode & FMODE_READ)
4352 		ret = seq_lseek(file, offset, whence);
4353 	else
4354 		file->f_pos = ret = 0;
4355 
4356 	return ret;
4357 }
4358 
4359 static const struct file_operations tracing_fops = {
4360 	.open		= tracing_open,
4361 	.read		= seq_read,
4362 	.write		= tracing_write_stub,
4363 	.llseek		= tracing_lseek,
4364 	.release	= tracing_release,
4365 };
4366 
4367 static const struct file_operations show_traces_fops = {
4368 	.open		= show_traces_open,
4369 	.read		= seq_read,
4370 	.release	= seq_release,
4371 	.llseek		= seq_lseek,
4372 };
4373 
4374 static ssize_t
4375 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4376 		     size_t count, loff_t *ppos)
4377 {
4378 	struct trace_array *tr = file_inode(filp)->i_private;
4379 	char *mask_str;
4380 	int len;
4381 
4382 	len = snprintf(NULL, 0, "%*pb\n",
4383 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4384 	mask_str = kmalloc(len, GFP_KERNEL);
4385 	if (!mask_str)
4386 		return -ENOMEM;
4387 
4388 	len = snprintf(mask_str, len, "%*pb\n",
4389 		       cpumask_pr_args(tr->tracing_cpumask));
4390 	if (len >= count) {
4391 		count = -EINVAL;
4392 		goto out_err;
4393 	}
4394 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4395 
4396 out_err:
4397 	kfree(mask_str);
4398 
4399 	return count;
4400 }
4401 
4402 static ssize_t
4403 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4404 		      size_t count, loff_t *ppos)
4405 {
4406 	struct trace_array *tr = file_inode(filp)->i_private;
4407 	cpumask_var_t tracing_cpumask_new;
4408 	int err, cpu;
4409 
4410 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4411 		return -ENOMEM;
4412 
4413 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4414 	if (err)
4415 		goto err_unlock;
4416 
4417 	local_irq_disable();
4418 	arch_spin_lock(&tr->max_lock);
4419 	for_each_tracing_cpu(cpu) {
4420 		/*
4421 		 * Increase/decrease the disabled counter if we are
4422 		 * about to flip a bit in the cpumask:
4423 		 */
4424 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4425 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4426 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4427 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4428 		}
4429 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4430 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4431 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4432 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4433 		}
4434 	}
4435 	arch_spin_unlock(&tr->max_lock);
4436 	local_irq_enable();
4437 
4438 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4439 	free_cpumask_var(tracing_cpumask_new);
4440 
4441 	return count;
4442 
4443 err_unlock:
4444 	free_cpumask_var(tracing_cpumask_new);
4445 
4446 	return err;
4447 }
4448 
4449 static const struct file_operations tracing_cpumask_fops = {
4450 	.open		= tracing_open_generic_tr,
4451 	.read		= tracing_cpumask_read,
4452 	.write		= tracing_cpumask_write,
4453 	.release	= tracing_release_generic_tr,
4454 	.llseek		= generic_file_llseek,
4455 };
4456 
4457 static int tracing_trace_options_show(struct seq_file *m, void *v)
4458 {
4459 	struct tracer_opt *trace_opts;
4460 	struct trace_array *tr = m->private;
4461 	u32 tracer_flags;
4462 	int i;
4463 
4464 	mutex_lock(&trace_types_lock);
4465 	tracer_flags = tr->current_trace->flags->val;
4466 	trace_opts = tr->current_trace->flags->opts;
4467 
4468 	for (i = 0; trace_options[i]; i++) {
4469 		if (tr->trace_flags & (1 << i))
4470 			seq_printf(m, "%s\n", trace_options[i]);
4471 		else
4472 			seq_printf(m, "no%s\n", trace_options[i]);
4473 	}
4474 
4475 	for (i = 0; trace_opts[i].name; i++) {
4476 		if (tracer_flags & trace_opts[i].bit)
4477 			seq_printf(m, "%s\n", trace_opts[i].name);
4478 		else
4479 			seq_printf(m, "no%s\n", trace_opts[i].name);
4480 	}
4481 	mutex_unlock(&trace_types_lock);
4482 
4483 	return 0;
4484 }
4485 
4486 static int __set_tracer_option(struct trace_array *tr,
4487 			       struct tracer_flags *tracer_flags,
4488 			       struct tracer_opt *opts, int neg)
4489 {
4490 	struct tracer *trace = tracer_flags->trace;
4491 	int ret;
4492 
4493 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4494 	if (ret)
4495 		return ret;
4496 
4497 	if (neg)
4498 		tracer_flags->val &= ~opts->bit;
4499 	else
4500 		tracer_flags->val |= opts->bit;
4501 	return 0;
4502 }
4503 
4504 /* Try to assign a tracer specific option */
4505 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4506 {
4507 	struct tracer *trace = tr->current_trace;
4508 	struct tracer_flags *tracer_flags = trace->flags;
4509 	struct tracer_opt *opts = NULL;
4510 	int i;
4511 
4512 	for (i = 0; tracer_flags->opts[i].name; i++) {
4513 		opts = &tracer_flags->opts[i];
4514 
4515 		if (strcmp(cmp, opts->name) == 0)
4516 			return __set_tracer_option(tr, trace->flags, opts, neg);
4517 	}
4518 
4519 	return -EINVAL;
4520 }
4521 
4522 /* Some tracers require overwrite to stay enabled */
4523 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4524 {
4525 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4526 		return -1;
4527 
4528 	return 0;
4529 }
4530 
4531 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4532 {
4533 	/* do nothing if flag is already set */
4534 	if (!!(tr->trace_flags & mask) == !!enabled)
4535 		return 0;
4536 
4537 	/* Give the tracer a chance to approve the change */
4538 	if (tr->current_trace->flag_changed)
4539 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4540 			return -EINVAL;
4541 
4542 	if (enabled)
4543 		tr->trace_flags |= mask;
4544 	else
4545 		tr->trace_flags &= ~mask;
4546 
4547 	if (mask == TRACE_ITER_RECORD_CMD)
4548 		trace_event_enable_cmd_record(enabled);
4549 
4550 	if (mask == TRACE_ITER_RECORD_TGID) {
4551 		if (!tgid_map)
4552 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4553 					   sizeof(*tgid_map),
4554 					   GFP_KERNEL);
4555 		if (!tgid_map) {
4556 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4557 			return -ENOMEM;
4558 		}
4559 
4560 		trace_event_enable_tgid_record(enabled);
4561 	}
4562 
4563 	if (mask == TRACE_ITER_EVENT_FORK)
4564 		trace_event_follow_fork(tr, enabled);
4565 
4566 	if (mask == TRACE_ITER_FUNC_FORK)
4567 		ftrace_pid_follow_fork(tr, enabled);
4568 
4569 	if (mask == TRACE_ITER_OVERWRITE) {
4570 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4571 #ifdef CONFIG_TRACER_MAX_TRACE
4572 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4573 #endif
4574 	}
4575 
4576 	if (mask == TRACE_ITER_PRINTK) {
4577 		trace_printk_start_stop_comm(enabled);
4578 		trace_printk_control(enabled);
4579 	}
4580 
4581 	return 0;
4582 }
4583 
4584 static int trace_set_options(struct trace_array *tr, char *option)
4585 {
4586 	char *cmp;
4587 	int neg = 0;
4588 	int ret;
4589 	size_t orig_len = strlen(option);
4590 	int len;
4591 
4592 	cmp = strstrip(option);
4593 
4594 	len = str_has_prefix(cmp, "no");
4595 	if (len)
4596 		neg = 1;
4597 
4598 	cmp += len;
4599 
4600 	mutex_lock(&trace_types_lock);
4601 
4602 	ret = match_string(trace_options, -1, cmp);
4603 	/* If no option could be set, test the specific tracer options */
4604 	if (ret < 0)
4605 		ret = set_tracer_option(tr, cmp, neg);
4606 	else
4607 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4608 
4609 	mutex_unlock(&trace_types_lock);
4610 
4611 	/*
4612 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4613 	 * turn it back into a space.
4614 	 */
4615 	if (orig_len > strlen(option))
4616 		option[strlen(option)] = ' ';
4617 
4618 	return ret;
4619 }
4620 
4621 static void __init apply_trace_boot_options(void)
4622 {
4623 	char *buf = trace_boot_options_buf;
4624 	char *option;
4625 
4626 	while (true) {
4627 		option = strsep(&buf, ",");
4628 
4629 		if (!option)
4630 			break;
4631 
4632 		if (*option)
4633 			trace_set_options(&global_trace, option);
4634 
4635 		/* Put back the comma to allow this to be called again */
4636 		if (buf)
4637 			*(buf - 1) = ',';
4638 	}
4639 }
4640 
4641 static ssize_t
4642 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4643 			size_t cnt, loff_t *ppos)
4644 {
4645 	struct seq_file *m = filp->private_data;
4646 	struct trace_array *tr = m->private;
4647 	char buf[64];
4648 	int ret;
4649 
4650 	if (cnt >= sizeof(buf))
4651 		return -EINVAL;
4652 
4653 	if (copy_from_user(buf, ubuf, cnt))
4654 		return -EFAULT;
4655 
4656 	buf[cnt] = 0;
4657 
4658 	ret = trace_set_options(tr, buf);
4659 	if (ret < 0)
4660 		return ret;
4661 
4662 	*ppos += cnt;
4663 
4664 	return cnt;
4665 }
4666 
4667 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4668 {
4669 	struct trace_array *tr = inode->i_private;
4670 	int ret;
4671 
4672 	if (tracing_disabled)
4673 		return -ENODEV;
4674 
4675 	if (trace_array_get(tr) < 0)
4676 		return -ENODEV;
4677 
4678 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4679 	if (ret < 0)
4680 		trace_array_put(tr);
4681 
4682 	return ret;
4683 }
4684 
4685 static const struct file_operations tracing_iter_fops = {
4686 	.open		= tracing_trace_options_open,
4687 	.read		= seq_read,
4688 	.llseek		= seq_lseek,
4689 	.release	= tracing_single_release_tr,
4690 	.write		= tracing_trace_options_write,
4691 };
4692 
4693 static const char readme_msg[] =
4694 	"tracing mini-HOWTO:\n\n"
4695 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4696 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4697 	" Important files:\n"
4698 	"  trace\t\t\t- The static contents of the buffer\n"
4699 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4700 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4701 	"  current_tracer\t- function and latency tracers\n"
4702 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4703 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4704 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4705 	"  trace_clock\t\t-change the clock used to order events\n"
4706 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4707 	"      global:   Synced across CPUs but slows tracing down.\n"
4708 	"     counter:   Not a clock, but just an increment\n"
4709 	"      uptime:   Jiffy counter from time of boot\n"
4710 	"        perf:   Same clock that perf events use\n"
4711 #ifdef CONFIG_X86_64
4712 	"     x86-tsc:   TSC cycle counter\n"
4713 #endif
4714 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4715 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4716 	"    absolute:   Absolute (standalone) timestamp\n"
4717 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4718 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4719 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4720 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4721 	"\t\t\t  Remove sub-buffer with rmdir\n"
4722 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4723 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4724 	"\t\t\t  option name\n"
4725 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4726 #ifdef CONFIG_DYNAMIC_FTRACE
4727 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4728 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4729 	"\t\t\t  functions\n"
4730 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4731 	"\t     modules: Can select a group via module\n"
4732 	"\t      Format: :mod:<module-name>\n"
4733 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4734 	"\t    triggers: a command to perform when function is hit\n"
4735 	"\t      Format: <function>:<trigger>[:count]\n"
4736 	"\t     trigger: traceon, traceoff\n"
4737 	"\t\t      enable_event:<system>:<event>\n"
4738 	"\t\t      disable_event:<system>:<event>\n"
4739 #ifdef CONFIG_STACKTRACE
4740 	"\t\t      stacktrace\n"
4741 #endif
4742 #ifdef CONFIG_TRACER_SNAPSHOT
4743 	"\t\t      snapshot\n"
4744 #endif
4745 	"\t\t      dump\n"
4746 	"\t\t      cpudump\n"
4747 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4748 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4749 	"\t     The first one will disable tracing every time do_fault is hit\n"
4750 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4751 	"\t       The first time do trap is hit and it disables tracing, the\n"
4752 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4753 	"\t       the counter will not decrement. It only decrements when the\n"
4754 	"\t       trigger did work\n"
4755 	"\t     To remove trigger without count:\n"
4756 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4757 	"\t     To remove trigger with a count:\n"
4758 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4759 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4760 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4761 	"\t    modules: Can select a group via module command :mod:\n"
4762 	"\t    Does not accept triggers\n"
4763 #endif /* CONFIG_DYNAMIC_FTRACE */
4764 #ifdef CONFIG_FUNCTION_TRACER
4765 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4766 	"\t\t    (function)\n"
4767 #endif
4768 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4769 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4770 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4771 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4772 #endif
4773 #ifdef CONFIG_TRACER_SNAPSHOT
4774 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4775 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4776 	"\t\t\t  information\n"
4777 #endif
4778 #ifdef CONFIG_STACK_TRACER
4779 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4780 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4781 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4782 	"\t\t\t  new trace)\n"
4783 #ifdef CONFIG_DYNAMIC_FTRACE
4784 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4785 	"\t\t\t  traces\n"
4786 #endif
4787 #endif /* CONFIG_STACK_TRACER */
4788 #ifdef CONFIG_DYNAMIC_EVENTS
4789 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4790 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4791 #endif
4792 #ifdef CONFIG_KPROBE_EVENTS
4793 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4794 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4795 #endif
4796 #ifdef CONFIG_UPROBE_EVENTS
4797 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4798 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4799 #endif
4800 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4801 	"\t  accepts: event-definitions (one definition per line)\n"
4802 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4803 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4804 #ifdef CONFIG_HIST_TRIGGERS
4805 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4806 #endif
4807 	"\t           -:[<group>/]<event>\n"
4808 #ifdef CONFIG_KPROBE_EVENTS
4809 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4810   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4811 #endif
4812 #ifdef CONFIG_UPROBE_EVENTS
4813   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4814 #endif
4815 	"\t     args: <name>=fetcharg[:type]\n"
4816 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4817 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4818 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4819 #else
4820 	"\t           $stack<index>, $stack, $retval, $comm\n"
4821 #endif
4822 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4823 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4824 	"\t           <type>\\[<array-size>\\]\n"
4825 #ifdef CONFIG_HIST_TRIGGERS
4826 	"\t    field: <stype> <name>;\n"
4827 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4828 	"\t           [unsigned] char/int/long\n"
4829 #endif
4830 #endif
4831 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4832 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4833 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4834 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4835 	"\t\t\t  events\n"
4836 	"      filter\t\t- If set, only events passing filter are traced\n"
4837 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4838 	"\t\t\t  <event>:\n"
4839 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4840 	"      filter\t\t- If set, only events passing filter are traced\n"
4841 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4842 	"\t    Format: <trigger>[:count][if <filter>]\n"
4843 	"\t   trigger: traceon, traceoff\n"
4844 	"\t            enable_event:<system>:<event>\n"
4845 	"\t            disable_event:<system>:<event>\n"
4846 #ifdef CONFIG_HIST_TRIGGERS
4847 	"\t            enable_hist:<system>:<event>\n"
4848 	"\t            disable_hist:<system>:<event>\n"
4849 #endif
4850 #ifdef CONFIG_STACKTRACE
4851 	"\t\t    stacktrace\n"
4852 #endif
4853 #ifdef CONFIG_TRACER_SNAPSHOT
4854 	"\t\t    snapshot\n"
4855 #endif
4856 #ifdef CONFIG_HIST_TRIGGERS
4857 	"\t\t    hist (see below)\n"
4858 #endif
4859 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4860 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4861 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4862 	"\t                  events/block/block_unplug/trigger\n"
4863 	"\t   The first disables tracing every time block_unplug is hit.\n"
4864 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4865 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4866 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4867 	"\t   Like function triggers, the counter is only decremented if it\n"
4868 	"\t    enabled or disabled tracing.\n"
4869 	"\t   To remove a trigger without a count:\n"
4870 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4871 	"\t   To remove a trigger with a count:\n"
4872 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4873 	"\t   Filters can be ignored when removing a trigger.\n"
4874 #ifdef CONFIG_HIST_TRIGGERS
4875 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4876 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4877 	"\t            [:values=<field1[,field2,...]>]\n"
4878 	"\t            [:sort=<field1[,field2,...]>]\n"
4879 	"\t            [:size=#entries]\n"
4880 	"\t            [:pause][:continue][:clear]\n"
4881 	"\t            [:name=histname1]\n"
4882 	"\t            [:<handler>.<action>]\n"
4883 	"\t            [if <filter>]\n\n"
4884 	"\t    When a matching event is hit, an entry is added to a hash\n"
4885 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4886 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4887 	"\t    correspond to fields in the event's format description.  Keys\n"
4888 	"\t    can be any field, or the special string 'stacktrace'.\n"
4889 	"\t    Compound keys consisting of up to two fields can be specified\n"
4890 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4891 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4892 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4893 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4894 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4895 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4896 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4897 	"\t    its histogram data will be shared with other triggers of the\n"
4898 	"\t    same name, and trigger hits will update this common data.\n\n"
4899 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4900 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4901 	"\t    triggers attached to an event, there will be a table for each\n"
4902 	"\t    trigger in the output.  The table displayed for a named\n"
4903 	"\t    trigger will be the same as any other instance having the\n"
4904 	"\t    same name.  The default format used to display a given field\n"
4905 	"\t    can be modified by appending any of the following modifiers\n"
4906 	"\t    to the field name, as applicable:\n\n"
4907 	"\t            .hex        display a number as a hex value\n"
4908 	"\t            .sym        display an address as a symbol\n"
4909 	"\t            .sym-offset display an address as a symbol and offset\n"
4910 	"\t            .execname   display a common_pid as a program name\n"
4911 	"\t            .syscall    display a syscall id as a syscall name\n"
4912 	"\t            .log2       display log2 value rather than raw number\n"
4913 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4914 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4915 	"\t    trigger or to start a hist trigger but not log any events\n"
4916 	"\t    until told to do so.  'continue' can be used to start or\n"
4917 	"\t    restart a paused hist trigger.\n\n"
4918 	"\t    The 'clear' parameter will clear the contents of a running\n"
4919 	"\t    hist trigger and leave its current paused/active state\n"
4920 	"\t    unchanged.\n\n"
4921 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4922 	"\t    have one event conditionally start and stop another event's\n"
4923 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4924 	"\t    the enable_event and disable_event triggers.\n\n"
4925 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4926 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4927 	"\t        <handler>.<action>\n\n"
4928 	"\t    The available handlers are:\n\n"
4929 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4930 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4931 	"\t        onchange(var)            - invoke action if var changes\n\n"
4932 	"\t    The available actions are:\n\n"
4933 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4934 	"\t        save(field,...)                      - save current event fields\n"
4935 #ifdef CONFIG_TRACER_SNAPSHOT
4936 	"\t        snapshot()                           - snapshot the trace buffer\n"
4937 #endif
4938 #endif
4939 ;
4940 
4941 static ssize_t
4942 tracing_readme_read(struct file *filp, char __user *ubuf,
4943 		       size_t cnt, loff_t *ppos)
4944 {
4945 	return simple_read_from_buffer(ubuf, cnt, ppos,
4946 					readme_msg, strlen(readme_msg));
4947 }
4948 
4949 static const struct file_operations tracing_readme_fops = {
4950 	.open		= tracing_open_generic,
4951 	.read		= tracing_readme_read,
4952 	.llseek		= generic_file_llseek,
4953 };
4954 
4955 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4956 {
4957 	int *ptr = v;
4958 
4959 	if (*pos || m->count)
4960 		ptr++;
4961 
4962 	(*pos)++;
4963 
4964 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4965 		if (trace_find_tgid(*ptr))
4966 			return ptr;
4967 	}
4968 
4969 	return NULL;
4970 }
4971 
4972 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	void *v;
4975 	loff_t l = 0;
4976 
4977 	if (!tgid_map)
4978 		return NULL;
4979 
4980 	v = &tgid_map[0];
4981 	while (l <= *pos) {
4982 		v = saved_tgids_next(m, v, &l);
4983 		if (!v)
4984 			return NULL;
4985 	}
4986 
4987 	return v;
4988 }
4989 
4990 static void saved_tgids_stop(struct seq_file *m, void *v)
4991 {
4992 }
4993 
4994 static int saved_tgids_show(struct seq_file *m, void *v)
4995 {
4996 	int pid = (int *)v - tgid_map;
4997 
4998 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4999 	return 0;
5000 }
5001 
5002 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5003 	.start		= saved_tgids_start,
5004 	.stop		= saved_tgids_stop,
5005 	.next		= saved_tgids_next,
5006 	.show		= saved_tgids_show,
5007 };
5008 
5009 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5010 {
5011 	if (tracing_disabled)
5012 		return -ENODEV;
5013 
5014 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5015 }
5016 
5017 
5018 static const struct file_operations tracing_saved_tgids_fops = {
5019 	.open		= tracing_saved_tgids_open,
5020 	.read		= seq_read,
5021 	.llseek		= seq_lseek,
5022 	.release	= seq_release,
5023 };
5024 
5025 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5026 {
5027 	unsigned int *ptr = v;
5028 
5029 	if (*pos || m->count)
5030 		ptr++;
5031 
5032 	(*pos)++;
5033 
5034 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5035 	     ptr++) {
5036 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5037 			continue;
5038 
5039 		return ptr;
5040 	}
5041 
5042 	return NULL;
5043 }
5044 
5045 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5046 {
5047 	void *v;
5048 	loff_t l = 0;
5049 
5050 	preempt_disable();
5051 	arch_spin_lock(&trace_cmdline_lock);
5052 
5053 	v = &savedcmd->map_cmdline_to_pid[0];
5054 	while (l <= *pos) {
5055 		v = saved_cmdlines_next(m, v, &l);
5056 		if (!v)
5057 			return NULL;
5058 	}
5059 
5060 	return v;
5061 }
5062 
5063 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5064 {
5065 	arch_spin_unlock(&trace_cmdline_lock);
5066 	preempt_enable();
5067 }
5068 
5069 static int saved_cmdlines_show(struct seq_file *m, void *v)
5070 {
5071 	char buf[TASK_COMM_LEN];
5072 	unsigned int *pid = v;
5073 
5074 	__trace_find_cmdline(*pid, buf);
5075 	seq_printf(m, "%d %s\n", *pid, buf);
5076 	return 0;
5077 }
5078 
5079 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5080 	.start		= saved_cmdlines_start,
5081 	.next		= saved_cmdlines_next,
5082 	.stop		= saved_cmdlines_stop,
5083 	.show		= saved_cmdlines_show,
5084 };
5085 
5086 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5087 {
5088 	if (tracing_disabled)
5089 		return -ENODEV;
5090 
5091 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5092 }
5093 
5094 static const struct file_operations tracing_saved_cmdlines_fops = {
5095 	.open		= tracing_saved_cmdlines_open,
5096 	.read		= seq_read,
5097 	.llseek		= seq_lseek,
5098 	.release	= seq_release,
5099 };
5100 
5101 static ssize_t
5102 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5103 				 size_t cnt, loff_t *ppos)
5104 {
5105 	char buf[64];
5106 	int r;
5107 
5108 	arch_spin_lock(&trace_cmdline_lock);
5109 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5110 	arch_spin_unlock(&trace_cmdline_lock);
5111 
5112 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5113 }
5114 
5115 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5116 {
5117 	kfree(s->saved_cmdlines);
5118 	kfree(s->map_cmdline_to_pid);
5119 	kfree(s);
5120 }
5121 
5122 static int tracing_resize_saved_cmdlines(unsigned int val)
5123 {
5124 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5125 
5126 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5127 	if (!s)
5128 		return -ENOMEM;
5129 
5130 	if (allocate_cmdlines_buffer(val, s) < 0) {
5131 		kfree(s);
5132 		return -ENOMEM;
5133 	}
5134 
5135 	arch_spin_lock(&trace_cmdline_lock);
5136 	savedcmd_temp = savedcmd;
5137 	savedcmd = s;
5138 	arch_spin_unlock(&trace_cmdline_lock);
5139 	free_saved_cmdlines_buffer(savedcmd_temp);
5140 
5141 	return 0;
5142 }
5143 
5144 static ssize_t
5145 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5146 				  size_t cnt, loff_t *ppos)
5147 {
5148 	unsigned long val;
5149 	int ret;
5150 
5151 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5152 	if (ret)
5153 		return ret;
5154 
5155 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5156 	if (!val || val > PID_MAX_DEFAULT)
5157 		return -EINVAL;
5158 
5159 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5160 	if (ret < 0)
5161 		return ret;
5162 
5163 	*ppos += cnt;
5164 
5165 	return cnt;
5166 }
5167 
5168 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5169 	.open		= tracing_open_generic,
5170 	.read		= tracing_saved_cmdlines_size_read,
5171 	.write		= tracing_saved_cmdlines_size_write,
5172 };
5173 
5174 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5175 static union trace_eval_map_item *
5176 update_eval_map(union trace_eval_map_item *ptr)
5177 {
5178 	if (!ptr->map.eval_string) {
5179 		if (ptr->tail.next) {
5180 			ptr = ptr->tail.next;
5181 			/* Set ptr to the next real item (skip head) */
5182 			ptr++;
5183 		} else
5184 			return NULL;
5185 	}
5186 	return ptr;
5187 }
5188 
5189 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5190 {
5191 	union trace_eval_map_item *ptr = v;
5192 
5193 	/*
5194 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5195 	 * This really should never happen.
5196 	 */
5197 	ptr = update_eval_map(ptr);
5198 	if (WARN_ON_ONCE(!ptr))
5199 		return NULL;
5200 
5201 	ptr++;
5202 
5203 	(*pos)++;
5204 
5205 	ptr = update_eval_map(ptr);
5206 
5207 	return ptr;
5208 }
5209 
5210 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5211 {
5212 	union trace_eval_map_item *v;
5213 	loff_t l = 0;
5214 
5215 	mutex_lock(&trace_eval_mutex);
5216 
5217 	v = trace_eval_maps;
5218 	if (v)
5219 		v++;
5220 
5221 	while (v && l < *pos) {
5222 		v = eval_map_next(m, v, &l);
5223 	}
5224 
5225 	return v;
5226 }
5227 
5228 static void eval_map_stop(struct seq_file *m, void *v)
5229 {
5230 	mutex_unlock(&trace_eval_mutex);
5231 }
5232 
5233 static int eval_map_show(struct seq_file *m, void *v)
5234 {
5235 	union trace_eval_map_item *ptr = v;
5236 
5237 	seq_printf(m, "%s %ld (%s)\n",
5238 		   ptr->map.eval_string, ptr->map.eval_value,
5239 		   ptr->map.system);
5240 
5241 	return 0;
5242 }
5243 
5244 static const struct seq_operations tracing_eval_map_seq_ops = {
5245 	.start		= eval_map_start,
5246 	.next		= eval_map_next,
5247 	.stop		= eval_map_stop,
5248 	.show		= eval_map_show,
5249 };
5250 
5251 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5252 {
5253 	if (tracing_disabled)
5254 		return -ENODEV;
5255 
5256 	return seq_open(filp, &tracing_eval_map_seq_ops);
5257 }
5258 
5259 static const struct file_operations tracing_eval_map_fops = {
5260 	.open		= tracing_eval_map_open,
5261 	.read		= seq_read,
5262 	.llseek		= seq_lseek,
5263 	.release	= seq_release,
5264 };
5265 
5266 static inline union trace_eval_map_item *
5267 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5268 {
5269 	/* Return tail of array given the head */
5270 	return ptr + ptr->head.length + 1;
5271 }
5272 
5273 static void
5274 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5275 			   int len)
5276 {
5277 	struct trace_eval_map **stop;
5278 	struct trace_eval_map **map;
5279 	union trace_eval_map_item *map_array;
5280 	union trace_eval_map_item *ptr;
5281 
5282 	stop = start + len;
5283 
5284 	/*
5285 	 * The trace_eval_maps contains the map plus a head and tail item,
5286 	 * where the head holds the module and length of array, and the
5287 	 * tail holds a pointer to the next list.
5288 	 */
5289 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5290 	if (!map_array) {
5291 		pr_warn("Unable to allocate trace eval mapping\n");
5292 		return;
5293 	}
5294 
5295 	mutex_lock(&trace_eval_mutex);
5296 
5297 	if (!trace_eval_maps)
5298 		trace_eval_maps = map_array;
5299 	else {
5300 		ptr = trace_eval_maps;
5301 		for (;;) {
5302 			ptr = trace_eval_jmp_to_tail(ptr);
5303 			if (!ptr->tail.next)
5304 				break;
5305 			ptr = ptr->tail.next;
5306 
5307 		}
5308 		ptr->tail.next = map_array;
5309 	}
5310 	map_array->head.mod = mod;
5311 	map_array->head.length = len;
5312 	map_array++;
5313 
5314 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5315 		map_array->map = **map;
5316 		map_array++;
5317 	}
5318 	memset(map_array, 0, sizeof(*map_array));
5319 
5320 	mutex_unlock(&trace_eval_mutex);
5321 }
5322 
5323 static void trace_create_eval_file(struct dentry *d_tracer)
5324 {
5325 	trace_create_file("eval_map", 0444, d_tracer,
5326 			  NULL, &tracing_eval_map_fops);
5327 }
5328 
5329 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5330 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5331 static inline void trace_insert_eval_map_file(struct module *mod,
5332 			      struct trace_eval_map **start, int len) { }
5333 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5334 
5335 static void trace_insert_eval_map(struct module *mod,
5336 				  struct trace_eval_map **start, int len)
5337 {
5338 	struct trace_eval_map **map;
5339 
5340 	if (len <= 0)
5341 		return;
5342 
5343 	map = start;
5344 
5345 	trace_event_eval_update(map, len);
5346 
5347 	trace_insert_eval_map_file(mod, start, len);
5348 }
5349 
5350 static ssize_t
5351 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5352 		       size_t cnt, loff_t *ppos)
5353 {
5354 	struct trace_array *tr = filp->private_data;
5355 	char buf[MAX_TRACER_SIZE+2];
5356 	int r;
5357 
5358 	mutex_lock(&trace_types_lock);
5359 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5360 	mutex_unlock(&trace_types_lock);
5361 
5362 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5363 }
5364 
5365 int tracer_init(struct tracer *t, struct trace_array *tr)
5366 {
5367 	tracing_reset_online_cpus(&tr->trace_buffer);
5368 	return t->init(tr);
5369 }
5370 
5371 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5372 {
5373 	int cpu;
5374 
5375 	for_each_tracing_cpu(cpu)
5376 		per_cpu_ptr(buf->data, cpu)->entries = val;
5377 }
5378 
5379 #ifdef CONFIG_TRACER_MAX_TRACE
5380 /* resize @tr's buffer to the size of @size_tr's entries */
5381 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5382 					struct trace_buffer *size_buf, int cpu_id)
5383 {
5384 	int cpu, ret = 0;
5385 
5386 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5387 		for_each_tracing_cpu(cpu) {
5388 			ret = ring_buffer_resize(trace_buf->buffer,
5389 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5390 			if (ret < 0)
5391 				break;
5392 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5393 				per_cpu_ptr(size_buf->data, cpu)->entries;
5394 		}
5395 	} else {
5396 		ret = ring_buffer_resize(trace_buf->buffer,
5397 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5398 		if (ret == 0)
5399 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5400 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5401 	}
5402 
5403 	return ret;
5404 }
5405 #endif /* CONFIG_TRACER_MAX_TRACE */
5406 
5407 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5408 					unsigned long size, int cpu)
5409 {
5410 	int ret;
5411 
5412 	/*
5413 	 * If kernel or user changes the size of the ring buffer
5414 	 * we use the size that was given, and we can forget about
5415 	 * expanding it later.
5416 	 */
5417 	ring_buffer_expanded = true;
5418 
5419 	/* May be called before buffers are initialized */
5420 	if (!tr->trace_buffer.buffer)
5421 		return 0;
5422 
5423 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5424 	if (ret < 0)
5425 		return ret;
5426 
5427 #ifdef CONFIG_TRACER_MAX_TRACE
5428 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5429 	    !tr->current_trace->use_max_tr)
5430 		goto out;
5431 
5432 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5433 	if (ret < 0) {
5434 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5435 						     &tr->trace_buffer, cpu);
5436 		if (r < 0) {
5437 			/*
5438 			 * AARGH! We are left with different
5439 			 * size max buffer!!!!
5440 			 * The max buffer is our "snapshot" buffer.
5441 			 * When a tracer needs a snapshot (one of the
5442 			 * latency tracers), it swaps the max buffer
5443 			 * with the saved snap shot. We succeeded to
5444 			 * update the size of the main buffer, but failed to
5445 			 * update the size of the max buffer. But when we tried
5446 			 * to reset the main buffer to the original size, we
5447 			 * failed there too. This is very unlikely to
5448 			 * happen, but if it does, warn and kill all
5449 			 * tracing.
5450 			 */
5451 			WARN_ON(1);
5452 			tracing_disabled = 1;
5453 		}
5454 		return ret;
5455 	}
5456 
5457 	if (cpu == RING_BUFFER_ALL_CPUS)
5458 		set_buffer_entries(&tr->max_buffer, size);
5459 	else
5460 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5461 
5462  out:
5463 #endif /* CONFIG_TRACER_MAX_TRACE */
5464 
5465 	if (cpu == RING_BUFFER_ALL_CPUS)
5466 		set_buffer_entries(&tr->trace_buffer, size);
5467 	else
5468 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5469 
5470 	return ret;
5471 }
5472 
5473 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5474 					  unsigned long size, int cpu_id)
5475 {
5476 	int ret = size;
5477 
5478 	mutex_lock(&trace_types_lock);
5479 
5480 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5481 		/* make sure, this cpu is enabled in the mask */
5482 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5483 			ret = -EINVAL;
5484 			goto out;
5485 		}
5486 	}
5487 
5488 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5489 	if (ret < 0)
5490 		ret = -ENOMEM;
5491 
5492 out:
5493 	mutex_unlock(&trace_types_lock);
5494 
5495 	return ret;
5496 }
5497 
5498 
5499 /**
5500  * tracing_update_buffers - used by tracing facility to expand ring buffers
5501  *
5502  * To save on memory when the tracing is never used on a system with it
5503  * configured in. The ring buffers are set to a minimum size. But once
5504  * a user starts to use the tracing facility, then they need to grow
5505  * to their default size.
5506  *
5507  * This function is to be called when a tracer is about to be used.
5508  */
5509 int tracing_update_buffers(void)
5510 {
5511 	int ret = 0;
5512 
5513 	mutex_lock(&trace_types_lock);
5514 	if (!ring_buffer_expanded)
5515 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5516 						RING_BUFFER_ALL_CPUS);
5517 	mutex_unlock(&trace_types_lock);
5518 
5519 	return ret;
5520 }
5521 
5522 struct trace_option_dentry;
5523 
5524 static void
5525 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5526 
5527 /*
5528  * Used to clear out the tracer before deletion of an instance.
5529  * Must have trace_types_lock held.
5530  */
5531 static void tracing_set_nop(struct trace_array *tr)
5532 {
5533 	if (tr->current_trace == &nop_trace)
5534 		return;
5535 
5536 	tr->current_trace->enabled--;
5537 
5538 	if (tr->current_trace->reset)
5539 		tr->current_trace->reset(tr);
5540 
5541 	tr->current_trace = &nop_trace;
5542 }
5543 
5544 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5545 {
5546 	/* Only enable if the directory has been created already. */
5547 	if (!tr->dir)
5548 		return;
5549 
5550 	create_trace_option_files(tr, t);
5551 }
5552 
5553 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5554 {
5555 	struct tracer *t;
5556 #ifdef CONFIG_TRACER_MAX_TRACE
5557 	bool had_max_tr;
5558 #endif
5559 	int ret = 0;
5560 
5561 	mutex_lock(&trace_types_lock);
5562 
5563 	if (!ring_buffer_expanded) {
5564 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5565 						RING_BUFFER_ALL_CPUS);
5566 		if (ret < 0)
5567 			goto out;
5568 		ret = 0;
5569 	}
5570 
5571 	for (t = trace_types; t; t = t->next) {
5572 		if (strcmp(t->name, buf) == 0)
5573 			break;
5574 	}
5575 	if (!t) {
5576 		ret = -EINVAL;
5577 		goto out;
5578 	}
5579 	if (t == tr->current_trace)
5580 		goto out;
5581 
5582 #ifdef CONFIG_TRACER_SNAPSHOT
5583 	if (t->use_max_tr) {
5584 		arch_spin_lock(&tr->max_lock);
5585 		if (tr->cond_snapshot)
5586 			ret = -EBUSY;
5587 		arch_spin_unlock(&tr->max_lock);
5588 		if (ret)
5589 			goto out;
5590 	}
5591 #endif
5592 	/* Some tracers won't work on kernel command line */
5593 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5594 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5595 			t->name);
5596 		goto out;
5597 	}
5598 
5599 	/* Some tracers are only allowed for the top level buffer */
5600 	if (!trace_ok_for_array(t, tr)) {
5601 		ret = -EINVAL;
5602 		goto out;
5603 	}
5604 
5605 	/* If trace pipe files are being read, we can't change the tracer */
5606 	if (tr->current_trace->ref) {
5607 		ret = -EBUSY;
5608 		goto out;
5609 	}
5610 
5611 	trace_branch_disable();
5612 
5613 	tr->current_trace->enabled--;
5614 
5615 	if (tr->current_trace->reset)
5616 		tr->current_trace->reset(tr);
5617 
5618 	/* Current trace needs to be nop_trace before synchronize_rcu */
5619 	tr->current_trace = &nop_trace;
5620 
5621 #ifdef CONFIG_TRACER_MAX_TRACE
5622 	had_max_tr = tr->allocated_snapshot;
5623 
5624 	if (had_max_tr && !t->use_max_tr) {
5625 		/*
5626 		 * We need to make sure that the update_max_tr sees that
5627 		 * current_trace changed to nop_trace to keep it from
5628 		 * swapping the buffers after we resize it.
5629 		 * The update_max_tr is called from interrupts disabled
5630 		 * so a synchronized_sched() is sufficient.
5631 		 */
5632 		synchronize_rcu();
5633 		free_snapshot(tr);
5634 	}
5635 #endif
5636 
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638 	if (t->use_max_tr && !had_max_tr) {
5639 		ret = tracing_alloc_snapshot_instance(tr);
5640 		if (ret < 0)
5641 			goto out;
5642 	}
5643 #endif
5644 
5645 	if (t->init) {
5646 		ret = tracer_init(t, tr);
5647 		if (ret)
5648 			goto out;
5649 	}
5650 
5651 	tr->current_trace = t;
5652 	tr->current_trace->enabled++;
5653 	trace_branch_enable(tr);
5654  out:
5655 	mutex_unlock(&trace_types_lock);
5656 
5657 	return ret;
5658 }
5659 
5660 static ssize_t
5661 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5662 			size_t cnt, loff_t *ppos)
5663 {
5664 	struct trace_array *tr = filp->private_data;
5665 	char buf[MAX_TRACER_SIZE+1];
5666 	int i;
5667 	size_t ret;
5668 	int err;
5669 
5670 	ret = cnt;
5671 
5672 	if (cnt > MAX_TRACER_SIZE)
5673 		cnt = MAX_TRACER_SIZE;
5674 
5675 	if (copy_from_user(buf, ubuf, cnt))
5676 		return -EFAULT;
5677 
5678 	buf[cnt] = 0;
5679 
5680 	/* strip ending whitespace. */
5681 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5682 		buf[i] = 0;
5683 
5684 	err = tracing_set_tracer(tr, buf);
5685 	if (err)
5686 		return err;
5687 
5688 	*ppos += ret;
5689 
5690 	return ret;
5691 }
5692 
5693 static ssize_t
5694 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5695 		   size_t cnt, loff_t *ppos)
5696 {
5697 	char buf[64];
5698 	int r;
5699 
5700 	r = snprintf(buf, sizeof(buf), "%ld\n",
5701 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5702 	if (r > sizeof(buf))
5703 		r = sizeof(buf);
5704 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5705 }
5706 
5707 static ssize_t
5708 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5709 		    size_t cnt, loff_t *ppos)
5710 {
5711 	unsigned long val;
5712 	int ret;
5713 
5714 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5715 	if (ret)
5716 		return ret;
5717 
5718 	*ptr = val * 1000;
5719 
5720 	return cnt;
5721 }
5722 
5723 static ssize_t
5724 tracing_thresh_read(struct file *filp, char __user *ubuf,
5725 		    size_t cnt, loff_t *ppos)
5726 {
5727 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5728 }
5729 
5730 static ssize_t
5731 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5732 		     size_t cnt, loff_t *ppos)
5733 {
5734 	struct trace_array *tr = filp->private_data;
5735 	int ret;
5736 
5737 	mutex_lock(&trace_types_lock);
5738 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5739 	if (ret < 0)
5740 		goto out;
5741 
5742 	if (tr->current_trace->update_thresh) {
5743 		ret = tr->current_trace->update_thresh(tr);
5744 		if (ret < 0)
5745 			goto out;
5746 	}
5747 
5748 	ret = cnt;
5749 out:
5750 	mutex_unlock(&trace_types_lock);
5751 
5752 	return ret;
5753 }
5754 
5755 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5756 
5757 static ssize_t
5758 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5759 		     size_t cnt, loff_t *ppos)
5760 {
5761 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5762 }
5763 
5764 static ssize_t
5765 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5766 		      size_t cnt, loff_t *ppos)
5767 {
5768 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5769 }
5770 
5771 #endif
5772 
5773 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5774 {
5775 	struct trace_array *tr = inode->i_private;
5776 	struct trace_iterator *iter;
5777 	int ret = 0;
5778 
5779 	if (tracing_disabled)
5780 		return -ENODEV;
5781 
5782 	if (trace_array_get(tr) < 0)
5783 		return -ENODEV;
5784 
5785 	mutex_lock(&trace_types_lock);
5786 
5787 	/* create a buffer to store the information to pass to userspace */
5788 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5789 	if (!iter) {
5790 		ret = -ENOMEM;
5791 		__trace_array_put(tr);
5792 		goto out;
5793 	}
5794 
5795 	trace_seq_init(&iter->seq);
5796 	iter->trace = tr->current_trace;
5797 
5798 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5799 		ret = -ENOMEM;
5800 		goto fail;
5801 	}
5802 
5803 	/* trace pipe does not show start of buffer */
5804 	cpumask_setall(iter->started);
5805 
5806 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5807 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5808 
5809 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5810 	if (trace_clocks[tr->clock_id].in_ns)
5811 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5812 
5813 	iter->tr = tr;
5814 	iter->trace_buffer = &tr->trace_buffer;
5815 	iter->cpu_file = tracing_get_cpu(inode);
5816 	mutex_init(&iter->mutex);
5817 	filp->private_data = iter;
5818 
5819 	if (iter->trace->pipe_open)
5820 		iter->trace->pipe_open(iter);
5821 
5822 	nonseekable_open(inode, filp);
5823 
5824 	tr->current_trace->ref++;
5825 out:
5826 	mutex_unlock(&trace_types_lock);
5827 	return ret;
5828 
5829 fail:
5830 	kfree(iter);
5831 	__trace_array_put(tr);
5832 	mutex_unlock(&trace_types_lock);
5833 	return ret;
5834 }
5835 
5836 static int tracing_release_pipe(struct inode *inode, struct file *file)
5837 {
5838 	struct trace_iterator *iter = file->private_data;
5839 	struct trace_array *tr = inode->i_private;
5840 
5841 	mutex_lock(&trace_types_lock);
5842 
5843 	tr->current_trace->ref--;
5844 
5845 	if (iter->trace->pipe_close)
5846 		iter->trace->pipe_close(iter);
5847 
5848 	mutex_unlock(&trace_types_lock);
5849 
5850 	free_cpumask_var(iter->started);
5851 	mutex_destroy(&iter->mutex);
5852 	kfree(iter);
5853 
5854 	trace_array_put(tr);
5855 
5856 	return 0;
5857 }
5858 
5859 static __poll_t
5860 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5861 {
5862 	struct trace_array *tr = iter->tr;
5863 
5864 	/* Iterators are static, they should be filled or empty */
5865 	if (trace_buffer_iter(iter, iter->cpu_file))
5866 		return EPOLLIN | EPOLLRDNORM;
5867 
5868 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5869 		/*
5870 		 * Always select as readable when in blocking mode
5871 		 */
5872 		return EPOLLIN | EPOLLRDNORM;
5873 	else
5874 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5875 					     filp, poll_table);
5876 }
5877 
5878 static __poll_t
5879 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5880 {
5881 	struct trace_iterator *iter = filp->private_data;
5882 
5883 	return trace_poll(iter, filp, poll_table);
5884 }
5885 
5886 /* Must be called with iter->mutex held. */
5887 static int tracing_wait_pipe(struct file *filp)
5888 {
5889 	struct trace_iterator *iter = filp->private_data;
5890 	int ret;
5891 
5892 	while (trace_empty(iter)) {
5893 
5894 		if ((filp->f_flags & O_NONBLOCK)) {
5895 			return -EAGAIN;
5896 		}
5897 
5898 		/*
5899 		 * We block until we read something and tracing is disabled.
5900 		 * We still block if tracing is disabled, but we have never
5901 		 * read anything. This allows a user to cat this file, and
5902 		 * then enable tracing. But after we have read something,
5903 		 * we give an EOF when tracing is again disabled.
5904 		 *
5905 		 * iter->pos will be 0 if we haven't read anything.
5906 		 */
5907 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5908 			break;
5909 
5910 		mutex_unlock(&iter->mutex);
5911 
5912 		ret = wait_on_pipe(iter, 0);
5913 
5914 		mutex_lock(&iter->mutex);
5915 
5916 		if (ret)
5917 			return ret;
5918 	}
5919 
5920 	return 1;
5921 }
5922 
5923 /*
5924  * Consumer reader.
5925  */
5926 static ssize_t
5927 tracing_read_pipe(struct file *filp, char __user *ubuf,
5928 		  size_t cnt, loff_t *ppos)
5929 {
5930 	struct trace_iterator *iter = filp->private_data;
5931 	ssize_t sret;
5932 
5933 	/*
5934 	 * Avoid more than one consumer on a single file descriptor
5935 	 * This is just a matter of traces coherency, the ring buffer itself
5936 	 * is protected.
5937 	 */
5938 	mutex_lock(&iter->mutex);
5939 
5940 	/* return any leftover data */
5941 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5942 	if (sret != -EBUSY)
5943 		goto out;
5944 
5945 	trace_seq_init(&iter->seq);
5946 
5947 	if (iter->trace->read) {
5948 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5949 		if (sret)
5950 			goto out;
5951 	}
5952 
5953 waitagain:
5954 	sret = tracing_wait_pipe(filp);
5955 	if (sret <= 0)
5956 		goto out;
5957 
5958 	/* stop when tracing is finished */
5959 	if (trace_empty(iter)) {
5960 		sret = 0;
5961 		goto out;
5962 	}
5963 
5964 	if (cnt >= PAGE_SIZE)
5965 		cnt = PAGE_SIZE - 1;
5966 
5967 	/* reset all but tr, trace, and overruns */
5968 	memset(&iter->seq, 0,
5969 	       sizeof(struct trace_iterator) -
5970 	       offsetof(struct trace_iterator, seq));
5971 	cpumask_clear(iter->started);
5972 	iter->pos = -1;
5973 
5974 	trace_event_read_lock();
5975 	trace_access_lock(iter->cpu_file);
5976 	while (trace_find_next_entry_inc(iter) != NULL) {
5977 		enum print_line_t ret;
5978 		int save_len = iter->seq.seq.len;
5979 
5980 		ret = print_trace_line(iter);
5981 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5982 			/* don't print partial lines */
5983 			iter->seq.seq.len = save_len;
5984 			break;
5985 		}
5986 		if (ret != TRACE_TYPE_NO_CONSUME)
5987 			trace_consume(iter);
5988 
5989 		if (trace_seq_used(&iter->seq) >= cnt)
5990 			break;
5991 
5992 		/*
5993 		 * Setting the full flag means we reached the trace_seq buffer
5994 		 * size and we should leave by partial output condition above.
5995 		 * One of the trace_seq_* functions is not used properly.
5996 		 */
5997 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5998 			  iter->ent->type);
5999 	}
6000 	trace_access_unlock(iter->cpu_file);
6001 	trace_event_read_unlock();
6002 
6003 	/* Now copy what we have to the user */
6004 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6005 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6006 		trace_seq_init(&iter->seq);
6007 
6008 	/*
6009 	 * If there was nothing to send to user, in spite of consuming trace
6010 	 * entries, go back to wait for more entries.
6011 	 */
6012 	if (sret == -EBUSY)
6013 		goto waitagain;
6014 
6015 out:
6016 	mutex_unlock(&iter->mutex);
6017 
6018 	return sret;
6019 }
6020 
6021 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6022 				     unsigned int idx)
6023 {
6024 	__free_page(spd->pages[idx]);
6025 }
6026 
6027 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6028 	.confirm		= generic_pipe_buf_confirm,
6029 	.release		= generic_pipe_buf_release,
6030 	.steal			= generic_pipe_buf_steal,
6031 	.get			= generic_pipe_buf_get,
6032 };
6033 
6034 static size_t
6035 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6036 {
6037 	size_t count;
6038 	int save_len;
6039 	int ret;
6040 
6041 	/* Seq buffer is page-sized, exactly what we need. */
6042 	for (;;) {
6043 		save_len = iter->seq.seq.len;
6044 		ret = print_trace_line(iter);
6045 
6046 		if (trace_seq_has_overflowed(&iter->seq)) {
6047 			iter->seq.seq.len = save_len;
6048 			break;
6049 		}
6050 
6051 		/*
6052 		 * This should not be hit, because it should only
6053 		 * be set if the iter->seq overflowed. But check it
6054 		 * anyway to be safe.
6055 		 */
6056 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6057 			iter->seq.seq.len = save_len;
6058 			break;
6059 		}
6060 
6061 		count = trace_seq_used(&iter->seq) - save_len;
6062 		if (rem < count) {
6063 			rem = 0;
6064 			iter->seq.seq.len = save_len;
6065 			break;
6066 		}
6067 
6068 		if (ret != TRACE_TYPE_NO_CONSUME)
6069 			trace_consume(iter);
6070 		rem -= count;
6071 		if (!trace_find_next_entry_inc(iter))	{
6072 			rem = 0;
6073 			iter->ent = NULL;
6074 			break;
6075 		}
6076 	}
6077 
6078 	return rem;
6079 }
6080 
6081 static ssize_t tracing_splice_read_pipe(struct file *filp,
6082 					loff_t *ppos,
6083 					struct pipe_inode_info *pipe,
6084 					size_t len,
6085 					unsigned int flags)
6086 {
6087 	struct page *pages_def[PIPE_DEF_BUFFERS];
6088 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6089 	struct trace_iterator *iter = filp->private_data;
6090 	struct splice_pipe_desc spd = {
6091 		.pages		= pages_def,
6092 		.partial	= partial_def,
6093 		.nr_pages	= 0, /* This gets updated below. */
6094 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6095 		.ops		= &tracing_pipe_buf_ops,
6096 		.spd_release	= tracing_spd_release_pipe,
6097 	};
6098 	ssize_t ret;
6099 	size_t rem;
6100 	unsigned int i;
6101 
6102 	if (splice_grow_spd(pipe, &spd))
6103 		return -ENOMEM;
6104 
6105 	mutex_lock(&iter->mutex);
6106 
6107 	if (iter->trace->splice_read) {
6108 		ret = iter->trace->splice_read(iter, filp,
6109 					       ppos, pipe, len, flags);
6110 		if (ret)
6111 			goto out_err;
6112 	}
6113 
6114 	ret = tracing_wait_pipe(filp);
6115 	if (ret <= 0)
6116 		goto out_err;
6117 
6118 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6119 		ret = -EFAULT;
6120 		goto out_err;
6121 	}
6122 
6123 	trace_event_read_lock();
6124 	trace_access_lock(iter->cpu_file);
6125 
6126 	/* Fill as many pages as possible. */
6127 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6128 		spd.pages[i] = alloc_page(GFP_KERNEL);
6129 		if (!spd.pages[i])
6130 			break;
6131 
6132 		rem = tracing_fill_pipe_page(rem, iter);
6133 
6134 		/* Copy the data into the page, so we can start over. */
6135 		ret = trace_seq_to_buffer(&iter->seq,
6136 					  page_address(spd.pages[i]),
6137 					  trace_seq_used(&iter->seq));
6138 		if (ret < 0) {
6139 			__free_page(spd.pages[i]);
6140 			break;
6141 		}
6142 		spd.partial[i].offset = 0;
6143 		spd.partial[i].len = trace_seq_used(&iter->seq);
6144 
6145 		trace_seq_init(&iter->seq);
6146 	}
6147 
6148 	trace_access_unlock(iter->cpu_file);
6149 	trace_event_read_unlock();
6150 	mutex_unlock(&iter->mutex);
6151 
6152 	spd.nr_pages = i;
6153 
6154 	if (i)
6155 		ret = splice_to_pipe(pipe, &spd);
6156 	else
6157 		ret = 0;
6158 out:
6159 	splice_shrink_spd(&spd);
6160 	return ret;
6161 
6162 out_err:
6163 	mutex_unlock(&iter->mutex);
6164 	goto out;
6165 }
6166 
6167 static ssize_t
6168 tracing_entries_read(struct file *filp, char __user *ubuf,
6169 		     size_t cnt, loff_t *ppos)
6170 {
6171 	struct inode *inode = file_inode(filp);
6172 	struct trace_array *tr = inode->i_private;
6173 	int cpu = tracing_get_cpu(inode);
6174 	char buf[64];
6175 	int r = 0;
6176 	ssize_t ret;
6177 
6178 	mutex_lock(&trace_types_lock);
6179 
6180 	if (cpu == RING_BUFFER_ALL_CPUS) {
6181 		int cpu, buf_size_same;
6182 		unsigned long size;
6183 
6184 		size = 0;
6185 		buf_size_same = 1;
6186 		/* check if all cpu sizes are same */
6187 		for_each_tracing_cpu(cpu) {
6188 			/* fill in the size from first enabled cpu */
6189 			if (size == 0)
6190 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6191 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6192 				buf_size_same = 0;
6193 				break;
6194 			}
6195 		}
6196 
6197 		if (buf_size_same) {
6198 			if (!ring_buffer_expanded)
6199 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6200 					    size >> 10,
6201 					    trace_buf_size >> 10);
6202 			else
6203 				r = sprintf(buf, "%lu\n", size >> 10);
6204 		} else
6205 			r = sprintf(buf, "X\n");
6206 	} else
6207 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6208 
6209 	mutex_unlock(&trace_types_lock);
6210 
6211 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6212 	return ret;
6213 }
6214 
6215 static ssize_t
6216 tracing_entries_write(struct file *filp, const char __user *ubuf,
6217 		      size_t cnt, loff_t *ppos)
6218 {
6219 	struct inode *inode = file_inode(filp);
6220 	struct trace_array *tr = inode->i_private;
6221 	unsigned long val;
6222 	int ret;
6223 
6224 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6225 	if (ret)
6226 		return ret;
6227 
6228 	/* must have at least 1 entry */
6229 	if (!val)
6230 		return -EINVAL;
6231 
6232 	/* value is in KB */
6233 	val <<= 10;
6234 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6235 	if (ret < 0)
6236 		return ret;
6237 
6238 	*ppos += cnt;
6239 
6240 	return cnt;
6241 }
6242 
6243 static ssize_t
6244 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6245 				size_t cnt, loff_t *ppos)
6246 {
6247 	struct trace_array *tr = filp->private_data;
6248 	char buf[64];
6249 	int r, cpu;
6250 	unsigned long size = 0, expanded_size = 0;
6251 
6252 	mutex_lock(&trace_types_lock);
6253 	for_each_tracing_cpu(cpu) {
6254 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6255 		if (!ring_buffer_expanded)
6256 			expanded_size += trace_buf_size >> 10;
6257 	}
6258 	if (ring_buffer_expanded)
6259 		r = sprintf(buf, "%lu\n", size);
6260 	else
6261 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6262 	mutex_unlock(&trace_types_lock);
6263 
6264 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6265 }
6266 
6267 static ssize_t
6268 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6269 			  size_t cnt, loff_t *ppos)
6270 {
6271 	/*
6272 	 * There is no need to read what the user has written, this function
6273 	 * is just to make sure that there is no error when "echo" is used
6274 	 */
6275 
6276 	*ppos += cnt;
6277 
6278 	return cnt;
6279 }
6280 
6281 static int
6282 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6283 {
6284 	struct trace_array *tr = inode->i_private;
6285 
6286 	/* disable tracing ? */
6287 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6288 		tracer_tracing_off(tr);
6289 	/* resize the ring buffer to 0 */
6290 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6291 
6292 	trace_array_put(tr);
6293 
6294 	return 0;
6295 }
6296 
6297 static ssize_t
6298 tracing_mark_write(struct file *filp, const char __user *ubuf,
6299 					size_t cnt, loff_t *fpos)
6300 {
6301 	struct trace_array *tr = filp->private_data;
6302 	struct ring_buffer_event *event;
6303 	enum event_trigger_type tt = ETT_NONE;
6304 	struct ring_buffer *buffer;
6305 	struct print_entry *entry;
6306 	unsigned long irq_flags;
6307 	const char faulted[] = "<faulted>";
6308 	ssize_t written;
6309 	int size;
6310 	int len;
6311 
6312 /* Used in tracing_mark_raw_write() as well */
6313 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6314 
6315 	if (tracing_disabled)
6316 		return -EINVAL;
6317 
6318 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6319 		return -EINVAL;
6320 
6321 	if (cnt > TRACE_BUF_SIZE)
6322 		cnt = TRACE_BUF_SIZE;
6323 
6324 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6325 
6326 	local_save_flags(irq_flags);
6327 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6328 
6329 	/* If less than "<faulted>", then make sure we can still add that */
6330 	if (cnt < FAULTED_SIZE)
6331 		size += FAULTED_SIZE - cnt;
6332 
6333 	buffer = tr->trace_buffer.buffer;
6334 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6335 					    irq_flags, preempt_count());
6336 	if (unlikely(!event))
6337 		/* Ring buffer disabled, return as if not open for write */
6338 		return -EBADF;
6339 
6340 	entry = ring_buffer_event_data(event);
6341 	entry->ip = _THIS_IP_;
6342 
6343 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6344 	if (len) {
6345 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6346 		cnt = FAULTED_SIZE;
6347 		written = -EFAULT;
6348 	} else
6349 		written = cnt;
6350 	len = cnt;
6351 
6352 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6353 		/* do not add \n before testing triggers, but add \0 */
6354 		entry->buf[cnt] = '\0';
6355 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6356 	}
6357 
6358 	if (entry->buf[cnt - 1] != '\n') {
6359 		entry->buf[cnt] = '\n';
6360 		entry->buf[cnt + 1] = '\0';
6361 	} else
6362 		entry->buf[cnt] = '\0';
6363 
6364 	__buffer_unlock_commit(buffer, event);
6365 
6366 	if (tt)
6367 		event_triggers_post_call(tr->trace_marker_file, tt);
6368 
6369 	if (written > 0)
6370 		*fpos += written;
6371 
6372 	return written;
6373 }
6374 
6375 /* Limit it for now to 3K (including tag) */
6376 #define RAW_DATA_MAX_SIZE (1024*3)
6377 
6378 static ssize_t
6379 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6380 					size_t cnt, loff_t *fpos)
6381 {
6382 	struct trace_array *tr = filp->private_data;
6383 	struct ring_buffer_event *event;
6384 	struct ring_buffer *buffer;
6385 	struct raw_data_entry *entry;
6386 	const char faulted[] = "<faulted>";
6387 	unsigned long irq_flags;
6388 	ssize_t written;
6389 	int size;
6390 	int len;
6391 
6392 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6393 
6394 	if (tracing_disabled)
6395 		return -EINVAL;
6396 
6397 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6398 		return -EINVAL;
6399 
6400 	/* The marker must at least have a tag id */
6401 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6402 		return -EINVAL;
6403 
6404 	if (cnt > TRACE_BUF_SIZE)
6405 		cnt = TRACE_BUF_SIZE;
6406 
6407 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6408 
6409 	local_save_flags(irq_flags);
6410 	size = sizeof(*entry) + cnt;
6411 	if (cnt < FAULT_SIZE_ID)
6412 		size += FAULT_SIZE_ID - cnt;
6413 
6414 	buffer = tr->trace_buffer.buffer;
6415 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6416 					    irq_flags, preempt_count());
6417 	if (!event)
6418 		/* Ring buffer disabled, return as if not open for write */
6419 		return -EBADF;
6420 
6421 	entry = ring_buffer_event_data(event);
6422 
6423 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6424 	if (len) {
6425 		entry->id = -1;
6426 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6427 		written = -EFAULT;
6428 	} else
6429 		written = cnt;
6430 
6431 	__buffer_unlock_commit(buffer, event);
6432 
6433 	if (written > 0)
6434 		*fpos += written;
6435 
6436 	return written;
6437 }
6438 
6439 static int tracing_clock_show(struct seq_file *m, void *v)
6440 {
6441 	struct trace_array *tr = m->private;
6442 	int i;
6443 
6444 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6445 		seq_printf(m,
6446 			"%s%s%s%s", i ? " " : "",
6447 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6448 			i == tr->clock_id ? "]" : "");
6449 	seq_putc(m, '\n');
6450 
6451 	return 0;
6452 }
6453 
6454 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6455 {
6456 	int i;
6457 
6458 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6459 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6460 			break;
6461 	}
6462 	if (i == ARRAY_SIZE(trace_clocks))
6463 		return -EINVAL;
6464 
6465 	mutex_lock(&trace_types_lock);
6466 
6467 	tr->clock_id = i;
6468 
6469 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6470 
6471 	/*
6472 	 * New clock may not be consistent with the previous clock.
6473 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6474 	 */
6475 	tracing_reset_online_cpus(&tr->trace_buffer);
6476 
6477 #ifdef CONFIG_TRACER_MAX_TRACE
6478 	if (tr->max_buffer.buffer)
6479 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6480 	tracing_reset_online_cpus(&tr->max_buffer);
6481 #endif
6482 
6483 	mutex_unlock(&trace_types_lock);
6484 
6485 	return 0;
6486 }
6487 
6488 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6489 				   size_t cnt, loff_t *fpos)
6490 {
6491 	struct seq_file *m = filp->private_data;
6492 	struct trace_array *tr = m->private;
6493 	char buf[64];
6494 	const char *clockstr;
6495 	int ret;
6496 
6497 	if (cnt >= sizeof(buf))
6498 		return -EINVAL;
6499 
6500 	if (copy_from_user(buf, ubuf, cnt))
6501 		return -EFAULT;
6502 
6503 	buf[cnt] = 0;
6504 
6505 	clockstr = strstrip(buf);
6506 
6507 	ret = tracing_set_clock(tr, clockstr);
6508 	if (ret)
6509 		return ret;
6510 
6511 	*fpos += cnt;
6512 
6513 	return cnt;
6514 }
6515 
6516 static int tracing_clock_open(struct inode *inode, struct file *file)
6517 {
6518 	struct trace_array *tr = inode->i_private;
6519 	int ret;
6520 
6521 	if (tracing_disabled)
6522 		return -ENODEV;
6523 
6524 	if (trace_array_get(tr))
6525 		return -ENODEV;
6526 
6527 	ret = single_open(file, tracing_clock_show, inode->i_private);
6528 	if (ret < 0)
6529 		trace_array_put(tr);
6530 
6531 	return ret;
6532 }
6533 
6534 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6535 {
6536 	struct trace_array *tr = m->private;
6537 
6538 	mutex_lock(&trace_types_lock);
6539 
6540 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6541 		seq_puts(m, "delta [absolute]\n");
6542 	else
6543 		seq_puts(m, "[delta] absolute\n");
6544 
6545 	mutex_unlock(&trace_types_lock);
6546 
6547 	return 0;
6548 }
6549 
6550 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6551 {
6552 	struct trace_array *tr = inode->i_private;
6553 	int ret;
6554 
6555 	if (tracing_disabled)
6556 		return -ENODEV;
6557 
6558 	if (trace_array_get(tr))
6559 		return -ENODEV;
6560 
6561 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6562 	if (ret < 0)
6563 		trace_array_put(tr);
6564 
6565 	return ret;
6566 }
6567 
6568 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6569 {
6570 	int ret = 0;
6571 
6572 	mutex_lock(&trace_types_lock);
6573 
6574 	if (abs && tr->time_stamp_abs_ref++)
6575 		goto out;
6576 
6577 	if (!abs) {
6578 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6579 			ret = -EINVAL;
6580 			goto out;
6581 		}
6582 
6583 		if (--tr->time_stamp_abs_ref)
6584 			goto out;
6585 	}
6586 
6587 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6588 
6589 #ifdef CONFIG_TRACER_MAX_TRACE
6590 	if (tr->max_buffer.buffer)
6591 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6592 #endif
6593  out:
6594 	mutex_unlock(&trace_types_lock);
6595 
6596 	return ret;
6597 }
6598 
6599 struct ftrace_buffer_info {
6600 	struct trace_iterator	iter;
6601 	void			*spare;
6602 	unsigned int		spare_cpu;
6603 	unsigned int		read;
6604 };
6605 
6606 #ifdef CONFIG_TRACER_SNAPSHOT
6607 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6608 {
6609 	struct trace_array *tr = inode->i_private;
6610 	struct trace_iterator *iter;
6611 	struct seq_file *m;
6612 	int ret = 0;
6613 
6614 	if (trace_array_get(tr) < 0)
6615 		return -ENODEV;
6616 
6617 	if (file->f_mode & FMODE_READ) {
6618 		iter = __tracing_open(inode, file, true);
6619 		if (IS_ERR(iter))
6620 			ret = PTR_ERR(iter);
6621 	} else {
6622 		/* Writes still need the seq_file to hold the private data */
6623 		ret = -ENOMEM;
6624 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6625 		if (!m)
6626 			goto out;
6627 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6628 		if (!iter) {
6629 			kfree(m);
6630 			goto out;
6631 		}
6632 		ret = 0;
6633 
6634 		iter->tr = tr;
6635 		iter->trace_buffer = &tr->max_buffer;
6636 		iter->cpu_file = tracing_get_cpu(inode);
6637 		m->private = iter;
6638 		file->private_data = m;
6639 	}
6640 out:
6641 	if (ret < 0)
6642 		trace_array_put(tr);
6643 
6644 	return ret;
6645 }
6646 
6647 static ssize_t
6648 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6649 		       loff_t *ppos)
6650 {
6651 	struct seq_file *m = filp->private_data;
6652 	struct trace_iterator *iter = m->private;
6653 	struct trace_array *tr = iter->tr;
6654 	unsigned long val;
6655 	int ret;
6656 
6657 	ret = tracing_update_buffers();
6658 	if (ret < 0)
6659 		return ret;
6660 
6661 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6662 	if (ret)
6663 		return ret;
6664 
6665 	mutex_lock(&trace_types_lock);
6666 
6667 	if (tr->current_trace->use_max_tr) {
6668 		ret = -EBUSY;
6669 		goto out;
6670 	}
6671 
6672 	arch_spin_lock(&tr->max_lock);
6673 	if (tr->cond_snapshot)
6674 		ret = -EBUSY;
6675 	arch_spin_unlock(&tr->max_lock);
6676 	if (ret)
6677 		goto out;
6678 
6679 	switch (val) {
6680 	case 0:
6681 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6682 			ret = -EINVAL;
6683 			break;
6684 		}
6685 		if (tr->allocated_snapshot)
6686 			free_snapshot(tr);
6687 		break;
6688 	case 1:
6689 /* Only allow per-cpu swap if the ring buffer supports it */
6690 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6691 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6692 			ret = -EINVAL;
6693 			break;
6694 		}
6695 #endif
6696 		if (!tr->allocated_snapshot) {
6697 			ret = tracing_alloc_snapshot_instance(tr);
6698 			if (ret < 0)
6699 				break;
6700 		}
6701 		local_irq_disable();
6702 		/* Now, we're going to swap */
6703 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6704 			update_max_tr(tr, current, smp_processor_id(), NULL);
6705 		else
6706 			update_max_tr_single(tr, current, iter->cpu_file);
6707 		local_irq_enable();
6708 		break;
6709 	default:
6710 		if (tr->allocated_snapshot) {
6711 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6712 				tracing_reset_online_cpus(&tr->max_buffer);
6713 			else
6714 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6715 		}
6716 		break;
6717 	}
6718 
6719 	if (ret >= 0) {
6720 		*ppos += cnt;
6721 		ret = cnt;
6722 	}
6723 out:
6724 	mutex_unlock(&trace_types_lock);
6725 	return ret;
6726 }
6727 
6728 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6729 {
6730 	struct seq_file *m = file->private_data;
6731 	int ret;
6732 
6733 	ret = tracing_release(inode, file);
6734 
6735 	if (file->f_mode & FMODE_READ)
6736 		return ret;
6737 
6738 	/* If write only, the seq_file is just a stub */
6739 	if (m)
6740 		kfree(m->private);
6741 	kfree(m);
6742 
6743 	return 0;
6744 }
6745 
6746 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6747 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6748 				    size_t count, loff_t *ppos);
6749 static int tracing_buffers_release(struct inode *inode, struct file *file);
6750 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6751 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6752 
6753 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6754 {
6755 	struct ftrace_buffer_info *info;
6756 	int ret;
6757 
6758 	ret = tracing_buffers_open(inode, filp);
6759 	if (ret < 0)
6760 		return ret;
6761 
6762 	info = filp->private_data;
6763 
6764 	if (info->iter.trace->use_max_tr) {
6765 		tracing_buffers_release(inode, filp);
6766 		return -EBUSY;
6767 	}
6768 
6769 	info->iter.snapshot = true;
6770 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6771 
6772 	return ret;
6773 }
6774 
6775 #endif /* CONFIG_TRACER_SNAPSHOT */
6776 
6777 
6778 static const struct file_operations tracing_thresh_fops = {
6779 	.open		= tracing_open_generic,
6780 	.read		= tracing_thresh_read,
6781 	.write		= tracing_thresh_write,
6782 	.llseek		= generic_file_llseek,
6783 };
6784 
6785 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6786 static const struct file_operations tracing_max_lat_fops = {
6787 	.open		= tracing_open_generic,
6788 	.read		= tracing_max_lat_read,
6789 	.write		= tracing_max_lat_write,
6790 	.llseek		= generic_file_llseek,
6791 };
6792 #endif
6793 
6794 static const struct file_operations set_tracer_fops = {
6795 	.open		= tracing_open_generic,
6796 	.read		= tracing_set_trace_read,
6797 	.write		= tracing_set_trace_write,
6798 	.llseek		= generic_file_llseek,
6799 };
6800 
6801 static const struct file_operations tracing_pipe_fops = {
6802 	.open		= tracing_open_pipe,
6803 	.poll		= tracing_poll_pipe,
6804 	.read		= tracing_read_pipe,
6805 	.splice_read	= tracing_splice_read_pipe,
6806 	.release	= tracing_release_pipe,
6807 	.llseek		= no_llseek,
6808 };
6809 
6810 static const struct file_operations tracing_entries_fops = {
6811 	.open		= tracing_open_generic_tr,
6812 	.read		= tracing_entries_read,
6813 	.write		= tracing_entries_write,
6814 	.llseek		= generic_file_llseek,
6815 	.release	= tracing_release_generic_tr,
6816 };
6817 
6818 static const struct file_operations tracing_total_entries_fops = {
6819 	.open		= tracing_open_generic_tr,
6820 	.read		= tracing_total_entries_read,
6821 	.llseek		= generic_file_llseek,
6822 	.release	= tracing_release_generic_tr,
6823 };
6824 
6825 static const struct file_operations tracing_free_buffer_fops = {
6826 	.open		= tracing_open_generic_tr,
6827 	.write		= tracing_free_buffer_write,
6828 	.release	= tracing_free_buffer_release,
6829 };
6830 
6831 static const struct file_operations tracing_mark_fops = {
6832 	.open		= tracing_open_generic_tr,
6833 	.write		= tracing_mark_write,
6834 	.llseek		= generic_file_llseek,
6835 	.release	= tracing_release_generic_tr,
6836 };
6837 
6838 static const struct file_operations tracing_mark_raw_fops = {
6839 	.open		= tracing_open_generic_tr,
6840 	.write		= tracing_mark_raw_write,
6841 	.llseek		= generic_file_llseek,
6842 	.release	= tracing_release_generic_tr,
6843 };
6844 
6845 static const struct file_operations trace_clock_fops = {
6846 	.open		= tracing_clock_open,
6847 	.read		= seq_read,
6848 	.llseek		= seq_lseek,
6849 	.release	= tracing_single_release_tr,
6850 	.write		= tracing_clock_write,
6851 };
6852 
6853 static const struct file_operations trace_time_stamp_mode_fops = {
6854 	.open		= tracing_time_stamp_mode_open,
6855 	.read		= seq_read,
6856 	.llseek		= seq_lseek,
6857 	.release	= tracing_single_release_tr,
6858 };
6859 
6860 #ifdef CONFIG_TRACER_SNAPSHOT
6861 static const struct file_operations snapshot_fops = {
6862 	.open		= tracing_snapshot_open,
6863 	.read		= seq_read,
6864 	.write		= tracing_snapshot_write,
6865 	.llseek		= tracing_lseek,
6866 	.release	= tracing_snapshot_release,
6867 };
6868 
6869 static const struct file_operations snapshot_raw_fops = {
6870 	.open		= snapshot_raw_open,
6871 	.read		= tracing_buffers_read,
6872 	.release	= tracing_buffers_release,
6873 	.splice_read	= tracing_buffers_splice_read,
6874 	.llseek		= no_llseek,
6875 };
6876 
6877 #endif /* CONFIG_TRACER_SNAPSHOT */
6878 
6879 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6880 {
6881 	struct trace_array *tr = inode->i_private;
6882 	struct ftrace_buffer_info *info;
6883 	int ret;
6884 
6885 	if (tracing_disabled)
6886 		return -ENODEV;
6887 
6888 	if (trace_array_get(tr) < 0)
6889 		return -ENODEV;
6890 
6891 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6892 	if (!info) {
6893 		trace_array_put(tr);
6894 		return -ENOMEM;
6895 	}
6896 
6897 	mutex_lock(&trace_types_lock);
6898 
6899 	info->iter.tr		= tr;
6900 	info->iter.cpu_file	= tracing_get_cpu(inode);
6901 	info->iter.trace	= tr->current_trace;
6902 	info->iter.trace_buffer = &tr->trace_buffer;
6903 	info->spare		= NULL;
6904 	/* Force reading ring buffer for first read */
6905 	info->read		= (unsigned int)-1;
6906 
6907 	filp->private_data = info;
6908 
6909 	tr->current_trace->ref++;
6910 
6911 	mutex_unlock(&trace_types_lock);
6912 
6913 	ret = nonseekable_open(inode, filp);
6914 	if (ret < 0)
6915 		trace_array_put(tr);
6916 
6917 	return ret;
6918 }
6919 
6920 static __poll_t
6921 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6922 {
6923 	struct ftrace_buffer_info *info = filp->private_data;
6924 	struct trace_iterator *iter = &info->iter;
6925 
6926 	return trace_poll(iter, filp, poll_table);
6927 }
6928 
6929 static ssize_t
6930 tracing_buffers_read(struct file *filp, char __user *ubuf,
6931 		     size_t count, loff_t *ppos)
6932 {
6933 	struct ftrace_buffer_info *info = filp->private_data;
6934 	struct trace_iterator *iter = &info->iter;
6935 	ssize_t ret = 0;
6936 	ssize_t size;
6937 
6938 	if (!count)
6939 		return 0;
6940 
6941 #ifdef CONFIG_TRACER_MAX_TRACE
6942 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6943 		return -EBUSY;
6944 #endif
6945 
6946 	if (!info->spare) {
6947 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6948 							  iter->cpu_file);
6949 		if (IS_ERR(info->spare)) {
6950 			ret = PTR_ERR(info->spare);
6951 			info->spare = NULL;
6952 		} else {
6953 			info->spare_cpu = iter->cpu_file;
6954 		}
6955 	}
6956 	if (!info->spare)
6957 		return ret;
6958 
6959 	/* Do we have previous read data to read? */
6960 	if (info->read < PAGE_SIZE)
6961 		goto read;
6962 
6963  again:
6964 	trace_access_lock(iter->cpu_file);
6965 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6966 				    &info->spare,
6967 				    count,
6968 				    iter->cpu_file, 0);
6969 	trace_access_unlock(iter->cpu_file);
6970 
6971 	if (ret < 0) {
6972 		if (trace_empty(iter)) {
6973 			if ((filp->f_flags & O_NONBLOCK))
6974 				return -EAGAIN;
6975 
6976 			ret = wait_on_pipe(iter, 0);
6977 			if (ret)
6978 				return ret;
6979 
6980 			goto again;
6981 		}
6982 		return 0;
6983 	}
6984 
6985 	info->read = 0;
6986  read:
6987 	size = PAGE_SIZE - info->read;
6988 	if (size > count)
6989 		size = count;
6990 
6991 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6992 	if (ret == size)
6993 		return -EFAULT;
6994 
6995 	size -= ret;
6996 
6997 	*ppos += size;
6998 	info->read += size;
6999 
7000 	return size;
7001 }
7002 
7003 static int tracing_buffers_release(struct inode *inode, struct file *file)
7004 {
7005 	struct ftrace_buffer_info *info = file->private_data;
7006 	struct trace_iterator *iter = &info->iter;
7007 
7008 	mutex_lock(&trace_types_lock);
7009 
7010 	iter->tr->current_trace->ref--;
7011 
7012 	__trace_array_put(iter->tr);
7013 
7014 	if (info->spare)
7015 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7016 					   info->spare_cpu, info->spare);
7017 	kfree(info);
7018 
7019 	mutex_unlock(&trace_types_lock);
7020 
7021 	return 0;
7022 }
7023 
7024 struct buffer_ref {
7025 	struct ring_buffer	*buffer;
7026 	void			*page;
7027 	int			cpu;
7028 	int			ref;
7029 };
7030 
7031 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7032 				    struct pipe_buffer *buf)
7033 {
7034 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7035 
7036 	if (--ref->ref)
7037 		return;
7038 
7039 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7040 	kfree(ref);
7041 	buf->private = 0;
7042 }
7043 
7044 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7045 				struct pipe_buffer *buf)
7046 {
7047 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7048 
7049 	if (ref->ref > INT_MAX/2)
7050 		return false;
7051 
7052 	ref->ref++;
7053 	return true;
7054 }
7055 
7056 /* Pipe buffer operations for a buffer. */
7057 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7058 	.confirm		= generic_pipe_buf_confirm,
7059 	.release		= buffer_pipe_buf_release,
7060 	.steal			= generic_pipe_buf_steal,
7061 	.get			= buffer_pipe_buf_get,
7062 };
7063 
7064 /*
7065  * Callback from splice_to_pipe(), if we need to release some pages
7066  * at the end of the spd in case we error'ed out in filling the pipe.
7067  */
7068 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7069 {
7070 	struct buffer_ref *ref =
7071 		(struct buffer_ref *)spd->partial[i].private;
7072 
7073 	if (--ref->ref)
7074 		return;
7075 
7076 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7077 	kfree(ref);
7078 	spd->partial[i].private = 0;
7079 }
7080 
7081 static ssize_t
7082 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7083 			    struct pipe_inode_info *pipe, size_t len,
7084 			    unsigned int flags)
7085 {
7086 	struct ftrace_buffer_info *info = file->private_data;
7087 	struct trace_iterator *iter = &info->iter;
7088 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7089 	struct page *pages_def[PIPE_DEF_BUFFERS];
7090 	struct splice_pipe_desc spd = {
7091 		.pages		= pages_def,
7092 		.partial	= partial_def,
7093 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7094 		.ops		= &buffer_pipe_buf_ops,
7095 		.spd_release	= buffer_spd_release,
7096 	};
7097 	struct buffer_ref *ref;
7098 	int entries, i;
7099 	ssize_t ret = 0;
7100 
7101 #ifdef CONFIG_TRACER_MAX_TRACE
7102 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7103 		return -EBUSY;
7104 #endif
7105 
7106 	if (*ppos & (PAGE_SIZE - 1))
7107 		return -EINVAL;
7108 
7109 	if (len & (PAGE_SIZE - 1)) {
7110 		if (len < PAGE_SIZE)
7111 			return -EINVAL;
7112 		len &= PAGE_MASK;
7113 	}
7114 
7115 	if (splice_grow_spd(pipe, &spd))
7116 		return -ENOMEM;
7117 
7118  again:
7119 	trace_access_lock(iter->cpu_file);
7120 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7121 
7122 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7123 		struct page *page;
7124 		int r;
7125 
7126 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7127 		if (!ref) {
7128 			ret = -ENOMEM;
7129 			break;
7130 		}
7131 
7132 		ref->ref = 1;
7133 		ref->buffer = iter->trace_buffer->buffer;
7134 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7135 		if (IS_ERR(ref->page)) {
7136 			ret = PTR_ERR(ref->page);
7137 			ref->page = NULL;
7138 			kfree(ref);
7139 			break;
7140 		}
7141 		ref->cpu = iter->cpu_file;
7142 
7143 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7144 					  len, iter->cpu_file, 1);
7145 		if (r < 0) {
7146 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7147 						   ref->page);
7148 			kfree(ref);
7149 			break;
7150 		}
7151 
7152 		page = virt_to_page(ref->page);
7153 
7154 		spd.pages[i] = page;
7155 		spd.partial[i].len = PAGE_SIZE;
7156 		spd.partial[i].offset = 0;
7157 		spd.partial[i].private = (unsigned long)ref;
7158 		spd.nr_pages++;
7159 		*ppos += PAGE_SIZE;
7160 
7161 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7162 	}
7163 
7164 	trace_access_unlock(iter->cpu_file);
7165 	spd.nr_pages = i;
7166 
7167 	/* did we read anything? */
7168 	if (!spd.nr_pages) {
7169 		if (ret)
7170 			goto out;
7171 
7172 		ret = -EAGAIN;
7173 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7174 			goto out;
7175 
7176 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7177 		if (ret)
7178 			goto out;
7179 
7180 		goto again;
7181 	}
7182 
7183 	ret = splice_to_pipe(pipe, &spd);
7184 out:
7185 	splice_shrink_spd(&spd);
7186 
7187 	return ret;
7188 }
7189 
7190 static const struct file_operations tracing_buffers_fops = {
7191 	.open		= tracing_buffers_open,
7192 	.read		= tracing_buffers_read,
7193 	.poll		= tracing_buffers_poll,
7194 	.release	= tracing_buffers_release,
7195 	.splice_read	= tracing_buffers_splice_read,
7196 	.llseek		= no_llseek,
7197 };
7198 
7199 static ssize_t
7200 tracing_stats_read(struct file *filp, char __user *ubuf,
7201 		   size_t count, loff_t *ppos)
7202 {
7203 	struct inode *inode = file_inode(filp);
7204 	struct trace_array *tr = inode->i_private;
7205 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7206 	int cpu = tracing_get_cpu(inode);
7207 	struct trace_seq *s;
7208 	unsigned long cnt;
7209 	unsigned long long t;
7210 	unsigned long usec_rem;
7211 
7212 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7213 	if (!s)
7214 		return -ENOMEM;
7215 
7216 	trace_seq_init(s);
7217 
7218 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7219 	trace_seq_printf(s, "entries: %ld\n", cnt);
7220 
7221 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7222 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7223 
7224 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7225 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7226 
7227 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7228 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7229 
7230 	if (trace_clocks[tr->clock_id].in_ns) {
7231 		/* local or global for trace_clock */
7232 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7233 		usec_rem = do_div(t, USEC_PER_SEC);
7234 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7235 								t, usec_rem);
7236 
7237 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7238 		usec_rem = do_div(t, USEC_PER_SEC);
7239 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7240 	} else {
7241 		/* counter or tsc mode for trace_clock */
7242 		trace_seq_printf(s, "oldest event ts: %llu\n",
7243 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7244 
7245 		trace_seq_printf(s, "now ts: %llu\n",
7246 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7247 	}
7248 
7249 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7250 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7251 
7252 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7253 	trace_seq_printf(s, "read events: %ld\n", cnt);
7254 
7255 	count = simple_read_from_buffer(ubuf, count, ppos,
7256 					s->buffer, trace_seq_used(s));
7257 
7258 	kfree(s);
7259 
7260 	return count;
7261 }
7262 
7263 static const struct file_operations tracing_stats_fops = {
7264 	.open		= tracing_open_generic_tr,
7265 	.read		= tracing_stats_read,
7266 	.llseek		= generic_file_llseek,
7267 	.release	= tracing_release_generic_tr,
7268 };
7269 
7270 #ifdef CONFIG_DYNAMIC_FTRACE
7271 
7272 static ssize_t
7273 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7274 		  size_t cnt, loff_t *ppos)
7275 {
7276 	unsigned long *p = filp->private_data;
7277 	char buf[64]; /* Not too big for a shallow stack */
7278 	int r;
7279 
7280 	r = scnprintf(buf, 63, "%ld", *p);
7281 	buf[r++] = '\n';
7282 
7283 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7284 }
7285 
7286 static const struct file_operations tracing_dyn_info_fops = {
7287 	.open		= tracing_open_generic,
7288 	.read		= tracing_read_dyn_info,
7289 	.llseek		= generic_file_llseek,
7290 };
7291 #endif /* CONFIG_DYNAMIC_FTRACE */
7292 
7293 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7294 static void
7295 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7296 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7297 		void *data)
7298 {
7299 	tracing_snapshot_instance(tr);
7300 }
7301 
7302 static void
7303 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7304 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7305 		      void *data)
7306 {
7307 	struct ftrace_func_mapper *mapper = data;
7308 	long *count = NULL;
7309 
7310 	if (mapper)
7311 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7312 
7313 	if (count) {
7314 
7315 		if (*count <= 0)
7316 			return;
7317 
7318 		(*count)--;
7319 	}
7320 
7321 	tracing_snapshot_instance(tr);
7322 }
7323 
7324 static int
7325 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7326 		      struct ftrace_probe_ops *ops, void *data)
7327 {
7328 	struct ftrace_func_mapper *mapper = data;
7329 	long *count = NULL;
7330 
7331 	seq_printf(m, "%ps:", (void *)ip);
7332 
7333 	seq_puts(m, "snapshot");
7334 
7335 	if (mapper)
7336 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7337 
7338 	if (count)
7339 		seq_printf(m, ":count=%ld\n", *count);
7340 	else
7341 		seq_puts(m, ":unlimited\n");
7342 
7343 	return 0;
7344 }
7345 
7346 static int
7347 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7348 		     unsigned long ip, void *init_data, void **data)
7349 {
7350 	struct ftrace_func_mapper *mapper = *data;
7351 
7352 	if (!mapper) {
7353 		mapper = allocate_ftrace_func_mapper();
7354 		if (!mapper)
7355 			return -ENOMEM;
7356 		*data = mapper;
7357 	}
7358 
7359 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7360 }
7361 
7362 static void
7363 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7364 		     unsigned long ip, void *data)
7365 {
7366 	struct ftrace_func_mapper *mapper = data;
7367 
7368 	if (!ip) {
7369 		if (!mapper)
7370 			return;
7371 		free_ftrace_func_mapper(mapper, NULL);
7372 		return;
7373 	}
7374 
7375 	ftrace_func_mapper_remove_ip(mapper, ip);
7376 }
7377 
7378 static struct ftrace_probe_ops snapshot_probe_ops = {
7379 	.func			= ftrace_snapshot,
7380 	.print			= ftrace_snapshot_print,
7381 };
7382 
7383 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7384 	.func			= ftrace_count_snapshot,
7385 	.print			= ftrace_snapshot_print,
7386 	.init			= ftrace_snapshot_init,
7387 	.free			= ftrace_snapshot_free,
7388 };
7389 
7390 static int
7391 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7392 			       char *glob, char *cmd, char *param, int enable)
7393 {
7394 	struct ftrace_probe_ops *ops;
7395 	void *count = (void *)-1;
7396 	char *number;
7397 	int ret;
7398 
7399 	if (!tr)
7400 		return -ENODEV;
7401 
7402 	/* hash funcs only work with set_ftrace_filter */
7403 	if (!enable)
7404 		return -EINVAL;
7405 
7406 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7407 
7408 	if (glob[0] == '!')
7409 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7410 
7411 	if (!param)
7412 		goto out_reg;
7413 
7414 	number = strsep(&param, ":");
7415 
7416 	if (!strlen(number))
7417 		goto out_reg;
7418 
7419 	/*
7420 	 * We use the callback data field (which is a pointer)
7421 	 * as our counter.
7422 	 */
7423 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7424 	if (ret)
7425 		return ret;
7426 
7427  out_reg:
7428 	ret = tracing_alloc_snapshot_instance(tr);
7429 	if (ret < 0)
7430 		goto out;
7431 
7432 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7433 
7434  out:
7435 	return ret < 0 ? ret : 0;
7436 }
7437 
7438 static struct ftrace_func_command ftrace_snapshot_cmd = {
7439 	.name			= "snapshot",
7440 	.func			= ftrace_trace_snapshot_callback,
7441 };
7442 
7443 static __init int register_snapshot_cmd(void)
7444 {
7445 	return register_ftrace_command(&ftrace_snapshot_cmd);
7446 }
7447 #else
7448 static inline __init int register_snapshot_cmd(void) { return 0; }
7449 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7450 
7451 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7452 {
7453 	if (WARN_ON(!tr->dir))
7454 		return ERR_PTR(-ENODEV);
7455 
7456 	/* Top directory uses NULL as the parent */
7457 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7458 		return NULL;
7459 
7460 	/* All sub buffers have a descriptor */
7461 	return tr->dir;
7462 }
7463 
7464 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7465 {
7466 	struct dentry *d_tracer;
7467 
7468 	if (tr->percpu_dir)
7469 		return tr->percpu_dir;
7470 
7471 	d_tracer = tracing_get_dentry(tr);
7472 	if (IS_ERR(d_tracer))
7473 		return NULL;
7474 
7475 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7476 
7477 	WARN_ONCE(!tr->percpu_dir,
7478 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7479 
7480 	return tr->percpu_dir;
7481 }
7482 
7483 static struct dentry *
7484 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7485 		      void *data, long cpu, const struct file_operations *fops)
7486 {
7487 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7488 
7489 	if (ret) /* See tracing_get_cpu() */
7490 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7491 	return ret;
7492 }
7493 
7494 static void
7495 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7496 {
7497 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7498 	struct dentry *d_cpu;
7499 	char cpu_dir[30]; /* 30 characters should be more than enough */
7500 
7501 	if (!d_percpu)
7502 		return;
7503 
7504 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7505 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7506 	if (!d_cpu) {
7507 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7508 		return;
7509 	}
7510 
7511 	/* per cpu trace_pipe */
7512 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7513 				tr, cpu, &tracing_pipe_fops);
7514 
7515 	/* per cpu trace */
7516 	trace_create_cpu_file("trace", 0644, d_cpu,
7517 				tr, cpu, &tracing_fops);
7518 
7519 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7520 				tr, cpu, &tracing_buffers_fops);
7521 
7522 	trace_create_cpu_file("stats", 0444, d_cpu,
7523 				tr, cpu, &tracing_stats_fops);
7524 
7525 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7526 				tr, cpu, &tracing_entries_fops);
7527 
7528 #ifdef CONFIG_TRACER_SNAPSHOT
7529 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7530 				tr, cpu, &snapshot_fops);
7531 
7532 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7533 				tr, cpu, &snapshot_raw_fops);
7534 #endif
7535 }
7536 
7537 #ifdef CONFIG_FTRACE_SELFTEST
7538 /* Let selftest have access to static functions in this file */
7539 #include "trace_selftest.c"
7540 #endif
7541 
7542 static ssize_t
7543 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7544 			loff_t *ppos)
7545 {
7546 	struct trace_option_dentry *topt = filp->private_data;
7547 	char *buf;
7548 
7549 	if (topt->flags->val & topt->opt->bit)
7550 		buf = "1\n";
7551 	else
7552 		buf = "0\n";
7553 
7554 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7555 }
7556 
7557 static ssize_t
7558 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7559 			 loff_t *ppos)
7560 {
7561 	struct trace_option_dentry *topt = filp->private_data;
7562 	unsigned long val;
7563 	int ret;
7564 
7565 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7566 	if (ret)
7567 		return ret;
7568 
7569 	if (val != 0 && val != 1)
7570 		return -EINVAL;
7571 
7572 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7573 		mutex_lock(&trace_types_lock);
7574 		ret = __set_tracer_option(topt->tr, topt->flags,
7575 					  topt->opt, !val);
7576 		mutex_unlock(&trace_types_lock);
7577 		if (ret)
7578 			return ret;
7579 	}
7580 
7581 	*ppos += cnt;
7582 
7583 	return cnt;
7584 }
7585 
7586 
7587 static const struct file_operations trace_options_fops = {
7588 	.open = tracing_open_generic,
7589 	.read = trace_options_read,
7590 	.write = trace_options_write,
7591 	.llseek	= generic_file_llseek,
7592 };
7593 
7594 /*
7595  * In order to pass in both the trace_array descriptor as well as the index
7596  * to the flag that the trace option file represents, the trace_array
7597  * has a character array of trace_flags_index[], which holds the index
7598  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7599  * The address of this character array is passed to the flag option file
7600  * read/write callbacks.
7601  *
7602  * In order to extract both the index and the trace_array descriptor,
7603  * get_tr_index() uses the following algorithm.
7604  *
7605  *   idx = *ptr;
7606  *
7607  * As the pointer itself contains the address of the index (remember
7608  * index[1] == 1).
7609  *
7610  * Then to get the trace_array descriptor, by subtracting that index
7611  * from the ptr, we get to the start of the index itself.
7612  *
7613  *   ptr - idx == &index[0]
7614  *
7615  * Then a simple container_of() from that pointer gets us to the
7616  * trace_array descriptor.
7617  */
7618 static void get_tr_index(void *data, struct trace_array **ptr,
7619 			 unsigned int *pindex)
7620 {
7621 	*pindex = *(unsigned char *)data;
7622 
7623 	*ptr = container_of(data - *pindex, struct trace_array,
7624 			    trace_flags_index);
7625 }
7626 
7627 static ssize_t
7628 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7629 			loff_t *ppos)
7630 {
7631 	void *tr_index = filp->private_data;
7632 	struct trace_array *tr;
7633 	unsigned int index;
7634 	char *buf;
7635 
7636 	get_tr_index(tr_index, &tr, &index);
7637 
7638 	if (tr->trace_flags & (1 << index))
7639 		buf = "1\n";
7640 	else
7641 		buf = "0\n";
7642 
7643 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7644 }
7645 
7646 static ssize_t
7647 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7648 			 loff_t *ppos)
7649 {
7650 	void *tr_index = filp->private_data;
7651 	struct trace_array *tr;
7652 	unsigned int index;
7653 	unsigned long val;
7654 	int ret;
7655 
7656 	get_tr_index(tr_index, &tr, &index);
7657 
7658 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7659 	if (ret)
7660 		return ret;
7661 
7662 	if (val != 0 && val != 1)
7663 		return -EINVAL;
7664 
7665 	mutex_lock(&trace_types_lock);
7666 	ret = set_tracer_flag(tr, 1 << index, val);
7667 	mutex_unlock(&trace_types_lock);
7668 
7669 	if (ret < 0)
7670 		return ret;
7671 
7672 	*ppos += cnt;
7673 
7674 	return cnt;
7675 }
7676 
7677 static const struct file_operations trace_options_core_fops = {
7678 	.open = tracing_open_generic,
7679 	.read = trace_options_core_read,
7680 	.write = trace_options_core_write,
7681 	.llseek = generic_file_llseek,
7682 };
7683 
7684 struct dentry *trace_create_file(const char *name,
7685 				 umode_t mode,
7686 				 struct dentry *parent,
7687 				 void *data,
7688 				 const struct file_operations *fops)
7689 {
7690 	struct dentry *ret;
7691 
7692 	ret = tracefs_create_file(name, mode, parent, data, fops);
7693 	if (!ret)
7694 		pr_warn("Could not create tracefs '%s' entry\n", name);
7695 
7696 	return ret;
7697 }
7698 
7699 
7700 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7701 {
7702 	struct dentry *d_tracer;
7703 
7704 	if (tr->options)
7705 		return tr->options;
7706 
7707 	d_tracer = tracing_get_dentry(tr);
7708 	if (IS_ERR(d_tracer))
7709 		return NULL;
7710 
7711 	tr->options = tracefs_create_dir("options", d_tracer);
7712 	if (!tr->options) {
7713 		pr_warn("Could not create tracefs directory 'options'\n");
7714 		return NULL;
7715 	}
7716 
7717 	return tr->options;
7718 }
7719 
7720 static void
7721 create_trace_option_file(struct trace_array *tr,
7722 			 struct trace_option_dentry *topt,
7723 			 struct tracer_flags *flags,
7724 			 struct tracer_opt *opt)
7725 {
7726 	struct dentry *t_options;
7727 
7728 	t_options = trace_options_init_dentry(tr);
7729 	if (!t_options)
7730 		return;
7731 
7732 	topt->flags = flags;
7733 	topt->opt = opt;
7734 	topt->tr = tr;
7735 
7736 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7737 				    &trace_options_fops);
7738 
7739 }
7740 
7741 static void
7742 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7743 {
7744 	struct trace_option_dentry *topts;
7745 	struct trace_options *tr_topts;
7746 	struct tracer_flags *flags;
7747 	struct tracer_opt *opts;
7748 	int cnt;
7749 	int i;
7750 
7751 	if (!tracer)
7752 		return;
7753 
7754 	flags = tracer->flags;
7755 
7756 	if (!flags || !flags->opts)
7757 		return;
7758 
7759 	/*
7760 	 * If this is an instance, only create flags for tracers
7761 	 * the instance may have.
7762 	 */
7763 	if (!trace_ok_for_array(tracer, tr))
7764 		return;
7765 
7766 	for (i = 0; i < tr->nr_topts; i++) {
7767 		/* Make sure there's no duplicate flags. */
7768 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7769 			return;
7770 	}
7771 
7772 	opts = flags->opts;
7773 
7774 	for (cnt = 0; opts[cnt].name; cnt++)
7775 		;
7776 
7777 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7778 	if (!topts)
7779 		return;
7780 
7781 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7782 			    GFP_KERNEL);
7783 	if (!tr_topts) {
7784 		kfree(topts);
7785 		return;
7786 	}
7787 
7788 	tr->topts = tr_topts;
7789 	tr->topts[tr->nr_topts].tracer = tracer;
7790 	tr->topts[tr->nr_topts].topts = topts;
7791 	tr->nr_topts++;
7792 
7793 	for (cnt = 0; opts[cnt].name; cnt++) {
7794 		create_trace_option_file(tr, &topts[cnt], flags,
7795 					 &opts[cnt]);
7796 		WARN_ONCE(topts[cnt].entry == NULL,
7797 			  "Failed to create trace option: %s",
7798 			  opts[cnt].name);
7799 	}
7800 }
7801 
7802 static struct dentry *
7803 create_trace_option_core_file(struct trace_array *tr,
7804 			      const char *option, long index)
7805 {
7806 	struct dentry *t_options;
7807 
7808 	t_options = trace_options_init_dentry(tr);
7809 	if (!t_options)
7810 		return NULL;
7811 
7812 	return trace_create_file(option, 0644, t_options,
7813 				 (void *)&tr->trace_flags_index[index],
7814 				 &trace_options_core_fops);
7815 }
7816 
7817 static void create_trace_options_dir(struct trace_array *tr)
7818 {
7819 	struct dentry *t_options;
7820 	bool top_level = tr == &global_trace;
7821 	int i;
7822 
7823 	t_options = trace_options_init_dentry(tr);
7824 	if (!t_options)
7825 		return;
7826 
7827 	for (i = 0; trace_options[i]; i++) {
7828 		if (top_level ||
7829 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7830 			create_trace_option_core_file(tr, trace_options[i], i);
7831 	}
7832 }
7833 
7834 static ssize_t
7835 rb_simple_read(struct file *filp, char __user *ubuf,
7836 	       size_t cnt, loff_t *ppos)
7837 {
7838 	struct trace_array *tr = filp->private_data;
7839 	char buf[64];
7840 	int r;
7841 
7842 	r = tracer_tracing_is_on(tr);
7843 	r = sprintf(buf, "%d\n", r);
7844 
7845 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7846 }
7847 
7848 static ssize_t
7849 rb_simple_write(struct file *filp, const char __user *ubuf,
7850 		size_t cnt, loff_t *ppos)
7851 {
7852 	struct trace_array *tr = filp->private_data;
7853 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7854 	unsigned long val;
7855 	int ret;
7856 
7857 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7858 	if (ret)
7859 		return ret;
7860 
7861 	if (buffer) {
7862 		mutex_lock(&trace_types_lock);
7863 		if (!!val == tracer_tracing_is_on(tr)) {
7864 			val = 0; /* do nothing */
7865 		} else if (val) {
7866 			tracer_tracing_on(tr);
7867 			if (tr->current_trace->start)
7868 				tr->current_trace->start(tr);
7869 		} else {
7870 			tracer_tracing_off(tr);
7871 			if (tr->current_trace->stop)
7872 				tr->current_trace->stop(tr);
7873 		}
7874 		mutex_unlock(&trace_types_lock);
7875 	}
7876 
7877 	(*ppos)++;
7878 
7879 	return cnt;
7880 }
7881 
7882 static const struct file_operations rb_simple_fops = {
7883 	.open		= tracing_open_generic_tr,
7884 	.read		= rb_simple_read,
7885 	.write		= rb_simple_write,
7886 	.release	= tracing_release_generic_tr,
7887 	.llseek		= default_llseek,
7888 };
7889 
7890 static ssize_t
7891 buffer_percent_read(struct file *filp, char __user *ubuf,
7892 		    size_t cnt, loff_t *ppos)
7893 {
7894 	struct trace_array *tr = filp->private_data;
7895 	char buf[64];
7896 	int r;
7897 
7898 	r = tr->buffer_percent;
7899 	r = sprintf(buf, "%d\n", r);
7900 
7901 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7902 }
7903 
7904 static ssize_t
7905 buffer_percent_write(struct file *filp, const char __user *ubuf,
7906 		     size_t cnt, loff_t *ppos)
7907 {
7908 	struct trace_array *tr = filp->private_data;
7909 	unsigned long val;
7910 	int ret;
7911 
7912 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7913 	if (ret)
7914 		return ret;
7915 
7916 	if (val > 100)
7917 		return -EINVAL;
7918 
7919 	if (!val)
7920 		val = 1;
7921 
7922 	tr->buffer_percent = val;
7923 
7924 	(*ppos)++;
7925 
7926 	return cnt;
7927 }
7928 
7929 static const struct file_operations buffer_percent_fops = {
7930 	.open		= tracing_open_generic_tr,
7931 	.read		= buffer_percent_read,
7932 	.write		= buffer_percent_write,
7933 	.release	= tracing_release_generic_tr,
7934 	.llseek		= default_llseek,
7935 };
7936 
7937 struct dentry *trace_instance_dir;
7938 
7939 static void
7940 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7941 
7942 static int
7943 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7944 {
7945 	enum ring_buffer_flags rb_flags;
7946 
7947 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7948 
7949 	buf->tr = tr;
7950 
7951 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7952 	if (!buf->buffer)
7953 		return -ENOMEM;
7954 
7955 	buf->data = alloc_percpu(struct trace_array_cpu);
7956 	if (!buf->data) {
7957 		ring_buffer_free(buf->buffer);
7958 		buf->buffer = NULL;
7959 		return -ENOMEM;
7960 	}
7961 
7962 	/* Allocate the first page for all buffers */
7963 	set_buffer_entries(&tr->trace_buffer,
7964 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7965 
7966 	return 0;
7967 }
7968 
7969 static int allocate_trace_buffers(struct trace_array *tr, int size)
7970 {
7971 	int ret;
7972 
7973 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7974 	if (ret)
7975 		return ret;
7976 
7977 #ifdef CONFIG_TRACER_MAX_TRACE
7978 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7979 				    allocate_snapshot ? size : 1);
7980 	if (WARN_ON(ret)) {
7981 		ring_buffer_free(tr->trace_buffer.buffer);
7982 		tr->trace_buffer.buffer = NULL;
7983 		free_percpu(tr->trace_buffer.data);
7984 		tr->trace_buffer.data = NULL;
7985 		return -ENOMEM;
7986 	}
7987 	tr->allocated_snapshot = allocate_snapshot;
7988 
7989 	/*
7990 	 * Only the top level trace array gets its snapshot allocated
7991 	 * from the kernel command line.
7992 	 */
7993 	allocate_snapshot = false;
7994 #endif
7995 	return 0;
7996 }
7997 
7998 static void free_trace_buffer(struct trace_buffer *buf)
7999 {
8000 	if (buf->buffer) {
8001 		ring_buffer_free(buf->buffer);
8002 		buf->buffer = NULL;
8003 		free_percpu(buf->data);
8004 		buf->data = NULL;
8005 	}
8006 }
8007 
8008 static void free_trace_buffers(struct trace_array *tr)
8009 {
8010 	if (!tr)
8011 		return;
8012 
8013 	free_trace_buffer(&tr->trace_buffer);
8014 
8015 #ifdef CONFIG_TRACER_MAX_TRACE
8016 	free_trace_buffer(&tr->max_buffer);
8017 #endif
8018 }
8019 
8020 static void init_trace_flags_index(struct trace_array *tr)
8021 {
8022 	int i;
8023 
8024 	/* Used by the trace options files */
8025 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8026 		tr->trace_flags_index[i] = i;
8027 }
8028 
8029 static void __update_tracer_options(struct trace_array *tr)
8030 {
8031 	struct tracer *t;
8032 
8033 	for (t = trace_types; t; t = t->next)
8034 		add_tracer_options(tr, t);
8035 }
8036 
8037 static void update_tracer_options(struct trace_array *tr)
8038 {
8039 	mutex_lock(&trace_types_lock);
8040 	__update_tracer_options(tr);
8041 	mutex_unlock(&trace_types_lock);
8042 }
8043 
8044 static int instance_mkdir(const char *name)
8045 {
8046 	struct trace_array *tr;
8047 	int ret;
8048 
8049 	mutex_lock(&event_mutex);
8050 	mutex_lock(&trace_types_lock);
8051 
8052 	ret = -EEXIST;
8053 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8054 		if (tr->name && strcmp(tr->name, name) == 0)
8055 			goto out_unlock;
8056 	}
8057 
8058 	ret = -ENOMEM;
8059 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8060 	if (!tr)
8061 		goto out_unlock;
8062 
8063 	tr->name = kstrdup(name, GFP_KERNEL);
8064 	if (!tr->name)
8065 		goto out_free_tr;
8066 
8067 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8068 		goto out_free_tr;
8069 
8070 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8071 
8072 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8073 
8074 	raw_spin_lock_init(&tr->start_lock);
8075 
8076 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8077 
8078 	tr->current_trace = &nop_trace;
8079 
8080 	INIT_LIST_HEAD(&tr->systems);
8081 	INIT_LIST_HEAD(&tr->events);
8082 	INIT_LIST_HEAD(&tr->hist_vars);
8083 
8084 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8085 		goto out_free_tr;
8086 
8087 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8088 	if (!tr->dir)
8089 		goto out_free_tr;
8090 
8091 	ret = event_trace_add_tracer(tr->dir, tr);
8092 	if (ret) {
8093 		tracefs_remove_recursive(tr->dir);
8094 		goto out_free_tr;
8095 	}
8096 
8097 	ftrace_init_trace_array(tr);
8098 
8099 	init_tracer_tracefs(tr, tr->dir);
8100 	init_trace_flags_index(tr);
8101 	__update_tracer_options(tr);
8102 
8103 	list_add(&tr->list, &ftrace_trace_arrays);
8104 
8105 	mutex_unlock(&trace_types_lock);
8106 	mutex_unlock(&event_mutex);
8107 
8108 	return 0;
8109 
8110  out_free_tr:
8111 	free_trace_buffers(tr);
8112 	free_cpumask_var(tr->tracing_cpumask);
8113 	kfree(tr->name);
8114 	kfree(tr);
8115 
8116  out_unlock:
8117 	mutex_unlock(&trace_types_lock);
8118 	mutex_unlock(&event_mutex);
8119 
8120 	return ret;
8121 
8122 }
8123 
8124 static int instance_rmdir(const char *name)
8125 {
8126 	struct trace_array *tr;
8127 	int found = 0;
8128 	int ret;
8129 	int i;
8130 
8131 	mutex_lock(&event_mutex);
8132 	mutex_lock(&trace_types_lock);
8133 
8134 	ret = -ENODEV;
8135 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8136 		if (tr->name && strcmp(tr->name, name) == 0) {
8137 			found = 1;
8138 			break;
8139 		}
8140 	}
8141 	if (!found)
8142 		goto out_unlock;
8143 
8144 	ret = -EBUSY;
8145 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8146 		goto out_unlock;
8147 
8148 	list_del(&tr->list);
8149 
8150 	/* Disable all the flags that were enabled coming in */
8151 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8152 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8153 			set_tracer_flag(tr, 1 << i, 0);
8154 	}
8155 
8156 	tracing_set_nop(tr);
8157 	clear_ftrace_function_probes(tr);
8158 	event_trace_del_tracer(tr);
8159 	ftrace_clear_pids(tr);
8160 	ftrace_destroy_function_files(tr);
8161 	tracefs_remove_recursive(tr->dir);
8162 	free_trace_buffers(tr);
8163 
8164 	for (i = 0; i < tr->nr_topts; i++) {
8165 		kfree(tr->topts[i].topts);
8166 	}
8167 	kfree(tr->topts);
8168 
8169 	free_cpumask_var(tr->tracing_cpumask);
8170 	kfree(tr->name);
8171 	kfree(tr);
8172 
8173 	ret = 0;
8174 
8175  out_unlock:
8176 	mutex_unlock(&trace_types_lock);
8177 	mutex_unlock(&event_mutex);
8178 
8179 	return ret;
8180 }
8181 
8182 static __init void create_trace_instances(struct dentry *d_tracer)
8183 {
8184 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8185 							 instance_mkdir,
8186 							 instance_rmdir);
8187 	if (WARN_ON(!trace_instance_dir))
8188 		return;
8189 }
8190 
8191 static void
8192 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8193 {
8194 	struct trace_event_file *file;
8195 	int cpu;
8196 
8197 	trace_create_file("available_tracers", 0444, d_tracer,
8198 			tr, &show_traces_fops);
8199 
8200 	trace_create_file("current_tracer", 0644, d_tracer,
8201 			tr, &set_tracer_fops);
8202 
8203 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8204 			  tr, &tracing_cpumask_fops);
8205 
8206 	trace_create_file("trace_options", 0644, d_tracer,
8207 			  tr, &tracing_iter_fops);
8208 
8209 	trace_create_file("trace", 0644, d_tracer,
8210 			  tr, &tracing_fops);
8211 
8212 	trace_create_file("trace_pipe", 0444, d_tracer,
8213 			  tr, &tracing_pipe_fops);
8214 
8215 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8216 			  tr, &tracing_entries_fops);
8217 
8218 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8219 			  tr, &tracing_total_entries_fops);
8220 
8221 	trace_create_file("free_buffer", 0200, d_tracer,
8222 			  tr, &tracing_free_buffer_fops);
8223 
8224 	trace_create_file("trace_marker", 0220, d_tracer,
8225 			  tr, &tracing_mark_fops);
8226 
8227 	file = __find_event_file(tr, "ftrace", "print");
8228 	if (file && file->dir)
8229 		trace_create_file("trigger", 0644, file->dir, file,
8230 				  &event_trigger_fops);
8231 	tr->trace_marker_file = file;
8232 
8233 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8234 			  tr, &tracing_mark_raw_fops);
8235 
8236 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8237 			  &trace_clock_fops);
8238 
8239 	trace_create_file("tracing_on", 0644, d_tracer,
8240 			  tr, &rb_simple_fops);
8241 
8242 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8243 			  &trace_time_stamp_mode_fops);
8244 
8245 	tr->buffer_percent = 50;
8246 
8247 	trace_create_file("buffer_percent", 0444, d_tracer,
8248 			tr, &buffer_percent_fops);
8249 
8250 	create_trace_options_dir(tr);
8251 
8252 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8253 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8254 			&tr->max_latency, &tracing_max_lat_fops);
8255 #endif
8256 
8257 	if (ftrace_create_function_files(tr, d_tracer))
8258 		WARN(1, "Could not allocate function filter files");
8259 
8260 #ifdef CONFIG_TRACER_SNAPSHOT
8261 	trace_create_file("snapshot", 0644, d_tracer,
8262 			  tr, &snapshot_fops);
8263 #endif
8264 
8265 	for_each_tracing_cpu(cpu)
8266 		tracing_init_tracefs_percpu(tr, cpu);
8267 
8268 	ftrace_init_tracefs(tr, d_tracer);
8269 }
8270 
8271 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8272 {
8273 	struct vfsmount *mnt;
8274 	struct file_system_type *type;
8275 
8276 	/*
8277 	 * To maintain backward compatibility for tools that mount
8278 	 * debugfs to get to the tracing facility, tracefs is automatically
8279 	 * mounted to the debugfs/tracing directory.
8280 	 */
8281 	type = get_fs_type("tracefs");
8282 	if (!type)
8283 		return NULL;
8284 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8285 	put_filesystem(type);
8286 	if (IS_ERR(mnt))
8287 		return NULL;
8288 	mntget(mnt);
8289 
8290 	return mnt;
8291 }
8292 
8293 /**
8294  * tracing_init_dentry - initialize top level trace array
8295  *
8296  * This is called when creating files or directories in the tracing
8297  * directory. It is called via fs_initcall() by any of the boot up code
8298  * and expects to return the dentry of the top level tracing directory.
8299  */
8300 struct dentry *tracing_init_dentry(void)
8301 {
8302 	struct trace_array *tr = &global_trace;
8303 
8304 	/* The top level trace array uses  NULL as parent */
8305 	if (tr->dir)
8306 		return NULL;
8307 
8308 	if (WARN_ON(!tracefs_initialized()) ||
8309 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8310 		 WARN_ON(!debugfs_initialized())))
8311 		return ERR_PTR(-ENODEV);
8312 
8313 	/*
8314 	 * As there may still be users that expect the tracing
8315 	 * files to exist in debugfs/tracing, we must automount
8316 	 * the tracefs file system there, so older tools still
8317 	 * work with the newer kerenl.
8318 	 */
8319 	tr->dir = debugfs_create_automount("tracing", NULL,
8320 					   trace_automount, NULL);
8321 	if (!tr->dir) {
8322 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8323 		return ERR_PTR(-ENOMEM);
8324 	}
8325 
8326 	return NULL;
8327 }
8328 
8329 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8330 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8331 
8332 static void __init trace_eval_init(void)
8333 {
8334 	int len;
8335 
8336 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8337 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8338 }
8339 
8340 #ifdef CONFIG_MODULES
8341 static void trace_module_add_evals(struct module *mod)
8342 {
8343 	if (!mod->num_trace_evals)
8344 		return;
8345 
8346 	/*
8347 	 * Modules with bad taint do not have events created, do
8348 	 * not bother with enums either.
8349 	 */
8350 	if (trace_module_has_bad_taint(mod))
8351 		return;
8352 
8353 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8354 }
8355 
8356 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8357 static void trace_module_remove_evals(struct module *mod)
8358 {
8359 	union trace_eval_map_item *map;
8360 	union trace_eval_map_item **last = &trace_eval_maps;
8361 
8362 	if (!mod->num_trace_evals)
8363 		return;
8364 
8365 	mutex_lock(&trace_eval_mutex);
8366 
8367 	map = trace_eval_maps;
8368 
8369 	while (map) {
8370 		if (map->head.mod == mod)
8371 			break;
8372 		map = trace_eval_jmp_to_tail(map);
8373 		last = &map->tail.next;
8374 		map = map->tail.next;
8375 	}
8376 	if (!map)
8377 		goto out;
8378 
8379 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8380 	kfree(map);
8381  out:
8382 	mutex_unlock(&trace_eval_mutex);
8383 }
8384 #else
8385 static inline void trace_module_remove_evals(struct module *mod) { }
8386 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8387 
8388 static int trace_module_notify(struct notifier_block *self,
8389 			       unsigned long val, void *data)
8390 {
8391 	struct module *mod = data;
8392 
8393 	switch (val) {
8394 	case MODULE_STATE_COMING:
8395 		trace_module_add_evals(mod);
8396 		break;
8397 	case MODULE_STATE_GOING:
8398 		trace_module_remove_evals(mod);
8399 		break;
8400 	}
8401 
8402 	return 0;
8403 }
8404 
8405 static struct notifier_block trace_module_nb = {
8406 	.notifier_call = trace_module_notify,
8407 	.priority = 0,
8408 };
8409 #endif /* CONFIG_MODULES */
8410 
8411 static __init int tracer_init_tracefs(void)
8412 {
8413 	struct dentry *d_tracer;
8414 
8415 	trace_access_lock_init();
8416 
8417 	d_tracer = tracing_init_dentry();
8418 	if (IS_ERR(d_tracer))
8419 		return 0;
8420 
8421 	event_trace_init();
8422 
8423 	init_tracer_tracefs(&global_trace, d_tracer);
8424 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8425 
8426 	trace_create_file("tracing_thresh", 0644, d_tracer,
8427 			&global_trace, &tracing_thresh_fops);
8428 
8429 	trace_create_file("README", 0444, d_tracer,
8430 			NULL, &tracing_readme_fops);
8431 
8432 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8433 			NULL, &tracing_saved_cmdlines_fops);
8434 
8435 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8436 			  NULL, &tracing_saved_cmdlines_size_fops);
8437 
8438 	trace_create_file("saved_tgids", 0444, d_tracer,
8439 			NULL, &tracing_saved_tgids_fops);
8440 
8441 	trace_eval_init();
8442 
8443 	trace_create_eval_file(d_tracer);
8444 
8445 #ifdef CONFIG_MODULES
8446 	register_module_notifier(&trace_module_nb);
8447 #endif
8448 
8449 #ifdef CONFIG_DYNAMIC_FTRACE
8450 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8451 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8452 #endif
8453 
8454 	create_trace_instances(d_tracer);
8455 
8456 	update_tracer_options(&global_trace);
8457 
8458 	return 0;
8459 }
8460 
8461 static int trace_panic_handler(struct notifier_block *this,
8462 			       unsigned long event, void *unused)
8463 {
8464 	if (ftrace_dump_on_oops)
8465 		ftrace_dump(ftrace_dump_on_oops);
8466 	return NOTIFY_OK;
8467 }
8468 
8469 static struct notifier_block trace_panic_notifier = {
8470 	.notifier_call  = trace_panic_handler,
8471 	.next           = NULL,
8472 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8473 };
8474 
8475 static int trace_die_handler(struct notifier_block *self,
8476 			     unsigned long val,
8477 			     void *data)
8478 {
8479 	switch (val) {
8480 	case DIE_OOPS:
8481 		if (ftrace_dump_on_oops)
8482 			ftrace_dump(ftrace_dump_on_oops);
8483 		break;
8484 	default:
8485 		break;
8486 	}
8487 	return NOTIFY_OK;
8488 }
8489 
8490 static struct notifier_block trace_die_notifier = {
8491 	.notifier_call = trace_die_handler,
8492 	.priority = 200
8493 };
8494 
8495 /*
8496  * printk is set to max of 1024, we really don't need it that big.
8497  * Nothing should be printing 1000 characters anyway.
8498  */
8499 #define TRACE_MAX_PRINT		1000
8500 
8501 /*
8502  * Define here KERN_TRACE so that we have one place to modify
8503  * it if we decide to change what log level the ftrace dump
8504  * should be at.
8505  */
8506 #define KERN_TRACE		KERN_EMERG
8507 
8508 void
8509 trace_printk_seq(struct trace_seq *s)
8510 {
8511 	/* Probably should print a warning here. */
8512 	if (s->seq.len >= TRACE_MAX_PRINT)
8513 		s->seq.len = TRACE_MAX_PRINT;
8514 
8515 	/*
8516 	 * More paranoid code. Although the buffer size is set to
8517 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8518 	 * an extra layer of protection.
8519 	 */
8520 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8521 		s->seq.len = s->seq.size - 1;
8522 
8523 	/* should be zero ended, but we are paranoid. */
8524 	s->buffer[s->seq.len] = 0;
8525 
8526 	printk(KERN_TRACE "%s", s->buffer);
8527 
8528 	trace_seq_init(s);
8529 }
8530 
8531 void trace_init_global_iter(struct trace_iterator *iter)
8532 {
8533 	iter->tr = &global_trace;
8534 	iter->trace = iter->tr->current_trace;
8535 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8536 	iter->trace_buffer = &global_trace.trace_buffer;
8537 
8538 	if (iter->trace && iter->trace->open)
8539 		iter->trace->open(iter);
8540 
8541 	/* Annotate start of buffers if we had overruns */
8542 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8543 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8544 
8545 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8546 	if (trace_clocks[iter->tr->clock_id].in_ns)
8547 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8548 }
8549 
8550 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8551 {
8552 	/* use static because iter can be a bit big for the stack */
8553 	static struct trace_iterator iter;
8554 	static atomic_t dump_running;
8555 	struct trace_array *tr = &global_trace;
8556 	unsigned int old_userobj;
8557 	unsigned long flags;
8558 	int cnt = 0, cpu;
8559 
8560 	/* Only allow one dump user at a time. */
8561 	if (atomic_inc_return(&dump_running) != 1) {
8562 		atomic_dec(&dump_running);
8563 		return;
8564 	}
8565 
8566 	/*
8567 	 * Always turn off tracing when we dump.
8568 	 * We don't need to show trace output of what happens
8569 	 * between multiple crashes.
8570 	 *
8571 	 * If the user does a sysrq-z, then they can re-enable
8572 	 * tracing with echo 1 > tracing_on.
8573 	 */
8574 	tracing_off();
8575 
8576 	local_irq_save(flags);
8577 	printk_nmi_direct_enter();
8578 
8579 	/* Simulate the iterator */
8580 	trace_init_global_iter(&iter);
8581 
8582 	for_each_tracing_cpu(cpu) {
8583 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8584 	}
8585 
8586 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8587 
8588 	/* don't look at user memory in panic mode */
8589 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8590 
8591 	switch (oops_dump_mode) {
8592 	case DUMP_ALL:
8593 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8594 		break;
8595 	case DUMP_ORIG:
8596 		iter.cpu_file = raw_smp_processor_id();
8597 		break;
8598 	case DUMP_NONE:
8599 		goto out_enable;
8600 	default:
8601 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8602 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8603 	}
8604 
8605 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8606 
8607 	/* Did function tracer already get disabled? */
8608 	if (ftrace_is_dead()) {
8609 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8610 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8611 	}
8612 
8613 	/*
8614 	 * We need to stop all tracing on all CPUS to read the
8615 	 * the next buffer. This is a bit expensive, but is
8616 	 * not done often. We fill all what we can read,
8617 	 * and then release the locks again.
8618 	 */
8619 
8620 	while (!trace_empty(&iter)) {
8621 
8622 		if (!cnt)
8623 			printk(KERN_TRACE "---------------------------------\n");
8624 
8625 		cnt++;
8626 
8627 		/* reset all but tr, trace, and overruns */
8628 		memset(&iter.seq, 0,
8629 		       sizeof(struct trace_iterator) -
8630 		       offsetof(struct trace_iterator, seq));
8631 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8632 		iter.pos = -1;
8633 
8634 		if (trace_find_next_entry_inc(&iter) != NULL) {
8635 			int ret;
8636 
8637 			ret = print_trace_line(&iter);
8638 			if (ret != TRACE_TYPE_NO_CONSUME)
8639 				trace_consume(&iter);
8640 		}
8641 		touch_nmi_watchdog();
8642 
8643 		trace_printk_seq(&iter.seq);
8644 	}
8645 
8646 	if (!cnt)
8647 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8648 	else
8649 		printk(KERN_TRACE "---------------------------------\n");
8650 
8651  out_enable:
8652 	tr->trace_flags |= old_userobj;
8653 
8654 	for_each_tracing_cpu(cpu) {
8655 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8656 	}
8657 	atomic_dec(&dump_running);
8658 	printk_nmi_direct_exit();
8659 	local_irq_restore(flags);
8660 }
8661 EXPORT_SYMBOL_GPL(ftrace_dump);
8662 
8663 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8664 {
8665 	char **argv;
8666 	int argc, ret;
8667 
8668 	argc = 0;
8669 	ret = 0;
8670 	argv = argv_split(GFP_KERNEL, buf, &argc);
8671 	if (!argv)
8672 		return -ENOMEM;
8673 
8674 	if (argc)
8675 		ret = createfn(argc, argv);
8676 
8677 	argv_free(argv);
8678 
8679 	return ret;
8680 }
8681 
8682 #define WRITE_BUFSIZE  4096
8683 
8684 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8685 				size_t count, loff_t *ppos,
8686 				int (*createfn)(int, char **))
8687 {
8688 	char *kbuf, *buf, *tmp;
8689 	int ret = 0;
8690 	size_t done = 0;
8691 	size_t size;
8692 
8693 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8694 	if (!kbuf)
8695 		return -ENOMEM;
8696 
8697 	while (done < count) {
8698 		size = count - done;
8699 
8700 		if (size >= WRITE_BUFSIZE)
8701 			size = WRITE_BUFSIZE - 1;
8702 
8703 		if (copy_from_user(kbuf, buffer + done, size)) {
8704 			ret = -EFAULT;
8705 			goto out;
8706 		}
8707 		kbuf[size] = '\0';
8708 		buf = kbuf;
8709 		do {
8710 			tmp = strchr(buf, '\n');
8711 			if (tmp) {
8712 				*tmp = '\0';
8713 				size = tmp - buf + 1;
8714 			} else {
8715 				size = strlen(buf);
8716 				if (done + size < count) {
8717 					if (buf != kbuf)
8718 						break;
8719 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8720 					pr_warn("Line length is too long: Should be less than %d\n",
8721 						WRITE_BUFSIZE - 2);
8722 					ret = -EINVAL;
8723 					goto out;
8724 				}
8725 			}
8726 			done += size;
8727 
8728 			/* Remove comments */
8729 			tmp = strchr(buf, '#');
8730 
8731 			if (tmp)
8732 				*tmp = '\0';
8733 
8734 			ret = trace_run_command(buf, createfn);
8735 			if (ret)
8736 				goto out;
8737 			buf += size;
8738 
8739 		} while (done < count);
8740 	}
8741 	ret = done;
8742 
8743 out:
8744 	kfree(kbuf);
8745 
8746 	return ret;
8747 }
8748 
8749 __init static int tracer_alloc_buffers(void)
8750 {
8751 	int ring_buf_size;
8752 	int ret = -ENOMEM;
8753 
8754 	/*
8755 	 * Make sure we don't accidently add more trace options
8756 	 * than we have bits for.
8757 	 */
8758 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8759 
8760 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8761 		goto out;
8762 
8763 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8764 		goto out_free_buffer_mask;
8765 
8766 	/* Only allocate trace_printk buffers if a trace_printk exists */
8767 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8768 		/* Must be called before global_trace.buffer is allocated */
8769 		trace_printk_init_buffers();
8770 
8771 	/* To save memory, keep the ring buffer size to its minimum */
8772 	if (ring_buffer_expanded)
8773 		ring_buf_size = trace_buf_size;
8774 	else
8775 		ring_buf_size = 1;
8776 
8777 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8778 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8779 
8780 	raw_spin_lock_init(&global_trace.start_lock);
8781 
8782 	/*
8783 	 * The prepare callbacks allocates some memory for the ring buffer. We
8784 	 * don't free the buffer if the if the CPU goes down. If we were to free
8785 	 * the buffer, then the user would lose any trace that was in the
8786 	 * buffer. The memory will be removed once the "instance" is removed.
8787 	 */
8788 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8789 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8790 				      NULL);
8791 	if (ret < 0)
8792 		goto out_free_cpumask;
8793 	/* Used for event triggers */
8794 	ret = -ENOMEM;
8795 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8796 	if (!temp_buffer)
8797 		goto out_rm_hp_state;
8798 
8799 	if (trace_create_savedcmd() < 0)
8800 		goto out_free_temp_buffer;
8801 
8802 	/* TODO: make the number of buffers hot pluggable with CPUS */
8803 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8804 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8805 		WARN_ON(1);
8806 		goto out_free_savedcmd;
8807 	}
8808 
8809 	if (global_trace.buffer_disabled)
8810 		tracing_off();
8811 
8812 	if (trace_boot_clock) {
8813 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8814 		if (ret < 0)
8815 			pr_warn("Trace clock %s not defined, going back to default\n",
8816 				trace_boot_clock);
8817 	}
8818 
8819 	/*
8820 	 * register_tracer() might reference current_trace, so it
8821 	 * needs to be set before we register anything. This is
8822 	 * just a bootstrap of current_trace anyway.
8823 	 */
8824 	global_trace.current_trace = &nop_trace;
8825 
8826 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8827 
8828 	ftrace_init_global_array_ops(&global_trace);
8829 
8830 	init_trace_flags_index(&global_trace);
8831 
8832 	register_tracer(&nop_trace);
8833 
8834 	/* Function tracing may start here (via kernel command line) */
8835 	init_function_trace();
8836 
8837 	/* All seems OK, enable tracing */
8838 	tracing_disabled = 0;
8839 
8840 	atomic_notifier_chain_register(&panic_notifier_list,
8841 				       &trace_panic_notifier);
8842 
8843 	register_die_notifier(&trace_die_notifier);
8844 
8845 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8846 
8847 	INIT_LIST_HEAD(&global_trace.systems);
8848 	INIT_LIST_HEAD(&global_trace.events);
8849 	INIT_LIST_HEAD(&global_trace.hist_vars);
8850 	list_add(&global_trace.list, &ftrace_trace_arrays);
8851 
8852 	apply_trace_boot_options();
8853 
8854 	register_snapshot_cmd();
8855 
8856 	return 0;
8857 
8858 out_free_savedcmd:
8859 	free_saved_cmdlines_buffer(savedcmd);
8860 out_free_temp_buffer:
8861 	ring_buffer_free(temp_buffer);
8862 out_rm_hp_state:
8863 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8864 out_free_cpumask:
8865 	free_cpumask_var(global_trace.tracing_cpumask);
8866 out_free_buffer_mask:
8867 	free_cpumask_var(tracing_buffer_mask);
8868 out:
8869 	return ret;
8870 }
8871 
8872 void __init early_trace_init(void)
8873 {
8874 	if (tracepoint_printk) {
8875 		tracepoint_print_iter =
8876 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8877 		if (WARN_ON(!tracepoint_print_iter))
8878 			tracepoint_printk = 0;
8879 		else
8880 			static_key_enable(&tracepoint_printk_key.key);
8881 	}
8882 	tracer_alloc_buffers();
8883 }
8884 
8885 void __init trace_init(void)
8886 {
8887 	trace_event_init();
8888 }
8889 
8890 __init static int clear_boot_tracer(void)
8891 {
8892 	/*
8893 	 * The default tracer at boot buffer is an init section.
8894 	 * This function is called in lateinit. If we did not
8895 	 * find the boot tracer, then clear it out, to prevent
8896 	 * later registration from accessing the buffer that is
8897 	 * about to be freed.
8898 	 */
8899 	if (!default_bootup_tracer)
8900 		return 0;
8901 
8902 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8903 	       default_bootup_tracer);
8904 	default_bootup_tracer = NULL;
8905 
8906 	return 0;
8907 }
8908 
8909 fs_initcall(tracer_init_tracefs);
8910 late_initcall_sync(clear_boot_tracer);
8911 
8912 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8913 __init static int tracing_set_default_clock(void)
8914 {
8915 	/* sched_clock_stable() is determined in late_initcall */
8916 	if (!trace_boot_clock && !sched_clock_stable()) {
8917 		printk(KERN_WARNING
8918 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8919 		       "If you want to keep using the local clock, then add:\n"
8920 		       "  \"trace_clock=local\"\n"
8921 		       "on the kernel command line\n");
8922 		tracing_set_clock(&global_trace, "global");
8923 	}
8924 
8925 	return 0;
8926 }
8927 late_initcall_sync(tracing_set_default_clock);
8928 #endif
8929