xref: /openbmc/linux/kernel/trace/trace.c (revision 534420c6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list) {
500 		trace_parser_put(&parser);
501 		return -ENOMEM;
502 	}
503 
504 	pid_list->pid_max = READ_ONCE(pid_max);
505 
506 	/* Only truncating will shrink pid_max */
507 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508 		pid_list->pid_max = filtered_pids->pid_max;
509 
510 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511 	if (!pid_list->pids) {
512 		trace_parser_put(&parser);
513 		kfree(pid_list);
514 		return -ENOMEM;
515 	}
516 
517 	if (filtered_pids) {
518 		/* copy the current bits to the new max */
519 		for_each_set_bit(pid, filtered_pids->pids,
520 				 filtered_pids->pid_max) {
521 			set_bit(pid, pid_list->pids);
522 			nr_pids++;
523 		}
524 	}
525 
526 	while (cnt > 0) {
527 
528 		pos = 0;
529 
530 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
531 		if (ret < 0 || !trace_parser_loaded(&parser))
532 			break;
533 
534 		read += ret;
535 		ubuf += ret;
536 		cnt -= ret;
537 
538 		ret = -EINVAL;
539 		if (kstrtoul(parser.buffer, 0, &val))
540 			break;
541 		if (val >= pid_list->pid_max)
542 			break;
543 
544 		pid = (pid_t)val;
545 
546 		set_bit(pid, pid_list->pids);
547 		nr_pids++;
548 
549 		trace_parser_clear(&parser);
550 		ret = 0;
551 	}
552 	trace_parser_put(&parser);
553 
554 	if (ret < 0) {
555 		trace_free_pid_list(pid_list);
556 		return ret;
557 	}
558 
559 	if (!nr_pids) {
560 		/* Cleared the list of pids */
561 		trace_free_pid_list(pid_list);
562 		read = ret;
563 		pid_list = NULL;
564 	}
565 
566 	*new_pid_list = pid_list;
567 
568 	return read;
569 }
570 
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573 	u64 ts;
574 
575 	/* Early boot up does not have a buffer yet */
576 	if (!buf->buffer)
577 		return trace_clock_local();
578 
579 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
580 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581 
582 	return ts;
583 }
584 
585 u64 ftrace_now(int cpu)
586 {
587 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589 
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601 	/*
602 	 * For quick access (irqsoff uses this in fast path), just
603 	 * return the mirror variable of the state of the ring buffer.
604 	 * It's a little racy, but we don't really care.
605 	 */
606 	smp_rmb();
607 	return !global_trace.buffer_disabled;
608 }
609 
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
621 
622 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623 
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer		*trace_types __read_mostly;
626 
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631 
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653 
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657 
658 static inline void trace_access_lock(int cpu)
659 {
660 	if (cpu == RING_BUFFER_ALL_CPUS) {
661 		/* gain it for accessing the whole ring buffer. */
662 		down_write(&all_cpu_access_lock);
663 	} else {
664 		/* gain it for accessing a cpu ring buffer. */
665 
666 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667 		down_read(&all_cpu_access_lock);
668 
669 		/* Secondly block other access to this @cpu ring buffer. */
670 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 	}
672 }
673 
674 static inline void trace_access_unlock(int cpu)
675 {
676 	if (cpu == RING_BUFFER_ALL_CPUS) {
677 		up_write(&all_cpu_access_lock);
678 	} else {
679 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680 		up_read(&all_cpu_access_lock);
681 	}
682 }
683 
684 static inline void trace_access_lock_init(void)
685 {
686 	int cpu;
687 
688 	for_each_possible_cpu(cpu)
689 		mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691 
692 #else
693 
694 static DEFINE_MUTEX(access_lock);
695 
696 static inline void trace_access_lock(int cpu)
697 {
698 	(void)cpu;
699 	mutex_lock(&access_lock);
700 }
701 
702 static inline void trace_access_unlock(int cpu)
703 {
704 	(void)cpu;
705 	mutex_unlock(&access_lock);
706 }
707 
708 static inline void trace_access_lock_init(void)
709 {
710 }
711 
712 #endif
713 
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716 				 unsigned long flags,
717 				 int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719 				      struct ring_buffer *buffer,
720 				      unsigned long flags,
721 				      int skip, int pc, struct pt_regs *regs);
722 
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725 					unsigned long flags,
726 					int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730 				      struct ring_buffer *buffer,
731 				      unsigned long flags,
732 				      int skip, int pc, struct pt_regs *regs)
733 {
734 }
735 
736 #endif
737 
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740 		  int type, unsigned long flags, int pc)
741 {
742 	struct trace_entry *ent = ring_buffer_event_data(event);
743 
744 	tracing_generic_entry_update(ent, flags, pc);
745 	ent->type = type;
746 }
747 
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750 			  int type,
751 			  unsigned long len,
752 			  unsigned long flags, int pc)
753 {
754 	struct ring_buffer_event *event;
755 
756 	event = ring_buffer_lock_reserve(buffer, len);
757 	if (event != NULL)
758 		trace_event_setup(event, type, flags, pc);
759 
760 	return event;
761 }
762 
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765 	if (tr->trace_buffer.buffer)
766 		ring_buffer_record_on(tr->trace_buffer.buffer);
767 	/*
768 	 * This flag is looked at when buffers haven't been allocated
769 	 * yet, or by some tracers (like irqsoff), that just want to
770 	 * know if the ring buffer has been disabled, but it can handle
771 	 * races of where it gets disabled but we still do a record.
772 	 * As the check is in the fast path of the tracers, it is more
773 	 * important to be fast than accurate.
774 	 */
775 	tr->buffer_disabled = 0;
776 	/* Make the flag seen by readers */
777 	smp_wmb();
778 }
779 
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788 	tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791 
792 
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796 	__this_cpu_write(trace_taskinfo_save, true);
797 
798 	/* If this is the temp buffer, we need to commit fully */
799 	if (this_cpu_read(trace_buffered_event) == event) {
800 		/* Length is in event->array[0] */
801 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
802 		/* Release the temp buffer */
803 		this_cpu_dec(trace_buffered_event_cnt);
804 	} else
805 		ring_buffer_unlock_commit(buffer, event);
806 }
807 
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:	   The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816 	struct ring_buffer_event *event;
817 	struct ring_buffer *buffer;
818 	struct print_entry *entry;
819 	unsigned long irq_flags;
820 	int alloc;
821 	int pc;
822 
823 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824 		return 0;
825 
826 	pc = preempt_count();
827 
828 	if (unlikely(tracing_selftest_running || tracing_disabled))
829 		return 0;
830 
831 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
832 
833 	local_save_flags(irq_flags);
834 	buffer = global_trace.trace_buffer.buffer;
835 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
836 					    irq_flags, pc);
837 	if (!event)
838 		return 0;
839 
840 	entry = ring_buffer_event_data(event);
841 	entry->ip = ip;
842 
843 	memcpy(&entry->buf, str, size);
844 
845 	/* Add a newline if necessary */
846 	if (entry->buf[size - 1] != '\n') {
847 		entry->buf[size] = '\n';
848 		entry->buf[size + 1] = '\0';
849 	} else
850 		entry->buf[size] = '\0';
851 
852 	__buffer_unlock_commit(buffer, event);
853 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854 
855 	return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858 
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:	   The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866 	struct ring_buffer_event *event;
867 	struct ring_buffer *buffer;
868 	struct bputs_entry *entry;
869 	unsigned long irq_flags;
870 	int size = sizeof(struct bputs_entry);
871 	int pc;
872 
873 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874 		return 0;
875 
876 	pc = preempt_count();
877 
878 	if (unlikely(tracing_selftest_running || tracing_disabled))
879 		return 0;
880 
881 	local_save_flags(irq_flags);
882 	buffer = global_trace.trace_buffer.buffer;
883 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884 					    irq_flags, pc);
885 	if (!event)
886 		return 0;
887 
888 	entry = ring_buffer_event_data(event);
889 	entry->ip			= ip;
890 	entry->str			= str;
891 
892 	__buffer_unlock_commit(buffer, event);
893 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894 
895 	return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898 
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
901 {
902 	struct tracer *tracer = tr->current_trace;
903 	unsigned long flags;
904 
905 	if (in_nmi()) {
906 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907 		internal_trace_puts("*** snapshot is being ignored        ***\n");
908 		return;
909 	}
910 
911 	if (!tr->allocated_snapshot) {
912 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913 		internal_trace_puts("*** stopping trace here!   ***\n");
914 		tracing_off();
915 		return;
916 	}
917 
918 	/* Note, snapshot can not be used when the tracer uses it */
919 	if (tracer->use_max_tr) {
920 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922 		return;
923 	}
924 
925 	local_irq_save(flags);
926 	update_max_tr(tr, current, smp_processor_id(), cond_data);
927 	local_irq_restore(flags);
928 }
929 
930 void tracing_snapshot_instance(struct trace_array *tr)
931 {
932 	tracing_snapshot_instance_cond(tr, NULL);
933 }
934 
935 /**
936  * tracing_snapshot - take a snapshot of the current buffer.
937  *
938  * This causes a swap between the snapshot buffer and the current live
939  * tracing buffer. You can use this to take snapshots of the live
940  * trace when some condition is triggered, but continue to trace.
941  *
942  * Note, make sure to allocate the snapshot with either
943  * a tracing_snapshot_alloc(), or by doing it manually
944  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
945  *
946  * If the snapshot buffer is not allocated, it will stop tracing.
947  * Basically making a permanent snapshot.
948  */
949 void tracing_snapshot(void)
950 {
951 	struct trace_array *tr = &global_trace;
952 
953 	tracing_snapshot_instance(tr);
954 }
955 EXPORT_SYMBOL_GPL(tracing_snapshot);
956 
957 /**
958  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
959  * @tr:		The tracing instance to snapshot
960  * @cond_data:	The data to be tested conditionally, and possibly saved
961  *
962  * This is the same as tracing_snapshot() except that the snapshot is
963  * conditional - the snapshot will only happen if the
964  * cond_snapshot.update() implementation receiving the cond_data
965  * returns true, which means that the trace array's cond_snapshot
966  * update() operation used the cond_data to determine whether the
967  * snapshot should be taken, and if it was, presumably saved it along
968  * with the snapshot.
969  */
970 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
971 {
972 	tracing_snapshot_instance_cond(tr, cond_data);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
975 
976 /**
977  * tracing_snapshot_cond_data - get the user data associated with a snapshot
978  * @tr:		The tracing instance
979  *
980  * When the user enables a conditional snapshot using
981  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
982  * with the snapshot.  This accessor is used to retrieve it.
983  *
984  * Should not be called from cond_snapshot.update(), since it takes
985  * the tr->max_lock lock, which the code calling
986  * cond_snapshot.update() has already done.
987  *
988  * Returns the cond_data associated with the trace array's snapshot.
989  */
990 void *tracing_cond_snapshot_data(struct trace_array *tr)
991 {
992 	void *cond_data = NULL;
993 
994 	arch_spin_lock(&tr->max_lock);
995 
996 	if (tr->cond_snapshot)
997 		cond_data = tr->cond_snapshot->cond_data;
998 
999 	arch_spin_unlock(&tr->max_lock);
1000 
1001 	return cond_data;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1004 
1005 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1006 					struct trace_buffer *size_buf, int cpu_id);
1007 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1008 
1009 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1010 {
1011 	int ret;
1012 
1013 	if (!tr->allocated_snapshot) {
1014 
1015 		/* allocate spare buffer */
1016 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1017 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1018 		if (ret < 0)
1019 			return ret;
1020 
1021 		tr->allocated_snapshot = true;
1022 	}
1023 
1024 	return 0;
1025 }
1026 
1027 static void free_snapshot(struct trace_array *tr)
1028 {
1029 	/*
1030 	 * We don't free the ring buffer. instead, resize it because
1031 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1032 	 * we want preserve it.
1033 	 */
1034 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1035 	set_buffer_entries(&tr->max_buffer, 1);
1036 	tracing_reset_online_cpus(&tr->max_buffer);
1037 	tr->allocated_snapshot = false;
1038 }
1039 
1040 /**
1041  * tracing_alloc_snapshot - allocate snapshot buffer.
1042  *
1043  * This only allocates the snapshot buffer if it isn't already
1044  * allocated - it doesn't also take a snapshot.
1045  *
1046  * This is meant to be used in cases where the snapshot buffer needs
1047  * to be set up for events that can't sleep but need to be able to
1048  * trigger a snapshot.
1049  */
1050 int tracing_alloc_snapshot(void)
1051 {
1052 	struct trace_array *tr = &global_trace;
1053 	int ret;
1054 
1055 	ret = tracing_alloc_snapshot_instance(tr);
1056 	WARN_ON(ret < 0);
1057 
1058 	return ret;
1059 }
1060 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1061 
1062 /**
1063  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1064  *
1065  * This is similar to tracing_snapshot(), but it will allocate the
1066  * snapshot buffer if it isn't already allocated. Use this only
1067  * where it is safe to sleep, as the allocation may sleep.
1068  *
1069  * This causes a swap between the snapshot buffer and the current live
1070  * tracing buffer. You can use this to take snapshots of the live
1071  * trace when some condition is triggered, but continue to trace.
1072  */
1073 void tracing_snapshot_alloc(void)
1074 {
1075 	int ret;
1076 
1077 	ret = tracing_alloc_snapshot();
1078 	if (ret < 0)
1079 		return;
1080 
1081 	tracing_snapshot();
1082 }
1083 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1084 
1085 /**
1086  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1087  * @tr:		The tracing instance
1088  * @cond_data:	User data to associate with the snapshot
1089  * @update:	Implementation of the cond_snapshot update function
1090  *
1091  * Check whether the conditional snapshot for the given instance has
1092  * already been enabled, or if the current tracer is already using a
1093  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1094  * save the cond_data and update function inside.
1095  *
1096  * Returns 0 if successful, error otherwise.
1097  */
1098 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1099 				 cond_update_fn_t update)
1100 {
1101 	struct cond_snapshot *cond_snapshot;
1102 	int ret = 0;
1103 
1104 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1105 	if (!cond_snapshot)
1106 		return -ENOMEM;
1107 
1108 	cond_snapshot->cond_data = cond_data;
1109 	cond_snapshot->update = update;
1110 
1111 	mutex_lock(&trace_types_lock);
1112 
1113 	ret = tracing_alloc_snapshot_instance(tr);
1114 	if (ret)
1115 		goto fail_unlock;
1116 
1117 	if (tr->current_trace->use_max_tr) {
1118 		ret = -EBUSY;
1119 		goto fail_unlock;
1120 	}
1121 
1122 	/*
1123 	 * The cond_snapshot can only change to NULL without the
1124 	 * trace_types_lock. We don't care if we race with it going
1125 	 * to NULL, but we want to make sure that it's not set to
1126 	 * something other than NULL when we get here, which we can
1127 	 * do safely with only holding the trace_types_lock and not
1128 	 * having to take the max_lock.
1129 	 */
1130 	if (tr->cond_snapshot) {
1131 		ret = -EBUSY;
1132 		goto fail_unlock;
1133 	}
1134 
1135 	arch_spin_lock(&tr->max_lock);
1136 	tr->cond_snapshot = cond_snapshot;
1137 	arch_spin_unlock(&tr->max_lock);
1138 
1139 	mutex_unlock(&trace_types_lock);
1140 
1141 	return ret;
1142 
1143  fail_unlock:
1144 	mutex_unlock(&trace_types_lock);
1145 	kfree(cond_snapshot);
1146 	return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1149 
1150 /**
1151  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1152  * @tr:		The tracing instance
1153  *
1154  * Check whether the conditional snapshot for the given instance is
1155  * enabled; if so, free the cond_snapshot associated with it,
1156  * otherwise return -EINVAL.
1157  *
1158  * Returns 0 if successful, error otherwise.
1159  */
1160 int tracing_snapshot_cond_disable(struct trace_array *tr)
1161 {
1162 	int ret = 0;
1163 
1164 	arch_spin_lock(&tr->max_lock);
1165 
1166 	if (!tr->cond_snapshot)
1167 		ret = -EINVAL;
1168 	else {
1169 		kfree(tr->cond_snapshot);
1170 		tr->cond_snapshot = NULL;
1171 	}
1172 
1173 	arch_spin_unlock(&tr->max_lock);
1174 
1175 	return ret;
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1178 #else
1179 void tracing_snapshot(void)
1180 {
1181 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot);
1184 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1185 {
1186 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1187 }
1188 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1189 int tracing_alloc_snapshot(void)
1190 {
1191 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1192 	return -ENODEV;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1195 void tracing_snapshot_alloc(void)
1196 {
1197 	/* Give warning */
1198 	tracing_snapshot();
1199 }
1200 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1201 void *tracing_cond_snapshot_data(struct trace_array *tr)
1202 {
1203 	return NULL;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1207 {
1208 	return -ENODEV;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	return false;
1214 }
1215 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1216 #endif /* CONFIG_TRACER_SNAPSHOT */
1217 
1218 void tracer_tracing_off(struct trace_array *tr)
1219 {
1220 	if (tr->trace_buffer.buffer)
1221 		ring_buffer_record_off(tr->trace_buffer.buffer);
1222 	/*
1223 	 * This flag is looked at when buffers haven't been allocated
1224 	 * yet, or by some tracers (like irqsoff), that just want to
1225 	 * know if the ring buffer has been disabled, but it can handle
1226 	 * races of where it gets disabled but we still do a record.
1227 	 * As the check is in the fast path of the tracers, it is more
1228 	 * important to be fast than accurate.
1229 	 */
1230 	tr->buffer_disabled = 1;
1231 	/* Make the flag seen by readers */
1232 	smp_wmb();
1233 }
1234 
1235 /**
1236  * tracing_off - turn off tracing buffers
1237  *
1238  * This function stops the tracing buffers from recording data.
1239  * It does not disable any overhead the tracers themselves may
1240  * be causing. This function simply causes all recording to
1241  * the ring buffers to fail.
1242  */
1243 void tracing_off(void)
1244 {
1245 	tracer_tracing_off(&global_trace);
1246 }
1247 EXPORT_SYMBOL_GPL(tracing_off);
1248 
1249 void disable_trace_on_warning(void)
1250 {
1251 	if (__disable_trace_on_warning)
1252 		tracing_off();
1253 }
1254 
1255 /**
1256  * tracer_tracing_is_on - show real state of ring buffer enabled
1257  * @tr : the trace array to know if ring buffer is enabled
1258  *
1259  * Shows real state of the ring buffer if it is enabled or not.
1260  */
1261 bool tracer_tracing_is_on(struct trace_array *tr)
1262 {
1263 	if (tr->trace_buffer.buffer)
1264 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1265 	return !tr->buffer_disabled;
1266 }
1267 
1268 /**
1269  * tracing_is_on - show state of ring buffers enabled
1270  */
1271 int tracing_is_on(void)
1272 {
1273 	return tracer_tracing_is_on(&global_trace);
1274 }
1275 EXPORT_SYMBOL_GPL(tracing_is_on);
1276 
1277 static int __init set_buf_size(char *str)
1278 {
1279 	unsigned long buf_size;
1280 
1281 	if (!str)
1282 		return 0;
1283 	buf_size = memparse(str, &str);
1284 	/* nr_entries can not be zero */
1285 	if (buf_size == 0)
1286 		return 0;
1287 	trace_buf_size = buf_size;
1288 	return 1;
1289 }
1290 __setup("trace_buf_size=", set_buf_size);
1291 
1292 static int __init set_tracing_thresh(char *str)
1293 {
1294 	unsigned long threshold;
1295 	int ret;
1296 
1297 	if (!str)
1298 		return 0;
1299 	ret = kstrtoul(str, 0, &threshold);
1300 	if (ret < 0)
1301 		return 0;
1302 	tracing_thresh = threshold * 1000;
1303 	return 1;
1304 }
1305 __setup("tracing_thresh=", set_tracing_thresh);
1306 
1307 unsigned long nsecs_to_usecs(unsigned long nsecs)
1308 {
1309 	return nsecs / 1000;
1310 }
1311 
1312 /*
1313  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1314  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1315  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1316  * of strings in the order that the evals (enum) were defined.
1317  */
1318 #undef C
1319 #define C(a, b) b
1320 
1321 /* These must match the bit postions in trace_iterator_flags */
1322 static const char *trace_options[] = {
1323 	TRACE_FLAGS
1324 	NULL
1325 };
1326 
1327 static struct {
1328 	u64 (*func)(void);
1329 	const char *name;
1330 	int in_ns;		/* is this clock in nanoseconds? */
1331 } trace_clocks[] = {
1332 	{ trace_clock_local,		"local",	1 },
1333 	{ trace_clock_global,		"global",	1 },
1334 	{ trace_clock_counter,		"counter",	0 },
1335 	{ trace_clock_jiffies,		"uptime",	0 },
1336 	{ trace_clock,			"perf",		1 },
1337 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1338 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1339 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1340 	ARCH_TRACE_CLOCKS
1341 };
1342 
1343 bool trace_clock_in_ns(struct trace_array *tr)
1344 {
1345 	if (trace_clocks[tr->clock_id].in_ns)
1346 		return true;
1347 
1348 	return false;
1349 }
1350 
1351 /*
1352  * trace_parser_get_init - gets the buffer for trace parser
1353  */
1354 int trace_parser_get_init(struct trace_parser *parser, int size)
1355 {
1356 	memset(parser, 0, sizeof(*parser));
1357 
1358 	parser->buffer = kmalloc(size, GFP_KERNEL);
1359 	if (!parser->buffer)
1360 		return 1;
1361 
1362 	parser->size = size;
1363 	return 0;
1364 }
1365 
1366 /*
1367  * trace_parser_put - frees the buffer for trace parser
1368  */
1369 void trace_parser_put(struct trace_parser *parser)
1370 {
1371 	kfree(parser->buffer);
1372 	parser->buffer = NULL;
1373 }
1374 
1375 /*
1376  * trace_get_user - reads the user input string separated by  space
1377  * (matched by isspace(ch))
1378  *
1379  * For each string found the 'struct trace_parser' is updated,
1380  * and the function returns.
1381  *
1382  * Returns number of bytes read.
1383  *
1384  * See kernel/trace/trace.h for 'struct trace_parser' details.
1385  */
1386 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1387 	size_t cnt, loff_t *ppos)
1388 {
1389 	char ch;
1390 	size_t read = 0;
1391 	ssize_t ret;
1392 
1393 	if (!*ppos)
1394 		trace_parser_clear(parser);
1395 
1396 	ret = get_user(ch, ubuf++);
1397 	if (ret)
1398 		goto out;
1399 
1400 	read++;
1401 	cnt--;
1402 
1403 	/*
1404 	 * The parser is not finished with the last write,
1405 	 * continue reading the user input without skipping spaces.
1406 	 */
1407 	if (!parser->cont) {
1408 		/* skip white space */
1409 		while (cnt && isspace(ch)) {
1410 			ret = get_user(ch, ubuf++);
1411 			if (ret)
1412 				goto out;
1413 			read++;
1414 			cnt--;
1415 		}
1416 
1417 		parser->idx = 0;
1418 
1419 		/* only spaces were written */
1420 		if (isspace(ch) || !ch) {
1421 			*ppos += read;
1422 			ret = read;
1423 			goto out;
1424 		}
1425 	}
1426 
1427 	/* read the non-space input */
1428 	while (cnt && !isspace(ch) && ch) {
1429 		if (parser->idx < parser->size - 1)
1430 			parser->buffer[parser->idx++] = ch;
1431 		else {
1432 			ret = -EINVAL;
1433 			goto out;
1434 		}
1435 		ret = get_user(ch, ubuf++);
1436 		if (ret)
1437 			goto out;
1438 		read++;
1439 		cnt--;
1440 	}
1441 
1442 	/* We either got finished input or we have to wait for another call. */
1443 	if (isspace(ch) || !ch) {
1444 		parser->buffer[parser->idx] = 0;
1445 		parser->cont = false;
1446 	} else if (parser->idx < parser->size - 1) {
1447 		parser->cont = true;
1448 		parser->buffer[parser->idx++] = ch;
1449 		/* Make sure the parsed string always terminates with '\0'. */
1450 		parser->buffer[parser->idx] = 0;
1451 	} else {
1452 		ret = -EINVAL;
1453 		goto out;
1454 	}
1455 
1456 	*ppos += read;
1457 	ret = read;
1458 
1459 out:
1460 	return ret;
1461 }
1462 
1463 /* TODO add a seq_buf_to_buffer() */
1464 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1465 {
1466 	int len;
1467 
1468 	if (trace_seq_used(s) <= s->seq.readpos)
1469 		return -EBUSY;
1470 
1471 	len = trace_seq_used(s) - s->seq.readpos;
1472 	if (cnt > len)
1473 		cnt = len;
1474 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1475 
1476 	s->seq.readpos += cnt;
1477 	return cnt;
1478 }
1479 
1480 unsigned long __read_mostly	tracing_thresh;
1481 
1482 #ifdef CONFIG_TRACER_MAX_TRACE
1483 /*
1484  * Copy the new maximum trace into the separate maximum-trace
1485  * structure. (this way the maximum trace is permanently saved,
1486  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1487  */
1488 static void
1489 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1490 {
1491 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1492 	struct trace_buffer *max_buf = &tr->max_buffer;
1493 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1494 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1495 
1496 	max_buf->cpu = cpu;
1497 	max_buf->time_start = data->preempt_timestamp;
1498 
1499 	max_data->saved_latency = tr->max_latency;
1500 	max_data->critical_start = data->critical_start;
1501 	max_data->critical_end = data->critical_end;
1502 
1503 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1504 	max_data->pid = tsk->pid;
1505 	/*
1506 	 * If tsk == current, then use current_uid(), as that does not use
1507 	 * RCU. The irq tracer can be called out of RCU scope.
1508 	 */
1509 	if (tsk == current)
1510 		max_data->uid = current_uid();
1511 	else
1512 		max_data->uid = task_uid(tsk);
1513 
1514 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1515 	max_data->policy = tsk->policy;
1516 	max_data->rt_priority = tsk->rt_priority;
1517 
1518 	/* record this tasks comm */
1519 	tracing_record_cmdline(tsk);
1520 }
1521 
1522 /**
1523  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1524  * @tr: tracer
1525  * @tsk: the task with the latency
1526  * @cpu: The cpu that initiated the trace.
1527  * @cond_data: User data associated with a conditional snapshot
1528  *
1529  * Flip the buffers between the @tr and the max_tr and record information
1530  * about which task was the cause of this latency.
1531  */
1532 void
1533 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1534 	      void *cond_data)
1535 {
1536 	if (tr->stop_count)
1537 		return;
1538 
1539 	WARN_ON_ONCE(!irqs_disabled());
1540 
1541 	if (!tr->allocated_snapshot) {
1542 		/* Only the nop tracer should hit this when disabling */
1543 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1544 		return;
1545 	}
1546 
1547 	arch_spin_lock(&tr->max_lock);
1548 
1549 	/* Inherit the recordable setting from trace_buffer */
1550 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1551 		ring_buffer_record_on(tr->max_buffer.buffer);
1552 	else
1553 		ring_buffer_record_off(tr->max_buffer.buffer);
1554 
1555 #ifdef CONFIG_TRACER_SNAPSHOT
1556 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1557 		goto out_unlock;
1558 #endif
1559 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1560 
1561 	__update_max_tr(tr, tsk, cpu);
1562 
1563  out_unlock:
1564 	arch_spin_unlock(&tr->max_lock);
1565 }
1566 
1567 /**
1568  * update_max_tr_single - only copy one trace over, and reset the rest
1569  * @tr - tracer
1570  * @tsk - task with the latency
1571  * @cpu - the cpu of the buffer to copy.
1572  *
1573  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1574  */
1575 void
1576 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1577 {
1578 	int ret;
1579 
1580 	if (tr->stop_count)
1581 		return;
1582 
1583 	WARN_ON_ONCE(!irqs_disabled());
1584 	if (!tr->allocated_snapshot) {
1585 		/* Only the nop tracer should hit this when disabling */
1586 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1587 		return;
1588 	}
1589 
1590 	arch_spin_lock(&tr->max_lock);
1591 
1592 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1593 
1594 	if (ret == -EBUSY) {
1595 		/*
1596 		 * We failed to swap the buffer due to a commit taking
1597 		 * place on this CPU. We fail to record, but we reset
1598 		 * the max trace buffer (no one writes directly to it)
1599 		 * and flag that it failed.
1600 		 */
1601 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1602 			"Failed to swap buffers due to commit in progress\n");
1603 	}
1604 
1605 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1606 
1607 	__update_max_tr(tr, tsk, cpu);
1608 	arch_spin_unlock(&tr->max_lock);
1609 }
1610 #endif /* CONFIG_TRACER_MAX_TRACE */
1611 
1612 static int wait_on_pipe(struct trace_iterator *iter, int full)
1613 {
1614 	/* Iterators are static, they should be filled or empty */
1615 	if (trace_buffer_iter(iter, iter->cpu_file))
1616 		return 0;
1617 
1618 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1619 				full);
1620 }
1621 
1622 #ifdef CONFIG_FTRACE_STARTUP_TEST
1623 static bool selftests_can_run;
1624 
1625 struct trace_selftests {
1626 	struct list_head		list;
1627 	struct tracer			*type;
1628 };
1629 
1630 static LIST_HEAD(postponed_selftests);
1631 
1632 static int save_selftest(struct tracer *type)
1633 {
1634 	struct trace_selftests *selftest;
1635 
1636 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1637 	if (!selftest)
1638 		return -ENOMEM;
1639 
1640 	selftest->type = type;
1641 	list_add(&selftest->list, &postponed_selftests);
1642 	return 0;
1643 }
1644 
1645 static int run_tracer_selftest(struct tracer *type)
1646 {
1647 	struct trace_array *tr = &global_trace;
1648 	struct tracer *saved_tracer = tr->current_trace;
1649 	int ret;
1650 
1651 	if (!type->selftest || tracing_selftest_disabled)
1652 		return 0;
1653 
1654 	/*
1655 	 * If a tracer registers early in boot up (before scheduling is
1656 	 * initialized and such), then do not run its selftests yet.
1657 	 * Instead, run it a little later in the boot process.
1658 	 */
1659 	if (!selftests_can_run)
1660 		return save_selftest(type);
1661 
1662 	/*
1663 	 * Run a selftest on this tracer.
1664 	 * Here we reset the trace buffer, and set the current
1665 	 * tracer to be this tracer. The tracer can then run some
1666 	 * internal tracing to verify that everything is in order.
1667 	 * If we fail, we do not register this tracer.
1668 	 */
1669 	tracing_reset_online_cpus(&tr->trace_buffer);
1670 
1671 	tr->current_trace = type;
1672 
1673 #ifdef CONFIG_TRACER_MAX_TRACE
1674 	if (type->use_max_tr) {
1675 		/* If we expanded the buffers, make sure the max is expanded too */
1676 		if (ring_buffer_expanded)
1677 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1678 					   RING_BUFFER_ALL_CPUS);
1679 		tr->allocated_snapshot = true;
1680 	}
1681 #endif
1682 
1683 	/* the test is responsible for initializing and enabling */
1684 	pr_info("Testing tracer %s: ", type->name);
1685 	ret = type->selftest(type, tr);
1686 	/* the test is responsible for resetting too */
1687 	tr->current_trace = saved_tracer;
1688 	if (ret) {
1689 		printk(KERN_CONT "FAILED!\n");
1690 		/* Add the warning after printing 'FAILED' */
1691 		WARN_ON(1);
1692 		return -1;
1693 	}
1694 	/* Only reset on passing, to avoid touching corrupted buffers */
1695 	tracing_reset_online_cpus(&tr->trace_buffer);
1696 
1697 #ifdef CONFIG_TRACER_MAX_TRACE
1698 	if (type->use_max_tr) {
1699 		tr->allocated_snapshot = false;
1700 
1701 		/* Shrink the max buffer again */
1702 		if (ring_buffer_expanded)
1703 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1704 					   RING_BUFFER_ALL_CPUS);
1705 	}
1706 #endif
1707 
1708 	printk(KERN_CONT "PASSED\n");
1709 	return 0;
1710 }
1711 
1712 static __init int init_trace_selftests(void)
1713 {
1714 	struct trace_selftests *p, *n;
1715 	struct tracer *t, **last;
1716 	int ret;
1717 
1718 	selftests_can_run = true;
1719 
1720 	mutex_lock(&trace_types_lock);
1721 
1722 	if (list_empty(&postponed_selftests))
1723 		goto out;
1724 
1725 	pr_info("Running postponed tracer tests:\n");
1726 
1727 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1728 		ret = run_tracer_selftest(p->type);
1729 		/* If the test fails, then warn and remove from available_tracers */
1730 		if (ret < 0) {
1731 			WARN(1, "tracer: %s failed selftest, disabling\n",
1732 			     p->type->name);
1733 			last = &trace_types;
1734 			for (t = trace_types; t; t = t->next) {
1735 				if (t == p->type) {
1736 					*last = t->next;
1737 					break;
1738 				}
1739 				last = &t->next;
1740 			}
1741 		}
1742 		list_del(&p->list);
1743 		kfree(p);
1744 	}
1745 
1746  out:
1747 	mutex_unlock(&trace_types_lock);
1748 
1749 	return 0;
1750 }
1751 core_initcall(init_trace_selftests);
1752 #else
1753 static inline int run_tracer_selftest(struct tracer *type)
1754 {
1755 	return 0;
1756 }
1757 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1758 
1759 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1760 
1761 static void __init apply_trace_boot_options(void);
1762 
1763 /**
1764  * register_tracer - register a tracer with the ftrace system.
1765  * @type - the plugin for the tracer
1766  *
1767  * Register a new plugin tracer.
1768  */
1769 int __init register_tracer(struct tracer *type)
1770 {
1771 	struct tracer *t;
1772 	int ret = 0;
1773 
1774 	if (!type->name) {
1775 		pr_info("Tracer must have a name\n");
1776 		return -1;
1777 	}
1778 
1779 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1780 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1781 		return -1;
1782 	}
1783 
1784 	mutex_lock(&trace_types_lock);
1785 
1786 	tracing_selftest_running = true;
1787 
1788 	for (t = trace_types; t; t = t->next) {
1789 		if (strcmp(type->name, t->name) == 0) {
1790 			/* already found */
1791 			pr_info("Tracer %s already registered\n",
1792 				type->name);
1793 			ret = -1;
1794 			goto out;
1795 		}
1796 	}
1797 
1798 	if (!type->set_flag)
1799 		type->set_flag = &dummy_set_flag;
1800 	if (!type->flags) {
1801 		/*allocate a dummy tracer_flags*/
1802 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1803 		if (!type->flags) {
1804 			ret = -ENOMEM;
1805 			goto out;
1806 		}
1807 		type->flags->val = 0;
1808 		type->flags->opts = dummy_tracer_opt;
1809 	} else
1810 		if (!type->flags->opts)
1811 			type->flags->opts = dummy_tracer_opt;
1812 
1813 	/* store the tracer for __set_tracer_option */
1814 	type->flags->trace = type;
1815 
1816 	ret = run_tracer_selftest(type);
1817 	if (ret < 0)
1818 		goto out;
1819 
1820 	type->next = trace_types;
1821 	trace_types = type;
1822 	add_tracer_options(&global_trace, type);
1823 
1824  out:
1825 	tracing_selftest_running = false;
1826 	mutex_unlock(&trace_types_lock);
1827 
1828 	if (ret || !default_bootup_tracer)
1829 		goto out_unlock;
1830 
1831 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1832 		goto out_unlock;
1833 
1834 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1835 	/* Do we want this tracer to start on bootup? */
1836 	tracing_set_tracer(&global_trace, type->name);
1837 	default_bootup_tracer = NULL;
1838 
1839 	apply_trace_boot_options();
1840 
1841 	/* disable other selftests, since this will break it. */
1842 	tracing_selftest_disabled = true;
1843 #ifdef CONFIG_FTRACE_STARTUP_TEST
1844 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1845 	       type->name);
1846 #endif
1847 
1848  out_unlock:
1849 	return ret;
1850 }
1851 
1852 void tracing_reset(struct trace_buffer *buf, int cpu)
1853 {
1854 	struct ring_buffer *buffer = buf->buffer;
1855 
1856 	if (!buffer)
1857 		return;
1858 
1859 	ring_buffer_record_disable(buffer);
1860 
1861 	/* Make sure all commits have finished */
1862 	synchronize_rcu();
1863 	ring_buffer_reset_cpu(buffer, cpu);
1864 
1865 	ring_buffer_record_enable(buffer);
1866 }
1867 
1868 void tracing_reset_online_cpus(struct trace_buffer *buf)
1869 {
1870 	struct ring_buffer *buffer = buf->buffer;
1871 	int cpu;
1872 
1873 	if (!buffer)
1874 		return;
1875 
1876 	ring_buffer_record_disable(buffer);
1877 
1878 	/* Make sure all commits have finished */
1879 	synchronize_rcu();
1880 
1881 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1882 
1883 	for_each_online_cpu(cpu)
1884 		ring_buffer_reset_cpu(buffer, cpu);
1885 
1886 	ring_buffer_record_enable(buffer);
1887 }
1888 
1889 /* Must have trace_types_lock held */
1890 void tracing_reset_all_online_cpus(void)
1891 {
1892 	struct trace_array *tr;
1893 
1894 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1895 		if (!tr->clear_trace)
1896 			continue;
1897 		tr->clear_trace = false;
1898 		tracing_reset_online_cpus(&tr->trace_buffer);
1899 #ifdef CONFIG_TRACER_MAX_TRACE
1900 		tracing_reset_online_cpus(&tr->max_buffer);
1901 #endif
1902 	}
1903 }
1904 
1905 static int *tgid_map;
1906 
1907 #define SAVED_CMDLINES_DEFAULT 128
1908 #define NO_CMDLINE_MAP UINT_MAX
1909 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1910 struct saved_cmdlines_buffer {
1911 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1912 	unsigned *map_cmdline_to_pid;
1913 	unsigned cmdline_num;
1914 	int cmdline_idx;
1915 	char *saved_cmdlines;
1916 };
1917 static struct saved_cmdlines_buffer *savedcmd;
1918 
1919 /* temporary disable recording */
1920 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1921 
1922 static inline char *get_saved_cmdlines(int idx)
1923 {
1924 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1925 }
1926 
1927 static inline void set_cmdline(int idx, const char *cmdline)
1928 {
1929 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1930 }
1931 
1932 static int allocate_cmdlines_buffer(unsigned int val,
1933 				    struct saved_cmdlines_buffer *s)
1934 {
1935 	s->map_cmdline_to_pid = kmalloc_array(val,
1936 					      sizeof(*s->map_cmdline_to_pid),
1937 					      GFP_KERNEL);
1938 	if (!s->map_cmdline_to_pid)
1939 		return -ENOMEM;
1940 
1941 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1942 	if (!s->saved_cmdlines) {
1943 		kfree(s->map_cmdline_to_pid);
1944 		return -ENOMEM;
1945 	}
1946 
1947 	s->cmdline_idx = 0;
1948 	s->cmdline_num = val;
1949 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1950 	       sizeof(s->map_pid_to_cmdline));
1951 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1952 	       val * sizeof(*s->map_cmdline_to_pid));
1953 
1954 	return 0;
1955 }
1956 
1957 static int trace_create_savedcmd(void)
1958 {
1959 	int ret;
1960 
1961 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1962 	if (!savedcmd)
1963 		return -ENOMEM;
1964 
1965 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1966 	if (ret < 0) {
1967 		kfree(savedcmd);
1968 		savedcmd = NULL;
1969 		return -ENOMEM;
1970 	}
1971 
1972 	return 0;
1973 }
1974 
1975 int is_tracing_stopped(void)
1976 {
1977 	return global_trace.stop_count;
1978 }
1979 
1980 /**
1981  * tracing_start - quick start of the tracer
1982  *
1983  * If tracing is enabled but was stopped by tracing_stop,
1984  * this will start the tracer back up.
1985  */
1986 void tracing_start(void)
1987 {
1988 	struct ring_buffer *buffer;
1989 	unsigned long flags;
1990 
1991 	if (tracing_disabled)
1992 		return;
1993 
1994 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1995 	if (--global_trace.stop_count) {
1996 		if (global_trace.stop_count < 0) {
1997 			/* Someone screwed up their debugging */
1998 			WARN_ON_ONCE(1);
1999 			global_trace.stop_count = 0;
2000 		}
2001 		goto out;
2002 	}
2003 
2004 	/* Prevent the buffers from switching */
2005 	arch_spin_lock(&global_trace.max_lock);
2006 
2007 	buffer = global_trace.trace_buffer.buffer;
2008 	if (buffer)
2009 		ring_buffer_record_enable(buffer);
2010 
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012 	buffer = global_trace.max_buffer.buffer;
2013 	if (buffer)
2014 		ring_buffer_record_enable(buffer);
2015 #endif
2016 
2017 	arch_spin_unlock(&global_trace.max_lock);
2018 
2019  out:
2020 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2021 }
2022 
2023 static void tracing_start_tr(struct trace_array *tr)
2024 {
2025 	struct ring_buffer *buffer;
2026 	unsigned long flags;
2027 
2028 	if (tracing_disabled)
2029 		return;
2030 
2031 	/* If global, we need to also start the max tracer */
2032 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2033 		return tracing_start();
2034 
2035 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2036 
2037 	if (--tr->stop_count) {
2038 		if (tr->stop_count < 0) {
2039 			/* Someone screwed up their debugging */
2040 			WARN_ON_ONCE(1);
2041 			tr->stop_count = 0;
2042 		}
2043 		goto out;
2044 	}
2045 
2046 	buffer = tr->trace_buffer.buffer;
2047 	if (buffer)
2048 		ring_buffer_record_enable(buffer);
2049 
2050  out:
2051 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2052 }
2053 
2054 /**
2055  * tracing_stop - quick stop of the tracer
2056  *
2057  * Light weight way to stop tracing. Use in conjunction with
2058  * tracing_start.
2059  */
2060 void tracing_stop(void)
2061 {
2062 	struct ring_buffer *buffer;
2063 	unsigned long flags;
2064 
2065 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2066 	if (global_trace.stop_count++)
2067 		goto out;
2068 
2069 	/* Prevent the buffers from switching */
2070 	arch_spin_lock(&global_trace.max_lock);
2071 
2072 	buffer = global_trace.trace_buffer.buffer;
2073 	if (buffer)
2074 		ring_buffer_record_disable(buffer);
2075 
2076 #ifdef CONFIG_TRACER_MAX_TRACE
2077 	buffer = global_trace.max_buffer.buffer;
2078 	if (buffer)
2079 		ring_buffer_record_disable(buffer);
2080 #endif
2081 
2082 	arch_spin_unlock(&global_trace.max_lock);
2083 
2084  out:
2085 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2086 }
2087 
2088 static void tracing_stop_tr(struct trace_array *tr)
2089 {
2090 	struct ring_buffer *buffer;
2091 	unsigned long flags;
2092 
2093 	/* If global, we need to also stop the max tracer */
2094 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2095 		return tracing_stop();
2096 
2097 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2098 	if (tr->stop_count++)
2099 		goto out;
2100 
2101 	buffer = tr->trace_buffer.buffer;
2102 	if (buffer)
2103 		ring_buffer_record_disable(buffer);
2104 
2105  out:
2106 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2107 }
2108 
2109 static int trace_save_cmdline(struct task_struct *tsk)
2110 {
2111 	unsigned pid, idx;
2112 
2113 	/* treat recording of idle task as a success */
2114 	if (!tsk->pid)
2115 		return 1;
2116 
2117 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2118 		return 0;
2119 
2120 	/*
2121 	 * It's not the end of the world if we don't get
2122 	 * the lock, but we also don't want to spin
2123 	 * nor do we want to disable interrupts,
2124 	 * so if we miss here, then better luck next time.
2125 	 */
2126 	if (!arch_spin_trylock(&trace_cmdline_lock))
2127 		return 0;
2128 
2129 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2130 	if (idx == NO_CMDLINE_MAP) {
2131 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2132 
2133 		/*
2134 		 * Check whether the cmdline buffer at idx has a pid
2135 		 * mapped. We are going to overwrite that entry so we
2136 		 * need to clear the map_pid_to_cmdline. Otherwise we
2137 		 * would read the new comm for the old pid.
2138 		 */
2139 		pid = savedcmd->map_cmdline_to_pid[idx];
2140 		if (pid != NO_CMDLINE_MAP)
2141 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2142 
2143 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2144 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2145 
2146 		savedcmd->cmdline_idx = idx;
2147 	}
2148 
2149 	set_cmdline(idx, tsk->comm);
2150 
2151 	arch_spin_unlock(&trace_cmdline_lock);
2152 
2153 	return 1;
2154 }
2155 
2156 static void __trace_find_cmdline(int pid, char comm[])
2157 {
2158 	unsigned map;
2159 
2160 	if (!pid) {
2161 		strcpy(comm, "<idle>");
2162 		return;
2163 	}
2164 
2165 	if (WARN_ON_ONCE(pid < 0)) {
2166 		strcpy(comm, "<XXX>");
2167 		return;
2168 	}
2169 
2170 	if (pid > PID_MAX_DEFAULT) {
2171 		strcpy(comm, "<...>");
2172 		return;
2173 	}
2174 
2175 	map = savedcmd->map_pid_to_cmdline[pid];
2176 	if (map != NO_CMDLINE_MAP)
2177 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2178 	else
2179 		strcpy(comm, "<...>");
2180 }
2181 
2182 void trace_find_cmdline(int pid, char comm[])
2183 {
2184 	preempt_disable();
2185 	arch_spin_lock(&trace_cmdline_lock);
2186 
2187 	__trace_find_cmdline(pid, comm);
2188 
2189 	arch_spin_unlock(&trace_cmdline_lock);
2190 	preempt_enable();
2191 }
2192 
2193 int trace_find_tgid(int pid)
2194 {
2195 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2196 		return 0;
2197 
2198 	return tgid_map[pid];
2199 }
2200 
2201 static int trace_save_tgid(struct task_struct *tsk)
2202 {
2203 	/* treat recording of idle task as a success */
2204 	if (!tsk->pid)
2205 		return 1;
2206 
2207 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2208 		return 0;
2209 
2210 	tgid_map[tsk->pid] = tsk->tgid;
2211 	return 1;
2212 }
2213 
2214 static bool tracing_record_taskinfo_skip(int flags)
2215 {
2216 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2217 		return true;
2218 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2219 		return true;
2220 	if (!__this_cpu_read(trace_taskinfo_save))
2221 		return true;
2222 	return false;
2223 }
2224 
2225 /**
2226  * tracing_record_taskinfo - record the task info of a task
2227  *
2228  * @task  - task to record
2229  * @flags - TRACE_RECORD_CMDLINE for recording comm
2230  *        - TRACE_RECORD_TGID for recording tgid
2231  */
2232 void tracing_record_taskinfo(struct task_struct *task, int flags)
2233 {
2234 	bool done;
2235 
2236 	if (tracing_record_taskinfo_skip(flags))
2237 		return;
2238 
2239 	/*
2240 	 * Record as much task information as possible. If some fail, continue
2241 	 * to try to record the others.
2242 	 */
2243 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2244 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2245 
2246 	/* If recording any information failed, retry again soon. */
2247 	if (!done)
2248 		return;
2249 
2250 	__this_cpu_write(trace_taskinfo_save, false);
2251 }
2252 
2253 /**
2254  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2255  *
2256  * @prev - previous task during sched_switch
2257  * @next - next task during sched_switch
2258  * @flags - TRACE_RECORD_CMDLINE for recording comm
2259  *          TRACE_RECORD_TGID for recording tgid
2260  */
2261 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2262 					  struct task_struct *next, int flags)
2263 {
2264 	bool done;
2265 
2266 	if (tracing_record_taskinfo_skip(flags))
2267 		return;
2268 
2269 	/*
2270 	 * Record as much task information as possible. If some fail, continue
2271 	 * to try to record the others.
2272 	 */
2273 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2274 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2275 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2276 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2277 
2278 	/* If recording any information failed, retry again soon. */
2279 	if (!done)
2280 		return;
2281 
2282 	__this_cpu_write(trace_taskinfo_save, false);
2283 }
2284 
2285 /* Helpers to record a specific task information */
2286 void tracing_record_cmdline(struct task_struct *task)
2287 {
2288 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2289 }
2290 
2291 void tracing_record_tgid(struct task_struct *task)
2292 {
2293 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2294 }
2295 
2296 /*
2297  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2298  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2299  * simplifies those functions and keeps them in sync.
2300  */
2301 enum print_line_t trace_handle_return(struct trace_seq *s)
2302 {
2303 	return trace_seq_has_overflowed(s) ?
2304 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_handle_return);
2307 
2308 void
2309 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2310 			     int pc)
2311 {
2312 	struct task_struct *tsk = current;
2313 
2314 	entry->preempt_count		= pc & 0xff;
2315 	entry->pid			= (tsk) ? tsk->pid : 0;
2316 	entry->flags =
2317 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2318 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2319 #else
2320 		TRACE_FLAG_IRQS_NOSUPPORT |
2321 #endif
2322 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2323 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2324 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2325 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2326 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2327 }
2328 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2329 
2330 struct ring_buffer_event *
2331 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2332 			  int type,
2333 			  unsigned long len,
2334 			  unsigned long flags, int pc)
2335 {
2336 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2337 }
2338 
2339 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2340 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2341 static int trace_buffered_event_ref;
2342 
2343 /**
2344  * trace_buffered_event_enable - enable buffering events
2345  *
2346  * When events are being filtered, it is quicker to use a temporary
2347  * buffer to write the event data into if there's a likely chance
2348  * that it will not be committed. The discard of the ring buffer
2349  * is not as fast as committing, and is much slower than copying
2350  * a commit.
2351  *
2352  * When an event is to be filtered, allocate per cpu buffers to
2353  * write the event data into, and if the event is filtered and discarded
2354  * it is simply dropped, otherwise, the entire data is to be committed
2355  * in one shot.
2356  */
2357 void trace_buffered_event_enable(void)
2358 {
2359 	struct ring_buffer_event *event;
2360 	struct page *page;
2361 	int cpu;
2362 
2363 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2364 
2365 	if (trace_buffered_event_ref++)
2366 		return;
2367 
2368 	for_each_tracing_cpu(cpu) {
2369 		page = alloc_pages_node(cpu_to_node(cpu),
2370 					GFP_KERNEL | __GFP_NORETRY, 0);
2371 		if (!page)
2372 			goto failed;
2373 
2374 		event = page_address(page);
2375 		memset(event, 0, sizeof(*event));
2376 
2377 		per_cpu(trace_buffered_event, cpu) = event;
2378 
2379 		preempt_disable();
2380 		if (cpu == smp_processor_id() &&
2381 		    this_cpu_read(trace_buffered_event) !=
2382 		    per_cpu(trace_buffered_event, cpu))
2383 			WARN_ON_ONCE(1);
2384 		preempt_enable();
2385 	}
2386 
2387 	return;
2388  failed:
2389 	trace_buffered_event_disable();
2390 }
2391 
2392 static void enable_trace_buffered_event(void *data)
2393 {
2394 	/* Probably not needed, but do it anyway */
2395 	smp_rmb();
2396 	this_cpu_dec(trace_buffered_event_cnt);
2397 }
2398 
2399 static void disable_trace_buffered_event(void *data)
2400 {
2401 	this_cpu_inc(trace_buffered_event_cnt);
2402 }
2403 
2404 /**
2405  * trace_buffered_event_disable - disable buffering events
2406  *
2407  * When a filter is removed, it is faster to not use the buffered
2408  * events, and to commit directly into the ring buffer. Free up
2409  * the temp buffers when there are no more users. This requires
2410  * special synchronization with current events.
2411  */
2412 void trace_buffered_event_disable(void)
2413 {
2414 	int cpu;
2415 
2416 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2417 
2418 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2419 		return;
2420 
2421 	if (--trace_buffered_event_ref)
2422 		return;
2423 
2424 	preempt_disable();
2425 	/* For each CPU, set the buffer as used. */
2426 	smp_call_function_many(tracing_buffer_mask,
2427 			       disable_trace_buffered_event, NULL, 1);
2428 	preempt_enable();
2429 
2430 	/* Wait for all current users to finish */
2431 	synchronize_rcu();
2432 
2433 	for_each_tracing_cpu(cpu) {
2434 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2435 		per_cpu(trace_buffered_event, cpu) = NULL;
2436 	}
2437 	/*
2438 	 * Make sure trace_buffered_event is NULL before clearing
2439 	 * trace_buffered_event_cnt.
2440 	 */
2441 	smp_wmb();
2442 
2443 	preempt_disable();
2444 	/* Do the work on each cpu */
2445 	smp_call_function_many(tracing_buffer_mask,
2446 			       enable_trace_buffered_event, NULL, 1);
2447 	preempt_enable();
2448 }
2449 
2450 static struct ring_buffer *temp_buffer;
2451 
2452 struct ring_buffer_event *
2453 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2454 			  struct trace_event_file *trace_file,
2455 			  int type, unsigned long len,
2456 			  unsigned long flags, int pc)
2457 {
2458 	struct ring_buffer_event *entry;
2459 	int val;
2460 
2461 	*current_rb = trace_file->tr->trace_buffer.buffer;
2462 
2463 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2464 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2465 	    (entry = this_cpu_read(trace_buffered_event))) {
2466 		/* Try to use the per cpu buffer first */
2467 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2468 		if (val == 1) {
2469 			trace_event_setup(entry, type, flags, pc);
2470 			entry->array[0] = len;
2471 			return entry;
2472 		}
2473 		this_cpu_dec(trace_buffered_event_cnt);
2474 	}
2475 
2476 	entry = __trace_buffer_lock_reserve(*current_rb,
2477 					    type, len, flags, pc);
2478 	/*
2479 	 * If tracing is off, but we have triggers enabled
2480 	 * we still need to look at the event data. Use the temp_buffer
2481 	 * to store the trace event for the tigger to use. It's recusive
2482 	 * safe and will not be recorded anywhere.
2483 	 */
2484 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2485 		*current_rb = temp_buffer;
2486 		entry = __trace_buffer_lock_reserve(*current_rb,
2487 						    type, len, flags, pc);
2488 	}
2489 	return entry;
2490 }
2491 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2492 
2493 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2494 static DEFINE_MUTEX(tracepoint_printk_mutex);
2495 
2496 static void output_printk(struct trace_event_buffer *fbuffer)
2497 {
2498 	struct trace_event_call *event_call;
2499 	struct trace_event *event;
2500 	unsigned long flags;
2501 	struct trace_iterator *iter = tracepoint_print_iter;
2502 
2503 	/* We should never get here if iter is NULL */
2504 	if (WARN_ON_ONCE(!iter))
2505 		return;
2506 
2507 	event_call = fbuffer->trace_file->event_call;
2508 	if (!event_call || !event_call->event.funcs ||
2509 	    !event_call->event.funcs->trace)
2510 		return;
2511 
2512 	event = &fbuffer->trace_file->event_call->event;
2513 
2514 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2515 	trace_seq_init(&iter->seq);
2516 	iter->ent = fbuffer->entry;
2517 	event_call->event.funcs->trace(iter, 0, event);
2518 	trace_seq_putc(&iter->seq, 0);
2519 	printk("%s", iter->seq.buffer);
2520 
2521 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2522 }
2523 
2524 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2525 			     void __user *buffer, size_t *lenp,
2526 			     loff_t *ppos)
2527 {
2528 	int save_tracepoint_printk;
2529 	int ret;
2530 
2531 	mutex_lock(&tracepoint_printk_mutex);
2532 	save_tracepoint_printk = tracepoint_printk;
2533 
2534 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2535 
2536 	/*
2537 	 * This will force exiting early, as tracepoint_printk
2538 	 * is always zero when tracepoint_printk_iter is not allocated
2539 	 */
2540 	if (!tracepoint_print_iter)
2541 		tracepoint_printk = 0;
2542 
2543 	if (save_tracepoint_printk == tracepoint_printk)
2544 		goto out;
2545 
2546 	if (tracepoint_printk)
2547 		static_key_enable(&tracepoint_printk_key.key);
2548 	else
2549 		static_key_disable(&tracepoint_printk_key.key);
2550 
2551  out:
2552 	mutex_unlock(&tracepoint_printk_mutex);
2553 
2554 	return ret;
2555 }
2556 
2557 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2558 {
2559 	if (static_key_false(&tracepoint_printk_key.key))
2560 		output_printk(fbuffer);
2561 
2562 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2563 				    fbuffer->event, fbuffer->entry,
2564 				    fbuffer->flags, fbuffer->pc);
2565 }
2566 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2567 
2568 /*
2569  * Skip 3:
2570  *
2571  *   trace_buffer_unlock_commit_regs()
2572  *   trace_event_buffer_commit()
2573  *   trace_event_raw_event_xxx()
2574  */
2575 # define STACK_SKIP 3
2576 
2577 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2578 				     struct ring_buffer *buffer,
2579 				     struct ring_buffer_event *event,
2580 				     unsigned long flags, int pc,
2581 				     struct pt_regs *regs)
2582 {
2583 	__buffer_unlock_commit(buffer, event);
2584 
2585 	/*
2586 	 * If regs is not set, then skip the necessary functions.
2587 	 * Note, we can still get here via blktrace, wakeup tracer
2588 	 * and mmiotrace, but that's ok if they lose a function or
2589 	 * two. They are not that meaningful.
2590 	 */
2591 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2592 	ftrace_trace_userstack(buffer, flags, pc);
2593 }
2594 
2595 /*
2596  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2597  */
2598 void
2599 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2600 				   struct ring_buffer_event *event)
2601 {
2602 	__buffer_unlock_commit(buffer, event);
2603 }
2604 
2605 static void
2606 trace_process_export(struct trace_export *export,
2607 	       struct ring_buffer_event *event)
2608 {
2609 	struct trace_entry *entry;
2610 	unsigned int size = 0;
2611 
2612 	entry = ring_buffer_event_data(event);
2613 	size = ring_buffer_event_length(event);
2614 	export->write(export, entry, size);
2615 }
2616 
2617 static DEFINE_MUTEX(ftrace_export_lock);
2618 
2619 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2620 
2621 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2622 
2623 static inline void ftrace_exports_enable(void)
2624 {
2625 	static_branch_enable(&ftrace_exports_enabled);
2626 }
2627 
2628 static inline void ftrace_exports_disable(void)
2629 {
2630 	static_branch_disable(&ftrace_exports_enabled);
2631 }
2632 
2633 static void ftrace_exports(struct ring_buffer_event *event)
2634 {
2635 	struct trace_export *export;
2636 
2637 	preempt_disable_notrace();
2638 
2639 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2640 	while (export) {
2641 		trace_process_export(export, event);
2642 		export = rcu_dereference_raw_notrace(export->next);
2643 	}
2644 
2645 	preempt_enable_notrace();
2646 }
2647 
2648 static inline void
2649 add_trace_export(struct trace_export **list, struct trace_export *export)
2650 {
2651 	rcu_assign_pointer(export->next, *list);
2652 	/*
2653 	 * We are entering export into the list but another
2654 	 * CPU might be walking that list. We need to make sure
2655 	 * the export->next pointer is valid before another CPU sees
2656 	 * the export pointer included into the list.
2657 	 */
2658 	rcu_assign_pointer(*list, export);
2659 }
2660 
2661 static inline int
2662 rm_trace_export(struct trace_export **list, struct trace_export *export)
2663 {
2664 	struct trace_export **p;
2665 
2666 	for (p = list; *p != NULL; p = &(*p)->next)
2667 		if (*p == export)
2668 			break;
2669 
2670 	if (*p != export)
2671 		return -1;
2672 
2673 	rcu_assign_pointer(*p, (*p)->next);
2674 
2675 	return 0;
2676 }
2677 
2678 static inline void
2679 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2680 {
2681 	if (*list == NULL)
2682 		ftrace_exports_enable();
2683 
2684 	add_trace_export(list, export);
2685 }
2686 
2687 static inline int
2688 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2689 {
2690 	int ret;
2691 
2692 	ret = rm_trace_export(list, export);
2693 	if (*list == NULL)
2694 		ftrace_exports_disable();
2695 
2696 	return ret;
2697 }
2698 
2699 int register_ftrace_export(struct trace_export *export)
2700 {
2701 	if (WARN_ON_ONCE(!export->write))
2702 		return -1;
2703 
2704 	mutex_lock(&ftrace_export_lock);
2705 
2706 	add_ftrace_export(&ftrace_exports_list, export);
2707 
2708 	mutex_unlock(&ftrace_export_lock);
2709 
2710 	return 0;
2711 }
2712 EXPORT_SYMBOL_GPL(register_ftrace_export);
2713 
2714 int unregister_ftrace_export(struct trace_export *export)
2715 {
2716 	int ret;
2717 
2718 	mutex_lock(&ftrace_export_lock);
2719 
2720 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2721 
2722 	mutex_unlock(&ftrace_export_lock);
2723 
2724 	return ret;
2725 }
2726 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2727 
2728 void
2729 trace_function(struct trace_array *tr,
2730 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2731 	       int pc)
2732 {
2733 	struct trace_event_call *call = &event_function;
2734 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2735 	struct ring_buffer_event *event;
2736 	struct ftrace_entry *entry;
2737 
2738 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2739 					    flags, pc);
2740 	if (!event)
2741 		return;
2742 	entry	= ring_buffer_event_data(event);
2743 	entry->ip			= ip;
2744 	entry->parent_ip		= parent_ip;
2745 
2746 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2747 		if (static_branch_unlikely(&ftrace_exports_enabled))
2748 			ftrace_exports(event);
2749 		__buffer_unlock_commit(buffer, event);
2750 	}
2751 }
2752 
2753 #ifdef CONFIG_STACKTRACE
2754 
2755 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2756 struct ftrace_stack {
2757 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2758 };
2759 
2760 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2761 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2762 
2763 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2764 				 unsigned long flags,
2765 				 int skip, int pc, struct pt_regs *regs)
2766 {
2767 	struct trace_event_call *call = &event_kernel_stack;
2768 	struct ring_buffer_event *event;
2769 	struct stack_entry *entry;
2770 	struct stack_trace trace;
2771 	int use_stack;
2772 	int size = FTRACE_STACK_ENTRIES;
2773 
2774 	trace.nr_entries	= 0;
2775 	trace.skip		= skip;
2776 
2777 	/*
2778 	 * Add one, for this function and the call to save_stack_trace()
2779 	 * If regs is set, then these functions will not be in the way.
2780 	 */
2781 #ifndef CONFIG_UNWINDER_ORC
2782 	if (!regs)
2783 		trace.skip++;
2784 #endif
2785 
2786 	/*
2787 	 * Since events can happen in NMIs there's no safe way to
2788 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2789 	 * or NMI comes in, it will just have to use the default
2790 	 * FTRACE_STACK_SIZE.
2791 	 */
2792 	preempt_disable_notrace();
2793 
2794 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2795 	/*
2796 	 * We don't need any atomic variables, just a barrier.
2797 	 * If an interrupt comes in, we don't care, because it would
2798 	 * have exited and put the counter back to what we want.
2799 	 * We just need a barrier to keep gcc from moving things
2800 	 * around.
2801 	 */
2802 	barrier();
2803 	if (use_stack == 1) {
2804 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2805 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2806 
2807 		if (regs)
2808 			save_stack_trace_regs(regs, &trace);
2809 		else
2810 			save_stack_trace(&trace);
2811 
2812 		if (trace.nr_entries > size)
2813 			size = trace.nr_entries;
2814 	} else
2815 		/* From now on, use_stack is a boolean */
2816 		use_stack = 0;
2817 
2818 	size *= sizeof(unsigned long);
2819 
2820 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2821 					    sizeof(*entry) + size, flags, pc);
2822 	if (!event)
2823 		goto out;
2824 	entry = ring_buffer_event_data(event);
2825 
2826 	memset(&entry->caller, 0, size);
2827 
2828 	if (use_stack)
2829 		memcpy(&entry->caller, trace.entries,
2830 		       trace.nr_entries * sizeof(unsigned long));
2831 	else {
2832 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2833 		trace.entries		= entry->caller;
2834 		if (regs)
2835 			save_stack_trace_regs(regs, &trace);
2836 		else
2837 			save_stack_trace(&trace);
2838 	}
2839 
2840 	entry->size = trace.nr_entries;
2841 
2842 	if (!call_filter_check_discard(call, entry, buffer, event))
2843 		__buffer_unlock_commit(buffer, event);
2844 
2845  out:
2846 	/* Again, don't let gcc optimize things here */
2847 	barrier();
2848 	__this_cpu_dec(ftrace_stack_reserve);
2849 	preempt_enable_notrace();
2850 
2851 }
2852 
2853 static inline void ftrace_trace_stack(struct trace_array *tr,
2854 				      struct ring_buffer *buffer,
2855 				      unsigned long flags,
2856 				      int skip, int pc, struct pt_regs *regs)
2857 {
2858 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2859 		return;
2860 
2861 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2862 }
2863 
2864 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2865 		   int pc)
2866 {
2867 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2868 
2869 	if (rcu_is_watching()) {
2870 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2871 		return;
2872 	}
2873 
2874 	/*
2875 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2876 	 * but if the above rcu_is_watching() failed, then the NMI
2877 	 * triggered someplace critical, and rcu_irq_enter() should
2878 	 * not be called from NMI.
2879 	 */
2880 	if (unlikely(in_nmi()))
2881 		return;
2882 
2883 	rcu_irq_enter_irqson();
2884 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2885 	rcu_irq_exit_irqson();
2886 }
2887 
2888 /**
2889  * trace_dump_stack - record a stack back trace in the trace buffer
2890  * @skip: Number of functions to skip (helper handlers)
2891  */
2892 void trace_dump_stack(int skip)
2893 {
2894 	unsigned long flags;
2895 
2896 	if (tracing_disabled || tracing_selftest_running)
2897 		return;
2898 
2899 	local_save_flags(flags);
2900 
2901 #ifndef CONFIG_UNWINDER_ORC
2902 	/* Skip 1 to skip this function. */
2903 	skip++;
2904 #endif
2905 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2906 			     flags, skip, preempt_count(), NULL);
2907 }
2908 EXPORT_SYMBOL_GPL(trace_dump_stack);
2909 
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911 
2912 void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 	struct trace_event_call *call = &event_user_stack;
2916 	struct ring_buffer_event *event;
2917 	struct userstack_entry *entry;
2918 	struct stack_trace trace;
2919 
2920 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2921 		return;
2922 
2923 	/*
2924 	 * NMIs can not handle page faults, even with fix ups.
2925 	 * The save user stack can (and often does) fault.
2926 	 */
2927 	if (unlikely(in_nmi()))
2928 		return;
2929 
2930 	/*
2931 	 * prevent recursion, since the user stack tracing may
2932 	 * trigger other kernel events.
2933 	 */
2934 	preempt_disable();
2935 	if (__this_cpu_read(user_stack_count))
2936 		goto out;
2937 
2938 	__this_cpu_inc(user_stack_count);
2939 
2940 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2941 					    sizeof(*entry), flags, pc);
2942 	if (!event)
2943 		goto out_drop_count;
2944 	entry	= ring_buffer_event_data(event);
2945 
2946 	entry->tgid		= current->tgid;
2947 	memset(&entry->caller, 0, sizeof(entry->caller));
2948 
2949 	trace.nr_entries	= 0;
2950 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2951 	trace.skip		= 0;
2952 	trace.entries		= entry->caller;
2953 
2954 	save_stack_trace_user(&trace);
2955 	if (!call_filter_check_discard(call, entry, buffer, event))
2956 		__buffer_unlock_commit(buffer, event);
2957 
2958  out_drop_count:
2959 	__this_cpu_dec(user_stack_count);
2960  out:
2961 	preempt_enable();
2962 }
2963 
2964 #ifdef UNUSED
2965 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2966 {
2967 	ftrace_trace_userstack(tr, flags, preempt_count());
2968 }
2969 #endif /* UNUSED */
2970 
2971 #endif /* CONFIG_STACKTRACE */
2972 
2973 /* created for use with alloc_percpu */
2974 struct trace_buffer_struct {
2975 	int nesting;
2976 	char buffer[4][TRACE_BUF_SIZE];
2977 };
2978 
2979 static struct trace_buffer_struct *trace_percpu_buffer;
2980 
2981 /*
2982  * Thise allows for lockless recording.  If we're nested too deeply, then
2983  * this returns NULL.
2984  */
2985 static char *get_trace_buf(void)
2986 {
2987 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2988 
2989 	if (!buffer || buffer->nesting >= 4)
2990 		return NULL;
2991 
2992 	buffer->nesting++;
2993 
2994 	/* Interrupts must see nesting incremented before we use the buffer */
2995 	barrier();
2996 	return &buffer->buffer[buffer->nesting][0];
2997 }
2998 
2999 static void put_trace_buf(void)
3000 {
3001 	/* Don't let the decrement of nesting leak before this */
3002 	barrier();
3003 	this_cpu_dec(trace_percpu_buffer->nesting);
3004 }
3005 
3006 static int alloc_percpu_trace_buffer(void)
3007 {
3008 	struct trace_buffer_struct *buffers;
3009 
3010 	buffers = alloc_percpu(struct trace_buffer_struct);
3011 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3012 		return -ENOMEM;
3013 
3014 	trace_percpu_buffer = buffers;
3015 	return 0;
3016 }
3017 
3018 static int buffers_allocated;
3019 
3020 void trace_printk_init_buffers(void)
3021 {
3022 	if (buffers_allocated)
3023 		return;
3024 
3025 	if (alloc_percpu_trace_buffer())
3026 		return;
3027 
3028 	/* trace_printk() is for debug use only. Don't use it in production. */
3029 
3030 	pr_warn("\n");
3031 	pr_warn("**********************************************************\n");
3032 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3033 	pr_warn("**                                                      **\n");
3034 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3035 	pr_warn("**                                                      **\n");
3036 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3037 	pr_warn("** unsafe for production use.                           **\n");
3038 	pr_warn("**                                                      **\n");
3039 	pr_warn("** If you see this message and you are not debugging    **\n");
3040 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3041 	pr_warn("**                                                      **\n");
3042 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3043 	pr_warn("**********************************************************\n");
3044 
3045 	/* Expand the buffers to set size */
3046 	tracing_update_buffers();
3047 
3048 	buffers_allocated = 1;
3049 
3050 	/*
3051 	 * trace_printk_init_buffers() can be called by modules.
3052 	 * If that happens, then we need to start cmdline recording
3053 	 * directly here. If the global_trace.buffer is already
3054 	 * allocated here, then this was called by module code.
3055 	 */
3056 	if (global_trace.trace_buffer.buffer)
3057 		tracing_start_cmdline_record();
3058 }
3059 
3060 void trace_printk_start_comm(void)
3061 {
3062 	/* Start tracing comms if trace printk is set */
3063 	if (!buffers_allocated)
3064 		return;
3065 	tracing_start_cmdline_record();
3066 }
3067 
3068 static void trace_printk_start_stop_comm(int enabled)
3069 {
3070 	if (!buffers_allocated)
3071 		return;
3072 
3073 	if (enabled)
3074 		tracing_start_cmdline_record();
3075 	else
3076 		tracing_stop_cmdline_record();
3077 }
3078 
3079 /**
3080  * trace_vbprintk - write binary msg to tracing buffer
3081  *
3082  */
3083 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3084 {
3085 	struct trace_event_call *call = &event_bprint;
3086 	struct ring_buffer_event *event;
3087 	struct ring_buffer *buffer;
3088 	struct trace_array *tr = &global_trace;
3089 	struct bprint_entry *entry;
3090 	unsigned long flags;
3091 	char *tbuffer;
3092 	int len = 0, size, pc;
3093 
3094 	if (unlikely(tracing_selftest_running || tracing_disabled))
3095 		return 0;
3096 
3097 	/* Don't pollute graph traces with trace_vprintk internals */
3098 	pause_graph_tracing();
3099 
3100 	pc = preempt_count();
3101 	preempt_disable_notrace();
3102 
3103 	tbuffer = get_trace_buf();
3104 	if (!tbuffer) {
3105 		len = 0;
3106 		goto out_nobuffer;
3107 	}
3108 
3109 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3110 
3111 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3112 		goto out;
3113 
3114 	local_save_flags(flags);
3115 	size = sizeof(*entry) + sizeof(u32) * len;
3116 	buffer = tr->trace_buffer.buffer;
3117 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3118 					    flags, pc);
3119 	if (!event)
3120 		goto out;
3121 	entry = ring_buffer_event_data(event);
3122 	entry->ip			= ip;
3123 	entry->fmt			= fmt;
3124 
3125 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3126 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3127 		__buffer_unlock_commit(buffer, event);
3128 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3129 	}
3130 
3131 out:
3132 	put_trace_buf();
3133 
3134 out_nobuffer:
3135 	preempt_enable_notrace();
3136 	unpause_graph_tracing();
3137 
3138 	return len;
3139 }
3140 EXPORT_SYMBOL_GPL(trace_vbprintk);
3141 
3142 __printf(3, 0)
3143 static int
3144 __trace_array_vprintk(struct ring_buffer *buffer,
3145 		      unsigned long ip, const char *fmt, va_list args)
3146 {
3147 	struct trace_event_call *call = &event_print;
3148 	struct ring_buffer_event *event;
3149 	int len = 0, size, pc;
3150 	struct print_entry *entry;
3151 	unsigned long flags;
3152 	char *tbuffer;
3153 
3154 	if (tracing_disabled || tracing_selftest_running)
3155 		return 0;
3156 
3157 	/* Don't pollute graph traces with trace_vprintk internals */
3158 	pause_graph_tracing();
3159 
3160 	pc = preempt_count();
3161 	preempt_disable_notrace();
3162 
3163 
3164 	tbuffer = get_trace_buf();
3165 	if (!tbuffer) {
3166 		len = 0;
3167 		goto out_nobuffer;
3168 	}
3169 
3170 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3171 
3172 	local_save_flags(flags);
3173 	size = sizeof(*entry) + len + 1;
3174 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3175 					    flags, pc);
3176 	if (!event)
3177 		goto out;
3178 	entry = ring_buffer_event_data(event);
3179 	entry->ip = ip;
3180 
3181 	memcpy(&entry->buf, tbuffer, len + 1);
3182 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3183 		__buffer_unlock_commit(buffer, event);
3184 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3185 	}
3186 
3187 out:
3188 	put_trace_buf();
3189 
3190 out_nobuffer:
3191 	preempt_enable_notrace();
3192 	unpause_graph_tracing();
3193 
3194 	return len;
3195 }
3196 
3197 __printf(3, 0)
3198 int trace_array_vprintk(struct trace_array *tr,
3199 			unsigned long ip, const char *fmt, va_list args)
3200 {
3201 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3202 }
3203 
3204 __printf(3, 0)
3205 int trace_array_printk(struct trace_array *tr,
3206 		       unsigned long ip, const char *fmt, ...)
3207 {
3208 	int ret;
3209 	va_list ap;
3210 
3211 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3212 		return 0;
3213 
3214 	va_start(ap, fmt);
3215 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3216 	va_end(ap);
3217 	return ret;
3218 }
3219 
3220 __printf(3, 4)
3221 int trace_array_printk_buf(struct ring_buffer *buffer,
3222 			   unsigned long ip, const char *fmt, ...)
3223 {
3224 	int ret;
3225 	va_list ap;
3226 
3227 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3228 		return 0;
3229 
3230 	va_start(ap, fmt);
3231 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3232 	va_end(ap);
3233 	return ret;
3234 }
3235 
3236 __printf(2, 0)
3237 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3238 {
3239 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3240 }
3241 EXPORT_SYMBOL_GPL(trace_vprintk);
3242 
3243 static void trace_iterator_increment(struct trace_iterator *iter)
3244 {
3245 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3246 
3247 	iter->idx++;
3248 	if (buf_iter)
3249 		ring_buffer_read(buf_iter, NULL);
3250 }
3251 
3252 static struct trace_entry *
3253 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3254 		unsigned long *lost_events)
3255 {
3256 	struct ring_buffer_event *event;
3257 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3258 
3259 	if (buf_iter)
3260 		event = ring_buffer_iter_peek(buf_iter, ts);
3261 	else
3262 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3263 					 lost_events);
3264 
3265 	if (event) {
3266 		iter->ent_size = ring_buffer_event_length(event);
3267 		return ring_buffer_event_data(event);
3268 	}
3269 	iter->ent_size = 0;
3270 	return NULL;
3271 }
3272 
3273 static struct trace_entry *
3274 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3275 		  unsigned long *missing_events, u64 *ent_ts)
3276 {
3277 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3278 	struct trace_entry *ent, *next = NULL;
3279 	unsigned long lost_events = 0, next_lost = 0;
3280 	int cpu_file = iter->cpu_file;
3281 	u64 next_ts = 0, ts;
3282 	int next_cpu = -1;
3283 	int next_size = 0;
3284 	int cpu;
3285 
3286 	/*
3287 	 * If we are in a per_cpu trace file, don't bother by iterating over
3288 	 * all cpu and peek directly.
3289 	 */
3290 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3291 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3292 			return NULL;
3293 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3294 		if (ent_cpu)
3295 			*ent_cpu = cpu_file;
3296 
3297 		return ent;
3298 	}
3299 
3300 	for_each_tracing_cpu(cpu) {
3301 
3302 		if (ring_buffer_empty_cpu(buffer, cpu))
3303 			continue;
3304 
3305 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3306 
3307 		/*
3308 		 * Pick the entry with the smallest timestamp:
3309 		 */
3310 		if (ent && (!next || ts < next_ts)) {
3311 			next = ent;
3312 			next_cpu = cpu;
3313 			next_ts = ts;
3314 			next_lost = lost_events;
3315 			next_size = iter->ent_size;
3316 		}
3317 	}
3318 
3319 	iter->ent_size = next_size;
3320 
3321 	if (ent_cpu)
3322 		*ent_cpu = next_cpu;
3323 
3324 	if (ent_ts)
3325 		*ent_ts = next_ts;
3326 
3327 	if (missing_events)
3328 		*missing_events = next_lost;
3329 
3330 	return next;
3331 }
3332 
3333 /* Find the next real entry, without updating the iterator itself */
3334 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3335 					  int *ent_cpu, u64 *ent_ts)
3336 {
3337 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3338 }
3339 
3340 /* Find the next real entry, and increment the iterator to the next entry */
3341 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3342 {
3343 	iter->ent = __find_next_entry(iter, &iter->cpu,
3344 				      &iter->lost_events, &iter->ts);
3345 
3346 	if (iter->ent)
3347 		trace_iterator_increment(iter);
3348 
3349 	return iter->ent ? iter : NULL;
3350 }
3351 
3352 static void trace_consume(struct trace_iterator *iter)
3353 {
3354 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3355 			    &iter->lost_events);
3356 }
3357 
3358 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3359 {
3360 	struct trace_iterator *iter = m->private;
3361 	int i = (int)*pos;
3362 	void *ent;
3363 
3364 	WARN_ON_ONCE(iter->leftover);
3365 
3366 	(*pos)++;
3367 
3368 	/* can't go backwards */
3369 	if (iter->idx > i)
3370 		return NULL;
3371 
3372 	if (iter->idx < 0)
3373 		ent = trace_find_next_entry_inc(iter);
3374 	else
3375 		ent = iter;
3376 
3377 	while (ent && iter->idx < i)
3378 		ent = trace_find_next_entry_inc(iter);
3379 
3380 	iter->pos = *pos;
3381 
3382 	return ent;
3383 }
3384 
3385 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3386 {
3387 	struct ring_buffer_event *event;
3388 	struct ring_buffer_iter *buf_iter;
3389 	unsigned long entries = 0;
3390 	u64 ts;
3391 
3392 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3393 
3394 	buf_iter = trace_buffer_iter(iter, cpu);
3395 	if (!buf_iter)
3396 		return;
3397 
3398 	ring_buffer_iter_reset(buf_iter);
3399 
3400 	/*
3401 	 * We could have the case with the max latency tracers
3402 	 * that a reset never took place on a cpu. This is evident
3403 	 * by the timestamp being before the start of the buffer.
3404 	 */
3405 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3406 		if (ts >= iter->trace_buffer->time_start)
3407 			break;
3408 		entries++;
3409 		ring_buffer_read(buf_iter, NULL);
3410 	}
3411 
3412 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3413 }
3414 
3415 /*
3416  * The current tracer is copied to avoid a global locking
3417  * all around.
3418  */
3419 static void *s_start(struct seq_file *m, loff_t *pos)
3420 {
3421 	struct trace_iterator *iter = m->private;
3422 	struct trace_array *tr = iter->tr;
3423 	int cpu_file = iter->cpu_file;
3424 	void *p = NULL;
3425 	loff_t l = 0;
3426 	int cpu;
3427 
3428 	/*
3429 	 * copy the tracer to avoid using a global lock all around.
3430 	 * iter->trace is a copy of current_trace, the pointer to the
3431 	 * name may be used instead of a strcmp(), as iter->trace->name
3432 	 * will point to the same string as current_trace->name.
3433 	 */
3434 	mutex_lock(&trace_types_lock);
3435 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3436 		*iter->trace = *tr->current_trace;
3437 	mutex_unlock(&trace_types_lock);
3438 
3439 #ifdef CONFIG_TRACER_MAX_TRACE
3440 	if (iter->snapshot && iter->trace->use_max_tr)
3441 		return ERR_PTR(-EBUSY);
3442 #endif
3443 
3444 	if (!iter->snapshot)
3445 		atomic_inc(&trace_record_taskinfo_disabled);
3446 
3447 	if (*pos != iter->pos) {
3448 		iter->ent = NULL;
3449 		iter->cpu = 0;
3450 		iter->idx = -1;
3451 
3452 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3453 			for_each_tracing_cpu(cpu)
3454 				tracing_iter_reset(iter, cpu);
3455 		} else
3456 			tracing_iter_reset(iter, cpu_file);
3457 
3458 		iter->leftover = 0;
3459 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3460 			;
3461 
3462 	} else {
3463 		/*
3464 		 * If we overflowed the seq_file before, then we want
3465 		 * to just reuse the trace_seq buffer again.
3466 		 */
3467 		if (iter->leftover)
3468 			p = iter;
3469 		else {
3470 			l = *pos - 1;
3471 			p = s_next(m, p, &l);
3472 		}
3473 	}
3474 
3475 	trace_event_read_lock();
3476 	trace_access_lock(cpu_file);
3477 	return p;
3478 }
3479 
3480 static void s_stop(struct seq_file *m, void *p)
3481 {
3482 	struct trace_iterator *iter = m->private;
3483 
3484 #ifdef CONFIG_TRACER_MAX_TRACE
3485 	if (iter->snapshot && iter->trace->use_max_tr)
3486 		return;
3487 #endif
3488 
3489 	if (!iter->snapshot)
3490 		atomic_dec(&trace_record_taskinfo_disabled);
3491 
3492 	trace_access_unlock(iter->cpu_file);
3493 	trace_event_read_unlock();
3494 }
3495 
3496 static void
3497 get_total_entries(struct trace_buffer *buf,
3498 		  unsigned long *total, unsigned long *entries)
3499 {
3500 	unsigned long count;
3501 	int cpu;
3502 
3503 	*total = 0;
3504 	*entries = 0;
3505 
3506 	for_each_tracing_cpu(cpu) {
3507 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3508 		/*
3509 		 * If this buffer has skipped entries, then we hold all
3510 		 * entries for the trace and we need to ignore the
3511 		 * ones before the time stamp.
3512 		 */
3513 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3514 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3515 			/* total is the same as the entries */
3516 			*total += count;
3517 		} else
3518 			*total += count +
3519 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3520 		*entries += count;
3521 	}
3522 }
3523 
3524 static void print_lat_help_header(struct seq_file *m)
3525 {
3526 	seq_puts(m, "#                  _------=> CPU#            \n"
3527 		    "#                 / _-----=> irqs-off        \n"
3528 		    "#                | / _----=> need-resched    \n"
3529 		    "#                || / _---=> hardirq/softirq \n"
3530 		    "#                ||| / _--=> preempt-depth   \n"
3531 		    "#                |||| /     delay            \n"
3532 		    "#  cmd     pid   ||||| time  |   caller      \n"
3533 		    "#     \\   /      |||||  \\    |   /         \n");
3534 }
3535 
3536 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3537 {
3538 	unsigned long total;
3539 	unsigned long entries;
3540 
3541 	get_total_entries(buf, &total, &entries);
3542 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3543 		   entries, total, num_online_cpus());
3544 	seq_puts(m, "#\n");
3545 }
3546 
3547 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3548 				   unsigned int flags)
3549 {
3550 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551 
3552 	print_event_info(buf, m);
3553 
3554 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3555 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3556 }
3557 
3558 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3559 				       unsigned int flags)
3560 {
3561 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3562 	const char tgid_space[] = "          ";
3563 	const char space[] = "  ";
3564 
3565 	print_event_info(buf, m);
3566 
3567 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3568 		   tgid ? tgid_space : space);
3569 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3570 		   tgid ? tgid_space : space);
3571 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3572 		   tgid ? tgid_space : space);
3573 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3574 		   tgid ? tgid_space : space);
3575 	seq_printf(m, "#                          %s||| /     delay\n",
3576 		   tgid ? tgid_space : space);
3577 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3578 		   tgid ? "   TGID   " : space);
3579 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3580 		   tgid ? "     |    " : space);
3581 }
3582 
3583 void
3584 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3585 {
3586 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3587 	struct trace_buffer *buf = iter->trace_buffer;
3588 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3589 	struct tracer *type = iter->trace;
3590 	unsigned long entries;
3591 	unsigned long total;
3592 	const char *name = "preemption";
3593 
3594 	name = type->name;
3595 
3596 	get_total_entries(buf, &total, &entries);
3597 
3598 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3599 		   name, UTS_RELEASE);
3600 	seq_puts(m, "# -----------------------------------"
3601 		 "---------------------------------\n");
3602 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3603 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3604 		   nsecs_to_usecs(data->saved_latency),
3605 		   entries,
3606 		   total,
3607 		   buf->cpu,
3608 #if defined(CONFIG_PREEMPT_NONE)
3609 		   "server",
3610 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3611 		   "desktop",
3612 #elif defined(CONFIG_PREEMPT)
3613 		   "preempt",
3614 #else
3615 		   "unknown",
3616 #endif
3617 		   /* These are reserved for later use */
3618 		   0, 0, 0, 0);
3619 #ifdef CONFIG_SMP
3620 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3621 #else
3622 	seq_puts(m, ")\n");
3623 #endif
3624 	seq_puts(m, "#    -----------------\n");
3625 	seq_printf(m, "#    | task: %.16s-%d "
3626 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3627 		   data->comm, data->pid,
3628 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3629 		   data->policy, data->rt_priority);
3630 	seq_puts(m, "#    -----------------\n");
3631 
3632 	if (data->critical_start) {
3633 		seq_puts(m, "#  => started at: ");
3634 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3635 		trace_print_seq(m, &iter->seq);
3636 		seq_puts(m, "\n#  => ended at:   ");
3637 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3638 		trace_print_seq(m, &iter->seq);
3639 		seq_puts(m, "\n#\n");
3640 	}
3641 
3642 	seq_puts(m, "#\n");
3643 }
3644 
3645 static void test_cpu_buff_start(struct trace_iterator *iter)
3646 {
3647 	struct trace_seq *s = &iter->seq;
3648 	struct trace_array *tr = iter->tr;
3649 
3650 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3651 		return;
3652 
3653 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3654 		return;
3655 
3656 	if (cpumask_available(iter->started) &&
3657 	    cpumask_test_cpu(iter->cpu, iter->started))
3658 		return;
3659 
3660 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3661 		return;
3662 
3663 	if (cpumask_available(iter->started))
3664 		cpumask_set_cpu(iter->cpu, iter->started);
3665 
3666 	/* Don't print started cpu buffer for the first entry of the trace */
3667 	if (iter->idx > 1)
3668 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3669 				iter->cpu);
3670 }
3671 
3672 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3673 {
3674 	struct trace_array *tr = iter->tr;
3675 	struct trace_seq *s = &iter->seq;
3676 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3677 	struct trace_entry *entry;
3678 	struct trace_event *event;
3679 
3680 	entry = iter->ent;
3681 
3682 	test_cpu_buff_start(iter);
3683 
3684 	event = ftrace_find_event(entry->type);
3685 
3686 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3687 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3688 			trace_print_lat_context(iter);
3689 		else
3690 			trace_print_context(iter);
3691 	}
3692 
3693 	if (trace_seq_has_overflowed(s))
3694 		return TRACE_TYPE_PARTIAL_LINE;
3695 
3696 	if (event)
3697 		return event->funcs->trace(iter, sym_flags, event);
3698 
3699 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3700 
3701 	return trace_handle_return(s);
3702 }
3703 
3704 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3705 {
3706 	struct trace_array *tr = iter->tr;
3707 	struct trace_seq *s = &iter->seq;
3708 	struct trace_entry *entry;
3709 	struct trace_event *event;
3710 
3711 	entry = iter->ent;
3712 
3713 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3714 		trace_seq_printf(s, "%d %d %llu ",
3715 				 entry->pid, iter->cpu, iter->ts);
3716 
3717 	if (trace_seq_has_overflowed(s))
3718 		return TRACE_TYPE_PARTIAL_LINE;
3719 
3720 	event = ftrace_find_event(entry->type);
3721 	if (event)
3722 		return event->funcs->raw(iter, 0, event);
3723 
3724 	trace_seq_printf(s, "%d ?\n", entry->type);
3725 
3726 	return trace_handle_return(s);
3727 }
3728 
3729 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3730 {
3731 	struct trace_array *tr = iter->tr;
3732 	struct trace_seq *s = &iter->seq;
3733 	unsigned char newline = '\n';
3734 	struct trace_entry *entry;
3735 	struct trace_event *event;
3736 
3737 	entry = iter->ent;
3738 
3739 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3740 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3741 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3742 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3743 		if (trace_seq_has_overflowed(s))
3744 			return TRACE_TYPE_PARTIAL_LINE;
3745 	}
3746 
3747 	event = ftrace_find_event(entry->type);
3748 	if (event) {
3749 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3750 		if (ret != TRACE_TYPE_HANDLED)
3751 			return ret;
3752 	}
3753 
3754 	SEQ_PUT_FIELD(s, newline);
3755 
3756 	return trace_handle_return(s);
3757 }
3758 
3759 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3760 {
3761 	struct trace_array *tr = iter->tr;
3762 	struct trace_seq *s = &iter->seq;
3763 	struct trace_entry *entry;
3764 	struct trace_event *event;
3765 
3766 	entry = iter->ent;
3767 
3768 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3769 		SEQ_PUT_FIELD(s, entry->pid);
3770 		SEQ_PUT_FIELD(s, iter->cpu);
3771 		SEQ_PUT_FIELD(s, iter->ts);
3772 		if (trace_seq_has_overflowed(s))
3773 			return TRACE_TYPE_PARTIAL_LINE;
3774 	}
3775 
3776 	event = ftrace_find_event(entry->type);
3777 	return event ? event->funcs->binary(iter, 0, event) :
3778 		TRACE_TYPE_HANDLED;
3779 }
3780 
3781 int trace_empty(struct trace_iterator *iter)
3782 {
3783 	struct ring_buffer_iter *buf_iter;
3784 	int cpu;
3785 
3786 	/* If we are looking at one CPU buffer, only check that one */
3787 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3788 		cpu = iter->cpu_file;
3789 		buf_iter = trace_buffer_iter(iter, cpu);
3790 		if (buf_iter) {
3791 			if (!ring_buffer_iter_empty(buf_iter))
3792 				return 0;
3793 		} else {
3794 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3795 				return 0;
3796 		}
3797 		return 1;
3798 	}
3799 
3800 	for_each_tracing_cpu(cpu) {
3801 		buf_iter = trace_buffer_iter(iter, cpu);
3802 		if (buf_iter) {
3803 			if (!ring_buffer_iter_empty(buf_iter))
3804 				return 0;
3805 		} else {
3806 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3807 				return 0;
3808 		}
3809 	}
3810 
3811 	return 1;
3812 }
3813 
3814 /*  Called with trace_event_read_lock() held. */
3815 enum print_line_t print_trace_line(struct trace_iterator *iter)
3816 {
3817 	struct trace_array *tr = iter->tr;
3818 	unsigned long trace_flags = tr->trace_flags;
3819 	enum print_line_t ret;
3820 
3821 	if (iter->lost_events) {
3822 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3823 				 iter->cpu, iter->lost_events);
3824 		if (trace_seq_has_overflowed(&iter->seq))
3825 			return TRACE_TYPE_PARTIAL_LINE;
3826 	}
3827 
3828 	if (iter->trace && iter->trace->print_line) {
3829 		ret = iter->trace->print_line(iter);
3830 		if (ret != TRACE_TYPE_UNHANDLED)
3831 			return ret;
3832 	}
3833 
3834 	if (iter->ent->type == TRACE_BPUTS &&
3835 			trace_flags & TRACE_ITER_PRINTK &&
3836 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3837 		return trace_print_bputs_msg_only(iter);
3838 
3839 	if (iter->ent->type == TRACE_BPRINT &&
3840 			trace_flags & TRACE_ITER_PRINTK &&
3841 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3842 		return trace_print_bprintk_msg_only(iter);
3843 
3844 	if (iter->ent->type == TRACE_PRINT &&
3845 			trace_flags & TRACE_ITER_PRINTK &&
3846 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3847 		return trace_print_printk_msg_only(iter);
3848 
3849 	if (trace_flags & TRACE_ITER_BIN)
3850 		return print_bin_fmt(iter);
3851 
3852 	if (trace_flags & TRACE_ITER_HEX)
3853 		return print_hex_fmt(iter);
3854 
3855 	if (trace_flags & TRACE_ITER_RAW)
3856 		return print_raw_fmt(iter);
3857 
3858 	return print_trace_fmt(iter);
3859 }
3860 
3861 void trace_latency_header(struct seq_file *m)
3862 {
3863 	struct trace_iterator *iter = m->private;
3864 	struct trace_array *tr = iter->tr;
3865 
3866 	/* print nothing if the buffers are empty */
3867 	if (trace_empty(iter))
3868 		return;
3869 
3870 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3871 		print_trace_header(m, iter);
3872 
3873 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3874 		print_lat_help_header(m);
3875 }
3876 
3877 void trace_default_header(struct seq_file *m)
3878 {
3879 	struct trace_iterator *iter = m->private;
3880 	struct trace_array *tr = iter->tr;
3881 	unsigned long trace_flags = tr->trace_flags;
3882 
3883 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3884 		return;
3885 
3886 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3887 		/* print nothing if the buffers are empty */
3888 		if (trace_empty(iter))
3889 			return;
3890 		print_trace_header(m, iter);
3891 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3892 			print_lat_help_header(m);
3893 	} else {
3894 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3895 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3896 				print_func_help_header_irq(iter->trace_buffer,
3897 							   m, trace_flags);
3898 			else
3899 				print_func_help_header(iter->trace_buffer, m,
3900 						       trace_flags);
3901 		}
3902 	}
3903 }
3904 
3905 static void test_ftrace_alive(struct seq_file *m)
3906 {
3907 	if (!ftrace_is_dead())
3908 		return;
3909 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3910 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3911 }
3912 
3913 #ifdef CONFIG_TRACER_MAX_TRACE
3914 static void show_snapshot_main_help(struct seq_file *m)
3915 {
3916 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3917 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3918 		    "#                      Takes a snapshot of the main buffer.\n"
3919 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3920 		    "#                      (Doesn't have to be '2' works with any number that\n"
3921 		    "#                       is not a '0' or '1')\n");
3922 }
3923 
3924 static void show_snapshot_percpu_help(struct seq_file *m)
3925 {
3926 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3927 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3928 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3929 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3930 #else
3931 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3932 		    "#                     Must use main snapshot file to allocate.\n");
3933 #endif
3934 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3935 		    "#                      (Doesn't have to be '2' works with any number that\n"
3936 		    "#                       is not a '0' or '1')\n");
3937 }
3938 
3939 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3940 {
3941 	if (iter->tr->allocated_snapshot)
3942 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3943 	else
3944 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3945 
3946 	seq_puts(m, "# Snapshot commands:\n");
3947 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3948 		show_snapshot_main_help(m);
3949 	else
3950 		show_snapshot_percpu_help(m);
3951 }
3952 #else
3953 /* Should never be called */
3954 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3955 #endif
3956 
3957 static int s_show(struct seq_file *m, void *v)
3958 {
3959 	struct trace_iterator *iter = v;
3960 	int ret;
3961 
3962 	if (iter->ent == NULL) {
3963 		if (iter->tr) {
3964 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3965 			seq_puts(m, "#\n");
3966 			test_ftrace_alive(m);
3967 		}
3968 		if (iter->snapshot && trace_empty(iter))
3969 			print_snapshot_help(m, iter);
3970 		else if (iter->trace && iter->trace->print_header)
3971 			iter->trace->print_header(m);
3972 		else
3973 			trace_default_header(m);
3974 
3975 	} else if (iter->leftover) {
3976 		/*
3977 		 * If we filled the seq_file buffer earlier, we
3978 		 * want to just show it now.
3979 		 */
3980 		ret = trace_print_seq(m, &iter->seq);
3981 
3982 		/* ret should this time be zero, but you never know */
3983 		iter->leftover = ret;
3984 
3985 	} else {
3986 		print_trace_line(iter);
3987 		ret = trace_print_seq(m, &iter->seq);
3988 		/*
3989 		 * If we overflow the seq_file buffer, then it will
3990 		 * ask us for this data again at start up.
3991 		 * Use that instead.
3992 		 *  ret is 0 if seq_file write succeeded.
3993 		 *        -1 otherwise.
3994 		 */
3995 		iter->leftover = ret;
3996 	}
3997 
3998 	return 0;
3999 }
4000 
4001 /*
4002  * Should be used after trace_array_get(), trace_types_lock
4003  * ensures that i_cdev was already initialized.
4004  */
4005 static inline int tracing_get_cpu(struct inode *inode)
4006 {
4007 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4008 		return (long)inode->i_cdev - 1;
4009 	return RING_BUFFER_ALL_CPUS;
4010 }
4011 
4012 static const struct seq_operations tracer_seq_ops = {
4013 	.start		= s_start,
4014 	.next		= s_next,
4015 	.stop		= s_stop,
4016 	.show		= s_show,
4017 };
4018 
4019 static struct trace_iterator *
4020 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4021 {
4022 	struct trace_array *tr = inode->i_private;
4023 	struct trace_iterator *iter;
4024 	int cpu;
4025 
4026 	if (tracing_disabled)
4027 		return ERR_PTR(-ENODEV);
4028 
4029 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4030 	if (!iter)
4031 		return ERR_PTR(-ENOMEM);
4032 
4033 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4034 				    GFP_KERNEL);
4035 	if (!iter->buffer_iter)
4036 		goto release;
4037 
4038 	/*
4039 	 * We make a copy of the current tracer to avoid concurrent
4040 	 * changes on it while we are reading.
4041 	 */
4042 	mutex_lock(&trace_types_lock);
4043 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4044 	if (!iter->trace)
4045 		goto fail;
4046 
4047 	*iter->trace = *tr->current_trace;
4048 
4049 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4050 		goto fail;
4051 
4052 	iter->tr = tr;
4053 
4054 #ifdef CONFIG_TRACER_MAX_TRACE
4055 	/* Currently only the top directory has a snapshot */
4056 	if (tr->current_trace->print_max || snapshot)
4057 		iter->trace_buffer = &tr->max_buffer;
4058 	else
4059 #endif
4060 		iter->trace_buffer = &tr->trace_buffer;
4061 	iter->snapshot = snapshot;
4062 	iter->pos = -1;
4063 	iter->cpu_file = tracing_get_cpu(inode);
4064 	mutex_init(&iter->mutex);
4065 
4066 	/* Notify the tracer early; before we stop tracing. */
4067 	if (iter->trace && iter->trace->open)
4068 		iter->trace->open(iter);
4069 
4070 	/* Annotate start of buffers if we had overruns */
4071 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4072 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4073 
4074 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4075 	if (trace_clocks[tr->clock_id].in_ns)
4076 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4077 
4078 	/* stop the trace while dumping if we are not opening "snapshot" */
4079 	if (!iter->snapshot)
4080 		tracing_stop_tr(tr);
4081 
4082 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4083 		for_each_tracing_cpu(cpu) {
4084 			iter->buffer_iter[cpu] =
4085 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4086 							 cpu, GFP_KERNEL);
4087 		}
4088 		ring_buffer_read_prepare_sync();
4089 		for_each_tracing_cpu(cpu) {
4090 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4091 			tracing_iter_reset(iter, cpu);
4092 		}
4093 	} else {
4094 		cpu = iter->cpu_file;
4095 		iter->buffer_iter[cpu] =
4096 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4097 						 cpu, GFP_KERNEL);
4098 		ring_buffer_read_prepare_sync();
4099 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4100 		tracing_iter_reset(iter, cpu);
4101 	}
4102 
4103 	mutex_unlock(&trace_types_lock);
4104 
4105 	return iter;
4106 
4107  fail:
4108 	mutex_unlock(&trace_types_lock);
4109 	kfree(iter->trace);
4110 	kfree(iter->buffer_iter);
4111 release:
4112 	seq_release_private(inode, file);
4113 	return ERR_PTR(-ENOMEM);
4114 }
4115 
4116 int tracing_open_generic(struct inode *inode, struct file *filp)
4117 {
4118 	if (tracing_disabled)
4119 		return -ENODEV;
4120 
4121 	filp->private_data = inode->i_private;
4122 	return 0;
4123 }
4124 
4125 bool tracing_is_disabled(void)
4126 {
4127 	return (tracing_disabled) ? true: false;
4128 }
4129 
4130 /*
4131  * Open and update trace_array ref count.
4132  * Must have the current trace_array passed to it.
4133  */
4134 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4135 {
4136 	struct trace_array *tr = inode->i_private;
4137 
4138 	if (tracing_disabled)
4139 		return -ENODEV;
4140 
4141 	if (trace_array_get(tr) < 0)
4142 		return -ENODEV;
4143 
4144 	filp->private_data = inode->i_private;
4145 
4146 	return 0;
4147 }
4148 
4149 static int tracing_release(struct inode *inode, struct file *file)
4150 {
4151 	struct trace_array *tr = inode->i_private;
4152 	struct seq_file *m = file->private_data;
4153 	struct trace_iterator *iter;
4154 	int cpu;
4155 
4156 	if (!(file->f_mode & FMODE_READ)) {
4157 		trace_array_put(tr);
4158 		return 0;
4159 	}
4160 
4161 	/* Writes do not use seq_file */
4162 	iter = m->private;
4163 	mutex_lock(&trace_types_lock);
4164 
4165 	for_each_tracing_cpu(cpu) {
4166 		if (iter->buffer_iter[cpu])
4167 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4168 	}
4169 
4170 	if (iter->trace && iter->trace->close)
4171 		iter->trace->close(iter);
4172 
4173 	if (!iter->snapshot)
4174 		/* reenable tracing if it was previously enabled */
4175 		tracing_start_tr(tr);
4176 
4177 	__trace_array_put(tr);
4178 
4179 	mutex_unlock(&trace_types_lock);
4180 
4181 	mutex_destroy(&iter->mutex);
4182 	free_cpumask_var(iter->started);
4183 	kfree(iter->trace);
4184 	kfree(iter->buffer_iter);
4185 	seq_release_private(inode, file);
4186 
4187 	return 0;
4188 }
4189 
4190 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4191 {
4192 	struct trace_array *tr = inode->i_private;
4193 
4194 	trace_array_put(tr);
4195 	return 0;
4196 }
4197 
4198 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4199 {
4200 	struct trace_array *tr = inode->i_private;
4201 
4202 	trace_array_put(tr);
4203 
4204 	return single_release(inode, file);
4205 }
4206 
4207 static int tracing_open(struct inode *inode, struct file *file)
4208 {
4209 	struct trace_array *tr = inode->i_private;
4210 	struct trace_iterator *iter;
4211 	int ret = 0;
4212 
4213 	if (trace_array_get(tr) < 0)
4214 		return -ENODEV;
4215 
4216 	/* If this file was open for write, then erase contents */
4217 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4218 		int cpu = tracing_get_cpu(inode);
4219 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4220 
4221 #ifdef CONFIG_TRACER_MAX_TRACE
4222 		if (tr->current_trace->print_max)
4223 			trace_buf = &tr->max_buffer;
4224 #endif
4225 
4226 		if (cpu == RING_BUFFER_ALL_CPUS)
4227 			tracing_reset_online_cpus(trace_buf);
4228 		else
4229 			tracing_reset(trace_buf, cpu);
4230 	}
4231 
4232 	if (file->f_mode & FMODE_READ) {
4233 		iter = __tracing_open(inode, file, false);
4234 		if (IS_ERR(iter))
4235 			ret = PTR_ERR(iter);
4236 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4237 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4238 	}
4239 
4240 	if (ret < 0)
4241 		trace_array_put(tr);
4242 
4243 	return ret;
4244 }
4245 
4246 /*
4247  * Some tracers are not suitable for instance buffers.
4248  * A tracer is always available for the global array (toplevel)
4249  * or if it explicitly states that it is.
4250  */
4251 static bool
4252 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4253 {
4254 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4255 }
4256 
4257 /* Find the next tracer that this trace array may use */
4258 static struct tracer *
4259 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4260 {
4261 	while (t && !trace_ok_for_array(t, tr))
4262 		t = t->next;
4263 
4264 	return t;
4265 }
4266 
4267 static void *
4268 t_next(struct seq_file *m, void *v, loff_t *pos)
4269 {
4270 	struct trace_array *tr = m->private;
4271 	struct tracer *t = v;
4272 
4273 	(*pos)++;
4274 
4275 	if (t)
4276 		t = get_tracer_for_array(tr, t->next);
4277 
4278 	return t;
4279 }
4280 
4281 static void *t_start(struct seq_file *m, loff_t *pos)
4282 {
4283 	struct trace_array *tr = m->private;
4284 	struct tracer *t;
4285 	loff_t l = 0;
4286 
4287 	mutex_lock(&trace_types_lock);
4288 
4289 	t = get_tracer_for_array(tr, trace_types);
4290 	for (; t && l < *pos; t = t_next(m, t, &l))
4291 			;
4292 
4293 	return t;
4294 }
4295 
4296 static void t_stop(struct seq_file *m, void *p)
4297 {
4298 	mutex_unlock(&trace_types_lock);
4299 }
4300 
4301 static int t_show(struct seq_file *m, void *v)
4302 {
4303 	struct tracer *t = v;
4304 
4305 	if (!t)
4306 		return 0;
4307 
4308 	seq_puts(m, t->name);
4309 	if (t->next)
4310 		seq_putc(m, ' ');
4311 	else
4312 		seq_putc(m, '\n');
4313 
4314 	return 0;
4315 }
4316 
4317 static const struct seq_operations show_traces_seq_ops = {
4318 	.start		= t_start,
4319 	.next		= t_next,
4320 	.stop		= t_stop,
4321 	.show		= t_show,
4322 };
4323 
4324 static int show_traces_open(struct inode *inode, struct file *file)
4325 {
4326 	struct trace_array *tr = inode->i_private;
4327 	struct seq_file *m;
4328 	int ret;
4329 
4330 	if (tracing_disabled)
4331 		return -ENODEV;
4332 
4333 	ret = seq_open(file, &show_traces_seq_ops);
4334 	if (ret)
4335 		return ret;
4336 
4337 	m = file->private_data;
4338 	m->private = tr;
4339 
4340 	return 0;
4341 }
4342 
4343 static ssize_t
4344 tracing_write_stub(struct file *filp, const char __user *ubuf,
4345 		   size_t count, loff_t *ppos)
4346 {
4347 	return count;
4348 }
4349 
4350 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4351 {
4352 	int ret;
4353 
4354 	if (file->f_mode & FMODE_READ)
4355 		ret = seq_lseek(file, offset, whence);
4356 	else
4357 		file->f_pos = ret = 0;
4358 
4359 	return ret;
4360 }
4361 
4362 static const struct file_operations tracing_fops = {
4363 	.open		= tracing_open,
4364 	.read		= seq_read,
4365 	.write		= tracing_write_stub,
4366 	.llseek		= tracing_lseek,
4367 	.release	= tracing_release,
4368 };
4369 
4370 static const struct file_operations show_traces_fops = {
4371 	.open		= show_traces_open,
4372 	.read		= seq_read,
4373 	.release	= seq_release,
4374 	.llseek		= seq_lseek,
4375 };
4376 
4377 static ssize_t
4378 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4379 		     size_t count, loff_t *ppos)
4380 {
4381 	struct trace_array *tr = file_inode(filp)->i_private;
4382 	char *mask_str;
4383 	int len;
4384 
4385 	len = snprintf(NULL, 0, "%*pb\n",
4386 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4387 	mask_str = kmalloc(len, GFP_KERNEL);
4388 	if (!mask_str)
4389 		return -ENOMEM;
4390 
4391 	len = snprintf(mask_str, len, "%*pb\n",
4392 		       cpumask_pr_args(tr->tracing_cpumask));
4393 	if (len >= count) {
4394 		count = -EINVAL;
4395 		goto out_err;
4396 	}
4397 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4398 
4399 out_err:
4400 	kfree(mask_str);
4401 
4402 	return count;
4403 }
4404 
4405 static ssize_t
4406 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4407 		      size_t count, loff_t *ppos)
4408 {
4409 	struct trace_array *tr = file_inode(filp)->i_private;
4410 	cpumask_var_t tracing_cpumask_new;
4411 	int err, cpu;
4412 
4413 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4414 		return -ENOMEM;
4415 
4416 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4417 	if (err)
4418 		goto err_unlock;
4419 
4420 	local_irq_disable();
4421 	arch_spin_lock(&tr->max_lock);
4422 	for_each_tracing_cpu(cpu) {
4423 		/*
4424 		 * Increase/decrease the disabled counter if we are
4425 		 * about to flip a bit in the cpumask:
4426 		 */
4427 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4428 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4429 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4430 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4431 		}
4432 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4433 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4434 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4435 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4436 		}
4437 	}
4438 	arch_spin_unlock(&tr->max_lock);
4439 	local_irq_enable();
4440 
4441 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4442 	free_cpumask_var(tracing_cpumask_new);
4443 
4444 	return count;
4445 
4446 err_unlock:
4447 	free_cpumask_var(tracing_cpumask_new);
4448 
4449 	return err;
4450 }
4451 
4452 static const struct file_operations tracing_cpumask_fops = {
4453 	.open		= tracing_open_generic_tr,
4454 	.read		= tracing_cpumask_read,
4455 	.write		= tracing_cpumask_write,
4456 	.release	= tracing_release_generic_tr,
4457 	.llseek		= generic_file_llseek,
4458 };
4459 
4460 static int tracing_trace_options_show(struct seq_file *m, void *v)
4461 {
4462 	struct tracer_opt *trace_opts;
4463 	struct trace_array *tr = m->private;
4464 	u32 tracer_flags;
4465 	int i;
4466 
4467 	mutex_lock(&trace_types_lock);
4468 	tracer_flags = tr->current_trace->flags->val;
4469 	trace_opts = tr->current_trace->flags->opts;
4470 
4471 	for (i = 0; trace_options[i]; i++) {
4472 		if (tr->trace_flags & (1 << i))
4473 			seq_printf(m, "%s\n", trace_options[i]);
4474 		else
4475 			seq_printf(m, "no%s\n", trace_options[i]);
4476 	}
4477 
4478 	for (i = 0; trace_opts[i].name; i++) {
4479 		if (tracer_flags & trace_opts[i].bit)
4480 			seq_printf(m, "%s\n", trace_opts[i].name);
4481 		else
4482 			seq_printf(m, "no%s\n", trace_opts[i].name);
4483 	}
4484 	mutex_unlock(&trace_types_lock);
4485 
4486 	return 0;
4487 }
4488 
4489 static int __set_tracer_option(struct trace_array *tr,
4490 			       struct tracer_flags *tracer_flags,
4491 			       struct tracer_opt *opts, int neg)
4492 {
4493 	struct tracer *trace = tracer_flags->trace;
4494 	int ret;
4495 
4496 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4497 	if (ret)
4498 		return ret;
4499 
4500 	if (neg)
4501 		tracer_flags->val &= ~opts->bit;
4502 	else
4503 		tracer_flags->val |= opts->bit;
4504 	return 0;
4505 }
4506 
4507 /* Try to assign a tracer specific option */
4508 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4509 {
4510 	struct tracer *trace = tr->current_trace;
4511 	struct tracer_flags *tracer_flags = trace->flags;
4512 	struct tracer_opt *opts = NULL;
4513 	int i;
4514 
4515 	for (i = 0; tracer_flags->opts[i].name; i++) {
4516 		opts = &tracer_flags->opts[i];
4517 
4518 		if (strcmp(cmp, opts->name) == 0)
4519 			return __set_tracer_option(tr, trace->flags, opts, neg);
4520 	}
4521 
4522 	return -EINVAL;
4523 }
4524 
4525 /* Some tracers require overwrite to stay enabled */
4526 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4527 {
4528 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4529 		return -1;
4530 
4531 	return 0;
4532 }
4533 
4534 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4535 {
4536 	/* do nothing if flag is already set */
4537 	if (!!(tr->trace_flags & mask) == !!enabled)
4538 		return 0;
4539 
4540 	/* Give the tracer a chance to approve the change */
4541 	if (tr->current_trace->flag_changed)
4542 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4543 			return -EINVAL;
4544 
4545 	if (enabled)
4546 		tr->trace_flags |= mask;
4547 	else
4548 		tr->trace_flags &= ~mask;
4549 
4550 	if (mask == TRACE_ITER_RECORD_CMD)
4551 		trace_event_enable_cmd_record(enabled);
4552 
4553 	if (mask == TRACE_ITER_RECORD_TGID) {
4554 		if (!tgid_map)
4555 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4556 					   sizeof(*tgid_map),
4557 					   GFP_KERNEL);
4558 		if (!tgid_map) {
4559 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4560 			return -ENOMEM;
4561 		}
4562 
4563 		trace_event_enable_tgid_record(enabled);
4564 	}
4565 
4566 	if (mask == TRACE_ITER_EVENT_FORK)
4567 		trace_event_follow_fork(tr, enabled);
4568 
4569 	if (mask == TRACE_ITER_FUNC_FORK)
4570 		ftrace_pid_follow_fork(tr, enabled);
4571 
4572 	if (mask == TRACE_ITER_OVERWRITE) {
4573 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4574 #ifdef CONFIG_TRACER_MAX_TRACE
4575 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4576 #endif
4577 	}
4578 
4579 	if (mask == TRACE_ITER_PRINTK) {
4580 		trace_printk_start_stop_comm(enabled);
4581 		trace_printk_control(enabled);
4582 	}
4583 
4584 	return 0;
4585 }
4586 
4587 static int trace_set_options(struct trace_array *tr, char *option)
4588 {
4589 	char *cmp;
4590 	int neg = 0;
4591 	int ret;
4592 	size_t orig_len = strlen(option);
4593 	int len;
4594 
4595 	cmp = strstrip(option);
4596 
4597 	len = str_has_prefix(cmp, "no");
4598 	if (len)
4599 		neg = 1;
4600 
4601 	cmp += len;
4602 
4603 	mutex_lock(&trace_types_lock);
4604 
4605 	ret = match_string(trace_options, -1, cmp);
4606 	/* If no option could be set, test the specific tracer options */
4607 	if (ret < 0)
4608 		ret = set_tracer_option(tr, cmp, neg);
4609 	else
4610 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4611 
4612 	mutex_unlock(&trace_types_lock);
4613 
4614 	/*
4615 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4616 	 * turn it back into a space.
4617 	 */
4618 	if (orig_len > strlen(option))
4619 		option[strlen(option)] = ' ';
4620 
4621 	return ret;
4622 }
4623 
4624 static void __init apply_trace_boot_options(void)
4625 {
4626 	char *buf = trace_boot_options_buf;
4627 	char *option;
4628 
4629 	while (true) {
4630 		option = strsep(&buf, ",");
4631 
4632 		if (!option)
4633 			break;
4634 
4635 		if (*option)
4636 			trace_set_options(&global_trace, option);
4637 
4638 		/* Put back the comma to allow this to be called again */
4639 		if (buf)
4640 			*(buf - 1) = ',';
4641 	}
4642 }
4643 
4644 static ssize_t
4645 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4646 			size_t cnt, loff_t *ppos)
4647 {
4648 	struct seq_file *m = filp->private_data;
4649 	struct trace_array *tr = m->private;
4650 	char buf[64];
4651 	int ret;
4652 
4653 	if (cnt >= sizeof(buf))
4654 		return -EINVAL;
4655 
4656 	if (copy_from_user(buf, ubuf, cnt))
4657 		return -EFAULT;
4658 
4659 	buf[cnt] = 0;
4660 
4661 	ret = trace_set_options(tr, buf);
4662 	if (ret < 0)
4663 		return ret;
4664 
4665 	*ppos += cnt;
4666 
4667 	return cnt;
4668 }
4669 
4670 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4671 {
4672 	struct trace_array *tr = inode->i_private;
4673 	int ret;
4674 
4675 	if (tracing_disabled)
4676 		return -ENODEV;
4677 
4678 	if (trace_array_get(tr) < 0)
4679 		return -ENODEV;
4680 
4681 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4682 	if (ret < 0)
4683 		trace_array_put(tr);
4684 
4685 	return ret;
4686 }
4687 
4688 static const struct file_operations tracing_iter_fops = {
4689 	.open		= tracing_trace_options_open,
4690 	.read		= seq_read,
4691 	.llseek		= seq_lseek,
4692 	.release	= tracing_single_release_tr,
4693 	.write		= tracing_trace_options_write,
4694 };
4695 
4696 static const char readme_msg[] =
4697 	"tracing mini-HOWTO:\n\n"
4698 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4699 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4700 	" Important files:\n"
4701 	"  trace\t\t\t- The static contents of the buffer\n"
4702 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4703 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4704 	"  current_tracer\t- function and latency tracers\n"
4705 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4706 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4707 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4708 	"  trace_clock\t\t-change the clock used to order events\n"
4709 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4710 	"      global:   Synced across CPUs but slows tracing down.\n"
4711 	"     counter:   Not a clock, but just an increment\n"
4712 	"      uptime:   Jiffy counter from time of boot\n"
4713 	"        perf:   Same clock that perf events use\n"
4714 #ifdef CONFIG_X86_64
4715 	"     x86-tsc:   TSC cycle counter\n"
4716 #endif
4717 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4718 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4719 	"    absolute:   Absolute (standalone) timestamp\n"
4720 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4721 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4722 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4723 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4724 	"\t\t\t  Remove sub-buffer with rmdir\n"
4725 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4726 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4727 	"\t\t\t  option name\n"
4728 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4729 #ifdef CONFIG_DYNAMIC_FTRACE
4730 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4731 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4732 	"\t\t\t  functions\n"
4733 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4734 	"\t     modules: Can select a group via module\n"
4735 	"\t      Format: :mod:<module-name>\n"
4736 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4737 	"\t    triggers: a command to perform when function is hit\n"
4738 	"\t      Format: <function>:<trigger>[:count]\n"
4739 	"\t     trigger: traceon, traceoff\n"
4740 	"\t\t      enable_event:<system>:<event>\n"
4741 	"\t\t      disable_event:<system>:<event>\n"
4742 #ifdef CONFIG_STACKTRACE
4743 	"\t\t      stacktrace\n"
4744 #endif
4745 #ifdef CONFIG_TRACER_SNAPSHOT
4746 	"\t\t      snapshot\n"
4747 #endif
4748 	"\t\t      dump\n"
4749 	"\t\t      cpudump\n"
4750 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4751 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4752 	"\t     The first one will disable tracing every time do_fault is hit\n"
4753 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4754 	"\t       The first time do trap is hit and it disables tracing, the\n"
4755 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4756 	"\t       the counter will not decrement. It only decrements when the\n"
4757 	"\t       trigger did work\n"
4758 	"\t     To remove trigger without count:\n"
4759 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4760 	"\t     To remove trigger with a count:\n"
4761 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4762 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4763 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4764 	"\t    modules: Can select a group via module command :mod:\n"
4765 	"\t    Does not accept triggers\n"
4766 #endif /* CONFIG_DYNAMIC_FTRACE */
4767 #ifdef CONFIG_FUNCTION_TRACER
4768 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4769 	"\t\t    (function)\n"
4770 #endif
4771 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4772 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4773 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4774 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4775 #endif
4776 #ifdef CONFIG_TRACER_SNAPSHOT
4777 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4778 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4779 	"\t\t\t  information\n"
4780 #endif
4781 #ifdef CONFIG_STACK_TRACER
4782 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4783 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4784 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4785 	"\t\t\t  new trace)\n"
4786 #ifdef CONFIG_DYNAMIC_FTRACE
4787 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4788 	"\t\t\t  traces\n"
4789 #endif
4790 #endif /* CONFIG_STACK_TRACER */
4791 #ifdef CONFIG_DYNAMIC_EVENTS
4792 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4793 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4794 #endif
4795 #ifdef CONFIG_KPROBE_EVENTS
4796 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4797 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4798 #endif
4799 #ifdef CONFIG_UPROBE_EVENTS
4800 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4801 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4802 #endif
4803 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4804 	"\t  accepts: event-definitions (one definition per line)\n"
4805 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4806 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4807 #ifdef CONFIG_HIST_TRIGGERS
4808 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4809 #endif
4810 	"\t           -:[<group>/]<event>\n"
4811 #ifdef CONFIG_KPROBE_EVENTS
4812 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4813   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4814 #endif
4815 #ifdef CONFIG_UPROBE_EVENTS
4816   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4817 #endif
4818 	"\t     args: <name>=fetcharg[:type]\n"
4819 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4820 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4821 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4822 #else
4823 	"\t           $stack<index>, $stack, $retval, $comm\n"
4824 #endif
4825 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4826 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4827 	"\t           <type>\\[<array-size>\\]\n"
4828 #ifdef CONFIG_HIST_TRIGGERS
4829 	"\t    field: <stype> <name>;\n"
4830 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4831 	"\t           [unsigned] char/int/long\n"
4832 #endif
4833 #endif
4834 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4835 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4836 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4837 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4838 	"\t\t\t  events\n"
4839 	"      filter\t\t- If set, only events passing filter are traced\n"
4840 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4841 	"\t\t\t  <event>:\n"
4842 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4843 	"      filter\t\t- If set, only events passing filter are traced\n"
4844 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4845 	"\t    Format: <trigger>[:count][if <filter>]\n"
4846 	"\t   trigger: traceon, traceoff\n"
4847 	"\t            enable_event:<system>:<event>\n"
4848 	"\t            disable_event:<system>:<event>\n"
4849 #ifdef CONFIG_HIST_TRIGGERS
4850 	"\t            enable_hist:<system>:<event>\n"
4851 	"\t            disable_hist:<system>:<event>\n"
4852 #endif
4853 #ifdef CONFIG_STACKTRACE
4854 	"\t\t    stacktrace\n"
4855 #endif
4856 #ifdef CONFIG_TRACER_SNAPSHOT
4857 	"\t\t    snapshot\n"
4858 #endif
4859 #ifdef CONFIG_HIST_TRIGGERS
4860 	"\t\t    hist (see below)\n"
4861 #endif
4862 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4863 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4864 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4865 	"\t                  events/block/block_unplug/trigger\n"
4866 	"\t   The first disables tracing every time block_unplug is hit.\n"
4867 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4868 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4869 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4870 	"\t   Like function triggers, the counter is only decremented if it\n"
4871 	"\t    enabled or disabled tracing.\n"
4872 	"\t   To remove a trigger without a count:\n"
4873 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4874 	"\t   To remove a trigger with a count:\n"
4875 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4876 	"\t   Filters can be ignored when removing a trigger.\n"
4877 #ifdef CONFIG_HIST_TRIGGERS
4878 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4879 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4880 	"\t            [:values=<field1[,field2,...]>]\n"
4881 	"\t            [:sort=<field1[,field2,...]>]\n"
4882 	"\t            [:size=#entries]\n"
4883 	"\t            [:pause][:continue][:clear]\n"
4884 	"\t            [:name=histname1]\n"
4885 	"\t            [:<handler>.<action>]\n"
4886 	"\t            [if <filter>]\n\n"
4887 	"\t    When a matching event is hit, an entry is added to a hash\n"
4888 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4889 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4890 	"\t    correspond to fields in the event's format description.  Keys\n"
4891 	"\t    can be any field, or the special string 'stacktrace'.\n"
4892 	"\t    Compound keys consisting of up to two fields can be specified\n"
4893 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4894 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4895 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4896 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4897 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4898 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4899 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4900 	"\t    its histogram data will be shared with other triggers of the\n"
4901 	"\t    same name, and trigger hits will update this common data.\n\n"
4902 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4903 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4904 	"\t    triggers attached to an event, there will be a table for each\n"
4905 	"\t    trigger in the output.  The table displayed for a named\n"
4906 	"\t    trigger will be the same as any other instance having the\n"
4907 	"\t    same name.  The default format used to display a given field\n"
4908 	"\t    can be modified by appending any of the following modifiers\n"
4909 	"\t    to the field name, as applicable:\n\n"
4910 	"\t            .hex        display a number as a hex value\n"
4911 	"\t            .sym        display an address as a symbol\n"
4912 	"\t            .sym-offset display an address as a symbol and offset\n"
4913 	"\t            .execname   display a common_pid as a program name\n"
4914 	"\t            .syscall    display a syscall id as a syscall name\n"
4915 	"\t            .log2       display log2 value rather than raw number\n"
4916 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4917 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4918 	"\t    trigger or to start a hist trigger but not log any events\n"
4919 	"\t    until told to do so.  'continue' can be used to start or\n"
4920 	"\t    restart a paused hist trigger.\n\n"
4921 	"\t    The 'clear' parameter will clear the contents of a running\n"
4922 	"\t    hist trigger and leave its current paused/active state\n"
4923 	"\t    unchanged.\n\n"
4924 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4925 	"\t    have one event conditionally start and stop another event's\n"
4926 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4927 	"\t    the enable_event and disable_event triggers.\n\n"
4928 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4929 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4930 	"\t        <handler>.<action>\n\n"
4931 	"\t    The available handlers are:\n\n"
4932 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4933 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4934 	"\t        onchange(var)            - invoke action if var changes\n\n"
4935 	"\t    The available actions are:\n\n"
4936 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4937 	"\t        save(field,...)                      - save current event fields\n"
4938 #ifdef CONFIG_TRACER_SNAPSHOT
4939 	"\t        snapshot()                           - snapshot the trace buffer\n"
4940 #endif
4941 #endif
4942 ;
4943 
4944 static ssize_t
4945 tracing_readme_read(struct file *filp, char __user *ubuf,
4946 		       size_t cnt, loff_t *ppos)
4947 {
4948 	return simple_read_from_buffer(ubuf, cnt, ppos,
4949 					readme_msg, strlen(readme_msg));
4950 }
4951 
4952 static const struct file_operations tracing_readme_fops = {
4953 	.open		= tracing_open_generic,
4954 	.read		= tracing_readme_read,
4955 	.llseek		= generic_file_llseek,
4956 };
4957 
4958 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4959 {
4960 	int *ptr = v;
4961 
4962 	if (*pos || m->count)
4963 		ptr++;
4964 
4965 	(*pos)++;
4966 
4967 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4968 		if (trace_find_tgid(*ptr))
4969 			return ptr;
4970 	}
4971 
4972 	return NULL;
4973 }
4974 
4975 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4976 {
4977 	void *v;
4978 	loff_t l = 0;
4979 
4980 	if (!tgid_map)
4981 		return NULL;
4982 
4983 	v = &tgid_map[0];
4984 	while (l <= *pos) {
4985 		v = saved_tgids_next(m, v, &l);
4986 		if (!v)
4987 			return NULL;
4988 	}
4989 
4990 	return v;
4991 }
4992 
4993 static void saved_tgids_stop(struct seq_file *m, void *v)
4994 {
4995 }
4996 
4997 static int saved_tgids_show(struct seq_file *m, void *v)
4998 {
4999 	int pid = (int *)v - tgid_map;
5000 
5001 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5002 	return 0;
5003 }
5004 
5005 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5006 	.start		= saved_tgids_start,
5007 	.stop		= saved_tgids_stop,
5008 	.next		= saved_tgids_next,
5009 	.show		= saved_tgids_show,
5010 };
5011 
5012 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5013 {
5014 	if (tracing_disabled)
5015 		return -ENODEV;
5016 
5017 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5018 }
5019 
5020 
5021 static const struct file_operations tracing_saved_tgids_fops = {
5022 	.open		= tracing_saved_tgids_open,
5023 	.read		= seq_read,
5024 	.llseek		= seq_lseek,
5025 	.release	= seq_release,
5026 };
5027 
5028 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5029 {
5030 	unsigned int *ptr = v;
5031 
5032 	if (*pos || m->count)
5033 		ptr++;
5034 
5035 	(*pos)++;
5036 
5037 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5038 	     ptr++) {
5039 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5040 			continue;
5041 
5042 		return ptr;
5043 	}
5044 
5045 	return NULL;
5046 }
5047 
5048 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5049 {
5050 	void *v;
5051 	loff_t l = 0;
5052 
5053 	preempt_disable();
5054 	arch_spin_lock(&trace_cmdline_lock);
5055 
5056 	v = &savedcmd->map_cmdline_to_pid[0];
5057 	while (l <= *pos) {
5058 		v = saved_cmdlines_next(m, v, &l);
5059 		if (!v)
5060 			return NULL;
5061 	}
5062 
5063 	return v;
5064 }
5065 
5066 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5067 {
5068 	arch_spin_unlock(&trace_cmdline_lock);
5069 	preempt_enable();
5070 }
5071 
5072 static int saved_cmdlines_show(struct seq_file *m, void *v)
5073 {
5074 	char buf[TASK_COMM_LEN];
5075 	unsigned int *pid = v;
5076 
5077 	__trace_find_cmdline(*pid, buf);
5078 	seq_printf(m, "%d %s\n", *pid, buf);
5079 	return 0;
5080 }
5081 
5082 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5083 	.start		= saved_cmdlines_start,
5084 	.next		= saved_cmdlines_next,
5085 	.stop		= saved_cmdlines_stop,
5086 	.show		= saved_cmdlines_show,
5087 };
5088 
5089 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5090 {
5091 	if (tracing_disabled)
5092 		return -ENODEV;
5093 
5094 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5095 }
5096 
5097 static const struct file_operations tracing_saved_cmdlines_fops = {
5098 	.open		= tracing_saved_cmdlines_open,
5099 	.read		= seq_read,
5100 	.llseek		= seq_lseek,
5101 	.release	= seq_release,
5102 };
5103 
5104 static ssize_t
5105 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5106 				 size_t cnt, loff_t *ppos)
5107 {
5108 	char buf[64];
5109 	int r;
5110 
5111 	arch_spin_lock(&trace_cmdline_lock);
5112 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5113 	arch_spin_unlock(&trace_cmdline_lock);
5114 
5115 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5116 }
5117 
5118 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5119 {
5120 	kfree(s->saved_cmdlines);
5121 	kfree(s->map_cmdline_to_pid);
5122 	kfree(s);
5123 }
5124 
5125 static int tracing_resize_saved_cmdlines(unsigned int val)
5126 {
5127 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5128 
5129 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5130 	if (!s)
5131 		return -ENOMEM;
5132 
5133 	if (allocate_cmdlines_buffer(val, s) < 0) {
5134 		kfree(s);
5135 		return -ENOMEM;
5136 	}
5137 
5138 	arch_spin_lock(&trace_cmdline_lock);
5139 	savedcmd_temp = savedcmd;
5140 	savedcmd = s;
5141 	arch_spin_unlock(&trace_cmdline_lock);
5142 	free_saved_cmdlines_buffer(savedcmd_temp);
5143 
5144 	return 0;
5145 }
5146 
5147 static ssize_t
5148 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5149 				  size_t cnt, loff_t *ppos)
5150 {
5151 	unsigned long val;
5152 	int ret;
5153 
5154 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5155 	if (ret)
5156 		return ret;
5157 
5158 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5159 	if (!val || val > PID_MAX_DEFAULT)
5160 		return -EINVAL;
5161 
5162 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5163 	if (ret < 0)
5164 		return ret;
5165 
5166 	*ppos += cnt;
5167 
5168 	return cnt;
5169 }
5170 
5171 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5172 	.open		= tracing_open_generic,
5173 	.read		= tracing_saved_cmdlines_size_read,
5174 	.write		= tracing_saved_cmdlines_size_write,
5175 };
5176 
5177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5178 static union trace_eval_map_item *
5179 update_eval_map(union trace_eval_map_item *ptr)
5180 {
5181 	if (!ptr->map.eval_string) {
5182 		if (ptr->tail.next) {
5183 			ptr = ptr->tail.next;
5184 			/* Set ptr to the next real item (skip head) */
5185 			ptr++;
5186 		} else
5187 			return NULL;
5188 	}
5189 	return ptr;
5190 }
5191 
5192 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5193 {
5194 	union trace_eval_map_item *ptr = v;
5195 
5196 	/*
5197 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5198 	 * This really should never happen.
5199 	 */
5200 	ptr = update_eval_map(ptr);
5201 	if (WARN_ON_ONCE(!ptr))
5202 		return NULL;
5203 
5204 	ptr++;
5205 
5206 	(*pos)++;
5207 
5208 	ptr = update_eval_map(ptr);
5209 
5210 	return ptr;
5211 }
5212 
5213 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5214 {
5215 	union trace_eval_map_item *v;
5216 	loff_t l = 0;
5217 
5218 	mutex_lock(&trace_eval_mutex);
5219 
5220 	v = trace_eval_maps;
5221 	if (v)
5222 		v++;
5223 
5224 	while (v && l < *pos) {
5225 		v = eval_map_next(m, v, &l);
5226 	}
5227 
5228 	return v;
5229 }
5230 
5231 static void eval_map_stop(struct seq_file *m, void *v)
5232 {
5233 	mutex_unlock(&trace_eval_mutex);
5234 }
5235 
5236 static int eval_map_show(struct seq_file *m, void *v)
5237 {
5238 	union trace_eval_map_item *ptr = v;
5239 
5240 	seq_printf(m, "%s %ld (%s)\n",
5241 		   ptr->map.eval_string, ptr->map.eval_value,
5242 		   ptr->map.system);
5243 
5244 	return 0;
5245 }
5246 
5247 static const struct seq_operations tracing_eval_map_seq_ops = {
5248 	.start		= eval_map_start,
5249 	.next		= eval_map_next,
5250 	.stop		= eval_map_stop,
5251 	.show		= eval_map_show,
5252 };
5253 
5254 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5255 {
5256 	if (tracing_disabled)
5257 		return -ENODEV;
5258 
5259 	return seq_open(filp, &tracing_eval_map_seq_ops);
5260 }
5261 
5262 static const struct file_operations tracing_eval_map_fops = {
5263 	.open		= tracing_eval_map_open,
5264 	.read		= seq_read,
5265 	.llseek		= seq_lseek,
5266 	.release	= seq_release,
5267 };
5268 
5269 static inline union trace_eval_map_item *
5270 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5271 {
5272 	/* Return tail of array given the head */
5273 	return ptr + ptr->head.length + 1;
5274 }
5275 
5276 static void
5277 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5278 			   int len)
5279 {
5280 	struct trace_eval_map **stop;
5281 	struct trace_eval_map **map;
5282 	union trace_eval_map_item *map_array;
5283 	union trace_eval_map_item *ptr;
5284 
5285 	stop = start + len;
5286 
5287 	/*
5288 	 * The trace_eval_maps contains the map plus a head and tail item,
5289 	 * where the head holds the module and length of array, and the
5290 	 * tail holds a pointer to the next list.
5291 	 */
5292 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5293 	if (!map_array) {
5294 		pr_warn("Unable to allocate trace eval mapping\n");
5295 		return;
5296 	}
5297 
5298 	mutex_lock(&trace_eval_mutex);
5299 
5300 	if (!trace_eval_maps)
5301 		trace_eval_maps = map_array;
5302 	else {
5303 		ptr = trace_eval_maps;
5304 		for (;;) {
5305 			ptr = trace_eval_jmp_to_tail(ptr);
5306 			if (!ptr->tail.next)
5307 				break;
5308 			ptr = ptr->tail.next;
5309 
5310 		}
5311 		ptr->tail.next = map_array;
5312 	}
5313 	map_array->head.mod = mod;
5314 	map_array->head.length = len;
5315 	map_array++;
5316 
5317 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5318 		map_array->map = **map;
5319 		map_array++;
5320 	}
5321 	memset(map_array, 0, sizeof(*map_array));
5322 
5323 	mutex_unlock(&trace_eval_mutex);
5324 }
5325 
5326 static void trace_create_eval_file(struct dentry *d_tracer)
5327 {
5328 	trace_create_file("eval_map", 0444, d_tracer,
5329 			  NULL, &tracing_eval_map_fops);
5330 }
5331 
5332 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5333 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5334 static inline void trace_insert_eval_map_file(struct module *mod,
5335 			      struct trace_eval_map **start, int len) { }
5336 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5337 
5338 static void trace_insert_eval_map(struct module *mod,
5339 				  struct trace_eval_map **start, int len)
5340 {
5341 	struct trace_eval_map **map;
5342 
5343 	if (len <= 0)
5344 		return;
5345 
5346 	map = start;
5347 
5348 	trace_event_eval_update(map, len);
5349 
5350 	trace_insert_eval_map_file(mod, start, len);
5351 }
5352 
5353 static ssize_t
5354 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5355 		       size_t cnt, loff_t *ppos)
5356 {
5357 	struct trace_array *tr = filp->private_data;
5358 	char buf[MAX_TRACER_SIZE+2];
5359 	int r;
5360 
5361 	mutex_lock(&trace_types_lock);
5362 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5363 	mutex_unlock(&trace_types_lock);
5364 
5365 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5366 }
5367 
5368 int tracer_init(struct tracer *t, struct trace_array *tr)
5369 {
5370 	tracing_reset_online_cpus(&tr->trace_buffer);
5371 	return t->init(tr);
5372 }
5373 
5374 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5375 {
5376 	int cpu;
5377 
5378 	for_each_tracing_cpu(cpu)
5379 		per_cpu_ptr(buf->data, cpu)->entries = val;
5380 }
5381 
5382 #ifdef CONFIG_TRACER_MAX_TRACE
5383 /* resize @tr's buffer to the size of @size_tr's entries */
5384 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5385 					struct trace_buffer *size_buf, int cpu_id)
5386 {
5387 	int cpu, ret = 0;
5388 
5389 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5390 		for_each_tracing_cpu(cpu) {
5391 			ret = ring_buffer_resize(trace_buf->buffer,
5392 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5393 			if (ret < 0)
5394 				break;
5395 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5396 				per_cpu_ptr(size_buf->data, cpu)->entries;
5397 		}
5398 	} else {
5399 		ret = ring_buffer_resize(trace_buf->buffer,
5400 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5401 		if (ret == 0)
5402 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5403 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5404 	}
5405 
5406 	return ret;
5407 }
5408 #endif /* CONFIG_TRACER_MAX_TRACE */
5409 
5410 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5411 					unsigned long size, int cpu)
5412 {
5413 	int ret;
5414 
5415 	/*
5416 	 * If kernel or user changes the size of the ring buffer
5417 	 * we use the size that was given, and we can forget about
5418 	 * expanding it later.
5419 	 */
5420 	ring_buffer_expanded = true;
5421 
5422 	/* May be called before buffers are initialized */
5423 	if (!tr->trace_buffer.buffer)
5424 		return 0;
5425 
5426 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5427 	if (ret < 0)
5428 		return ret;
5429 
5430 #ifdef CONFIG_TRACER_MAX_TRACE
5431 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5432 	    !tr->current_trace->use_max_tr)
5433 		goto out;
5434 
5435 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5436 	if (ret < 0) {
5437 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5438 						     &tr->trace_buffer, cpu);
5439 		if (r < 0) {
5440 			/*
5441 			 * AARGH! We are left with different
5442 			 * size max buffer!!!!
5443 			 * The max buffer is our "snapshot" buffer.
5444 			 * When a tracer needs a snapshot (one of the
5445 			 * latency tracers), it swaps the max buffer
5446 			 * with the saved snap shot. We succeeded to
5447 			 * update the size of the main buffer, but failed to
5448 			 * update the size of the max buffer. But when we tried
5449 			 * to reset the main buffer to the original size, we
5450 			 * failed there too. This is very unlikely to
5451 			 * happen, but if it does, warn and kill all
5452 			 * tracing.
5453 			 */
5454 			WARN_ON(1);
5455 			tracing_disabled = 1;
5456 		}
5457 		return ret;
5458 	}
5459 
5460 	if (cpu == RING_BUFFER_ALL_CPUS)
5461 		set_buffer_entries(&tr->max_buffer, size);
5462 	else
5463 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5464 
5465  out:
5466 #endif /* CONFIG_TRACER_MAX_TRACE */
5467 
5468 	if (cpu == RING_BUFFER_ALL_CPUS)
5469 		set_buffer_entries(&tr->trace_buffer, size);
5470 	else
5471 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5472 
5473 	return ret;
5474 }
5475 
5476 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5477 					  unsigned long size, int cpu_id)
5478 {
5479 	int ret = size;
5480 
5481 	mutex_lock(&trace_types_lock);
5482 
5483 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5484 		/* make sure, this cpu is enabled in the mask */
5485 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5486 			ret = -EINVAL;
5487 			goto out;
5488 		}
5489 	}
5490 
5491 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5492 	if (ret < 0)
5493 		ret = -ENOMEM;
5494 
5495 out:
5496 	mutex_unlock(&trace_types_lock);
5497 
5498 	return ret;
5499 }
5500 
5501 
5502 /**
5503  * tracing_update_buffers - used by tracing facility to expand ring buffers
5504  *
5505  * To save on memory when the tracing is never used on a system with it
5506  * configured in. The ring buffers are set to a minimum size. But once
5507  * a user starts to use the tracing facility, then they need to grow
5508  * to their default size.
5509  *
5510  * This function is to be called when a tracer is about to be used.
5511  */
5512 int tracing_update_buffers(void)
5513 {
5514 	int ret = 0;
5515 
5516 	mutex_lock(&trace_types_lock);
5517 	if (!ring_buffer_expanded)
5518 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5519 						RING_BUFFER_ALL_CPUS);
5520 	mutex_unlock(&trace_types_lock);
5521 
5522 	return ret;
5523 }
5524 
5525 struct trace_option_dentry;
5526 
5527 static void
5528 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5529 
5530 /*
5531  * Used to clear out the tracer before deletion of an instance.
5532  * Must have trace_types_lock held.
5533  */
5534 static void tracing_set_nop(struct trace_array *tr)
5535 {
5536 	if (tr->current_trace == &nop_trace)
5537 		return;
5538 
5539 	tr->current_trace->enabled--;
5540 
5541 	if (tr->current_trace->reset)
5542 		tr->current_trace->reset(tr);
5543 
5544 	tr->current_trace = &nop_trace;
5545 }
5546 
5547 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5548 {
5549 	/* Only enable if the directory has been created already. */
5550 	if (!tr->dir)
5551 		return;
5552 
5553 	create_trace_option_files(tr, t);
5554 }
5555 
5556 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5557 {
5558 	struct tracer *t;
5559 #ifdef CONFIG_TRACER_MAX_TRACE
5560 	bool had_max_tr;
5561 #endif
5562 	int ret = 0;
5563 
5564 	mutex_lock(&trace_types_lock);
5565 
5566 	if (!ring_buffer_expanded) {
5567 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5568 						RING_BUFFER_ALL_CPUS);
5569 		if (ret < 0)
5570 			goto out;
5571 		ret = 0;
5572 	}
5573 
5574 	for (t = trace_types; t; t = t->next) {
5575 		if (strcmp(t->name, buf) == 0)
5576 			break;
5577 	}
5578 	if (!t) {
5579 		ret = -EINVAL;
5580 		goto out;
5581 	}
5582 	if (t == tr->current_trace)
5583 		goto out;
5584 
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586 	if (t->use_max_tr) {
5587 		arch_spin_lock(&tr->max_lock);
5588 		if (tr->cond_snapshot)
5589 			ret = -EBUSY;
5590 		arch_spin_unlock(&tr->max_lock);
5591 		if (ret)
5592 			goto out;
5593 	}
5594 #endif
5595 	/* Some tracers won't work on kernel command line */
5596 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5597 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5598 			t->name);
5599 		goto out;
5600 	}
5601 
5602 	/* Some tracers are only allowed for the top level buffer */
5603 	if (!trace_ok_for_array(t, tr)) {
5604 		ret = -EINVAL;
5605 		goto out;
5606 	}
5607 
5608 	/* If trace pipe files are being read, we can't change the tracer */
5609 	if (tr->current_trace->ref) {
5610 		ret = -EBUSY;
5611 		goto out;
5612 	}
5613 
5614 	trace_branch_disable();
5615 
5616 	tr->current_trace->enabled--;
5617 
5618 	if (tr->current_trace->reset)
5619 		tr->current_trace->reset(tr);
5620 
5621 	/* Current trace needs to be nop_trace before synchronize_rcu */
5622 	tr->current_trace = &nop_trace;
5623 
5624 #ifdef CONFIG_TRACER_MAX_TRACE
5625 	had_max_tr = tr->allocated_snapshot;
5626 
5627 	if (had_max_tr && !t->use_max_tr) {
5628 		/*
5629 		 * We need to make sure that the update_max_tr sees that
5630 		 * current_trace changed to nop_trace to keep it from
5631 		 * swapping the buffers after we resize it.
5632 		 * The update_max_tr is called from interrupts disabled
5633 		 * so a synchronized_sched() is sufficient.
5634 		 */
5635 		synchronize_rcu();
5636 		free_snapshot(tr);
5637 	}
5638 #endif
5639 
5640 #ifdef CONFIG_TRACER_MAX_TRACE
5641 	if (t->use_max_tr && !had_max_tr) {
5642 		ret = tracing_alloc_snapshot_instance(tr);
5643 		if (ret < 0)
5644 			goto out;
5645 	}
5646 #endif
5647 
5648 	if (t->init) {
5649 		ret = tracer_init(t, tr);
5650 		if (ret)
5651 			goto out;
5652 	}
5653 
5654 	tr->current_trace = t;
5655 	tr->current_trace->enabled++;
5656 	trace_branch_enable(tr);
5657  out:
5658 	mutex_unlock(&trace_types_lock);
5659 
5660 	return ret;
5661 }
5662 
5663 static ssize_t
5664 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5665 			size_t cnt, loff_t *ppos)
5666 {
5667 	struct trace_array *tr = filp->private_data;
5668 	char buf[MAX_TRACER_SIZE+1];
5669 	int i;
5670 	size_t ret;
5671 	int err;
5672 
5673 	ret = cnt;
5674 
5675 	if (cnt > MAX_TRACER_SIZE)
5676 		cnt = MAX_TRACER_SIZE;
5677 
5678 	if (copy_from_user(buf, ubuf, cnt))
5679 		return -EFAULT;
5680 
5681 	buf[cnt] = 0;
5682 
5683 	/* strip ending whitespace. */
5684 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5685 		buf[i] = 0;
5686 
5687 	err = tracing_set_tracer(tr, buf);
5688 	if (err)
5689 		return err;
5690 
5691 	*ppos += ret;
5692 
5693 	return ret;
5694 }
5695 
5696 static ssize_t
5697 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5698 		   size_t cnt, loff_t *ppos)
5699 {
5700 	char buf[64];
5701 	int r;
5702 
5703 	r = snprintf(buf, sizeof(buf), "%ld\n",
5704 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5705 	if (r > sizeof(buf))
5706 		r = sizeof(buf);
5707 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5708 }
5709 
5710 static ssize_t
5711 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5712 		    size_t cnt, loff_t *ppos)
5713 {
5714 	unsigned long val;
5715 	int ret;
5716 
5717 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5718 	if (ret)
5719 		return ret;
5720 
5721 	*ptr = val * 1000;
5722 
5723 	return cnt;
5724 }
5725 
5726 static ssize_t
5727 tracing_thresh_read(struct file *filp, char __user *ubuf,
5728 		    size_t cnt, loff_t *ppos)
5729 {
5730 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5731 }
5732 
5733 static ssize_t
5734 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5735 		     size_t cnt, loff_t *ppos)
5736 {
5737 	struct trace_array *tr = filp->private_data;
5738 	int ret;
5739 
5740 	mutex_lock(&trace_types_lock);
5741 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5742 	if (ret < 0)
5743 		goto out;
5744 
5745 	if (tr->current_trace->update_thresh) {
5746 		ret = tr->current_trace->update_thresh(tr);
5747 		if (ret < 0)
5748 			goto out;
5749 	}
5750 
5751 	ret = cnt;
5752 out:
5753 	mutex_unlock(&trace_types_lock);
5754 
5755 	return ret;
5756 }
5757 
5758 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5759 
5760 static ssize_t
5761 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5762 		     size_t cnt, loff_t *ppos)
5763 {
5764 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5765 }
5766 
5767 static ssize_t
5768 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5769 		      size_t cnt, loff_t *ppos)
5770 {
5771 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5772 }
5773 
5774 #endif
5775 
5776 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5777 {
5778 	struct trace_array *tr = inode->i_private;
5779 	struct trace_iterator *iter;
5780 	int ret = 0;
5781 
5782 	if (tracing_disabled)
5783 		return -ENODEV;
5784 
5785 	if (trace_array_get(tr) < 0)
5786 		return -ENODEV;
5787 
5788 	mutex_lock(&trace_types_lock);
5789 
5790 	/* create a buffer to store the information to pass to userspace */
5791 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5792 	if (!iter) {
5793 		ret = -ENOMEM;
5794 		__trace_array_put(tr);
5795 		goto out;
5796 	}
5797 
5798 	trace_seq_init(&iter->seq);
5799 	iter->trace = tr->current_trace;
5800 
5801 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5802 		ret = -ENOMEM;
5803 		goto fail;
5804 	}
5805 
5806 	/* trace pipe does not show start of buffer */
5807 	cpumask_setall(iter->started);
5808 
5809 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5810 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5811 
5812 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5813 	if (trace_clocks[tr->clock_id].in_ns)
5814 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5815 
5816 	iter->tr = tr;
5817 	iter->trace_buffer = &tr->trace_buffer;
5818 	iter->cpu_file = tracing_get_cpu(inode);
5819 	mutex_init(&iter->mutex);
5820 	filp->private_data = iter;
5821 
5822 	if (iter->trace->pipe_open)
5823 		iter->trace->pipe_open(iter);
5824 
5825 	nonseekable_open(inode, filp);
5826 
5827 	tr->current_trace->ref++;
5828 out:
5829 	mutex_unlock(&trace_types_lock);
5830 	return ret;
5831 
5832 fail:
5833 	kfree(iter);
5834 	__trace_array_put(tr);
5835 	mutex_unlock(&trace_types_lock);
5836 	return ret;
5837 }
5838 
5839 static int tracing_release_pipe(struct inode *inode, struct file *file)
5840 {
5841 	struct trace_iterator *iter = file->private_data;
5842 	struct trace_array *tr = inode->i_private;
5843 
5844 	mutex_lock(&trace_types_lock);
5845 
5846 	tr->current_trace->ref--;
5847 
5848 	if (iter->trace->pipe_close)
5849 		iter->trace->pipe_close(iter);
5850 
5851 	mutex_unlock(&trace_types_lock);
5852 
5853 	free_cpumask_var(iter->started);
5854 	mutex_destroy(&iter->mutex);
5855 	kfree(iter);
5856 
5857 	trace_array_put(tr);
5858 
5859 	return 0;
5860 }
5861 
5862 static __poll_t
5863 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5864 {
5865 	struct trace_array *tr = iter->tr;
5866 
5867 	/* Iterators are static, they should be filled or empty */
5868 	if (trace_buffer_iter(iter, iter->cpu_file))
5869 		return EPOLLIN | EPOLLRDNORM;
5870 
5871 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5872 		/*
5873 		 * Always select as readable when in blocking mode
5874 		 */
5875 		return EPOLLIN | EPOLLRDNORM;
5876 	else
5877 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5878 					     filp, poll_table);
5879 }
5880 
5881 static __poll_t
5882 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5883 {
5884 	struct trace_iterator *iter = filp->private_data;
5885 
5886 	return trace_poll(iter, filp, poll_table);
5887 }
5888 
5889 /* Must be called with iter->mutex held. */
5890 static int tracing_wait_pipe(struct file *filp)
5891 {
5892 	struct trace_iterator *iter = filp->private_data;
5893 	int ret;
5894 
5895 	while (trace_empty(iter)) {
5896 
5897 		if ((filp->f_flags & O_NONBLOCK)) {
5898 			return -EAGAIN;
5899 		}
5900 
5901 		/*
5902 		 * We block until we read something and tracing is disabled.
5903 		 * We still block if tracing is disabled, but we have never
5904 		 * read anything. This allows a user to cat this file, and
5905 		 * then enable tracing. But after we have read something,
5906 		 * we give an EOF when tracing is again disabled.
5907 		 *
5908 		 * iter->pos will be 0 if we haven't read anything.
5909 		 */
5910 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5911 			break;
5912 
5913 		mutex_unlock(&iter->mutex);
5914 
5915 		ret = wait_on_pipe(iter, 0);
5916 
5917 		mutex_lock(&iter->mutex);
5918 
5919 		if (ret)
5920 			return ret;
5921 	}
5922 
5923 	return 1;
5924 }
5925 
5926 /*
5927  * Consumer reader.
5928  */
5929 static ssize_t
5930 tracing_read_pipe(struct file *filp, char __user *ubuf,
5931 		  size_t cnt, loff_t *ppos)
5932 {
5933 	struct trace_iterator *iter = filp->private_data;
5934 	ssize_t sret;
5935 
5936 	/*
5937 	 * Avoid more than one consumer on a single file descriptor
5938 	 * This is just a matter of traces coherency, the ring buffer itself
5939 	 * is protected.
5940 	 */
5941 	mutex_lock(&iter->mutex);
5942 
5943 	/* return any leftover data */
5944 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5945 	if (sret != -EBUSY)
5946 		goto out;
5947 
5948 	trace_seq_init(&iter->seq);
5949 
5950 	if (iter->trace->read) {
5951 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5952 		if (sret)
5953 			goto out;
5954 	}
5955 
5956 waitagain:
5957 	sret = tracing_wait_pipe(filp);
5958 	if (sret <= 0)
5959 		goto out;
5960 
5961 	/* stop when tracing is finished */
5962 	if (trace_empty(iter)) {
5963 		sret = 0;
5964 		goto out;
5965 	}
5966 
5967 	if (cnt >= PAGE_SIZE)
5968 		cnt = PAGE_SIZE - 1;
5969 
5970 	/* reset all but tr, trace, and overruns */
5971 	memset(&iter->seq, 0,
5972 	       sizeof(struct trace_iterator) -
5973 	       offsetof(struct trace_iterator, seq));
5974 	cpumask_clear(iter->started);
5975 	iter->pos = -1;
5976 
5977 	trace_event_read_lock();
5978 	trace_access_lock(iter->cpu_file);
5979 	while (trace_find_next_entry_inc(iter) != NULL) {
5980 		enum print_line_t ret;
5981 		int save_len = iter->seq.seq.len;
5982 
5983 		ret = print_trace_line(iter);
5984 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5985 			/* don't print partial lines */
5986 			iter->seq.seq.len = save_len;
5987 			break;
5988 		}
5989 		if (ret != TRACE_TYPE_NO_CONSUME)
5990 			trace_consume(iter);
5991 
5992 		if (trace_seq_used(&iter->seq) >= cnt)
5993 			break;
5994 
5995 		/*
5996 		 * Setting the full flag means we reached the trace_seq buffer
5997 		 * size and we should leave by partial output condition above.
5998 		 * One of the trace_seq_* functions is not used properly.
5999 		 */
6000 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6001 			  iter->ent->type);
6002 	}
6003 	trace_access_unlock(iter->cpu_file);
6004 	trace_event_read_unlock();
6005 
6006 	/* Now copy what we have to the user */
6007 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6008 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6009 		trace_seq_init(&iter->seq);
6010 
6011 	/*
6012 	 * If there was nothing to send to user, in spite of consuming trace
6013 	 * entries, go back to wait for more entries.
6014 	 */
6015 	if (sret == -EBUSY)
6016 		goto waitagain;
6017 
6018 out:
6019 	mutex_unlock(&iter->mutex);
6020 
6021 	return sret;
6022 }
6023 
6024 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6025 				     unsigned int idx)
6026 {
6027 	__free_page(spd->pages[idx]);
6028 }
6029 
6030 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6031 	.confirm		= generic_pipe_buf_confirm,
6032 	.release		= generic_pipe_buf_release,
6033 	.steal			= generic_pipe_buf_steal,
6034 	.get			= generic_pipe_buf_get,
6035 };
6036 
6037 static size_t
6038 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6039 {
6040 	size_t count;
6041 	int save_len;
6042 	int ret;
6043 
6044 	/* Seq buffer is page-sized, exactly what we need. */
6045 	for (;;) {
6046 		save_len = iter->seq.seq.len;
6047 		ret = print_trace_line(iter);
6048 
6049 		if (trace_seq_has_overflowed(&iter->seq)) {
6050 			iter->seq.seq.len = save_len;
6051 			break;
6052 		}
6053 
6054 		/*
6055 		 * This should not be hit, because it should only
6056 		 * be set if the iter->seq overflowed. But check it
6057 		 * anyway to be safe.
6058 		 */
6059 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6060 			iter->seq.seq.len = save_len;
6061 			break;
6062 		}
6063 
6064 		count = trace_seq_used(&iter->seq) - save_len;
6065 		if (rem < count) {
6066 			rem = 0;
6067 			iter->seq.seq.len = save_len;
6068 			break;
6069 		}
6070 
6071 		if (ret != TRACE_TYPE_NO_CONSUME)
6072 			trace_consume(iter);
6073 		rem -= count;
6074 		if (!trace_find_next_entry_inc(iter))	{
6075 			rem = 0;
6076 			iter->ent = NULL;
6077 			break;
6078 		}
6079 	}
6080 
6081 	return rem;
6082 }
6083 
6084 static ssize_t tracing_splice_read_pipe(struct file *filp,
6085 					loff_t *ppos,
6086 					struct pipe_inode_info *pipe,
6087 					size_t len,
6088 					unsigned int flags)
6089 {
6090 	struct page *pages_def[PIPE_DEF_BUFFERS];
6091 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6092 	struct trace_iterator *iter = filp->private_data;
6093 	struct splice_pipe_desc spd = {
6094 		.pages		= pages_def,
6095 		.partial	= partial_def,
6096 		.nr_pages	= 0, /* This gets updated below. */
6097 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6098 		.ops		= &tracing_pipe_buf_ops,
6099 		.spd_release	= tracing_spd_release_pipe,
6100 	};
6101 	ssize_t ret;
6102 	size_t rem;
6103 	unsigned int i;
6104 
6105 	if (splice_grow_spd(pipe, &spd))
6106 		return -ENOMEM;
6107 
6108 	mutex_lock(&iter->mutex);
6109 
6110 	if (iter->trace->splice_read) {
6111 		ret = iter->trace->splice_read(iter, filp,
6112 					       ppos, pipe, len, flags);
6113 		if (ret)
6114 			goto out_err;
6115 	}
6116 
6117 	ret = tracing_wait_pipe(filp);
6118 	if (ret <= 0)
6119 		goto out_err;
6120 
6121 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6122 		ret = -EFAULT;
6123 		goto out_err;
6124 	}
6125 
6126 	trace_event_read_lock();
6127 	trace_access_lock(iter->cpu_file);
6128 
6129 	/* Fill as many pages as possible. */
6130 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6131 		spd.pages[i] = alloc_page(GFP_KERNEL);
6132 		if (!spd.pages[i])
6133 			break;
6134 
6135 		rem = tracing_fill_pipe_page(rem, iter);
6136 
6137 		/* Copy the data into the page, so we can start over. */
6138 		ret = trace_seq_to_buffer(&iter->seq,
6139 					  page_address(spd.pages[i]),
6140 					  trace_seq_used(&iter->seq));
6141 		if (ret < 0) {
6142 			__free_page(spd.pages[i]);
6143 			break;
6144 		}
6145 		spd.partial[i].offset = 0;
6146 		spd.partial[i].len = trace_seq_used(&iter->seq);
6147 
6148 		trace_seq_init(&iter->seq);
6149 	}
6150 
6151 	trace_access_unlock(iter->cpu_file);
6152 	trace_event_read_unlock();
6153 	mutex_unlock(&iter->mutex);
6154 
6155 	spd.nr_pages = i;
6156 
6157 	if (i)
6158 		ret = splice_to_pipe(pipe, &spd);
6159 	else
6160 		ret = 0;
6161 out:
6162 	splice_shrink_spd(&spd);
6163 	return ret;
6164 
6165 out_err:
6166 	mutex_unlock(&iter->mutex);
6167 	goto out;
6168 }
6169 
6170 static ssize_t
6171 tracing_entries_read(struct file *filp, char __user *ubuf,
6172 		     size_t cnt, loff_t *ppos)
6173 {
6174 	struct inode *inode = file_inode(filp);
6175 	struct trace_array *tr = inode->i_private;
6176 	int cpu = tracing_get_cpu(inode);
6177 	char buf[64];
6178 	int r = 0;
6179 	ssize_t ret;
6180 
6181 	mutex_lock(&trace_types_lock);
6182 
6183 	if (cpu == RING_BUFFER_ALL_CPUS) {
6184 		int cpu, buf_size_same;
6185 		unsigned long size;
6186 
6187 		size = 0;
6188 		buf_size_same = 1;
6189 		/* check if all cpu sizes are same */
6190 		for_each_tracing_cpu(cpu) {
6191 			/* fill in the size from first enabled cpu */
6192 			if (size == 0)
6193 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6194 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6195 				buf_size_same = 0;
6196 				break;
6197 			}
6198 		}
6199 
6200 		if (buf_size_same) {
6201 			if (!ring_buffer_expanded)
6202 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6203 					    size >> 10,
6204 					    trace_buf_size >> 10);
6205 			else
6206 				r = sprintf(buf, "%lu\n", size >> 10);
6207 		} else
6208 			r = sprintf(buf, "X\n");
6209 	} else
6210 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6211 
6212 	mutex_unlock(&trace_types_lock);
6213 
6214 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6215 	return ret;
6216 }
6217 
6218 static ssize_t
6219 tracing_entries_write(struct file *filp, const char __user *ubuf,
6220 		      size_t cnt, loff_t *ppos)
6221 {
6222 	struct inode *inode = file_inode(filp);
6223 	struct trace_array *tr = inode->i_private;
6224 	unsigned long val;
6225 	int ret;
6226 
6227 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6228 	if (ret)
6229 		return ret;
6230 
6231 	/* must have at least 1 entry */
6232 	if (!val)
6233 		return -EINVAL;
6234 
6235 	/* value is in KB */
6236 	val <<= 10;
6237 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6238 	if (ret < 0)
6239 		return ret;
6240 
6241 	*ppos += cnt;
6242 
6243 	return cnt;
6244 }
6245 
6246 static ssize_t
6247 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6248 				size_t cnt, loff_t *ppos)
6249 {
6250 	struct trace_array *tr = filp->private_data;
6251 	char buf[64];
6252 	int r, cpu;
6253 	unsigned long size = 0, expanded_size = 0;
6254 
6255 	mutex_lock(&trace_types_lock);
6256 	for_each_tracing_cpu(cpu) {
6257 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6258 		if (!ring_buffer_expanded)
6259 			expanded_size += trace_buf_size >> 10;
6260 	}
6261 	if (ring_buffer_expanded)
6262 		r = sprintf(buf, "%lu\n", size);
6263 	else
6264 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6265 	mutex_unlock(&trace_types_lock);
6266 
6267 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6268 }
6269 
6270 static ssize_t
6271 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6272 			  size_t cnt, loff_t *ppos)
6273 {
6274 	/*
6275 	 * There is no need to read what the user has written, this function
6276 	 * is just to make sure that there is no error when "echo" is used
6277 	 */
6278 
6279 	*ppos += cnt;
6280 
6281 	return cnt;
6282 }
6283 
6284 static int
6285 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6286 {
6287 	struct trace_array *tr = inode->i_private;
6288 
6289 	/* disable tracing ? */
6290 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6291 		tracer_tracing_off(tr);
6292 	/* resize the ring buffer to 0 */
6293 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6294 
6295 	trace_array_put(tr);
6296 
6297 	return 0;
6298 }
6299 
6300 static ssize_t
6301 tracing_mark_write(struct file *filp, const char __user *ubuf,
6302 					size_t cnt, loff_t *fpos)
6303 {
6304 	struct trace_array *tr = filp->private_data;
6305 	struct ring_buffer_event *event;
6306 	enum event_trigger_type tt = ETT_NONE;
6307 	struct ring_buffer *buffer;
6308 	struct print_entry *entry;
6309 	unsigned long irq_flags;
6310 	const char faulted[] = "<faulted>";
6311 	ssize_t written;
6312 	int size;
6313 	int len;
6314 
6315 /* Used in tracing_mark_raw_write() as well */
6316 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6317 
6318 	if (tracing_disabled)
6319 		return -EINVAL;
6320 
6321 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6322 		return -EINVAL;
6323 
6324 	if (cnt > TRACE_BUF_SIZE)
6325 		cnt = TRACE_BUF_SIZE;
6326 
6327 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6328 
6329 	local_save_flags(irq_flags);
6330 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6331 
6332 	/* If less than "<faulted>", then make sure we can still add that */
6333 	if (cnt < FAULTED_SIZE)
6334 		size += FAULTED_SIZE - cnt;
6335 
6336 	buffer = tr->trace_buffer.buffer;
6337 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6338 					    irq_flags, preempt_count());
6339 	if (unlikely(!event))
6340 		/* Ring buffer disabled, return as if not open for write */
6341 		return -EBADF;
6342 
6343 	entry = ring_buffer_event_data(event);
6344 	entry->ip = _THIS_IP_;
6345 
6346 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6347 	if (len) {
6348 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6349 		cnt = FAULTED_SIZE;
6350 		written = -EFAULT;
6351 	} else
6352 		written = cnt;
6353 	len = cnt;
6354 
6355 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6356 		/* do not add \n before testing triggers, but add \0 */
6357 		entry->buf[cnt] = '\0';
6358 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6359 	}
6360 
6361 	if (entry->buf[cnt - 1] != '\n') {
6362 		entry->buf[cnt] = '\n';
6363 		entry->buf[cnt + 1] = '\0';
6364 	} else
6365 		entry->buf[cnt] = '\0';
6366 
6367 	__buffer_unlock_commit(buffer, event);
6368 
6369 	if (tt)
6370 		event_triggers_post_call(tr->trace_marker_file, tt);
6371 
6372 	if (written > 0)
6373 		*fpos += written;
6374 
6375 	return written;
6376 }
6377 
6378 /* Limit it for now to 3K (including tag) */
6379 #define RAW_DATA_MAX_SIZE (1024*3)
6380 
6381 static ssize_t
6382 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6383 					size_t cnt, loff_t *fpos)
6384 {
6385 	struct trace_array *tr = filp->private_data;
6386 	struct ring_buffer_event *event;
6387 	struct ring_buffer *buffer;
6388 	struct raw_data_entry *entry;
6389 	const char faulted[] = "<faulted>";
6390 	unsigned long irq_flags;
6391 	ssize_t written;
6392 	int size;
6393 	int len;
6394 
6395 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6396 
6397 	if (tracing_disabled)
6398 		return -EINVAL;
6399 
6400 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6401 		return -EINVAL;
6402 
6403 	/* The marker must at least have a tag id */
6404 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6405 		return -EINVAL;
6406 
6407 	if (cnt > TRACE_BUF_SIZE)
6408 		cnt = TRACE_BUF_SIZE;
6409 
6410 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6411 
6412 	local_save_flags(irq_flags);
6413 	size = sizeof(*entry) + cnt;
6414 	if (cnt < FAULT_SIZE_ID)
6415 		size += FAULT_SIZE_ID - cnt;
6416 
6417 	buffer = tr->trace_buffer.buffer;
6418 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6419 					    irq_flags, preempt_count());
6420 	if (!event)
6421 		/* Ring buffer disabled, return as if not open for write */
6422 		return -EBADF;
6423 
6424 	entry = ring_buffer_event_data(event);
6425 
6426 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6427 	if (len) {
6428 		entry->id = -1;
6429 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6430 		written = -EFAULT;
6431 	} else
6432 		written = cnt;
6433 
6434 	__buffer_unlock_commit(buffer, event);
6435 
6436 	if (written > 0)
6437 		*fpos += written;
6438 
6439 	return written;
6440 }
6441 
6442 static int tracing_clock_show(struct seq_file *m, void *v)
6443 {
6444 	struct trace_array *tr = m->private;
6445 	int i;
6446 
6447 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6448 		seq_printf(m,
6449 			"%s%s%s%s", i ? " " : "",
6450 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6451 			i == tr->clock_id ? "]" : "");
6452 	seq_putc(m, '\n');
6453 
6454 	return 0;
6455 }
6456 
6457 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6458 {
6459 	int i;
6460 
6461 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6462 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6463 			break;
6464 	}
6465 	if (i == ARRAY_SIZE(trace_clocks))
6466 		return -EINVAL;
6467 
6468 	mutex_lock(&trace_types_lock);
6469 
6470 	tr->clock_id = i;
6471 
6472 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6473 
6474 	/*
6475 	 * New clock may not be consistent with the previous clock.
6476 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6477 	 */
6478 	tracing_reset_online_cpus(&tr->trace_buffer);
6479 
6480 #ifdef CONFIG_TRACER_MAX_TRACE
6481 	if (tr->max_buffer.buffer)
6482 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6483 	tracing_reset_online_cpus(&tr->max_buffer);
6484 #endif
6485 
6486 	mutex_unlock(&trace_types_lock);
6487 
6488 	return 0;
6489 }
6490 
6491 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6492 				   size_t cnt, loff_t *fpos)
6493 {
6494 	struct seq_file *m = filp->private_data;
6495 	struct trace_array *tr = m->private;
6496 	char buf[64];
6497 	const char *clockstr;
6498 	int ret;
6499 
6500 	if (cnt >= sizeof(buf))
6501 		return -EINVAL;
6502 
6503 	if (copy_from_user(buf, ubuf, cnt))
6504 		return -EFAULT;
6505 
6506 	buf[cnt] = 0;
6507 
6508 	clockstr = strstrip(buf);
6509 
6510 	ret = tracing_set_clock(tr, clockstr);
6511 	if (ret)
6512 		return ret;
6513 
6514 	*fpos += cnt;
6515 
6516 	return cnt;
6517 }
6518 
6519 static int tracing_clock_open(struct inode *inode, struct file *file)
6520 {
6521 	struct trace_array *tr = inode->i_private;
6522 	int ret;
6523 
6524 	if (tracing_disabled)
6525 		return -ENODEV;
6526 
6527 	if (trace_array_get(tr))
6528 		return -ENODEV;
6529 
6530 	ret = single_open(file, tracing_clock_show, inode->i_private);
6531 	if (ret < 0)
6532 		trace_array_put(tr);
6533 
6534 	return ret;
6535 }
6536 
6537 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6538 {
6539 	struct trace_array *tr = m->private;
6540 
6541 	mutex_lock(&trace_types_lock);
6542 
6543 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6544 		seq_puts(m, "delta [absolute]\n");
6545 	else
6546 		seq_puts(m, "[delta] absolute\n");
6547 
6548 	mutex_unlock(&trace_types_lock);
6549 
6550 	return 0;
6551 }
6552 
6553 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6554 {
6555 	struct trace_array *tr = inode->i_private;
6556 	int ret;
6557 
6558 	if (tracing_disabled)
6559 		return -ENODEV;
6560 
6561 	if (trace_array_get(tr))
6562 		return -ENODEV;
6563 
6564 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6565 	if (ret < 0)
6566 		trace_array_put(tr);
6567 
6568 	return ret;
6569 }
6570 
6571 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6572 {
6573 	int ret = 0;
6574 
6575 	mutex_lock(&trace_types_lock);
6576 
6577 	if (abs && tr->time_stamp_abs_ref++)
6578 		goto out;
6579 
6580 	if (!abs) {
6581 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6582 			ret = -EINVAL;
6583 			goto out;
6584 		}
6585 
6586 		if (--tr->time_stamp_abs_ref)
6587 			goto out;
6588 	}
6589 
6590 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6591 
6592 #ifdef CONFIG_TRACER_MAX_TRACE
6593 	if (tr->max_buffer.buffer)
6594 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6595 #endif
6596  out:
6597 	mutex_unlock(&trace_types_lock);
6598 
6599 	return ret;
6600 }
6601 
6602 struct ftrace_buffer_info {
6603 	struct trace_iterator	iter;
6604 	void			*spare;
6605 	unsigned int		spare_cpu;
6606 	unsigned int		read;
6607 };
6608 
6609 #ifdef CONFIG_TRACER_SNAPSHOT
6610 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6611 {
6612 	struct trace_array *tr = inode->i_private;
6613 	struct trace_iterator *iter;
6614 	struct seq_file *m;
6615 	int ret = 0;
6616 
6617 	if (trace_array_get(tr) < 0)
6618 		return -ENODEV;
6619 
6620 	if (file->f_mode & FMODE_READ) {
6621 		iter = __tracing_open(inode, file, true);
6622 		if (IS_ERR(iter))
6623 			ret = PTR_ERR(iter);
6624 	} else {
6625 		/* Writes still need the seq_file to hold the private data */
6626 		ret = -ENOMEM;
6627 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6628 		if (!m)
6629 			goto out;
6630 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6631 		if (!iter) {
6632 			kfree(m);
6633 			goto out;
6634 		}
6635 		ret = 0;
6636 
6637 		iter->tr = tr;
6638 		iter->trace_buffer = &tr->max_buffer;
6639 		iter->cpu_file = tracing_get_cpu(inode);
6640 		m->private = iter;
6641 		file->private_data = m;
6642 	}
6643 out:
6644 	if (ret < 0)
6645 		trace_array_put(tr);
6646 
6647 	return ret;
6648 }
6649 
6650 static ssize_t
6651 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6652 		       loff_t *ppos)
6653 {
6654 	struct seq_file *m = filp->private_data;
6655 	struct trace_iterator *iter = m->private;
6656 	struct trace_array *tr = iter->tr;
6657 	unsigned long val;
6658 	int ret;
6659 
6660 	ret = tracing_update_buffers();
6661 	if (ret < 0)
6662 		return ret;
6663 
6664 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6665 	if (ret)
6666 		return ret;
6667 
6668 	mutex_lock(&trace_types_lock);
6669 
6670 	if (tr->current_trace->use_max_tr) {
6671 		ret = -EBUSY;
6672 		goto out;
6673 	}
6674 
6675 	arch_spin_lock(&tr->max_lock);
6676 	if (tr->cond_snapshot)
6677 		ret = -EBUSY;
6678 	arch_spin_unlock(&tr->max_lock);
6679 	if (ret)
6680 		goto out;
6681 
6682 	switch (val) {
6683 	case 0:
6684 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6685 			ret = -EINVAL;
6686 			break;
6687 		}
6688 		if (tr->allocated_snapshot)
6689 			free_snapshot(tr);
6690 		break;
6691 	case 1:
6692 /* Only allow per-cpu swap if the ring buffer supports it */
6693 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6694 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6695 			ret = -EINVAL;
6696 			break;
6697 		}
6698 #endif
6699 		if (!tr->allocated_snapshot) {
6700 			ret = tracing_alloc_snapshot_instance(tr);
6701 			if (ret < 0)
6702 				break;
6703 		}
6704 		local_irq_disable();
6705 		/* Now, we're going to swap */
6706 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6707 			update_max_tr(tr, current, smp_processor_id(), NULL);
6708 		else
6709 			update_max_tr_single(tr, current, iter->cpu_file);
6710 		local_irq_enable();
6711 		break;
6712 	default:
6713 		if (tr->allocated_snapshot) {
6714 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6715 				tracing_reset_online_cpus(&tr->max_buffer);
6716 			else
6717 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6718 		}
6719 		break;
6720 	}
6721 
6722 	if (ret >= 0) {
6723 		*ppos += cnt;
6724 		ret = cnt;
6725 	}
6726 out:
6727 	mutex_unlock(&trace_types_lock);
6728 	return ret;
6729 }
6730 
6731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6732 {
6733 	struct seq_file *m = file->private_data;
6734 	int ret;
6735 
6736 	ret = tracing_release(inode, file);
6737 
6738 	if (file->f_mode & FMODE_READ)
6739 		return ret;
6740 
6741 	/* If write only, the seq_file is just a stub */
6742 	if (m)
6743 		kfree(m->private);
6744 	kfree(m);
6745 
6746 	return 0;
6747 }
6748 
6749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6751 				    size_t count, loff_t *ppos);
6752 static int tracing_buffers_release(struct inode *inode, struct file *file);
6753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6754 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6755 
6756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6757 {
6758 	struct ftrace_buffer_info *info;
6759 	int ret;
6760 
6761 	ret = tracing_buffers_open(inode, filp);
6762 	if (ret < 0)
6763 		return ret;
6764 
6765 	info = filp->private_data;
6766 
6767 	if (info->iter.trace->use_max_tr) {
6768 		tracing_buffers_release(inode, filp);
6769 		return -EBUSY;
6770 	}
6771 
6772 	info->iter.snapshot = true;
6773 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6774 
6775 	return ret;
6776 }
6777 
6778 #endif /* CONFIG_TRACER_SNAPSHOT */
6779 
6780 
6781 static const struct file_operations tracing_thresh_fops = {
6782 	.open		= tracing_open_generic,
6783 	.read		= tracing_thresh_read,
6784 	.write		= tracing_thresh_write,
6785 	.llseek		= generic_file_llseek,
6786 };
6787 
6788 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6789 static const struct file_operations tracing_max_lat_fops = {
6790 	.open		= tracing_open_generic,
6791 	.read		= tracing_max_lat_read,
6792 	.write		= tracing_max_lat_write,
6793 	.llseek		= generic_file_llseek,
6794 };
6795 #endif
6796 
6797 static const struct file_operations set_tracer_fops = {
6798 	.open		= tracing_open_generic,
6799 	.read		= tracing_set_trace_read,
6800 	.write		= tracing_set_trace_write,
6801 	.llseek		= generic_file_llseek,
6802 };
6803 
6804 static const struct file_operations tracing_pipe_fops = {
6805 	.open		= tracing_open_pipe,
6806 	.poll		= tracing_poll_pipe,
6807 	.read		= tracing_read_pipe,
6808 	.splice_read	= tracing_splice_read_pipe,
6809 	.release	= tracing_release_pipe,
6810 	.llseek		= no_llseek,
6811 };
6812 
6813 static const struct file_operations tracing_entries_fops = {
6814 	.open		= tracing_open_generic_tr,
6815 	.read		= tracing_entries_read,
6816 	.write		= tracing_entries_write,
6817 	.llseek		= generic_file_llseek,
6818 	.release	= tracing_release_generic_tr,
6819 };
6820 
6821 static const struct file_operations tracing_total_entries_fops = {
6822 	.open		= tracing_open_generic_tr,
6823 	.read		= tracing_total_entries_read,
6824 	.llseek		= generic_file_llseek,
6825 	.release	= tracing_release_generic_tr,
6826 };
6827 
6828 static const struct file_operations tracing_free_buffer_fops = {
6829 	.open		= tracing_open_generic_tr,
6830 	.write		= tracing_free_buffer_write,
6831 	.release	= tracing_free_buffer_release,
6832 };
6833 
6834 static const struct file_operations tracing_mark_fops = {
6835 	.open		= tracing_open_generic_tr,
6836 	.write		= tracing_mark_write,
6837 	.llseek		= generic_file_llseek,
6838 	.release	= tracing_release_generic_tr,
6839 };
6840 
6841 static const struct file_operations tracing_mark_raw_fops = {
6842 	.open		= tracing_open_generic_tr,
6843 	.write		= tracing_mark_raw_write,
6844 	.llseek		= generic_file_llseek,
6845 	.release	= tracing_release_generic_tr,
6846 };
6847 
6848 static const struct file_operations trace_clock_fops = {
6849 	.open		= tracing_clock_open,
6850 	.read		= seq_read,
6851 	.llseek		= seq_lseek,
6852 	.release	= tracing_single_release_tr,
6853 	.write		= tracing_clock_write,
6854 };
6855 
6856 static const struct file_operations trace_time_stamp_mode_fops = {
6857 	.open		= tracing_time_stamp_mode_open,
6858 	.read		= seq_read,
6859 	.llseek		= seq_lseek,
6860 	.release	= tracing_single_release_tr,
6861 };
6862 
6863 #ifdef CONFIG_TRACER_SNAPSHOT
6864 static const struct file_operations snapshot_fops = {
6865 	.open		= tracing_snapshot_open,
6866 	.read		= seq_read,
6867 	.write		= tracing_snapshot_write,
6868 	.llseek		= tracing_lseek,
6869 	.release	= tracing_snapshot_release,
6870 };
6871 
6872 static const struct file_operations snapshot_raw_fops = {
6873 	.open		= snapshot_raw_open,
6874 	.read		= tracing_buffers_read,
6875 	.release	= tracing_buffers_release,
6876 	.splice_read	= tracing_buffers_splice_read,
6877 	.llseek		= no_llseek,
6878 };
6879 
6880 #endif /* CONFIG_TRACER_SNAPSHOT */
6881 
6882 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6883 {
6884 	struct trace_array *tr = inode->i_private;
6885 	struct ftrace_buffer_info *info;
6886 	int ret;
6887 
6888 	if (tracing_disabled)
6889 		return -ENODEV;
6890 
6891 	if (trace_array_get(tr) < 0)
6892 		return -ENODEV;
6893 
6894 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6895 	if (!info) {
6896 		trace_array_put(tr);
6897 		return -ENOMEM;
6898 	}
6899 
6900 	mutex_lock(&trace_types_lock);
6901 
6902 	info->iter.tr		= tr;
6903 	info->iter.cpu_file	= tracing_get_cpu(inode);
6904 	info->iter.trace	= tr->current_trace;
6905 	info->iter.trace_buffer = &tr->trace_buffer;
6906 	info->spare		= NULL;
6907 	/* Force reading ring buffer for first read */
6908 	info->read		= (unsigned int)-1;
6909 
6910 	filp->private_data = info;
6911 
6912 	tr->current_trace->ref++;
6913 
6914 	mutex_unlock(&trace_types_lock);
6915 
6916 	ret = nonseekable_open(inode, filp);
6917 	if (ret < 0)
6918 		trace_array_put(tr);
6919 
6920 	return ret;
6921 }
6922 
6923 static __poll_t
6924 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6925 {
6926 	struct ftrace_buffer_info *info = filp->private_data;
6927 	struct trace_iterator *iter = &info->iter;
6928 
6929 	return trace_poll(iter, filp, poll_table);
6930 }
6931 
6932 static ssize_t
6933 tracing_buffers_read(struct file *filp, char __user *ubuf,
6934 		     size_t count, loff_t *ppos)
6935 {
6936 	struct ftrace_buffer_info *info = filp->private_data;
6937 	struct trace_iterator *iter = &info->iter;
6938 	ssize_t ret = 0;
6939 	ssize_t size;
6940 
6941 	if (!count)
6942 		return 0;
6943 
6944 #ifdef CONFIG_TRACER_MAX_TRACE
6945 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6946 		return -EBUSY;
6947 #endif
6948 
6949 	if (!info->spare) {
6950 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6951 							  iter->cpu_file);
6952 		if (IS_ERR(info->spare)) {
6953 			ret = PTR_ERR(info->spare);
6954 			info->spare = NULL;
6955 		} else {
6956 			info->spare_cpu = iter->cpu_file;
6957 		}
6958 	}
6959 	if (!info->spare)
6960 		return ret;
6961 
6962 	/* Do we have previous read data to read? */
6963 	if (info->read < PAGE_SIZE)
6964 		goto read;
6965 
6966  again:
6967 	trace_access_lock(iter->cpu_file);
6968 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6969 				    &info->spare,
6970 				    count,
6971 				    iter->cpu_file, 0);
6972 	trace_access_unlock(iter->cpu_file);
6973 
6974 	if (ret < 0) {
6975 		if (trace_empty(iter)) {
6976 			if ((filp->f_flags & O_NONBLOCK))
6977 				return -EAGAIN;
6978 
6979 			ret = wait_on_pipe(iter, 0);
6980 			if (ret)
6981 				return ret;
6982 
6983 			goto again;
6984 		}
6985 		return 0;
6986 	}
6987 
6988 	info->read = 0;
6989  read:
6990 	size = PAGE_SIZE - info->read;
6991 	if (size > count)
6992 		size = count;
6993 
6994 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6995 	if (ret == size)
6996 		return -EFAULT;
6997 
6998 	size -= ret;
6999 
7000 	*ppos += size;
7001 	info->read += size;
7002 
7003 	return size;
7004 }
7005 
7006 static int tracing_buffers_release(struct inode *inode, struct file *file)
7007 {
7008 	struct ftrace_buffer_info *info = file->private_data;
7009 	struct trace_iterator *iter = &info->iter;
7010 
7011 	mutex_lock(&trace_types_lock);
7012 
7013 	iter->tr->current_trace->ref--;
7014 
7015 	__trace_array_put(iter->tr);
7016 
7017 	if (info->spare)
7018 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7019 					   info->spare_cpu, info->spare);
7020 	kfree(info);
7021 
7022 	mutex_unlock(&trace_types_lock);
7023 
7024 	return 0;
7025 }
7026 
7027 struct buffer_ref {
7028 	struct ring_buffer	*buffer;
7029 	void			*page;
7030 	int			cpu;
7031 	refcount_t		refcount;
7032 };
7033 
7034 static void buffer_ref_release(struct buffer_ref *ref)
7035 {
7036 	if (!refcount_dec_and_test(&ref->refcount))
7037 		return;
7038 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7039 	kfree(ref);
7040 }
7041 
7042 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7043 				    struct pipe_buffer *buf)
7044 {
7045 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7046 
7047 	buffer_ref_release(ref);
7048 	buf->private = 0;
7049 }
7050 
7051 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7052 				struct pipe_buffer *buf)
7053 {
7054 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7055 
7056 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7057 		return false;
7058 
7059 	refcount_inc(&ref->refcount);
7060 	return true;
7061 }
7062 
7063 /* Pipe buffer operations for a buffer. */
7064 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7065 	.confirm		= generic_pipe_buf_confirm,
7066 	.release		= buffer_pipe_buf_release,
7067 	.steal			= generic_pipe_buf_nosteal,
7068 	.get			= buffer_pipe_buf_get,
7069 };
7070 
7071 /*
7072  * Callback from splice_to_pipe(), if we need to release some pages
7073  * at the end of the spd in case we error'ed out in filling the pipe.
7074  */
7075 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7076 {
7077 	struct buffer_ref *ref =
7078 		(struct buffer_ref *)spd->partial[i].private;
7079 
7080 	buffer_ref_release(ref);
7081 	spd->partial[i].private = 0;
7082 }
7083 
7084 static ssize_t
7085 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7086 			    struct pipe_inode_info *pipe, size_t len,
7087 			    unsigned int flags)
7088 {
7089 	struct ftrace_buffer_info *info = file->private_data;
7090 	struct trace_iterator *iter = &info->iter;
7091 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7092 	struct page *pages_def[PIPE_DEF_BUFFERS];
7093 	struct splice_pipe_desc spd = {
7094 		.pages		= pages_def,
7095 		.partial	= partial_def,
7096 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7097 		.ops		= &buffer_pipe_buf_ops,
7098 		.spd_release	= buffer_spd_release,
7099 	};
7100 	struct buffer_ref *ref;
7101 	int entries, i;
7102 	ssize_t ret = 0;
7103 
7104 #ifdef CONFIG_TRACER_MAX_TRACE
7105 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7106 		return -EBUSY;
7107 #endif
7108 
7109 	if (*ppos & (PAGE_SIZE - 1))
7110 		return -EINVAL;
7111 
7112 	if (len & (PAGE_SIZE - 1)) {
7113 		if (len < PAGE_SIZE)
7114 			return -EINVAL;
7115 		len &= PAGE_MASK;
7116 	}
7117 
7118 	if (splice_grow_spd(pipe, &spd))
7119 		return -ENOMEM;
7120 
7121  again:
7122 	trace_access_lock(iter->cpu_file);
7123 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7124 
7125 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7126 		struct page *page;
7127 		int r;
7128 
7129 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7130 		if (!ref) {
7131 			ret = -ENOMEM;
7132 			break;
7133 		}
7134 
7135 		refcount_set(&ref->refcount, 1);
7136 		ref->buffer = iter->trace_buffer->buffer;
7137 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7138 		if (IS_ERR(ref->page)) {
7139 			ret = PTR_ERR(ref->page);
7140 			ref->page = NULL;
7141 			kfree(ref);
7142 			break;
7143 		}
7144 		ref->cpu = iter->cpu_file;
7145 
7146 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7147 					  len, iter->cpu_file, 1);
7148 		if (r < 0) {
7149 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7150 						   ref->page);
7151 			kfree(ref);
7152 			break;
7153 		}
7154 
7155 		page = virt_to_page(ref->page);
7156 
7157 		spd.pages[i] = page;
7158 		spd.partial[i].len = PAGE_SIZE;
7159 		spd.partial[i].offset = 0;
7160 		spd.partial[i].private = (unsigned long)ref;
7161 		spd.nr_pages++;
7162 		*ppos += PAGE_SIZE;
7163 
7164 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7165 	}
7166 
7167 	trace_access_unlock(iter->cpu_file);
7168 	spd.nr_pages = i;
7169 
7170 	/* did we read anything? */
7171 	if (!spd.nr_pages) {
7172 		if (ret)
7173 			goto out;
7174 
7175 		ret = -EAGAIN;
7176 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7177 			goto out;
7178 
7179 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7180 		if (ret)
7181 			goto out;
7182 
7183 		goto again;
7184 	}
7185 
7186 	ret = splice_to_pipe(pipe, &spd);
7187 out:
7188 	splice_shrink_spd(&spd);
7189 
7190 	return ret;
7191 }
7192 
7193 static const struct file_operations tracing_buffers_fops = {
7194 	.open		= tracing_buffers_open,
7195 	.read		= tracing_buffers_read,
7196 	.poll		= tracing_buffers_poll,
7197 	.release	= tracing_buffers_release,
7198 	.splice_read	= tracing_buffers_splice_read,
7199 	.llseek		= no_llseek,
7200 };
7201 
7202 static ssize_t
7203 tracing_stats_read(struct file *filp, char __user *ubuf,
7204 		   size_t count, loff_t *ppos)
7205 {
7206 	struct inode *inode = file_inode(filp);
7207 	struct trace_array *tr = inode->i_private;
7208 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7209 	int cpu = tracing_get_cpu(inode);
7210 	struct trace_seq *s;
7211 	unsigned long cnt;
7212 	unsigned long long t;
7213 	unsigned long usec_rem;
7214 
7215 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7216 	if (!s)
7217 		return -ENOMEM;
7218 
7219 	trace_seq_init(s);
7220 
7221 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7222 	trace_seq_printf(s, "entries: %ld\n", cnt);
7223 
7224 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7225 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7226 
7227 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7228 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7229 
7230 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7231 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7232 
7233 	if (trace_clocks[tr->clock_id].in_ns) {
7234 		/* local or global for trace_clock */
7235 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7236 		usec_rem = do_div(t, USEC_PER_SEC);
7237 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7238 								t, usec_rem);
7239 
7240 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7241 		usec_rem = do_div(t, USEC_PER_SEC);
7242 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7243 	} else {
7244 		/* counter or tsc mode for trace_clock */
7245 		trace_seq_printf(s, "oldest event ts: %llu\n",
7246 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7247 
7248 		trace_seq_printf(s, "now ts: %llu\n",
7249 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7250 	}
7251 
7252 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7253 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7254 
7255 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7256 	trace_seq_printf(s, "read events: %ld\n", cnt);
7257 
7258 	count = simple_read_from_buffer(ubuf, count, ppos,
7259 					s->buffer, trace_seq_used(s));
7260 
7261 	kfree(s);
7262 
7263 	return count;
7264 }
7265 
7266 static const struct file_operations tracing_stats_fops = {
7267 	.open		= tracing_open_generic_tr,
7268 	.read		= tracing_stats_read,
7269 	.llseek		= generic_file_llseek,
7270 	.release	= tracing_release_generic_tr,
7271 };
7272 
7273 #ifdef CONFIG_DYNAMIC_FTRACE
7274 
7275 static ssize_t
7276 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7277 		  size_t cnt, loff_t *ppos)
7278 {
7279 	unsigned long *p = filp->private_data;
7280 	char buf[64]; /* Not too big for a shallow stack */
7281 	int r;
7282 
7283 	r = scnprintf(buf, 63, "%ld", *p);
7284 	buf[r++] = '\n';
7285 
7286 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7287 }
7288 
7289 static const struct file_operations tracing_dyn_info_fops = {
7290 	.open		= tracing_open_generic,
7291 	.read		= tracing_read_dyn_info,
7292 	.llseek		= generic_file_llseek,
7293 };
7294 #endif /* CONFIG_DYNAMIC_FTRACE */
7295 
7296 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7297 static void
7298 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7299 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7300 		void *data)
7301 {
7302 	tracing_snapshot_instance(tr);
7303 }
7304 
7305 static void
7306 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7307 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7308 		      void *data)
7309 {
7310 	struct ftrace_func_mapper *mapper = data;
7311 	long *count = NULL;
7312 
7313 	if (mapper)
7314 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7315 
7316 	if (count) {
7317 
7318 		if (*count <= 0)
7319 			return;
7320 
7321 		(*count)--;
7322 	}
7323 
7324 	tracing_snapshot_instance(tr);
7325 }
7326 
7327 static int
7328 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7329 		      struct ftrace_probe_ops *ops, void *data)
7330 {
7331 	struct ftrace_func_mapper *mapper = data;
7332 	long *count = NULL;
7333 
7334 	seq_printf(m, "%ps:", (void *)ip);
7335 
7336 	seq_puts(m, "snapshot");
7337 
7338 	if (mapper)
7339 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7340 
7341 	if (count)
7342 		seq_printf(m, ":count=%ld\n", *count);
7343 	else
7344 		seq_puts(m, ":unlimited\n");
7345 
7346 	return 0;
7347 }
7348 
7349 static int
7350 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7351 		     unsigned long ip, void *init_data, void **data)
7352 {
7353 	struct ftrace_func_mapper *mapper = *data;
7354 
7355 	if (!mapper) {
7356 		mapper = allocate_ftrace_func_mapper();
7357 		if (!mapper)
7358 			return -ENOMEM;
7359 		*data = mapper;
7360 	}
7361 
7362 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7363 }
7364 
7365 static void
7366 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7367 		     unsigned long ip, void *data)
7368 {
7369 	struct ftrace_func_mapper *mapper = data;
7370 
7371 	if (!ip) {
7372 		if (!mapper)
7373 			return;
7374 		free_ftrace_func_mapper(mapper, NULL);
7375 		return;
7376 	}
7377 
7378 	ftrace_func_mapper_remove_ip(mapper, ip);
7379 }
7380 
7381 static struct ftrace_probe_ops snapshot_probe_ops = {
7382 	.func			= ftrace_snapshot,
7383 	.print			= ftrace_snapshot_print,
7384 };
7385 
7386 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7387 	.func			= ftrace_count_snapshot,
7388 	.print			= ftrace_snapshot_print,
7389 	.init			= ftrace_snapshot_init,
7390 	.free			= ftrace_snapshot_free,
7391 };
7392 
7393 static int
7394 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7395 			       char *glob, char *cmd, char *param, int enable)
7396 {
7397 	struct ftrace_probe_ops *ops;
7398 	void *count = (void *)-1;
7399 	char *number;
7400 	int ret;
7401 
7402 	if (!tr)
7403 		return -ENODEV;
7404 
7405 	/* hash funcs only work with set_ftrace_filter */
7406 	if (!enable)
7407 		return -EINVAL;
7408 
7409 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7410 
7411 	if (glob[0] == '!')
7412 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7413 
7414 	if (!param)
7415 		goto out_reg;
7416 
7417 	number = strsep(&param, ":");
7418 
7419 	if (!strlen(number))
7420 		goto out_reg;
7421 
7422 	/*
7423 	 * We use the callback data field (which is a pointer)
7424 	 * as our counter.
7425 	 */
7426 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7427 	if (ret)
7428 		return ret;
7429 
7430  out_reg:
7431 	ret = tracing_alloc_snapshot_instance(tr);
7432 	if (ret < 0)
7433 		goto out;
7434 
7435 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7436 
7437  out:
7438 	return ret < 0 ? ret : 0;
7439 }
7440 
7441 static struct ftrace_func_command ftrace_snapshot_cmd = {
7442 	.name			= "snapshot",
7443 	.func			= ftrace_trace_snapshot_callback,
7444 };
7445 
7446 static __init int register_snapshot_cmd(void)
7447 {
7448 	return register_ftrace_command(&ftrace_snapshot_cmd);
7449 }
7450 #else
7451 static inline __init int register_snapshot_cmd(void) { return 0; }
7452 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7453 
7454 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7455 {
7456 	if (WARN_ON(!tr->dir))
7457 		return ERR_PTR(-ENODEV);
7458 
7459 	/* Top directory uses NULL as the parent */
7460 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7461 		return NULL;
7462 
7463 	/* All sub buffers have a descriptor */
7464 	return tr->dir;
7465 }
7466 
7467 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7468 {
7469 	struct dentry *d_tracer;
7470 
7471 	if (tr->percpu_dir)
7472 		return tr->percpu_dir;
7473 
7474 	d_tracer = tracing_get_dentry(tr);
7475 	if (IS_ERR(d_tracer))
7476 		return NULL;
7477 
7478 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7479 
7480 	WARN_ONCE(!tr->percpu_dir,
7481 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7482 
7483 	return tr->percpu_dir;
7484 }
7485 
7486 static struct dentry *
7487 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7488 		      void *data, long cpu, const struct file_operations *fops)
7489 {
7490 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7491 
7492 	if (ret) /* See tracing_get_cpu() */
7493 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7494 	return ret;
7495 }
7496 
7497 static void
7498 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7499 {
7500 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7501 	struct dentry *d_cpu;
7502 	char cpu_dir[30]; /* 30 characters should be more than enough */
7503 
7504 	if (!d_percpu)
7505 		return;
7506 
7507 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7508 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7509 	if (!d_cpu) {
7510 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7511 		return;
7512 	}
7513 
7514 	/* per cpu trace_pipe */
7515 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7516 				tr, cpu, &tracing_pipe_fops);
7517 
7518 	/* per cpu trace */
7519 	trace_create_cpu_file("trace", 0644, d_cpu,
7520 				tr, cpu, &tracing_fops);
7521 
7522 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7523 				tr, cpu, &tracing_buffers_fops);
7524 
7525 	trace_create_cpu_file("stats", 0444, d_cpu,
7526 				tr, cpu, &tracing_stats_fops);
7527 
7528 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7529 				tr, cpu, &tracing_entries_fops);
7530 
7531 #ifdef CONFIG_TRACER_SNAPSHOT
7532 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7533 				tr, cpu, &snapshot_fops);
7534 
7535 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7536 				tr, cpu, &snapshot_raw_fops);
7537 #endif
7538 }
7539 
7540 #ifdef CONFIG_FTRACE_SELFTEST
7541 /* Let selftest have access to static functions in this file */
7542 #include "trace_selftest.c"
7543 #endif
7544 
7545 static ssize_t
7546 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7547 			loff_t *ppos)
7548 {
7549 	struct trace_option_dentry *topt = filp->private_data;
7550 	char *buf;
7551 
7552 	if (topt->flags->val & topt->opt->bit)
7553 		buf = "1\n";
7554 	else
7555 		buf = "0\n";
7556 
7557 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7558 }
7559 
7560 static ssize_t
7561 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7562 			 loff_t *ppos)
7563 {
7564 	struct trace_option_dentry *topt = filp->private_data;
7565 	unsigned long val;
7566 	int ret;
7567 
7568 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7569 	if (ret)
7570 		return ret;
7571 
7572 	if (val != 0 && val != 1)
7573 		return -EINVAL;
7574 
7575 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7576 		mutex_lock(&trace_types_lock);
7577 		ret = __set_tracer_option(topt->tr, topt->flags,
7578 					  topt->opt, !val);
7579 		mutex_unlock(&trace_types_lock);
7580 		if (ret)
7581 			return ret;
7582 	}
7583 
7584 	*ppos += cnt;
7585 
7586 	return cnt;
7587 }
7588 
7589 
7590 static const struct file_operations trace_options_fops = {
7591 	.open = tracing_open_generic,
7592 	.read = trace_options_read,
7593 	.write = trace_options_write,
7594 	.llseek	= generic_file_llseek,
7595 };
7596 
7597 /*
7598  * In order to pass in both the trace_array descriptor as well as the index
7599  * to the flag that the trace option file represents, the trace_array
7600  * has a character array of trace_flags_index[], which holds the index
7601  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7602  * The address of this character array is passed to the flag option file
7603  * read/write callbacks.
7604  *
7605  * In order to extract both the index and the trace_array descriptor,
7606  * get_tr_index() uses the following algorithm.
7607  *
7608  *   idx = *ptr;
7609  *
7610  * As the pointer itself contains the address of the index (remember
7611  * index[1] == 1).
7612  *
7613  * Then to get the trace_array descriptor, by subtracting that index
7614  * from the ptr, we get to the start of the index itself.
7615  *
7616  *   ptr - idx == &index[0]
7617  *
7618  * Then a simple container_of() from that pointer gets us to the
7619  * trace_array descriptor.
7620  */
7621 static void get_tr_index(void *data, struct trace_array **ptr,
7622 			 unsigned int *pindex)
7623 {
7624 	*pindex = *(unsigned char *)data;
7625 
7626 	*ptr = container_of(data - *pindex, struct trace_array,
7627 			    trace_flags_index);
7628 }
7629 
7630 static ssize_t
7631 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7632 			loff_t *ppos)
7633 {
7634 	void *tr_index = filp->private_data;
7635 	struct trace_array *tr;
7636 	unsigned int index;
7637 	char *buf;
7638 
7639 	get_tr_index(tr_index, &tr, &index);
7640 
7641 	if (tr->trace_flags & (1 << index))
7642 		buf = "1\n";
7643 	else
7644 		buf = "0\n";
7645 
7646 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7647 }
7648 
7649 static ssize_t
7650 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7651 			 loff_t *ppos)
7652 {
7653 	void *tr_index = filp->private_data;
7654 	struct trace_array *tr;
7655 	unsigned int index;
7656 	unsigned long val;
7657 	int ret;
7658 
7659 	get_tr_index(tr_index, &tr, &index);
7660 
7661 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7662 	if (ret)
7663 		return ret;
7664 
7665 	if (val != 0 && val != 1)
7666 		return -EINVAL;
7667 
7668 	mutex_lock(&trace_types_lock);
7669 	ret = set_tracer_flag(tr, 1 << index, val);
7670 	mutex_unlock(&trace_types_lock);
7671 
7672 	if (ret < 0)
7673 		return ret;
7674 
7675 	*ppos += cnt;
7676 
7677 	return cnt;
7678 }
7679 
7680 static const struct file_operations trace_options_core_fops = {
7681 	.open = tracing_open_generic,
7682 	.read = trace_options_core_read,
7683 	.write = trace_options_core_write,
7684 	.llseek = generic_file_llseek,
7685 };
7686 
7687 struct dentry *trace_create_file(const char *name,
7688 				 umode_t mode,
7689 				 struct dentry *parent,
7690 				 void *data,
7691 				 const struct file_operations *fops)
7692 {
7693 	struct dentry *ret;
7694 
7695 	ret = tracefs_create_file(name, mode, parent, data, fops);
7696 	if (!ret)
7697 		pr_warn("Could not create tracefs '%s' entry\n", name);
7698 
7699 	return ret;
7700 }
7701 
7702 
7703 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7704 {
7705 	struct dentry *d_tracer;
7706 
7707 	if (tr->options)
7708 		return tr->options;
7709 
7710 	d_tracer = tracing_get_dentry(tr);
7711 	if (IS_ERR(d_tracer))
7712 		return NULL;
7713 
7714 	tr->options = tracefs_create_dir("options", d_tracer);
7715 	if (!tr->options) {
7716 		pr_warn("Could not create tracefs directory 'options'\n");
7717 		return NULL;
7718 	}
7719 
7720 	return tr->options;
7721 }
7722 
7723 static void
7724 create_trace_option_file(struct trace_array *tr,
7725 			 struct trace_option_dentry *topt,
7726 			 struct tracer_flags *flags,
7727 			 struct tracer_opt *opt)
7728 {
7729 	struct dentry *t_options;
7730 
7731 	t_options = trace_options_init_dentry(tr);
7732 	if (!t_options)
7733 		return;
7734 
7735 	topt->flags = flags;
7736 	topt->opt = opt;
7737 	topt->tr = tr;
7738 
7739 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7740 				    &trace_options_fops);
7741 
7742 }
7743 
7744 static void
7745 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7746 {
7747 	struct trace_option_dentry *topts;
7748 	struct trace_options *tr_topts;
7749 	struct tracer_flags *flags;
7750 	struct tracer_opt *opts;
7751 	int cnt;
7752 	int i;
7753 
7754 	if (!tracer)
7755 		return;
7756 
7757 	flags = tracer->flags;
7758 
7759 	if (!flags || !flags->opts)
7760 		return;
7761 
7762 	/*
7763 	 * If this is an instance, only create flags for tracers
7764 	 * the instance may have.
7765 	 */
7766 	if (!trace_ok_for_array(tracer, tr))
7767 		return;
7768 
7769 	for (i = 0; i < tr->nr_topts; i++) {
7770 		/* Make sure there's no duplicate flags. */
7771 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7772 			return;
7773 	}
7774 
7775 	opts = flags->opts;
7776 
7777 	for (cnt = 0; opts[cnt].name; cnt++)
7778 		;
7779 
7780 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7781 	if (!topts)
7782 		return;
7783 
7784 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7785 			    GFP_KERNEL);
7786 	if (!tr_topts) {
7787 		kfree(topts);
7788 		return;
7789 	}
7790 
7791 	tr->topts = tr_topts;
7792 	tr->topts[tr->nr_topts].tracer = tracer;
7793 	tr->topts[tr->nr_topts].topts = topts;
7794 	tr->nr_topts++;
7795 
7796 	for (cnt = 0; opts[cnt].name; cnt++) {
7797 		create_trace_option_file(tr, &topts[cnt], flags,
7798 					 &opts[cnt]);
7799 		WARN_ONCE(topts[cnt].entry == NULL,
7800 			  "Failed to create trace option: %s",
7801 			  opts[cnt].name);
7802 	}
7803 }
7804 
7805 static struct dentry *
7806 create_trace_option_core_file(struct trace_array *tr,
7807 			      const char *option, long index)
7808 {
7809 	struct dentry *t_options;
7810 
7811 	t_options = trace_options_init_dentry(tr);
7812 	if (!t_options)
7813 		return NULL;
7814 
7815 	return trace_create_file(option, 0644, t_options,
7816 				 (void *)&tr->trace_flags_index[index],
7817 				 &trace_options_core_fops);
7818 }
7819 
7820 static void create_trace_options_dir(struct trace_array *tr)
7821 {
7822 	struct dentry *t_options;
7823 	bool top_level = tr == &global_trace;
7824 	int i;
7825 
7826 	t_options = trace_options_init_dentry(tr);
7827 	if (!t_options)
7828 		return;
7829 
7830 	for (i = 0; trace_options[i]; i++) {
7831 		if (top_level ||
7832 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7833 			create_trace_option_core_file(tr, trace_options[i], i);
7834 	}
7835 }
7836 
7837 static ssize_t
7838 rb_simple_read(struct file *filp, char __user *ubuf,
7839 	       size_t cnt, loff_t *ppos)
7840 {
7841 	struct trace_array *tr = filp->private_data;
7842 	char buf[64];
7843 	int r;
7844 
7845 	r = tracer_tracing_is_on(tr);
7846 	r = sprintf(buf, "%d\n", r);
7847 
7848 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7849 }
7850 
7851 static ssize_t
7852 rb_simple_write(struct file *filp, const char __user *ubuf,
7853 		size_t cnt, loff_t *ppos)
7854 {
7855 	struct trace_array *tr = filp->private_data;
7856 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7857 	unsigned long val;
7858 	int ret;
7859 
7860 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7861 	if (ret)
7862 		return ret;
7863 
7864 	if (buffer) {
7865 		mutex_lock(&trace_types_lock);
7866 		if (!!val == tracer_tracing_is_on(tr)) {
7867 			val = 0; /* do nothing */
7868 		} else if (val) {
7869 			tracer_tracing_on(tr);
7870 			if (tr->current_trace->start)
7871 				tr->current_trace->start(tr);
7872 		} else {
7873 			tracer_tracing_off(tr);
7874 			if (tr->current_trace->stop)
7875 				tr->current_trace->stop(tr);
7876 		}
7877 		mutex_unlock(&trace_types_lock);
7878 	}
7879 
7880 	(*ppos)++;
7881 
7882 	return cnt;
7883 }
7884 
7885 static const struct file_operations rb_simple_fops = {
7886 	.open		= tracing_open_generic_tr,
7887 	.read		= rb_simple_read,
7888 	.write		= rb_simple_write,
7889 	.release	= tracing_release_generic_tr,
7890 	.llseek		= default_llseek,
7891 };
7892 
7893 static ssize_t
7894 buffer_percent_read(struct file *filp, char __user *ubuf,
7895 		    size_t cnt, loff_t *ppos)
7896 {
7897 	struct trace_array *tr = filp->private_data;
7898 	char buf[64];
7899 	int r;
7900 
7901 	r = tr->buffer_percent;
7902 	r = sprintf(buf, "%d\n", r);
7903 
7904 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7905 }
7906 
7907 static ssize_t
7908 buffer_percent_write(struct file *filp, const char __user *ubuf,
7909 		     size_t cnt, loff_t *ppos)
7910 {
7911 	struct trace_array *tr = filp->private_data;
7912 	unsigned long val;
7913 	int ret;
7914 
7915 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7916 	if (ret)
7917 		return ret;
7918 
7919 	if (val > 100)
7920 		return -EINVAL;
7921 
7922 	if (!val)
7923 		val = 1;
7924 
7925 	tr->buffer_percent = val;
7926 
7927 	(*ppos)++;
7928 
7929 	return cnt;
7930 }
7931 
7932 static const struct file_operations buffer_percent_fops = {
7933 	.open		= tracing_open_generic_tr,
7934 	.read		= buffer_percent_read,
7935 	.write		= buffer_percent_write,
7936 	.release	= tracing_release_generic_tr,
7937 	.llseek		= default_llseek,
7938 };
7939 
7940 struct dentry *trace_instance_dir;
7941 
7942 static void
7943 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7944 
7945 static int
7946 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7947 {
7948 	enum ring_buffer_flags rb_flags;
7949 
7950 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7951 
7952 	buf->tr = tr;
7953 
7954 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7955 	if (!buf->buffer)
7956 		return -ENOMEM;
7957 
7958 	buf->data = alloc_percpu(struct trace_array_cpu);
7959 	if (!buf->data) {
7960 		ring_buffer_free(buf->buffer);
7961 		buf->buffer = NULL;
7962 		return -ENOMEM;
7963 	}
7964 
7965 	/* Allocate the first page for all buffers */
7966 	set_buffer_entries(&tr->trace_buffer,
7967 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7968 
7969 	return 0;
7970 }
7971 
7972 static int allocate_trace_buffers(struct trace_array *tr, int size)
7973 {
7974 	int ret;
7975 
7976 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7977 	if (ret)
7978 		return ret;
7979 
7980 #ifdef CONFIG_TRACER_MAX_TRACE
7981 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7982 				    allocate_snapshot ? size : 1);
7983 	if (WARN_ON(ret)) {
7984 		ring_buffer_free(tr->trace_buffer.buffer);
7985 		tr->trace_buffer.buffer = NULL;
7986 		free_percpu(tr->trace_buffer.data);
7987 		tr->trace_buffer.data = NULL;
7988 		return -ENOMEM;
7989 	}
7990 	tr->allocated_snapshot = allocate_snapshot;
7991 
7992 	/*
7993 	 * Only the top level trace array gets its snapshot allocated
7994 	 * from the kernel command line.
7995 	 */
7996 	allocate_snapshot = false;
7997 #endif
7998 	return 0;
7999 }
8000 
8001 static void free_trace_buffer(struct trace_buffer *buf)
8002 {
8003 	if (buf->buffer) {
8004 		ring_buffer_free(buf->buffer);
8005 		buf->buffer = NULL;
8006 		free_percpu(buf->data);
8007 		buf->data = NULL;
8008 	}
8009 }
8010 
8011 static void free_trace_buffers(struct trace_array *tr)
8012 {
8013 	if (!tr)
8014 		return;
8015 
8016 	free_trace_buffer(&tr->trace_buffer);
8017 
8018 #ifdef CONFIG_TRACER_MAX_TRACE
8019 	free_trace_buffer(&tr->max_buffer);
8020 #endif
8021 }
8022 
8023 static void init_trace_flags_index(struct trace_array *tr)
8024 {
8025 	int i;
8026 
8027 	/* Used by the trace options files */
8028 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8029 		tr->trace_flags_index[i] = i;
8030 }
8031 
8032 static void __update_tracer_options(struct trace_array *tr)
8033 {
8034 	struct tracer *t;
8035 
8036 	for (t = trace_types; t; t = t->next)
8037 		add_tracer_options(tr, t);
8038 }
8039 
8040 static void update_tracer_options(struct trace_array *tr)
8041 {
8042 	mutex_lock(&trace_types_lock);
8043 	__update_tracer_options(tr);
8044 	mutex_unlock(&trace_types_lock);
8045 }
8046 
8047 static int instance_mkdir(const char *name)
8048 {
8049 	struct trace_array *tr;
8050 	int ret;
8051 
8052 	mutex_lock(&event_mutex);
8053 	mutex_lock(&trace_types_lock);
8054 
8055 	ret = -EEXIST;
8056 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8057 		if (tr->name && strcmp(tr->name, name) == 0)
8058 			goto out_unlock;
8059 	}
8060 
8061 	ret = -ENOMEM;
8062 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8063 	if (!tr)
8064 		goto out_unlock;
8065 
8066 	tr->name = kstrdup(name, GFP_KERNEL);
8067 	if (!tr->name)
8068 		goto out_free_tr;
8069 
8070 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8071 		goto out_free_tr;
8072 
8073 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8074 
8075 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8076 
8077 	raw_spin_lock_init(&tr->start_lock);
8078 
8079 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8080 
8081 	tr->current_trace = &nop_trace;
8082 
8083 	INIT_LIST_HEAD(&tr->systems);
8084 	INIT_LIST_HEAD(&tr->events);
8085 	INIT_LIST_HEAD(&tr->hist_vars);
8086 
8087 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8088 		goto out_free_tr;
8089 
8090 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8091 	if (!tr->dir)
8092 		goto out_free_tr;
8093 
8094 	ret = event_trace_add_tracer(tr->dir, tr);
8095 	if (ret) {
8096 		tracefs_remove_recursive(tr->dir);
8097 		goto out_free_tr;
8098 	}
8099 
8100 	ftrace_init_trace_array(tr);
8101 
8102 	init_tracer_tracefs(tr, tr->dir);
8103 	init_trace_flags_index(tr);
8104 	__update_tracer_options(tr);
8105 
8106 	list_add(&tr->list, &ftrace_trace_arrays);
8107 
8108 	mutex_unlock(&trace_types_lock);
8109 	mutex_unlock(&event_mutex);
8110 
8111 	return 0;
8112 
8113  out_free_tr:
8114 	free_trace_buffers(tr);
8115 	free_cpumask_var(tr->tracing_cpumask);
8116 	kfree(tr->name);
8117 	kfree(tr);
8118 
8119  out_unlock:
8120 	mutex_unlock(&trace_types_lock);
8121 	mutex_unlock(&event_mutex);
8122 
8123 	return ret;
8124 
8125 }
8126 
8127 static int instance_rmdir(const char *name)
8128 {
8129 	struct trace_array *tr;
8130 	int found = 0;
8131 	int ret;
8132 	int i;
8133 
8134 	mutex_lock(&event_mutex);
8135 	mutex_lock(&trace_types_lock);
8136 
8137 	ret = -ENODEV;
8138 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8139 		if (tr->name && strcmp(tr->name, name) == 0) {
8140 			found = 1;
8141 			break;
8142 		}
8143 	}
8144 	if (!found)
8145 		goto out_unlock;
8146 
8147 	ret = -EBUSY;
8148 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8149 		goto out_unlock;
8150 
8151 	list_del(&tr->list);
8152 
8153 	/* Disable all the flags that were enabled coming in */
8154 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8155 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8156 			set_tracer_flag(tr, 1 << i, 0);
8157 	}
8158 
8159 	tracing_set_nop(tr);
8160 	clear_ftrace_function_probes(tr);
8161 	event_trace_del_tracer(tr);
8162 	ftrace_clear_pids(tr);
8163 	ftrace_destroy_function_files(tr);
8164 	tracefs_remove_recursive(tr->dir);
8165 	free_trace_buffers(tr);
8166 
8167 	for (i = 0; i < tr->nr_topts; i++) {
8168 		kfree(tr->topts[i].topts);
8169 	}
8170 	kfree(tr->topts);
8171 
8172 	free_cpumask_var(tr->tracing_cpumask);
8173 	kfree(tr->name);
8174 	kfree(tr);
8175 
8176 	ret = 0;
8177 
8178  out_unlock:
8179 	mutex_unlock(&trace_types_lock);
8180 	mutex_unlock(&event_mutex);
8181 
8182 	return ret;
8183 }
8184 
8185 static __init void create_trace_instances(struct dentry *d_tracer)
8186 {
8187 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8188 							 instance_mkdir,
8189 							 instance_rmdir);
8190 	if (WARN_ON(!trace_instance_dir))
8191 		return;
8192 }
8193 
8194 static void
8195 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8196 {
8197 	struct trace_event_file *file;
8198 	int cpu;
8199 
8200 	trace_create_file("available_tracers", 0444, d_tracer,
8201 			tr, &show_traces_fops);
8202 
8203 	trace_create_file("current_tracer", 0644, d_tracer,
8204 			tr, &set_tracer_fops);
8205 
8206 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8207 			  tr, &tracing_cpumask_fops);
8208 
8209 	trace_create_file("trace_options", 0644, d_tracer,
8210 			  tr, &tracing_iter_fops);
8211 
8212 	trace_create_file("trace", 0644, d_tracer,
8213 			  tr, &tracing_fops);
8214 
8215 	trace_create_file("trace_pipe", 0444, d_tracer,
8216 			  tr, &tracing_pipe_fops);
8217 
8218 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8219 			  tr, &tracing_entries_fops);
8220 
8221 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8222 			  tr, &tracing_total_entries_fops);
8223 
8224 	trace_create_file("free_buffer", 0200, d_tracer,
8225 			  tr, &tracing_free_buffer_fops);
8226 
8227 	trace_create_file("trace_marker", 0220, d_tracer,
8228 			  tr, &tracing_mark_fops);
8229 
8230 	file = __find_event_file(tr, "ftrace", "print");
8231 	if (file && file->dir)
8232 		trace_create_file("trigger", 0644, file->dir, file,
8233 				  &event_trigger_fops);
8234 	tr->trace_marker_file = file;
8235 
8236 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8237 			  tr, &tracing_mark_raw_fops);
8238 
8239 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8240 			  &trace_clock_fops);
8241 
8242 	trace_create_file("tracing_on", 0644, d_tracer,
8243 			  tr, &rb_simple_fops);
8244 
8245 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8246 			  &trace_time_stamp_mode_fops);
8247 
8248 	tr->buffer_percent = 50;
8249 
8250 	trace_create_file("buffer_percent", 0444, d_tracer,
8251 			tr, &buffer_percent_fops);
8252 
8253 	create_trace_options_dir(tr);
8254 
8255 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8256 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8257 			&tr->max_latency, &tracing_max_lat_fops);
8258 #endif
8259 
8260 	if (ftrace_create_function_files(tr, d_tracer))
8261 		WARN(1, "Could not allocate function filter files");
8262 
8263 #ifdef CONFIG_TRACER_SNAPSHOT
8264 	trace_create_file("snapshot", 0644, d_tracer,
8265 			  tr, &snapshot_fops);
8266 #endif
8267 
8268 	for_each_tracing_cpu(cpu)
8269 		tracing_init_tracefs_percpu(tr, cpu);
8270 
8271 	ftrace_init_tracefs(tr, d_tracer);
8272 }
8273 
8274 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8275 {
8276 	struct vfsmount *mnt;
8277 	struct file_system_type *type;
8278 
8279 	/*
8280 	 * To maintain backward compatibility for tools that mount
8281 	 * debugfs to get to the tracing facility, tracefs is automatically
8282 	 * mounted to the debugfs/tracing directory.
8283 	 */
8284 	type = get_fs_type("tracefs");
8285 	if (!type)
8286 		return NULL;
8287 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8288 	put_filesystem(type);
8289 	if (IS_ERR(mnt))
8290 		return NULL;
8291 	mntget(mnt);
8292 
8293 	return mnt;
8294 }
8295 
8296 /**
8297  * tracing_init_dentry - initialize top level trace array
8298  *
8299  * This is called when creating files or directories in the tracing
8300  * directory. It is called via fs_initcall() by any of the boot up code
8301  * and expects to return the dentry of the top level tracing directory.
8302  */
8303 struct dentry *tracing_init_dentry(void)
8304 {
8305 	struct trace_array *tr = &global_trace;
8306 
8307 	/* The top level trace array uses  NULL as parent */
8308 	if (tr->dir)
8309 		return NULL;
8310 
8311 	if (WARN_ON(!tracefs_initialized()) ||
8312 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8313 		 WARN_ON(!debugfs_initialized())))
8314 		return ERR_PTR(-ENODEV);
8315 
8316 	/*
8317 	 * As there may still be users that expect the tracing
8318 	 * files to exist in debugfs/tracing, we must automount
8319 	 * the tracefs file system there, so older tools still
8320 	 * work with the newer kerenl.
8321 	 */
8322 	tr->dir = debugfs_create_automount("tracing", NULL,
8323 					   trace_automount, NULL);
8324 	if (!tr->dir) {
8325 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8326 		return ERR_PTR(-ENOMEM);
8327 	}
8328 
8329 	return NULL;
8330 }
8331 
8332 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8333 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8334 
8335 static void __init trace_eval_init(void)
8336 {
8337 	int len;
8338 
8339 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8340 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8341 }
8342 
8343 #ifdef CONFIG_MODULES
8344 static void trace_module_add_evals(struct module *mod)
8345 {
8346 	if (!mod->num_trace_evals)
8347 		return;
8348 
8349 	/*
8350 	 * Modules with bad taint do not have events created, do
8351 	 * not bother with enums either.
8352 	 */
8353 	if (trace_module_has_bad_taint(mod))
8354 		return;
8355 
8356 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8357 }
8358 
8359 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8360 static void trace_module_remove_evals(struct module *mod)
8361 {
8362 	union trace_eval_map_item *map;
8363 	union trace_eval_map_item **last = &trace_eval_maps;
8364 
8365 	if (!mod->num_trace_evals)
8366 		return;
8367 
8368 	mutex_lock(&trace_eval_mutex);
8369 
8370 	map = trace_eval_maps;
8371 
8372 	while (map) {
8373 		if (map->head.mod == mod)
8374 			break;
8375 		map = trace_eval_jmp_to_tail(map);
8376 		last = &map->tail.next;
8377 		map = map->tail.next;
8378 	}
8379 	if (!map)
8380 		goto out;
8381 
8382 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8383 	kfree(map);
8384  out:
8385 	mutex_unlock(&trace_eval_mutex);
8386 }
8387 #else
8388 static inline void trace_module_remove_evals(struct module *mod) { }
8389 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8390 
8391 static int trace_module_notify(struct notifier_block *self,
8392 			       unsigned long val, void *data)
8393 {
8394 	struct module *mod = data;
8395 
8396 	switch (val) {
8397 	case MODULE_STATE_COMING:
8398 		trace_module_add_evals(mod);
8399 		break;
8400 	case MODULE_STATE_GOING:
8401 		trace_module_remove_evals(mod);
8402 		break;
8403 	}
8404 
8405 	return 0;
8406 }
8407 
8408 static struct notifier_block trace_module_nb = {
8409 	.notifier_call = trace_module_notify,
8410 	.priority = 0,
8411 };
8412 #endif /* CONFIG_MODULES */
8413 
8414 static __init int tracer_init_tracefs(void)
8415 {
8416 	struct dentry *d_tracer;
8417 
8418 	trace_access_lock_init();
8419 
8420 	d_tracer = tracing_init_dentry();
8421 	if (IS_ERR(d_tracer))
8422 		return 0;
8423 
8424 	event_trace_init();
8425 
8426 	init_tracer_tracefs(&global_trace, d_tracer);
8427 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8428 
8429 	trace_create_file("tracing_thresh", 0644, d_tracer,
8430 			&global_trace, &tracing_thresh_fops);
8431 
8432 	trace_create_file("README", 0444, d_tracer,
8433 			NULL, &tracing_readme_fops);
8434 
8435 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8436 			NULL, &tracing_saved_cmdlines_fops);
8437 
8438 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8439 			  NULL, &tracing_saved_cmdlines_size_fops);
8440 
8441 	trace_create_file("saved_tgids", 0444, d_tracer,
8442 			NULL, &tracing_saved_tgids_fops);
8443 
8444 	trace_eval_init();
8445 
8446 	trace_create_eval_file(d_tracer);
8447 
8448 #ifdef CONFIG_MODULES
8449 	register_module_notifier(&trace_module_nb);
8450 #endif
8451 
8452 #ifdef CONFIG_DYNAMIC_FTRACE
8453 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8454 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8455 #endif
8456 
8457 	create_trace_instances(d_tracer);
8458 
8459 	update_tracer_options(&global_trace);
8460 
8461 	return 0;
8462 }
8463 
8464 static int trace_panic_handler(struct notifier_block *this,
8465 			       unsigned long event, void *unused)
8466 {
8467 	if (ftrace_dump_on_oops)
8468 		ftrace_dump(ftrace_dump_on_oops);
8469 	return NOTIFY_OK;
8470 }
8471 
8472 static struct notifier_block trace_panic_notifier = {
8473 	.notifier_call  = trace_panic_handler,
8474 	.next           = NULL,
8475 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8476 };
8477 
8478 static int trace_die_handler(struct notifier_block *self,
8479 			     unsigned long val,
8480 			     void *data)
8481 {
8482 	switch (val) {
8483 	case DIE_OOPS:
8484 		if (ftrace_dump_on_oops)
8485 			ftrace_dump(ftrace_dump_on_oops);
8486 		break;
8487 	default:
8488 		break;
8489 	}
8490 	return NOTIFY_OK;
8491 }
8492 
8493 static struct notifier_block trace_die_notifier = {
8494 	.notifier_call = trace_die_handler,
8495 	.priority = 200
8496 };
8497 
8498 /*
8499  * printk is set to max of 1024, we really don't need it that big.
8500  * Nothing should be printing 1000 characters anyway.
8501  */
8502 #define TRACE_MAX_PRINT		1000
8503 
8504 /*
8505  * Define here KERN_TRACE so that we have one place to modify
8506  * it if we decide to change what log level the ftrace dump
8507  * should be at.
8508  */
8509 #define KERN_TRACE		KERN_EMERG
8510 
8511 void
8512 trace_printk_seq(struct trace_seq *s)
8513 {
8514 	/* Probably should print a warning here. */
8515 	if (s->seq.len >= TRACE_MAX_PRINT)
8516 		s->seq.len = TRACE_MAX_PRINT;
8517 
8518 	/*
8519 	 * More paranoid code. Although the buffer size is set to
8520 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8521 	 * an extra layer of protection.
8522 	 */
8523 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8524 		s->seq.len = s->seq.size - 1;
8525 
8526 	/* should be zero ended, but we are paranoid. */
8527 	s->buffer[s->seq.len] = 0;
8528 
8529 	printk(KERN_TRACE "%s", s->buffer);
8530 
8531 	trace_seq_init(s);
8532 }
8533 
8534 void trace_init_global_iter(struct trace_iterator *iter)
8535 {
8536 	iter->tr = &global_trace;
8537 	iter->trace = iter->tr->current_trace;
8538 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8539 	iter->trace_buffer = &global_trace.trace_buffer;
8540 
8541 	if (iter->trace && iter->trace->open)
8542 		iter->trace->open(iter);
8543 
8544 	/* Annotate start of buffers if we had overruns */
8545 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8546 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8547 
8548 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8549 	if (trace_clocks[iter->tr->clock_id].in_ns)
8550 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8551 }
8552 
8553 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8554 {
8555 	/* use static because iter can be a bit big for the stack */
8556 	static struct trace_iterator iter;
8557 	static atomic_t dump_running;
8558 	struct trace_array *tr = &global_trace;
8559 	unsigned int old_userobj;
8560 	unsigned long flags;
8561 	int cnt = 0, cpu;
8562 
8563 	/* Only allow one dump user at a time. */
8564 	if (atomic_inc_return(&dump_running) != 1) {
8565 		atomic_dec(&dump_running);
8566 		return;
8567 	}
8568 
8569 	/*
8570 	 * Always turn off tracing when we dump.
8571 	 * We don't need to show trace output of what happens
8572 	 * between multiple crashes.
8573 	 *
8574 	 * If the user does a sysrq-z, then they can re-enable
8575 	 * tracing with echo 1 > tracing_on.
8576 	 */
8577 	tracing_off();
8578 
8579 	local_irq_save(flags);
8580 	printk_nmi_direct_enter();
8581 
8582 	/* Simulate the iterator */
8583 	trace_init_global_iter(&iter);
8584 
8585 	for_each_tracing_cpu(cpu) {
8586 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8587 	}
8588 
8589 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8590 
8591 	/* don't look at user memory in panic mode */
8592 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8593 
8594 	switch (oops_dump_mode) {
8595 	case DUMP_ALL:
8596 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8597 		break;
8598 	case DUMP_ORIG:
8599 		iter.cpu_file = raw_smp_processor_id();
8600 		break;
8601 	case DUMP_NONE:
8602 		goto out_enable;
8603 	default:
8604 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8605 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8606 	}
8607 
8608 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8609 
8610 	/* Did function tracer already get disabled? */
8611 	if (ftrace_is_dead()) {
8612 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8613 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8614 	}
8615 
8616 	/*
8617 	 * We need to stop all tracing on all CPUS to read the
8618 	 * the next buffer. This is a bit expensive, but is
8619 	 * not done often. We fill all what we can read,
8620 	 * and then release the locks again.
8621 	 */
8622 
8623 	while (!trace_empty(&iter)) {
8624 
8625 		if (!cnt)
8626 			printk(KERN_TRACE "---------------------------------\n");
8627 
8628 		cnt++;
8629 
8630 		/* reset all but tr, trace, and overruns */
8631 		memset(&iter.seq, 0,
8632 		       sizeof(struct trace_iterator) -
8633 		       offsetof(struct trace_iterator, seq));
8634 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8635 		iter.pos = -1;
8636 
8637 		if (trace_find_next_entry_inc(&iter) != NULL) {
8638 			int ret;
8639 
8640 			ret = print_trace_line(&iter);
8641 			if (ret != TRACE_TYPE_NO_CONSUME)
8642 				trace_consume(&iter);
8643 		}
8644 		touch_nmi_watchdog();
8645 
8646 		trace_printk_seq(&iter.seq);
8647 	}
8648 
8649 	if (!cnt)
8650 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8651 	else
8652 		printk(KERN_TRACE "---------------------------------\n");
8653 
8654  out_enable:
8655 	tr->trace_flags |= old_userobj;
8656 
8657 	for_each_tracing_cpu(cpu) {
8658 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8659 	}
8660 	atomic_dec(&dump_running);
8661 	printk_nmi_direct_exit();
8662 	local_irq_restore(flags);
8663 }
8664 EXPORT_SYMBOL_GPL(ftrace_dump);
8665 
8666 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8667 {
8668 	char **argv;
8669 	int argc, ret;
8670 
8671 	argc = 0;
8672 	ret = 0;
8673 	argv = argv_split(GFP_KERNEL, buf, &argc);
8674 	if (!argv)
8675 		return -ENOMEM;
8676 
8677 	if (argc)
8678 		ret = createfn(argc, argv);
8679 
8680 	argv_free(argv);
8681 
8682 	return ret;
8683 }
8684 
8685 #define WRITE_BUFSIZE  4096
8686 
8687 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8688 				size_t count, loff_t *ppos,
8689 				int (*createfn)(int, char **))
8690 {
8691 	char *kbuf, *buf, *tmp;
8692 	int ret = 0;
8693 	size_t done = 0;
8694 	size_t size;
8695 
8696 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8697 	if (!kbuf)
8698 		return -ENOMEM;
8699 
8700 	while (done < count) {
8701 		size = count - done;
8702 
8703 		if (size >= WRITE_BUFSIZE)
8704 			size = WRITE_BUFSIZE - 1;
8705 
8706 		if (copy_from_user(kbuf, buffer + done, size)) {
8707 			ret = -EFAULT;
8708 			goto out;
8709 		}
8710 		kbuf[size] = '\0';
8711 		buf = kbuf;
8712 		do {
8713 			tmp = strchr(buf, '\n');
8714 			if (tmp) {
8715 				*tmp = '\0';
8716 				size = tmp - buf + 1;
8717 			} else {
8718 				size = strlen(buf);
8719 				if (done + size < count) {
8720 					if (buf != kbuf)
8721 						break;
8722 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8723 					pr_warn("Line length is too long: Should be less than %d\n",
8724 						WRITE_BUFSIZE - 2);
8725 					ret = -EINVAL;
8726 					goto out;
8727 				}
8728 			}
8729 			done += size;
8730 
8731 			/* Remove comments */
8732 			tmp = strchr(buf, '#');
8733 
8734 			if (tmp)
8735 				*tmp = '\0';
8736 
8737 			ret = trace_run_command(buf, createfn);
8738 			if (ret)
8739 				goto out;
8740 			buf += size;
8741 
8742 		} while (done < count);
8743 	}
8744 	ret = done;
8745 
8746 out:
8747 	kfree(kbuf);
8748 
8749 	return ret;
8750 }
8751 
8752 __init static int tracer_alloc_buffers(void)
8753 {
8754 	int ring_buf_size;
8755 	int ret = -ENOMEM;
8756 
8757 	/*
8758 	 * Make sure we don't accidently add more trace options
8759 	 * than we have bits for.
8760 	 */
8761 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8762 
8763 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8764 		goto out;
8765 
8766 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8767 		goto out_free_buffer_mask;
8768 
8769 	/* Only allocate trace_printk buffers if a trace_printk exists */
8770 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8771 		/* Must be called before global_trace.buffer is allocated */
8772 		trace_printk_init_buffers();
8773 
8774 	/* To save memory, keep the ring buffer size to its minimum */
8775 	if (ring_buffer_expanded)
8776 		ring_buf_size = trace_buf_size;
8777 	else
8778 		ring_buf_size = 1;
8779 
8780 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8781 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8782 
8783 	raw_spin_lock_init(&global_trace.start_lock);
8784 
8785 	/*
8786 	 * The prepare callbacks allocates some memory for the ring buffer. We
8787 	 * don't free the buffer if the if the CPU goes down. If we were to free
8788 	 * the buffer, then the user would lose any trace that was in the
8789 	 * buffer. The memory will be removed once the "instance" is removed.
8790 	 */
8791 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8792 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8793 				      NULL);
8794 	if (ret < 0)
8795 		goto out_free_cpumask;
8796 	/* Used for event triggers */
8797 	ret = -ENOMEM;
8798 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8799 	if (!temp_buffer)
8800 		goto out_rm_hp_state;
8801 
8802 	if (trace_create_savedcmd() < 0)
8803 		goto out_free_temp_buffer;
8804 
8805 	/* TODO: make the number of buffers hot pluggable with CPUS */
8806 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8807 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8808 		WARN_ON(1);
8809 		goto out_free_savedcmd;
8810 	}
8811 
8812 	if (global_trace.buffer_disabled)
8813 		tracing_off();
8814 
8815 	if (trace_boot_clock) {
8816 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8817 		if (ret < 0)
8818 			pr_warn("Trace clock %s not defined, going back to default\n",
8819 				trace_boot_clock);
8820 	}
8821 
8822 	/*
8823 	 * register_tracer() might reference current_trace, so it
8824 	 * needs to be set before we register anything. This is
8825 	 * just a bootstrap of current_trace anyway.
8826 	 */
8827 	global_trace.current_trace = &nop_trace;
8828 
8829 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8830 
8831 	ftrace_init_global_array_ops(&global_trace);
8832 
8833 	init_trace_flags_index(&global_trace);
8834 
8835 	register_tracer(&nop_trace);
8836 
8837 	/* Function tracing may start here (via kernel command line) */
8838 	init_function_trace();
8839 
8840 	/* All seems OK, enable tracing */
8841 	tracing_disabled = 0;
8842 
8843 	atomic_notifier_chain_register(&panic_notifier_list,
8844 				       &trace_panic_notifier);
8845 
8846 	register_die_notifier(&trace_die_notifier);
8847 
8848 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8849 
8850 	INIT_LIST_HEAD(&global_trace.systems);
8851 	INIT_LIST_HEAD(&global_trace.events);
8852 	INIT_LIST_HEAD(&global_trace.hist_vars);
8853 	list_add(&global_trace.list, &ftrace_trace_arrays);
8854 
8855 	apply_trace_boot_options();
8856 
8857 	register_snapshot_cmd();
8858 
8859 	return 0;
8860 
8861 out_free_savedcmd:
8862 	free_saved_cmdlines_buffer(savedcmd);
8863 out_free_temp_buffer:
8864 	ring_buffer_free(temp_buffer);
8865 out_rm_hp_state:
8866 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8867 out_free_cpumask:
8868 	free_cpumask_var(global_trace.tracing_cpumask);
8869 out_free_buffer_mask:
8870 	free_cpumask_var(tracing_buffer_mask);
8871 out:
8872 	return ret;
8873 }
8874 
8875 void __init early_trace_init(void)
8876 {
8877 	if (tracepoint_printk) {
8878 		tracepoint_print_iter =
8879 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8880 		if (WARN_ON(!tracepoint_print_iter))
8881 			tracepoint_printk = 0;
8882 		else
8883 			static_key_enable(&tracepoint_printk_key.key);
8884 	}
8885 	tracer_alloc_buffers();
8886 }
8887 
8888 void __init trace_init(void)
8889 {
8890 	trace_event_init();
8891 }
8892 
8893 __init static int clear_boot_tracer(void)
8894 {
8895 	/*
8896 	 * The default tracer at boot buffer is an init section.
8897 	 * This function is called in lateinit. If we did not
8898 	 * find the boot tracer, then clear it out, to prevent
8899 	 * later registration from accessing the buffer that is
8900 	 * about to be freed.
8901 	 */
8902 	if (!default_bootup_tracer)
8903 		return 0;
8904 
8905 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8906 	       default_bootup_tracer);
8907 	default_bootup_tracer = NULL;
8908 
8909 	return 0;
8910 }
8911 
8912 fs_initcall(tracer_init_tracefs);
8913 late_initcall_sync(clear_boot_tracer);
8914 
8915 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8916 __init static int tracing_set_default_clock(void)
8917 {
8918 	/* sched_clock_stable() is determined in late_initcall */
8919 	if (!trace_boot_clock && !sched_clock_stable()) {
8920 		printk(KERN_WARNING
8921 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8922 		       "If you want to keep using the local clock, then add:\n"
8923 		       "  \"trace_clock=local\"\n"
8924 		       "on the kernel command line\n");
8925 		tracing_set_clock(&global_trace, "global");
8926 	}
8927 
8928 	return 0;
8929 }
8930 late_initcall_sync(tracing_set_default_clock);
8931 #endif
8932