xref: /openbmc/linux/kernel/trace/trace.c (revision f3a8b664)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	ARCH_TRACE_CLOCKS
1129 };
1130 
1131 /*
1132  * trace_parser_get_init - gets the buffer for trace parser
1133  */
1134 int trace_parser_get_init(struct trace_parser *parser, int size)
1135 {
1136 	memset(parser, 0, sizeof(*parser));
1137 
1138 	parser->buffer = kmalloc(size, GFP_KERNEL);
1139 	if (!parser->buffer)
1140 		return 1;
1141 
1142 	parser->size = size;
1143 	return 0;
1144 }
1145 
1146 /*
1147  * trace_parser_put - frees the buffer for trace parser
1148  */
1149 void trace_parser_put(struct trace_parser *parser)
1150 {
1151 	kfree(parser->buffer);
1152 }
1153 
1154 /*
1155  * trace_get_user - reads the user input string separated by  space
1156  * (matched by isspace(ch))
1157  *
1158  * For each string found the 'struct trace_parser' is updated,
1159  * and the function returns.
1160  *
1161  * Returns number of bytes read.
1162  *
1163  * See kernel/trace/trace.h for 'struct trace_parser' details.
1164  */
1165 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166 	size_t cnt, loff_t *ppos)
1167 {
1168 	char ch;
1169 	size_t read = 0;
1170 	ssize_t ret;
1171 
1172 	if (!*ppos)
1173 		trace_parser_clear(parser);
1174 
1175 	ret = get_user(ch, ubuf++);
1176 	if (ret)
1177 		goto out;
1178 
1179 	read++;
1180 	cnt--;
1181 
1182 	/*
1183 	 * The parser is not finished with the last write,
1184 	 * continue reading the user input without skipping spaces.
1185 	 */
1186 	if (!parser->cont) {
1187 		/* skip white space */
1188 		while (cnt && isspace(ch)) {
1189 			ret = get_user(ch, ubuf++);
1190 			if (ret)
1191 				goto out;
1192 			read++;
1193 			cnt--;
1194 		}
1195 
1196 		/* only spaces were written */
1197 		if (isspace(ch)) {
1198 			*ppos += read;
1199 			ret = read;
1200 			goto out;
1201 		}
1202 
1203 		parser->idx = 0;
1204 	}
1205 
1206 	/* read the non-space input */
1207 	while (cnt && !isspace(ch)) {
1208 		if (parser->idx < parser->size - 1)
1209 			parser->buffer[parser->idx++] = ch;
1210 		else {
1211 			ret = -EINVAL;
1212 			goto out;
1213 		}
1214 		ret = get_user(ch, ubuf++);
1215 		if (ret)
1216 			goto out;
1217 		read++;
1218 		cnt--;
1219 	}
1220 
1221 	/* We either got finished input or we have to wait for another call. */
1222 	if (isspace(ch)) {
1223 		parser->buffer[parser->idx] = 0;
1224 		parser->cont = false;
1225 	} else if (parser->idx < parser->size - 1) {
1226 		parser->cont = true;
1227 		parser->buffer[parser->idx++] = ch;
1228 	} else {
1229 		ret = -EINVAL;
1230 		goto out;
1231 	}
1232 
1233 	*ppos += read;
1234 	ret = read;
1235 
1236 out:
1237 	return ret;
1238 }
1239 
1240 /* TODO add a seq_buf_to_buffer() */
1241 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242 {
1243 	int len;
1244 
1245 	if (trace_seq_used(s) <= s->seq.readpos)
1246 		return -EBUSY;
1247 
1248 	len = trace_seq_used(s) - s->seq.readpos;
1249 	if (cnt > len)
1250 		cnt = len;
1251 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252 
1253 	s->seq.readpos += cnt;
1254 	return cnt;
1255 }
1256 
1257 unsigned long __read_mostly	tracing_thresh;
1258 
1259 #ifdef CONFIG_TRACER_MAX_TRACE
1260 /*
1261  * Copy the new maximum trace into the separate maximum-trace
1262  * structure. (this way the maximum trace is permanently saved,
1263  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264  */
1265 static void
1266 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267 {
1268 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1269 	struct trace_buffer *max_buf = &tr->max_buffer;
1270 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272 
1273 	max_buf->cpu = cpu;
1274 	max_buf->time_start = data->preempt_timestamp;
1275 
1276 	max_data->saved_latency = tr->max_latency;
1277 	max_data->critical_start = data->critical_start;
1278 	max_data->critical_end = data->critical_end;
1279 
1280 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281 	max_data->pid = tsk->pid;
1282 	/*
1283 	 * If tsk == current, then use current_uid(), as that does not use
1284 	 * RCU. The irq tracer can be called out of RCU scope.
1285 	 */
1286 	if (tsk == current)
1287 		max_data->uid = current_uid();
1288 	else
1289 		max_data->uid = task_uid(tsk);
1290 
1291 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292 	max_data->policy = tsk->policy;
1293 	max_data->rt_priority = tsk->rt_priority;
1294 
1295 	/* record this tasks comm */
1296 	tracing_record_cmdline(tsk);
1297 }
1298 
1299 /**
1300  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301  * @tr: tracer
1302  * @tsk: the task with the latency
1303  * @cpu: The cpu that initiated the trace.
1304  *
1305  * Flip the buffers between the @tr and the max_tr and record information
1306  * about which task was the cause of this latency.
1307  */
1308 void
1309 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct ring_buffer *buf;
1312 
1313 	if (tr->stop_count)
1314 		return;
1315 
1316 	WARN_ON_ONCE(!irqs_disabled());
1317 
1318 	if (!tr->allocated_snapshot) {
1319 		/* Only the nop tracer should hit this when disabling */
1320 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321 		return;
1322 	}
1323 
1324 	arch_spin_lock(&tr->max_lock);
1325 
1326 	buf = tr->trace_buffer.buffer;
1327 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328 	tr->max_buffer.buffer = buf;
1329 
1330 	__update_max_tr(tr, tsk, cpu);
1331 	arch_spin_unlock(&tr->max_lock);
1332 }
1333 
1334 /**
1335  * update_max_tr_single - only copy one trace over, and reset the rest
1336  * @tr - tracer
1337  * @tsk - task with the latency
1338  * @cpu - the cpu of the buffer to copy.
1339  *
1340  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341  */
1342 void
1343 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344 {
1345 	int ret;
1346 
1347 	if (tr->stop_count)
1348 		return;
1349 
1350 	WARN_ON_ONCE(!irqs_disabled());
1351 	if (!tr->allocated_snapshot) {
1352 		/* Only the nop tracer should hit this when disabling */
1353 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354 		return;
1355 	}
1356 
1357 	arch_spin_lock(&tr->max_lock);
1358 
1359 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360 
1361 	if (ret == -EBUSY) {
1362 		/*
1363 		 * We failed to swap the buffer due to a commit taking
1364 		 * place on this CPU. We fail to record, but we reset
1365 		 * the max trace buffer (no one writes directly to it)
1366 		 * and flag that it failed.
1367 		 */
1368 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369 			"Failed to swap buffers due to commit in progress\n");
1370 	}
1371 
1372 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373 
1374 	__update_max_tr(tr, tsk, cpu);
1375 	arch_spin_unlock(&tr->max_lock);
1376 }
1377 #endif /* CONFIG_TRACER_MAX_TRACE */
1378 
1379 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380 {
1381 	/* Iterators are static, they should be filled or empty */
1382 	if (trace_buffer_iter(iter, iter->cpu_file))
1383 		return 0;
1384 
1385 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386 				full);
1387 }
1388 
1389 #ifdef CONFIG_FTRACE_STARTUP_TEST
1390 static int run_tracer_selftest(struct tracer *type)
1391 {
1392 	struct trace_array *tr = &global_trace;
1393 	struct tracer *saved_tracer = tr->current_trace;
1394 	int ret;
1395 
1396 	if (!type->selftest || tracing_selftest_disabled)
1397 		return 0;
1398 
1399 	/*
1400 	 * Run a selftest on this tracer.
1401 	 * Here we reset the trace buffer, and set the current
1402 	 * tracer to be this tracer. The tracer can then run some
1403 	 * internal tracing to verify that everything is in order.
1404 	 * If we fail, we do not register this tracer.
1405 	 */
1406 	tracing_reset_online_cpus(&tr->trace_buffer);
1407 
1408 	tr->current_trace = type;
1409 
1410 #ifdef CONFIG_TRACER_MAX_TRACE
1411 	if (type->use_max_tr) {
1412 		/* If we expanded the buffers, make sure the max is expanded too */
1413 		if (ring_buffer_expanded)
1414 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415 					   RING_BUFFER_ALL_CPUS);
1416 		tr->allocated_snapshot = true;
1417 	}
1418 #endif
1419 
1420 	/* the test is responsible for initializing and enabling */
1421 	pr_info("Testing tracer %s: ", type->name);
1422 	ret = type->selftest(type, tr);
1423 	/* the test is responsible for resetting too */
1424 	tr->current_trace = saved_tracer;
1425 	if (ret) {
1426 		printk(KERN_CONT "FAILED!\n");
1427 		/* Add the warning after printing 'FAILED' */
1428 		WARN_ON(1);
1429 		return -1;
1430 	}
1431 	/* Only reset on passing, to avoid touching corrupted buffers */
1432 	tracing_reset_online_cpus(&tr->trace_buffer);
1433 
1434 #ifdef CONFIG_TRACER_MAX_TRACE
1435 	if (type->use_max_tr) {
1436 		tr->allocated_snapshot = false;
1437 
1438 		/* Shrink the max buffer again */
1439 		if (ring_buffer_expanded)
1440 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1441 					   RING_BUFFER_ALL_CPUS);
1442 	}
1443 #endif
1444 
1445 	printk(KERN_CONT "PASSED\n");
1446 	return 0;
1447 }
1448 #else
1449 static inline int run_tracer_selftest(struct tracer *type)
1450 {
1451 	return 0;
1452 }
1453 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1454 
1455 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456 
1457 static void __init apply_trace_boot_options(void);
1458 
1459 /**
1460  * register_tracer - register a tracer with the ftrace system.
1461  * @type - the plugin for the tracer
1462  *
1463  * Register a new plugin tracer.
1464  */
1465 int __init register_tracer(struct tracer *type)
1466 {
1467 	struct tracer *t;
1468 	int ret = 0;
1469 
1470 	if (!type->name) {
1471 		pr_info("Tracer must have a name\n");
1472 		return -1;
1473 	}
1474 
1475 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477 		return -1;
1478 	}
1479 
1480 	mutex_lock(&trace_types_lock);
1481 
1482 	tracing_selftest_running = true;
1483 
1484 	for (t = trace_types; t; t = t->next) {
1485 		if (strcmp(type->name, t->name) == 0) {
1486 			/* already found */
1487 			pr_info("Tracer %s already registered\n",
1488 				type->name);
1489 			ret = -1;
1490 			goto out;
1491 		}
1492 	}
1493 
1494 	if (!type->set_flag)
1495 		type->set_flag = &dummy_set_flag;
1496 	if (!type->flags) {
1497 		/*allocate a dummy tracer_flags*/
1498 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499 		if (!type->flags) {
1500 			ret = -ENOMEM;
1501 			goto out;
1502 		}
1503 		type->flags->val = 0;
1504 		type->flags->opts = dummy_tracer_opt;
1505 	} else
1506 		if (!type->flags->opts)
1507 			type->flags->opts = dummy_tracer_opt;
1508 
1509 	/* store the tracer for __set_tracer_option */
1510 	type->flags->trace = type;
1511 
1512 	ret = run_tracer_selftest(type);
1513 	if (ret < 0)
1514 		goto out;
1515 
1516 	type->next = trace_types;
1517 	trace_types = type;
1518 	add_tracer_options(&global_trace, type);
1519 
1520  out:
1521 	tracing_selftest_running = false;
1522 	mutex_unlock(&trace_types_lock);
1523 
1524 	if (ret || !default_bootup_tracer)
1525 		goto out_unlock;
1526 
1527 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528 		goto out_unlock;
1529 
1530 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531 	/* Do we want this tracer to start on bootup? */
1532 	tracing_set_tracer(&global_trace, type->name);
1533 	default_bootup_tracer = NULL;
1534 
1535 	apply_trace_boot_options();
1536 
1537 	/* disable other selftests, since this will break it. */
1538 	tracing_selftest_disabled = true;
1539 #ifdef CONFIG_FTRACE_STARTUP_TEST
1540 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541 	       type->name);
1542 #endif
1543 
1544  out_unlock:
1545 	return ret;
1546 }
1547 
1548 void tracing_reset(struct trace_buffer *buf, int cpu)
1549 {
1550 	struct ring_buffer *buffer = buf->buffer;
1551 
1552 	if (!buffer)
1553 		return;
1554 
1555 	ring_buffer_record_disable(buffer);
1556 
1557 	/* Make sure all commits have finished */
1558 	synchronize_sched();
1559 	ring_buffer_reset_cpu(buffer, cpu);
1560 
1561 	ring_buffer_record_enable(buffer);
1562 }
1563 
1564 void tracing_reset_online_cpus(struct trace_buffer *buf)
1565 {
1566 	struct ring_buffer *buffer = buf->buffer;
1567 	int cpu;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_sched();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	for_each_online_cpu(cpu)
1580 		ring_buffer_reset_cpu(buffer, cpu);
1581 
1582 	ring_buffer_record_enable(buffer);
1583 }
1584 
1585 /* Must have trace_types_lock held */
1586 void tracing_reset_all_online_cpus(void)
1587 {
1588 	struct trace_array *tr;
1589 
1590 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591 		tracing_reset_online_cpus(&tr->trace_buffer);
1592 #ifdef CONFIG_TRACER_MAX_TRACE
1593 		tracing_reset_online_cpus(&tr->max_buffer);
1594 #endif
1595 	}
1596 }
1597 
1598 #define SAVED_CMDLINES_DEFAULT 128
1599 #define NO_CMDLINE_MAP UINT_MAX
1600 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601 struct saved_cmdlines_buffer {
1602 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603 	unsigned *map_cmdline_to_pid;
1604 	unsigned cmdline_num;
1605 	int cmdline_idx;
1606 	char *saved_cmdlines;
1607 };
1608 static struct saved_cmdlines_buffer *savedcmd;
1609 
1610 /* temporary disable recording */
1611 static atomic_t trace_record_cmdline_disabled __read_mostly;
1612 
1613 static inline char *get_saved_cmdlines(int idx)
1614 {
1615 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616 }
1617 
1618 static inline void set_cmdline(int idx, const char *cmdline)
1619 {
1620 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621 }
1622 
1623 static int allocate_cmdlines_buffer(unsigned int val,
1624 				    struct saved_cmdlines_buffer *s)
1625 {
1626 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627 					GFP_KERNEL);
1628 	if (!s->map_cmdline_to_pid)
1629 		return -ENOMEM;
1630 
1631 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632 	if (!s->saved_cmdlines) {
1633 		kfree(s->map_cmdline_to_pid);
1634 		return -ENOMEM;
1635 	}
1636 
1637 	s->cmdline_idx = 0;
1638 	s->cmdline_num = val;
1639 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640 	       sizeof(s->map_pid_to_cmdline));
1641 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642 	       val * sizeof(*s->map_cmdline_to_pid));
1643 
1644 	return 0;
1645 }
1646 
1647 static int trace_create_savedcmd(void)
1648 {
1649 	int ret;
1650 
1651 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652 	if (!savedcmd)
1653 		return -ENOMEM;
1654 
1655 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656 	if (ret < 0) {
1657 		kfree(savedcmd);
1658 		savedcmd = NULL;
1659 		return -ENOMEM;
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 int is_tracing_stopped(void)
1666 {
1667 	return global_trace.stop_count;
1668 }
1669 
1670 /**
1671  * tracing_start - quick start of the tracer
1672  *
1673  * If tracing is enabled but was stopped by tracing_stop,
1674  * this will start the tracer back up.
1675  */
1676 void tracing_start(void)
1677 {
1678 	struct ring_buffer *buffer;
1679 	unsigned long flags;
1680 
1681 	if (tracing_disabled)
1682 		return;
1683 
1684 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685 	if (--global_trace.stop_count) {
1686 		if (global_trace.stop_count < 0) {
1687 			/* Someone screwed up their debugging */
1688 			WARN_ON_ONCE(1);
1689 			global_trace.stop_count = 0;
1690 		}
1691 		goto out;
1692 	}
1693 
1694 	/* Prevent the buffers from switching */
1695 	arch_spin_lock(&global_trace.max_lock);
1696 
1697 	buffer = global_trace.trace_buffer.buffer;
1698 	if (buffer)
1699 		ring_buffer_record_enable(buffer);
1700 
1701 #ifdef CONFIG_TRACER_MAX_TRACE
1702 	buffer = global_trace.max_buffer.buffer;
1703 	if (buffer)
1704 		ring_buffer_record_enable(buffer);
1705 #endif
1706 
1707 	arch_spin_unlock(&global_trace.max_lock);
1708 
1709  out:
1710 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711 }
1712 
1713 static void tracing_start_tr(struct trace_array *tr)
1714 {
1715 	struct ring_buffer *buffer;
1716 	unsigned long flags;
1717 
1718 	if (tracing_disabled)
1719 		return;
1720 
1721 	/* If global, we need to also start the max tracer */
1722 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723 		return tracing_start();
1724 
1725 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1726 
1727 	if (--tr->stop_count) {
1728 		if (tr->stop_count < 0) {
1729 			/* Someone screwed up their debugging */
1730 			WARN_ON_ONCE(1);
1731 			tr->stop_count = 0;
1732 		}
1733 		goto out;
1734 	}
1735 
1736 	buffer = tr->trace_buffer.buffer;
1737 	if (buffer)
1738 		ring_buffer_record_enable(buffer);
1739 
1740  out:
1741 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742 }
1743 
1744 /**
1745  * tracing_stop - quick stop of the tracer
1746  *
1747  * Light weight way to stop tracing. Use in conjunction with
1748  * tracing_start.
1749  */
1750 void tracing_stop(void)
1751 {
1752 	struct ring_buffer *buffer;
1753 	unsigned long flags;
1754 
1755 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756 	if (global_trace.stop_count++)
1757 		goto out;
1758 
1759 	/* Prevent the buffers from switching */
1760 	arch_spin_lock(&global_trace.max_lock);
1761 
1762 	buffer = global_trace.trace_buffer.buffer;
1763 	if (buffer)
1764 		ring_buffer_record_disable(buffer);
1765 
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 	buffer = global_trace.max_buffer.buffer;
1768 	if (buffer)
1769 		ring_buffer_record_disable(buffer);
1770 #endif
1771 
1772 	arch_spin_unlock(&global_trace.max_lock);
1773 
1774  out:
1775 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776 }
1777 
1778 static void tracing_stop_tr(struct trace_array *tr)
1779 {
1780 	struct ring_buffer *buffer;
1781 	unsigned long flags;
1782 
1783 	/* If global, we need to also stop the max tracer */
1784 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785 		return tracing_stop();
1786 
1787 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1788 	if (tr->stop_count++)
1789 		goto out;
1790 
1791 	buffer = tr->trace_buffer.buffer;
1792 	if (buffer)
1793 		ring_buffer_record_disable(buffer);
1794 
1795  out:
1796 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797 }
1798 
1799 void trace_stop_cmdline_recording(void);
1800 
1801 static int trace_save_cmdline(struct task_struct *tsk)
1802 {
1803 	unsigned pid, idx;
1804 
1805 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806 		return 0;
1807 
1808 	/*
1809 	 * It's not the end of the world if we don't get
1810 	 * the lock, but we also don't want to spin
1811 	 * nor do we want to disable interrupts,
1812 	 * so if we miss here, then better luck next time.
1813 	 */
1814 	if (!arch_spin_trylock(&trace_cmdline_lock))
1815 		return 0;
1816 
1817 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818 	if (idx == NO_CMDLINE_MAP) {
1819 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820 
1821 		/*
1822 		 * Check whether the cmdline buffer at idx has a pid
1823 		 * mapped. We are going to overwrite that entry so we
1824 		 * need to clear the map_pid_to_cmdline. Otherwise we
1825 		 * would read the new comm for the old pid.
1826 		 */
1827 		pid = savedcmd->map_cmdline_to_pid[idx];
1828 		if (pid != NO_CMDLINE_MAP)
1829 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830 
1831 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833 
1834 		savedcmd->cmdline_idx = idx;
1835 	}
1836 
1837 	set_cmdline(idx, tsk->comm);
1838 
1839 	arch_spin_unlock(&trace_cmdline_lock);
1840 
1841 	return 1;
1842 }
1843 
1844 static void __trace_find_cmdline(int pid, char comm[])
1845 {
1846 	unsigned map;
1847 
1848 	if (!pid) {
1849 		strcpy(comm, "<idle>");
1850 		return;
1851 	}
1852 
1853 	if (WARN_ON_ONCE(pid < 0)) {
1854 		strcpy(comm, "<XXX>");
1855 		return;
1856 	}
1857 
1858 	if (pid > PID_MAX_DEFAULT) {
1859 		strcpy(comm, "<...>");
1860 		return;
1861 	}
1862 
1863 	map = savedcmd->map_pid_to_cmdline[pid];
1864 	if (map != NO_CMDLINE_MAP)
1865 		strcpy(comm, get_saved_cmdlines(map));
1866 	else
1867 		strcpy(comm, "<...>");
1868 }
1869 
1870 void trace_find_cmdline(int pid, char comm[])
1871 {
1872 	preempt_disable();
1873 	arch_spin_lock(&trace_cmdline_lock);
1874 
1875 	__trace_find_cmdline(pid, comm);
1876 
1877 	arch_spin_unlock(&trace_cmdline_lock);
1878 	preempt_enable();
1879 }
1880 
1881 void tracing_record_cmdline(struct task_struct *tsk)
1882 {
1883 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884 		return;
1885 
1886 	if (!__this_cpu_read(trace_cmdline_save))
1887 		return;
1888 
1889 	if (trace_save_cmdline(tsk))
1890 		__this_cpu_write(trace_cmdline_save, false);
1891 }
1892 
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895 			     int pc)
1896 {
1897 	struct task_struct *tsk = current;
1898 
1899 	entry->preempt_count		= pc & 0xff;
1900 	entry->pid			= (tsk) ? tsk->pid : 0;
1901 	entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905 		TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914 
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917 		  int type, unsigned long flags, int pc)
1918 {
1919 	struct trace_entry *ent = ring_buffer_event_data(event);
1920 
1921 	tracing_generic_entry_update(ent, flags, pc);
1922 	ent->type = type;
1923 }
1924 
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927 			  int type,
1928 			  unsigned long len,
1929 			  unsigned long flags, int pc)
1930 {
1931 	struct ring_buffer_event *event;
1932 
1933 	event = ring_buffer_lock_reserve(buffer, len);
1934 	if (event != NULL)
1935 		trace_event_setup(event, type, flags, pc);
1936 
1937 	return event;
1938 }
1939 
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943 
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960 	struct ring_buffer_event *event;
1961 	struct page *page;
1962 	int cpu;
1963 
1964 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965 
1966 	if (trace_buffered_event_ref++)
1967 		return;
1968 
1969 	for_each_tracing_cpu(cpu) {
1970 		page = alloc_pages_node(cpu_to_node(cpu),
1971 					GFP_KERNEL | __GFP_NORETRY, 0);
1972 		if (!page)
1973 			goto failed;
1974 
1975 		event = page_address(page);
1976 		memset(event, 0, sizeof(*event));
1977 
1978 		per_cpu(trace_buffered_event, cpu) = event;
1979 
1980 		preempt_disable();
1981 		if (cpu == smp_processor_id() &&
1982 		    this_cpu_read(trace_buffered_event) !=
1983 		    per_cpu(trace_buffered_event, cpu))
1984 			WARN_ON_ONCE(1);
1985 		preempt_enable();
1986 	}
1987 
1988 	return;
1989  failed:
1990 	trace_buffered_event_disable();
1991 }
1992 
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995 	/* Probably not needed, but do it anyway */
1996 	smp_rmb();
1997 	this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999 
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002 	this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004 
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015 	int cpu;
2016 
2017 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018 
2019 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020 		return;
2021 
2022 	if (--trace_buffered_event_ref)
2023 		return;
2024 
2025 	preempt_disable();
2026 	/* For each CPU, set the buffer as used. */
2027 	smp_call_function_many(tracing_buffer_mask,
2028 			       disable_trace_buffered_event, NULL, 1);
2029 	preempt_enable();
2030 
2031 	/* Wait for all current users to finish */
2032 	synchronize_sched();
2033 
2034 	for_each_tracing_cpu(cpu) {
2035 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036 		per_cpu(trace_buffered_event, cpu) = NULL;
2037 	}
2038 	/*
2039 	 * Make sure trace_buffered_event is NULL before clearing
2040 	 * trace_buffered_event_cnt.
2041 	 */
2042 	smp_wmb();
2043 
2044 	preempt_disable();
2045 	/* Do the work on each cpu */
2046 	smp_call_function_many(tracing_buffer_mask,
2047 			       enable_trace_buffered_event, NULL, 1);
2048 	preempt_enable();
2049 }
2050 
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054 	__this_cpu_write(trace_cmdline_save, true);
2055 
2056 	/* If this is the temp buffer, we need to commit fully */
2057 	if (this_cpu_read(trace_buffered_event) == event) {
2058 		/* Length is in event->array[0] */
2059 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060 		/* Release the temp buffer */
2061 		this_cpu_dec(trace_buffered_event_cnt);
2062 	} else
2063 		ring_buffer_unlock_commit(buffer, event);
2064 }
2065 
2066 static struct ring_buffer *temp_buffer;
2067 
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070 			  struct trace_event_file *trace_file,
2071 			  int type, unsigned long len,
2072 			  unsigned long flags, int pc)
2073 {
2074 	struct ring_buffer_event *entry;
2075 	int val;
2076 
2077 	*current_rb = trace_file->tr->trace_buffer.buffer;
2078 
2079 	if ((trace_file->flags &
2080 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081 	    (entry = this_cpu_read(trace_buffered_event))) {
2082 		/* Try to use the per cpu buffer first */
2083 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2084 		if (val == 1) {
2085 			trace_event_setup(entry, type, flags, pc);
2086 			entry->array[0] = len;
2087 			return entry;
2088 		}
2089 		this_cpu_dec(trace_buffered_event_cnt);
2090 	}
2091 
2092 	entry = trace_buffer_lock_reserve(*current_rb,
2093 					 type, len, flags, pc);
2094 	/*
2095 	 * If tracing is off, but we have triggers enabled
2096 	 * we still need to look at the event data. Use the temp_buffer
2097 	 * to store the trace event for the tigger to use. It's recusive
2098 	 * safe and will not be recorded anywhere.
2099 	 */
2100 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101 		*current_rb = temp_buffer;
2102 		entry = trace_buffer_lock_reserve(*current_rb,
2103 						  type, len, flags, pc);
2104 	}
2105 	return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108 
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110 				     struct ring_buffer *buffer,
2111 				     struct ring_buffer_event *event,
2112 				     unsigned long flags, int pc,
2113 				     struct pt_regs *regs)
2114 {
2115 	__buffer_unlock_commit(buffer, event);
2116 
2117 	/*
2118 	 * If regs is not set, then skip the following callers:
2119 	 *   trace_buffer_unlock_commit_regs
2120 	 *   event_trigger_unlock_commit
2121 	 *   trace_event_buffer_commit
2122 	 *   trace_event_raw_event_sched_switch
2123 	 * Note, we can still get here via blktrace, wakeup tracer
2124 	 * and mmiotrace, but that's ok if they lose a function or
2125 	 * two. They are that meaningful.
2126 	 */
2127 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128 	ftrace_trace_userstack(buffer, flags, pc);
2129 }
2130 
2131 void
2132 trace_function(struct trace_array *tr,
2133 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134 	       int pc)
2135 {
2136 	struct trace_event_call *call = &event_function;
2137 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138 	struct ring_buffer_event *event;
2139 	struct ftrace_entry *entry;
2140 
2141 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142 					  flags, pc);
2143 	if (!event)
2144 		return;
2145 	entry	= ring_buffer_event_data(event);
2146 	entry->ip			= ip;
2147 	entry->parent_ip		= parent_ip;
2148 
2149 	if (!call_filter_check_discard(call, entry, buffer, event))
2150 		__buffer_unlock_commit(buffer, event);
2151 }
2152 
2153 #ifdef CONFIG_STACKTRACE
2154 
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159 
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162 
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164 				 unsigned long flags,
2165 				 int skip, int pc, struct pt_regs *regs)
2166 {
2167 	struct trace_event_call *call = &event_kernel_stack;
2168 	struct ring_buffer_event *event;
2169 	struct stack_entry *entry;
2170 	struct stack_trace trace;
2171 	int use_stack;
2172 	int size = FTRACE_STACK_ENTRIES;
2173 
2174 	trace.nr_entries	= 0;
2175 	trace.skip		= skip;
2176 
2177 	/*
2178 	 * Add two, for this function and the call to save_stack_trace()
2179 	 * If regs is set, then these functions will not be in the way.
2180 	 */
2181 	if (!regs)
2182 		trace.skip += 2;
2183 
2184 	/*
2185 	 * Since events can happen in NMIs there's no safe way to
2186 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187 	 * or NMI comes in, it will just have to use the default
2188 	 * FTRACE_STACK_SIZE.
2189 	 */
2190 	preempt_disable_notrace();
2191 
2192 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193 	/*
2194 	 * We don't need any atomic variables, just a barrier.
2195 	 * If an interrupt comes in, we don't care, because it would
2196 	 * have exited and put the counter back to what we want.
2197 	 * We just need a barrier to keep gcc from moving things
2198 	 * around.
2199 	 */
2200 	barrier();
2201 	if (use_stack == 1) {
2202 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2203 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2204 
2205 		if (regs)
2206 			save_stack_trace_regs(regs, &trace);
2207 		else
2208 			save_stack_trace(&trace);
2209 
2210 		if (trace.nr_entries > size)
2211 			size = trace.nr_entries;
2212 	} else
2213 		/* From now on, use_stack is a boolean */
2214 		use_stack = 0;
2215 
2216 	size *= sizeof(unsigned long);
2217 
2218 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219 					  sizeof(*entry) + size, flags, pc);
2220 	if (!event)
2221 		goto out;
2222 	entry = ring_buffer_event_data(event);
2223 
2224 	memset(&entry->caller, 0, size);
2225 
2226 	if (use_stack)
2227 		memcpy(&entry->caller, trace.entries,
2228 		       trace.nr_entries * sizeof(unsigned long));
2229 	else {
2230 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2231 		trace.entries		= entry->caller;
2232 		if (regs)
2233 			save_stack_trace_regs(regs, &trace);
2234 		else
2235 			save_stack_trace(&trace);
2236 	}
2237 
2238 	entry->size = trace.nr_entries;
2239 
2240 	if (!call_filter_check_discard(call, entry, buffer, event))
2241 		__buffer_unlock_commit(buffer, event);
2242 
2243  out:
2244 	/* Again, don't let gcc optimize things here */
2245 	barrier();
2246 	__this_cpu_dec(ftrace_stack_reserve);
2247 	preempt_enable_notrace();
2248 
2249 }
2250 
2251 static inline void ftrace_trace_stack(struct trace_array *tr,
2252 				      struct ring_buffer *buffer,
2253 				      unsigned long flags,
2254 				      int skip, int pc, struct pt_regs *regs)
2255 {
2256 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257 		return;
2258 
2259 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260 }
2261 
2262 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263 		   int pc)
2264 {
2265 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266 }
2267 
2268 /**
2269  * trace_dump_stack - record a stack back trace in the trace buffer
2270  * @skip: Number of functions to skip (helper handlers)
2271  */
2272 void trace_dump_stack(int skip)
2273 {
2274 	unsigned long flags;
2275 
2276 	if (tracing_disabled || tracing_selftest_running)
2277 		return;
2278 
2279 	local_save_flags(flags);
2280 
2281 	/*
2282 	 * Skip 3 more, seems to get us at the caller of
2283 	 * this function.
2284 	 */
2285 	skip += 3;
2286 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287 			     flags, skip, preempt_count(), NULL);
2288 }
2289 
2290 static DEFINE_PER_CPU(int, user_stack_count);
2291 
2292 void
2293 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294 {
2295 	struct trace_event_call *call = &event_user_stack;
2296 	struct ring_buffer_event *event;
2297 	struct userstack_entry *entry;
2298 	struct stack_trace trace;
2299 
2300 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301 		return;
2302 
2303 	/*
2304 	 * NMIs can not handle page faults, even with fix ups.
2305 	 * The save user stack can (and often does) fault.
2306 	 */
2307 	if (unlikely(in_nmi()))
2308 		return;
2309 
2310 	/*
2311 	 * prevent recursion, since the user stack tracing may
2312 	 * trigger other kernel events.
2313 	 */
2314 	preempt_disable();
2315 	if (__this_cpu_read(user_stack_count))
2316 		goto out;
2317 
2318 	__this_cpu_inc(user_stack_count);
2319 
2320 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321 					  sizeof(*entry), flags, pc);
2322 	if (!event)
2323 		goto out_drop_count;
2324 	entry	= ring_buffer_event_data(event);
2325 
2326 	entry->tgid		= current->tgid;
2327 	memset(&entry->caller, 0, sizeof(entry->caller));
2328 
2329 	trace.nr_entries	= 0;
2330 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2331 	trace.skip		= 0;
2332 	trace.entries		= entry->caller;
2333 
2334 	save_stack_trace_user(&trace);
2335 	if (!call_filter_check_discard(call, entry, buffer, event))
2336 		__buffer_unlock_commit(buffer, event);
2337 
2338  out_drop_count:
2339 	__this_cpu_dec(user_stack_count);
2340  out:
2341 	preempt_enable();
2342 }
2343 
2344 #ifdef UNUSED
2345 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346 {
2347 	ftrace_trace_userstack(tr, flags, preempt_count());
2348 }
2349 #endif /* UNUSED */
2350 
2351 #endif /* CONFIG_STACKTRACE */
2352 
2353 /* created for use with alloc_percpu */
2354 struct trace_buffer_struct {
2355 	int nesting;
2356 	char buffer[4][TRACE_BUF_SIZE];
2357 };
2358 
2359 static struct trace_buffer_struct *trace_percpu_buffer;
2360 
2361 /*
2362  * Thise allows for lockless recording.  If we're nested too deeply, then
2363  * this returns NULL.
2364  */
2365 static char *get_trace_buf(void)
2366 {
2367 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368 
2369 	if (!buffer || buffer->nesting >= 4)
2370 		return NULL;
2371 
2372 	return &buffer->buffer[buffer->nesting++][0];
2373 }
2374 
2375 static void put_trace_buf(void)
2376 {
2377 	this_cpu_dec(trace_percpu_buffer->nesting);
2378 }
2379 
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382 	struct trace_buffer_struct *buffers;
2383 
2384 	buffers = alloc_percpu(struct trace_buffer_struct);
2385 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386 		return -ENOMEM;
2387 
2388 	trace_percpu_buffer = buffers;
2389 	return 0;
2390 }
2391 
2392 static int buffers_allocated;
2393 
2394 void trace_printk_init_buffers(void)
2395 {
2396 	if (buffers_allocated)
2397 		return;
2398 
2399 	if (alloc_percpu_trace_buffer())
2400 		return;
2401 
2402 	/* trace_printk() is for debug use only. Don't use it in production. */
2403 
2404 	pr_warn("\n");
2405 	pr_warn("**********************************************************\n");
2406 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407 	pr_warn("**                                                      **\n");
2408 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409 	pr_warn("**                                                      **\n");
2410 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411 	pr_warn("** unsafe for production use.                           **\n");
2412 	pr_warn("**                                                      **\n");
2413 	pr_warn("** If you see this message and you are not debugging    **\n");
2414 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415 	pr_warn("**                                                      **\n");
2416 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417 	pr_warn("**********************************************************\n");
2418 
2419 	/* Expand the buffers to set size */
2420 	tracing_update_buffers();
2421 
2422 	buffers_allocated = 1;
2423 
2424 	/*
2425 	 * trace_printk_init_buffers() can be called by modules.
2426 	 * If that happens, then we need to start cmdline recording
2427 	 * directly here. If the global_trace.buffer is already
2428 	 * allocated here, then this was called by module code.
2429 	 */
2430 	if (global_trace.trace_buffer.buffer)
2431 		tracing_start_cmdline_record();
2432 }
2433 
2434 void trace_printk_start_comm(void)
2435 {
2436 	/* Start tracing comms if trace printk is set */
2437 	if (!buffers_allocated)
2438 		return;
2439 	tracing_start_cmdline_record();
2440 }
2441 
2442 static void trace_printk_start_stop_comm(int enabled)
2443 {
2444 	if (!buffers_allocated)
2445 		return;
2446 
2447 	if (enabled)
2448 		tracing_start_cmdline_record();
2449 	else
2450 		tracing_stop_cmdline_record();
2451 }
2452 
2453 /**
2454  * trace_vbprintk - write binary msg to tracing buffer
2455  *
2456  */
2457 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458 {
2459 	struct trace_event_call *call = &event_bprint;
2460 	struct ring_buffer_event *event;
2461 	struct ring_buffer *buffer;
2462 	struct trace_array *tr = &global_trace;
2463 	struct bprint_entry *entry;
2464 	unsigned long flags;
2465 	char *tbuffer;
2466 	int len = 0, size, pc;
2467 
2468 	if (unlikely(tracing_selftest_running || tracing_disabled))
2469 		return 0;
2470 
2471 	/* Don't pollute graph traces with trace_vprintk internals */
2472 	pause_graph_tracing();
2473 
2474 	pc = preempt_count();
2475 	preempt_disable_notrace();
2476 
2477 	tbuffer = get_trace_buf();
2478 	if (!tbuffer) {
2479 		len = 0;
2480 		goto out_nobuffer;
2481 	}
2482 
2483 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484 
2485 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486 		goto out;
2487 
2488 	local_save_flags(flags);
2489 	size = sizeof(*entry) + sizeof(u32) * len;
2490 	buffer = tr->trace_buffer.buffer;
2491 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492 					  flags, pc);
2493 	if (!event)
2494 		goto out;
2495 	entry = ring_buffer_event_data(event);
2496 	entry->ip			= ip;
2497 	entry->fmt			= fmt;
2498 
2499 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2501 		__buffer_unlock_commit(buffer, event);
2502 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503 	}
2504 
2505 out:
2506 	put_trace_buf();
2507 
2508 out_nobuffer:
2509 	preempt_enable_notrace();
2510 	unpause_graph_tracing();
2511 
2512 	return len;
2513 }
2514 EXPORT_SYMBOL_GPL(trace_vbprintk);
2515 
2516 static int
2517 __trace_array_vprintk(struct ring_buffer *buffer,
2518 		      unsigned long ip, const char *fmt, va_list args)
2519 {
2520 	struct trace_event_call *call = &event_print;
2521 	struct ring_buffer_event *event;
2522 	int len = 0, size, pc;
2523 	struct print_entry *entry;
2524 	unsigned long flags;
2525 	char *tbuffer;
2526 
2527 	if (tracing_disabled || tracing_selftest_running)
2528 		return 0;
2529 
2530 	/* Don't pollute graph traces with trace_vprintk internals */
2531 	pause_graph_tracing();
2532 
2533 	pc = preempt_count();
2534 	preempt_disable_notrace();
2535 
2536 
2537 	tbuffer = get_trace_buf();
2538 	if (!tbuffer) {
2539 		len = 0;
2540 		goto out_nobuffer;
2541 	}
2542 
2543 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544 
2545 	local_save_flags(flags);
2546 	size = sizeof(*entry) + len + 1;
2547 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548 					  flags, pc);
2549 	if (!event)
2550 		goto out;
2551 	entry = ring_buffer_event_data(event);
2552 	entry->ip = ip;
2553 
2554 	memcpy(&entry->buf, tbuffer, len + 1);
2555 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2556 		__buffer_unlock_commit(buffer, event);
2557 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558 	}
2559 
2560 out:
2561 	put_trace_buf();
2562 
2563 out_nobuffer:
2564 	preempt_enable_notrace();
2565 	unpause_graph_tracing();
2566 
2567 	return len;
2568 }
2569 
2570 int trace_array_vprintk(struct trace_array *tr,
2571 			unsigned long ip, const char *fmt, va_list args)
2572 {
2573 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574 }
2575 
2576 int trace_array_printk(struct trace_array *tr,
2577 		       unsigned long ip, const char *fmt, ...)
2578 {
2579 	int ret;
2580 	va_list ap;
2581 
2582 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583 		return 0;
2584 
2585 	va_start(ap, fmt);
2586 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2587 	va_end(ap);
2588 	return ret;
2589 }
2590 
2591 int trace_array_printk_buf(struct ring_buffer *buffer,
2592 			   unsigned long ip, const char *fmt, ...)
2593 {
2594 	int ret;
2595 	va_list ap;
2596 
2597 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598 		return 0;
2599 
2600 	va_start(ap, fmt);
2601 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602 	va_end(ap);
2603 	return ret;
2604 }
2605 
2606 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607 {
2608 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2609 }
2610 EXPORT_SYMBOL_GPL(trace_vprintk);
2611 
2612 static void trace_iterator_increment(struct trace_iterator *iter)
2613 {
2614 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615 
2616 	iter->idx++;
2617 	if (buf_iter)
2618 		ring_buffer_read(buf_iter, NULL);
2619 }
2620 
2621 static struct trace_entry *
2622 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623 		unsigned long *lost_events)
2624 {
2625 	struct ring_buffer_event *event;
2626 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627 
2628 	if (buf_iter)
2629 		event = ring_buffer_iter_peek(buf_iter, ts);
2630 	else
2631 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632 					 lost_events);
2633 
2634 	if (event) {
2635 		iter->ent_size = ring_buffer_event_length(event);
2636 		return ring_buffer_event_data(event);
2637 	}
2638 	iter->ent_size = 0;
2639 	return NULL;
2640 }
2641 
2642 static struct trace_entry *
2643 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644 		  unsigned long *missing_events, u64 *ent_ts)
2645 {
2646 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647 	struct trace_entry *ent, *next = NULL;
2648 	unsigned long lost_events = 0, next_lost = 0;
2649 	int cpu_file = iter->cpu_file;
2650 	u64 next_ts = 0, ts;
2651 	int next_cpu = -1;
2652 	int next_size = 0;
2653 	int cpu;
2654 
2655 	/*
2656 	 * If we are in a per_cpu trace file, don't bother by iterating over
2657 	 * all cpu and peek directly.
2658 	 */
2659 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2661 			return NULL;
2662 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663 		if (ent_cpu)
2664 			*ent_cpu = cpu_file;
2665 
2666 		return ent;
2667 	}
2668 
2669 	for_each_tracing_cpu(cpu) {
2670 
2671 		if (ring_buffer_empty_cpu(buffer, cpu))
2672 			continue;
2673 
2674 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675 
2676 		/*
2677 		 * Pick the entry with the smallest timestamp:
2678 		 */
2679 		if (ent && (!next || ts < next_ts)) {
2680 			next = ent;
2681 			next_cpu = cpu;
2682 			next_ts = ts;
2683 			next_lost = lost_events;
2684 			next_size = iter->ent_size;
2685 		}
2686 	}
2687 
2688 	iter->ent_size = next_size;
2689 
2690 	if (ent_cpu)
2691 		*ent_cpu = next_cpu;
2692 
2693 	if (ent_ts)
2694 		*ent_ts = next_ts;
2695 
2696 	if (missing_events)
2697 		*missing_events = next_lost;
2698 
2699 	return next;
2700 }
2701 
2702 /* Find the next real entry, without updating the iterator itself */
2703 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704 					  int *ent_cpu, u64 *ent_ts)
2705 {
2706 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707 }
2708 
2709 /* Find the next real entry, and increment the iterator to the next entry */
2710 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711 {
2712 	iter->ent = __find_next_entry(iter, &iter->cpu,
2713 				      &iter->lost_events, &iter->ts);
2714 
2715 	if (iter->ent)
2716 		trace_iterator_increment(iter);
2717 
2718 	return iter->ent ? iter : NULL;
2719 }
2720 
2721 static void trace_consume(struct trace_iterator *iter)
2722 {
2723 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724 			    &iter->lost_events);
2725 }
2726 
2727 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728 {
2729 	struct trace_iterator *iter = m->private;
2730 	int i = (int)*pos;
2731 	void *ent;
2732 
2733 	WARN_ON_ONCE(iter->leftover);
2734 
2735 	(*pos)++;
2736 
2737 	/* can't go backwards */
2738 	if (iter->idx > i)
2739 		return NULL;
2740 
2741 	if (iter->idx < 0)
2742 		ent = trace_find_next_entry_inc(iter);
2743 	else
2744 		ent = iter;
2745 
2746 	while (ent && iter->idx < i)
2747 		ent = trace_find_next_entry_inc(iter);
2748 
2749 	iter->pos = *pos;
2750 
2751 	return ent;
2752 }
2753 
2754 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755 {
2756 	struct ring_buffer_event *event;
2757 	struct ring_buffer_iter *buf_iter;
2758 	unsigned long entries = 0;
2759 	u64 ts;
2760 
2761 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762 
2763 	buf_iter = trace_buffer_iter(iter, cpu);
2764 	if (!buf_iter)
2765 		return;
2766 
2767 	ring_buffer_iter_reset(buf_iter);
2768 
2769 	/*
2770 	 * We could have the case with the max latency tracers
2771 	 * that a reset never took place on a cpu. This is evident
2772 	 * by the timestamp being before the start of the buffer.
2773 	 */
2774 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775 		if (ts >= iter->trace_buffer->time_start)
2776 			break;
2777 		entries++;
2778 		ring_buffer_read(buf_iter, NULL);
2779 	}
2780 
2781 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782 }
2783 
2784 /*
2785  * The current tracer is copied to avoid a global locking
2786  * all around.
2787  */
2788 static void *s_start(struct seq_file *m, loff_t *pos)
2789 {
2790 	struct trace_iterator *iter = m->private;
2791 	struct trace_array *tr = iter->tr;
2792 	int cpu_file = iter->cpu_file;
2793 	void *p = NULL;
2794 	loff_t l = 0;
2795 	int cpu;
2796 
2797 	/*
2798 	 * copy the tracer to avoid using a global lock all around.
2799 	 * iter->trace is a copy of current_trace, the pointer to the
2800 	 * name may be used instead of a strcmp(), as iter->trace->name
2801 	 * will point to the same string as current_trace->name.
2802 	 */
2803 	mutex_lock(&trace_types_lock);
2804 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805 		*iter->trace = *tr->current_trace;
2806 	mutex_unlock(&trace_types_lock);
2807 
2808 #ifdef CONFIG_TRACER_MAX_TRACE
2809 	if (iter->snapshot && iter->trace->use_max_tr)
2810 		return ERR_PTR(-EBUSY);
2811 #endif
2812 
2813 	if (!iter->snapshot)
2814 		atomic_inc(&trace_record_cmdline_disabled);
2815 
2816 	if (*pos != iter->pos) {
2817 		iter->ent = NULL;
2818 		iter->cpu = 0;
2819 		iter->idx = -1;
2820 
2821 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822 			for_each_tracing_cpu(cpu)
2823 				tracing_iter_reset(iter, cpu);
2824 		} else
2825 			tracing_iter_reset(iter, cpu_file);
2826 
2827 		iter->leftover = 0;
2828 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829 			;
2830 
2831 	} else {
2832 		/*
2833 		 * If we overflowed the seq_file before, then we want
2834 		 * to just reuse the trace_seq buffer again.
2835 		 */
2836 		if (iter->leftover)
2837 			p = iter;
2838 		else {
2839 			l = *pos - 1;
2840 			p = s_next(m, p, &l);
2841 		}
2842 	}
2843 
2844 	trace_event_read_lock();
2845 	trace_access_lock(cpu_file);
2846 	return p;
2847 }
2848 
2849 static void s_stop(struct seq_file *m, void *p)
2850 {
2851 	struct trace_iterator *iter = m->private;
2852 
2853 #ifdef CONFIG_TRACER_MAX_TRACE
2854 	if (iter->snapshot && iter->trace->use_max_tr)
2855 		return;
2856 #endif
2857 
2858 	if (!iter->snapshot)
2859 		atomic_dec(&trace_record_cmdline_disabled);
2860 
2861 	trace_access_unlock(iter->cpu_file);
2862 	trace_event_read_unlock();
2863 }
2864 
2865 static void
2866 get_total_entries(struct trace_buffer *buf,
2867 		  unsigned long *total, unsigned long *entries)
2868 {
2869 	unsigned long count;
2870 	int cpu;
2871 
2872 	*total = 0;
2873 	*entries = 0;
2874 
2875 	for_each_tracing_cpu(cpu) {
2876 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877 		/*
2878 		 * If this buffer has skipped entries, then we hold all
2879 		 * entries for the trace and we need to ignore the
2880 		 * ones before the time stamp.
2881 		 */
2882 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884 			/* total is the same as the entries */
2885 			*total += count;
2886 		} else
2887 			*total += count +
2888 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2889 		*entries += count;
2890 	}
2891 }
2892 
2893 static void print_lat_help_header(struct seq_file *m)
2894 {
2895 	seq_puts(m, "#                  _------=> CPU#            \n"
2896 		    "#                 / _-----=> irqs-off        \n"
2897 		    "#                | / _----=> need-resched    \n"
2898 		    "#                || / _---=> hardirq/softirq \n"
2899 		    "#                ||| / _--=> preempt-depth   \n"
2900 		    "#                |||| /     delay            \n"
2901 		    "#  cmd     pid   ||||| time  |   caller      \n"
2902 		    "#     \\   /      |||||  \\    |   /         \n");
2903 }
2904 
2905 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906 {
2907 	unsigned long total;
2908 	unsigned long entries;
2909 
2910 	get_total_entries(buf, &total, &entries);
2911 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912 		   entries, total, num_online_cpus());
2913 	seq_puts(m, "#\n");
2914 }
2915 
2916 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917 {
2918 	print_event_info(buf, m);
2919 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920 		    "#              | |       |          |         |\n");
2921 }
2922 
2923 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924 {
2925 	print_event_info(buf, m);
2926 	seq_puts(m, "#                              _-----=> irqs-off\n"
2927 		    "#                             / _----=> need-resched\n"
2928 		    "#                            | / _---=> hardirq/softirq\n"
2929 		    "#                            || / _--=> preempt-depth\n"
2930 		    "#                            ||| /     delay\n"
2931 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932 		    "#              | |       |   ||||       |         |\n");
2933 }
2934 
2935 void
2936 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937 {
2938 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939 	struct trace_buffer *buf = iter->trace_buffer;
2940 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941 	struct tracer *type = iter->trace;
2942 	unsigned long entries;
2943 	unsigned long total;
2944 	const char *name = "preemption";
2945 
2946 	name = type->name;
2947 
2948 	get_total_entries(buf, &total, &entries);
2949 
2950 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951 		   name, UTS_RELEASE);
2952 	seq_puts(m, "# -----------------------------------"
2953 		 "---------------------------------\n");
2954 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956 		   nsecs_to_usecs(data->saved_latency),
2957 		   entries,
2958 		   total,
2959 		   buf->cpu,
2960 #if defined(CONFIG_PREEMPT_NONE)
2961 		   "server",
2962 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963 		   "desktop",
2964 #elif defined(CONFIG_PREEMPT)
2965 		   "preempt",
2966 #else
2967 		   "unknown",
2968 #endif
2969 		   /* These are reserved for later use */
2970 		   0, 0, 0, 0);
2971 #ifdef CONFIG_SMP
2972 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2973 #else
2974 	seq_puts(m, ")\n");
2975 #endif
2976 	seq_puts(m, "#    -----------------\n");
2977 	seq_printf(m, "#    | task: %.16s-%d "
2978 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979 		   data->comm, data->pid,
2980 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981 		   data->policy, data->rt_priority);
2982 	seq_puts(m, "#    -----------------\n");
2983 
2984 	if (data->critical_start) {
2985 		seq_puts(m, "#  => started at: ");
2986 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987 		trace_print_seq(m, &iter->seq);
2988 		seq_puts(m, "\n#  => ended at:   ");
2989 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990 		trace_print_seq(m, &iter->seq);
2991 		seq_puts(m, "\n#\n");
2992 	}
2993 
2994 	seq_puts(m, "#\n");
2995 }
2996 
2997 static void test_cpu_buff_start(struct trace_iterator *iter)
2998 {
2999 	struct trace_seq *s = &iter->seq;
3000 	struct trace_array *tr = iter->tr;
3001 
3002 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003 		return;
3004 
3005 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006 		return;
3007 
3008 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009 		return;
3010 
3011 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012 		return;
3013 
3014 	if (iter->started)
3015 		cpumask_set_cpu(iter->cpu, iter->started);
3016 
3017 	/* Don't print started cpu buffer for the first entry of the trace */
3018 	if (iter->idx > 1)
3019 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020 				iter->cpu);
3021 }
3022 
3023 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024 {
3025 	struct trace_array *tr = iter->tr;
3026 	struct trace_seq *s = &iter->seq;
3027 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028 	struct trace_entry *entry;
3029 	struct trace_event *event;
3030 
3031 	entry = iter->ent;
3032 
3033 	test_cpu_buff_start(iter);
3034 
3035 	event = ftrace_find_event(entry->type);
3036 
3037 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039 			trace_print_lat_context(iter);
3040 		else
3041 			trace_print_context(iter);
3042 	}
3043 
3044 	if (trace_seq_has_overflowed(s))
3045 		return TRACE_TYPE_PARTIAL_LINE;
3046 
3047 	if (event)
3048 		return event->funcs->trace(iter, sym_flags, event);
3049 
3050 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051 
3052 	return trace_handle_return(s);
3053 }
3054 
3055 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056 {
3057 	struct trace_array *tr = iter->tr;
3058 	struct trace_seq *s = &iter->seq;
3059 	struct trace_entry *entry;
3060 	struct trace_event *event;
3061 
3062 	entry = iter->ent;
3063 
3064 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065 		trace_seq_printf(s, "%d %d %llu ",
3066 				 entry->pid, iter->cpu, iter->ts);
3067 
3068 	if (trace_seq_has_overflowed(s))
3069 		return TRACE_TYPE_PARTIAL_LINE;
3070 
3071 	event = ftrace_find_event(entry->type);
3072 	if (event)
3073 		return event->funcs->raw(iter, 0, event);
3074 
3075 	trace_seq_printf(s, "%d ?\n", entry->type);
3076 
3077 	return trace_handle_return(s);
3078 }
3079 
3080 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081 {
3082 	struct trace_array *tr = iter->tr;
3083 	struct trace_seq *s = &iter->seq;
3084 	unsigned char newline = '\n';
3085 	struct trace_entry *entry;
3086 	struct trace_event *event;
3087 
3088 	entry = iter->ent;
3089 
3090 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3092 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3094 		if (trace_seq_has_overflowed(s))
3095 			return TRACE_TYPE_PARTIAL_LINE;
3096 	}
3097 
3098 	event = ftrace_find_event(entry->type);
3099 	if (event) {
3100 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101 		if (ret != TRACE_TYPE_HANDLED)
3102 			return ret;
3103 	}
3104 
3105 	SEQ_PUT_FIELD(s, newline);
3106 
3107 	return trace_handle_return(s);
3108 }
3109 
3110 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111 {
3112 	struct trace_array *tr = iter->tr;
3113 	struct trace_seq *s = &iter->seq;
3114 	struct trace_entry *entry;
3115 	struct trace_event *event;
3116 
3117 	entry = iter->ent;
3118 
3119 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120 		SEQ_PUT_FIELD(s, entry->pid);
3121 		SEQ_PUT_FIELD(s, iter->cpu);
3122 		SEQ_PUT_FIELD(s, iter->ts);
3123 		if (trace_seq_has_overflowed(s))
3124 			return TRACE_TYPE_PARTIAL_LINE;
3125 	}
3126 
3127 	event = ftrace_find_event(entry->type);
3128 	return event ? event->funcs->binary(iter, 0, event) :
3129 		TRACE_TYPE_HANDLED;
3130 }
3131 
3132 int trace_empty(struct trace_iterator *iter)
3133 {
3134 	struct ring_buffer_iter *buf_iter;
3135 	int cpu;
3136 
3137 	/* If we are looking at one CPU buffer, only check that one */
3138 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139 		cpu = iter->cpu_file;
3140 		buf_iter = trace_buffer_iter(iter, cpu);
3141 		if (buf_iter) {
3142 			if (!ring_buffer_iter_empty(buf_iter))
3143 				return 0;
3144 		} else {
3145 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146 				return 0;
3147 		}
3148 		return 1;
3149 	}
3150 
3151 	for_each_tracing_cpu(cpu) {
3152 		buf_iter = trace_buffer_iter(iter, cpu);
3153 		if (buf_iter) {
3154 			if (!ring_buffer_iter_empty(buf_iter))
3155 				return 0;
3156 		} else {
3157 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158 				return 0;
3159 		}
3160 	}
3161 
3162 	return 1;
3163 }
3164 
3165 /*  Called with trace_event_read_lock() held. */
3166 enum print_line_t print_trace_line(struct trace_iterator *iter)
3167 {
3168 	struct trace_array *tr = iter->tr;
3169 	unsigned long trace_flags = tr->trace_flags;
3170 	enum print_line_t ret;
3171 
3172 	if (iter->lost_events) {
3173 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174 				 iter->cpu, iter->lost_events);
3175 		if (trace_seq_has_overflowed(&iter->seq))
3176 			return TRACE_TYPE_PARTIAL_LINE;
3177 	}
3178 
3179 	if (iter->trace && iter->trace->print_line) {
3180 		ret = iter->trace->print_line(iter);
3181 		if (ret != TRACE_TYPE_UNHANDLED)
3182 			return ret;
3183 	}
3184 
3185 	if (iter->ent->type == TRACE_BPUTS &&
3186 			trace_flags & TRACE_ITER_PRINTK &&
3187 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188 		return trace_print_bputs_msg_only(iter);
3189 
3190 	if (iter->ent->type == TRACE_BPRINT &&
3191 			trace_flags & TRACE_ITER_PRINTK &&
3192 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193 		return trace_print_bprintk_msg_only(iter);
3194 
3195 	if (iter->ent->type == TRACE_PRINT &&
3196 			trace_flags & TRACE_ITER_PRINTK &&
3197 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198 		return trace_print_printk_msg_only(iter);
3199 
3200 	if (trace_flags & TRACE_ITER_BIN)
3201 		return print_bin_fmt(iter);
3202 
3203 	if (trace_flags & TRACE_ITER_HEX)
3204 		return print_hex_fmt(iter);
3205 
3206 	if (trace_flags & TRACE_ITER_RAW)
3207 		return print_raw_fmt(iter);
3208 
3209 	return print_trace_fmt(iter);
3210 }
3211 
3212 void trace_latency_header(struct seq_file *m)
3213 {
3214 	struct trace_iterator *iter = m->private;
3215 	struct trace_array *tr = iter->tr;
3216 
3217 	/* print nothing if the buffers are empty */
3218 	if (trace_empty(iter))
3219 		return;
3220 
3221 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222 		print_trace_header(m, iter);
3223 
3224 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225 		print_lat_help_header(m);
3226 }
3227 
3228 void trace_default_header(struct seq_file *m)
3229 {
3230 	struct trace_iterator *iter = m->private;
3231 	struct trace_array *tr = iter->tr;
3232 	unsigned long trace_flags = tr->trace_flags;
3233 
3234 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235 		return;
3236 
3237 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238 		/* print nothing if the buffers are empty */
3239 		if (trace_empty(iter))
3240 			return;
3241 		print_trace_header(m, iter);
3242 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3243 			print_lat_help_header(m);
3244 	} else {
3245 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3247 				print_func_help_header_irq(iter->trace_buffer, m);
3248 			else
3249 				print_func_help_header(iter->trace_buffer, m);
3250 		}
3251 	}
3252 }
3253 
3254 static void test_ftrace_alive(struct seq_file *m)
3255 {
3256 	if (!ftrace_is_dead())
3257 		return;
3258 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3260 }
3261 
3262 #ifdef CONFIG_TRACER_MAX_TRACE
3263 static void show_snapshot_main_help(struct seq_file *m)
3264 {
3265 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267 		    "#                      Takes a snapshot of the main buffer.\n"
3268 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269 		    "#                      (Doesn't have to be '2' works with any number that\n"
3270 		    "#                       is not a '0' or '1')\n");
3271 }
3272 
3273 static void show_snapshot_percpu_help(struct seq_file *m)
3274 {
3275 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279 #else
3280 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281 		    "#                     Must use main snapshot file to allocate.\n");
3282 #endif
3283 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284 		    "#                      (Doesn't have to be '2' works with any number that\n"
3285 		    "#                       is not a '0' or '1')\n");
3286 }
3287 
3288 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289 {
3290 	if (iter->tr->allocated_snapshot)
3291 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292 	else
3293 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294 
3295 	seq_puts(m, "# Snapshot commands:\n");
3296 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297 		show_snapshot_main_help(m);
3298 	else
3299 		show_snapshot_percpu_help(m);
3300 }
3301 #else
3302 /* Should never be called */
3303 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304 #endif
3305 
3306 static int s_show(struct seq_file *m, void *v)
3307 {
3308 	struct trace_iterator *iter = v;
3309 	int ret;
3310 
3311 	if (iter->ent == NULL) {
3312 		if (iter->tr) {
3313 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314 			seq_puts(m, "#\n");
3315 			test_ftrace_alive(m);
3316 		}
3317 		if (iter->snapshot && trace_empty(iter))
3318 			print_snapshot_help(m, iter);
3319 		else if (iter->trace && iter->trace->print_header)
3320 			iter->trace->print_header(m);
3321 		else
3322 			trace_default_header(m);
3323 
3324 	} else if (iter->leftover) {
3325 		/*
3326 		 * If we filled the seq_file buffer earlier, we
3327 		 * want to just show it now.
3328 		 */
3329 		ret = trace_print_seq(m, &iter->seq);
3330 
3331 		/* ret should this time be zero, but you never know */
3332 		iter->leftover = ret;
3333 
3334 	} else {
3335 		print_trace_line(iter);
3336 		ret = trace_print_seq(m, &iter->seq);
3337 		/*
3338 		 * If we overflow the seq_file buffer, then it will
3339 		 * ask us for this data again at start up.
3340 		 * Use that instead.
3341 		 *  ret is 0 if seq_file write succeeded.
3342 		 *        -1 otherwise.
3343 		 */
3344 		iter->leftover = ret;
3345 	}
3346 
3347 	return 0;
3348 }
3349 
3350 /*
3351  * Should be used after trace_array_get(), trace_types_lock
3352  * ensures that i_cdev was already initialized.
3353  */
3354 static inline int tracing_get_cpu(struct inode *inode)
3355 {
3356 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3357 		return (long)inode->i_cdev - 1;
3358 	return RING_BUFFER_ALL_CPUS;
3359 }
3360 
3361 static const struct seq_operations tracer_seq_ops = {
3362 	.start		= s_start,
3363 	.next		= s_next,
3364 	.stop		= s_stop,
3365 	.show		= s_show,
3366 };
3367 
3368 static struct trace_iterator *
3369 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370 {
3371 	struct trace_array *tr = inode->i_private;
3372 	struct trace_iterator *iter;
3373 	int cpu;
3374 
3375 	if (tracing_disabled)
3376 		return ERR_PTR(-ENODEV);
3377 
3378 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379 	if (!iter)
3380 		return ERR_PTR(-ENOMEM);
3381 
3382 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383 				    GFP_KERNEL);
3384 	if (!iter->buffer_iter)
3385 		goto release;
3386 
3387 	/*
3388 	 * We make a copy of the current tracer to avoid concurrent
3389 	 * changes on it while we are reading.
3390 	 */
3391 	mutex_lock(&trace_types_lock);
3392 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393 	if (!iter->trace)
3394 		goto fail;
3395 
3396 	*iter->trace = *tr->current_trace;
3397 
3398 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399 		goto fail;
3400 
3401 	iter->tr = tr;
3402 
3403 #ifdef CONFIG_TRACER_MAX_TRACE
3404 	/* Currently only the top directory has a snapshot */
3405 	if (tr->current_trace->print_max || snapshot)
3406 		iter->trace_buffer = &tr->max_buffer;
3407 	else
3408 #endif
3409 		iter->trace_buffer = &tr->trace_buffer;
3410 	iter->snapshot = snapshot;
3411 	iter->pos = -1;
3412 	iter->cpu_file = tracing_get_cpu(inode);
3413 	mutex_init(&iter->mutex);
3414 
3415 	/* Notify the tracer early; before we stop tracing. */
3416 	if (iter->trace && iter->trace->open)
3417 		iter->trace->open(iter);
3418 
3419 	/* Annotate start of buffers if we had overruns */
3420 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422 
3423 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424 	if (trace_clocks[tr->clock_id].in_ns)
3425 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426 
3427 	/* stop the trace while dumping if we are not opening "snapshot" */
3428 	if (!iter->snapshot)
3429 		tracing_stop_tr(tr);
3430 
3431 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432 		for_each_tracing_cpu(cpu) {
3433 			iter->buffer_iter[cpu] =
3434 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435 		}
3436 		ring_buffer_read_prepare_sync();
3437 		for_each_tracing_cpu(cpu) {
3438 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3439 			tracing_iter_reset(iter, cpu);
3440 		}
3441 	} else {
3442 		cpu = iter->cpu_file;
3443 		iter->buffer_iter[cpu] =
3444 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445 		ring_buffer_read_prepare_sync();
3446 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3447 		tracing_iter_reset(iter, cpu);
3448 	}
3449 
3450 	mutex_unlock(&trace_types_lock);
3451 
3452 	return iter;
3453 
3454  fail:
3455 	mutex_unlock(&trace_types_lock);
3456 	kfree(iter->trace);
3457 	kfree(iter->buffer_iter);
3458 release:
3459 	seq_release_private(inode, file);
3460 	return ERR_PTR(-ENOMEM);
3461 }
3462 
3463 int tracing_open_generic(struct inode *inode, struct file *filp)
3464 {
3465 	if (tracing_disabled)
3466 		return -ENODEV;
3467 
3468 	filp->private_data = inode->i_private;
3469 	return 0;
3470 }
3471 
3472 bool tracing_is_disabled(void)
3473 {
3474 	return (tracing_disabled) ? true: false;
3475 }
3476 
3477 /*
3478  * Open and update trace_array ref count.
3479  * Must have the current trace_array passed to it.
3480  */
3481 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482 {
3483 	struct trace_array *tr = inode->i_private;
3484 
3485 	if (tracing_disabled)
3486 		return -ENODEV;
3487 
3488 	if (trace_array_get(tr) < 0)
3489 		return -ENODEV;
3490 
3491 	filp->private_data = inode->i_private;
3492 
3493 	return 0;
3494 }
3495 
3496 static int tracing_release(struct inode *inode, struct file *file)
3497 {
3498 	struct trace_array *tr = inode->i_private;
3499 	struct seq_file *m = file->private_data;
3500 	struct trace_iterator *iter;
3501 	int cpu;
3502 
3503 	if (!(file->f_mode & FMODE_READ)) {
3504 		trace_array_put(tr);
3505 		return 0;
3506 	}
3507 
3508 	/* Writes do not use seq_file */
3509 	iter = m->private;
3510 	mutex_lock(&trace_types_lock);
3511 
3512 	for_each_tracing_cpu(cpu) {
3513 		if (iter->buffer_iter[cpu])
3514 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515 	}
3516 
3517 	if (iter->trace && iter->trace->close)
3518 		iter->trace->close(iter);
3519 
3520 	if (!iter->snapshot)
3521 		/* reenable tracing if it was previously enabled */
3522 		tracing_start_tr(tr);
3523 
3524 	__trace_array_put(tr);
3525 
3526 	mutex_unlock(&trace_types_lock);
3527 
3528 	mutex_destroy(&iter->mutex);
3529 	free_cpumask_var(iter->started);
3530 	kfree(iter->trace);
3531 	kfree(iter->buffer_iter);
3532 	seq_release_private(inode, file);
3533 
3534 	return 0;
3535 }
3536 
3537 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538 {
3539 	struct trace_array *tr = inode->i_private;
3540 
3541 	trace_array_put(tr);
3542 	return 0;
3543 }
3544 
3545 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546 {
3547 	struct trace_array *tr = inode->i_private;
3548 
3549 	trace_array_put(tr);
3550 
3551 	return single_release(inode, file);
3552 }
3553 
3554 static int tracing_open(struct inode *inode, struct file *file)
3555 {
3556 	struct trace_array *tr = inode->i_private;
3557 	struct trace_iterator *iter;
3558 	int ret = 0;
3559 
3560 	if (trace_array_get(tr) < 0)
3561 		return -ENODEV;
3562 
3563 	/* If this file was open for write, then erase contents */
3564 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565 		int cpu = tracing_get_cpu(inode);
3566 
3567 		if (cpu == RING_BUFFER_ALL_CPUS)
3568 			tracing_reset_online_cpus(&tr->trace_buffer);
3569 		else
3570 			tracing_reset(&tr->trace_buffer, cpu);
3571 	}
3572 
3573 	if (file->f_mode & FMODE_READ) {
3574 		iter = __tracing_open(inode, file, false);
3575 		if (IS_ERR(iter))
3576 			ret = PTR_ERR(iter);
3577 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579 	}
3580 
3581 	if (ret < 0)
3582 		trace_array_put(tr);
3583 
3584 	return ret;
3585 }
3586 
3587 /*
3588  * Some tracers are not suitable for instance buffers.
3589  * A tracer is always available for the global array (toplevel)
3590  * or if it explicitly states that it is.
3591  */
3592 static bool
3593 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594 {
3595 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596 }
3597 
3598 /* Find the next tracer that this trace array may use */
3599 static struct tracer *
3600 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601 {
3602 	while (t && !trace_ok_for_array(t, tr))
3603 		t = t->next;
3604 
3605 	return t;
3606 }
3607 
3608 static void *
3609 t_next(struct seq_file *m, void *v, loff_t *pos)
3610 {
3611 	struct trace_array *tr = m->private;
3612 	struct tracer *t = v;
3613 
3614 	(*pos)++;
3615 
3616 	if (t)
3617 		t = get_tracer_for_array(tr, t->next);
3618 
3619 	return t;
3620 }
3621 
3622 static void *t_start(struct seq_file *m, loff_t *pos)
3623 {
3624 	struct trace_array *tr = m->private;
3625 	struct tracer *t;
3626 	loff_t l = 0;
3627 
3628 	mutex_lock(&trace_types_lock);
3629 
3630 	t = get_tracer_for_array(tr, trace_types);
3631 	for (; t && l < *pos; t = t_next(m, t, &l))
3632 			;
3633 
3634 	return t;
3635 }
3636 
3637 static void t_stop(struct seq_file *m, void *p)
3638 {
3639 	mutex_unlock(&trace_types_lock);
3640 }
3641 
3642 static int t_show(struct seq_file *m, void *v)
3643 {
3644 	struct tracer *t = v;
3645 
3646 	if (!t)
3647 		return 0;
3648 
3649 	seq_puts(m, t->name);
3650 	if (t->next)
3651 		seq_putc(m, ' ');
3652 	else
3653 		seq_putc(m, '\n');
3654 
3655 	return 0;
3656 }
3657 
3658 static const struct seq_operations show_traces_seq_ops = {
3659 	.start		= t_start,
3660 	.next		= t_next,
3661 	.stop		= t_stop,
3662 	.show		= t_show,
3663 };
3664 
3665 static int show_traces_open(struct inode *inode, struct file *file)
3666 {
3667 	struct trace_array *tr = inode->i_private;
3668 	struct seq_file *m;
3669 	int ret;
3670 
3671 	if (tracing_disabled)
3672 		return -ENODEV;
3673 
3674 	ret = seq_open(file, &show_traces_seq_ops);
3675 	if (ret)
3676 		return ret;
3677 
3678 	m = file->private_data;
3679 	m->private = tr;
3680 
3681 	return 0;
3682 }
3683 
3684 static ssize_t
3685 tracing_write_stub(struct file *filp, const char __user *ubuf,
3686 		   size_t count, loff_t *ppos)
3687 {
3688 	return count;
3689 }
3690 
3691 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692 {
3693 	int ret;
3694 
3695 	if (file->f_mode & FMODE_READ)
3696 		ret = seq_lseek(file, offset, whence);
3697 	else
3698 		file->f_pos = ret = 0;
3699 
3700 	return ret;
3701 }
3702 
3703 static const struct file_operations tracing_fops = {
3704 	.open		= tracing_open,
3705 	.read		= seq_read,
3706 	.write		= tracing_write_stub,
3707 	.llseek		= tracing_lseek,
3708 	.release	= tracing_release,
3709 };
3710 
3711 static const struct file_operations show_traces_fops = {
3712 	.open		= show_traces_open,
3713 	.read		= seq_read,
3714 	.release	= seq_release,
3715 	.llseek		= seq_lseek,
3716 };
3717 
3718 /*
3719  * The tracer itself will not take this lock, but still we want
3720  * to provide a consistent cpumask to user-space:
3721  */
3722 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723 
3724 /*
3725  * Temporary storage for the character representation of the
3726  * CPU bitmask (and one more byte for the newline):
3727  */
3728 static char mask_str[NR_CPUS + 1];
3729 
3730 static ssize_t
3731 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732 		     size_t count, loff_t *ppos)
3733 {
3734 	struct trace_array *tr = file_inode(filp)->i_private;
3735 	int len;
3736 
3737 	mutex_lock(&tracing_cpumask_update_lock);
3738 
3739 	len = snprintf(mask_str, count, "%*pb\n",
3740 		       cpumask_pr_args(tr->tracing_cpumask));
3741 	if (len >= count) {
3742 		count = -EINVAL;
3743 		goto out_err;
3744 	}
3745 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746 
3747 out_err:
3748 	mutex_unlock(&tracing_cpumask_update_lock);
3749 
3750 	return count;
3751 }
3752 
3753 static ssize_t
3754 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755 		      size_t count, loff_t *ppos)
3756 {
3757 	struct trace_array *tr = file_inode(filp)->i_private;
3758 	cpumask_var_t tracing_cpumask_new;
3759 	int err, cpu;
3760 
3761 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762 		return -ENOMEM;
3763 
3764 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765 	if (err)
3766 		goto err_unlock;
3767 
3768 	mutex_lock(&tracing_cpumask_update_lock);
3769 
3770 	local_irq_disable();
3771 	arch_spin_lock(&tr->max_lock);
3772 	for_each_tracing_cpu(cpu) {
3773 		/*
3774 		 * Increase/decrease the disabled counter if we are
3775 		 * about to flip a bit in the cpumask:
3776 		 */
3777 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781 		}
3782 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786 		}
3787 	}
3788 	arch_spin_unlock(&tr->max_lock);
3789 	local_irq_enable();
3790 
3791 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792 
3793 	mutex_unlock(&tracing_cpumask_update_lock);
3794 	free_cpumask_var(tracing_cpumask_new);
3795 
3796 	return count;
3797 
3798 err_unlock:
3799 	free_cpumask_var(tracing_cpumask_new);
3800 
3801 	return err;
3802 }
3803 
3804 static const struct file_operations tracing_cpumask_fops = {
3805 	.open		= tracing_open_generic_tr,
3806 	.read		= tracing_cpumask_read,
3807 	.write		= tracing_cpumask_write,
3808 	.release	= tracing_release_generic_tr,
3809 	.llseek		= generic_file_llseek,
3810 };
3811 
3812 static int tracing_trace_options_show(struct seq_file *m, void *v)
3813 {
3814 	struct tracer_opt *trace_opts;
3815 	struct trace_array *tr = m->private;
3816 	u32 tracer_flags;
3817 	int i;
3818 
3819 	mutex_lock(&trace_types_lock);
3820 	tracer_flags = tr->current_trace->flags->val;
3821 	trace_opts = tr->current_trace->flags->opts;
3822 
3823 	for (i = 0; trace_options[i]; i++) {
3824 		if (tr->trace_flags & (1 << i))
3825 			seq_printf(m, "%s\n", trace_options[i]);
3826 		else
3827 			seq_printf(m, "no%s\n", trace_options[i]);
3828 	}
3829 
3830 	for (i = 0; trace_opts[i].name; i++) {
3831 		if (tracer_flags & trace_opts[i].bit)
3832 			seq_printf(m, "%s\n", trace_opts[i].name);
3833 		else
3834 			seq_printf(m, "no%s\n", trace_opts[i].name);
3835 	}
3836 	mutex_unlock(&trace_types_lock);
3837 
3838 	return 0;
3839 }
3840 
3841 static int __set_tracer_option(struct trace_array *tr,
3842 			       struct tracer_flags *tracer_flags,
3843 			       struct tracer_opt *opts, int neg)
3844 {
3845 	struct tracer *trace = tracer_flags->trace;
3846 	int ret;
3847 
3848 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849 	if (ret)
3850 		return ret;
3851 
3852 	if (neg)
3853 		tracer_flags->val &= ~opts->bit;
3854 	else
3855 		tracer_flags->val |= opts->bit;
3856 	return 0;
3857 }
3858 
3859 /* Try to assign a tracer specific option */
3860 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861 {
3862 	struct tracer *trace = tr->current_trace;
3863 	struct tracer_flags *tracer_flags = trace->flags;
3864 	struct tracer_opt *opts = NULL;
3865 	int i;
3866 
3867 	for (i = 0; tracer_flags->opts[i].name; i++) {
3868 		opts = &tracer_flags->opts[i];
3869 
3870 		if (strcmp(cmp, opts->name) == 0)
3871 			return __set_tracer_option(tr, trace->flags, opts, neg);
3872 	}
3873 
3874 	return -EINVAL;
3875 }
3876 
3877 /* Some tracers require overwrite to stay enabled */
3878 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879 {
3880 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881 		return -1;
3882 
3883 	return 0;
3884 }
3885 
3886 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887 {
3888 	/* do nothing if flag is already set */
3889 	if (!!(tr->trace_flags & mask) == !!enabled)
3890 		return 0;
3891 
3892 	/* Give the tracer a chance to approve the change */
3893 	if (tr->current_trace->flag_changed)
3894 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895 			return -EINVAL;
3896 
3897 	if (enabled)
3898 		tr->trace_flags |= mask;
3899 	else
3900 		tr->trace_flags &= ~mask;
3901 
3902 	if (mask == TRACE_ITER_RECORD_CMD)
3903 		trace_event_enable_cmd_record(enabled);
3904 
3905 	if (mask == TRACE_ITER_EVENT_FORK)
3906 		trace_event_follow_fork(tr, enabled);
3907 
3908 	if (mask == TRACE_ITER_OVERWRITE) {
3909 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912 #endif
3913 	}
3914 
3915 	if (mask == TRACE_ITER_PRINTK) {
3916 		trace_printk_start_stop_comm(enabled);
3917 		trace_printk_control(enabled);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static int trace_set_options(struct trace_array *tr, char *option)
3924 {
3925 	char *cmp;
3926 	int neg = 0;
3927 	int ret = -ENODEV;
3928 	int i;
3929 	size_t orig_len = strlen(option);
3930 
3931 	cmp = strstrip(option);
3932 
3933 	if (strncmp(cmp, "no", 2) == 0) {
3934 		neg = 1;
3935 		cmp += 2;
3936 	}
3937 
3938 	mutex_lock(&trace_types_lock);
3939 
3940 	for (i = 0; trace_options[i]; i++) {
3941 		if (strcmp(cmp, trace_options[i]) == 0) {
3942 			ret = set_tracer_flag(tr, 1 << i, !neg);
3943 			break;
3944 		}
3945 	}
3946 
3947 	/* If no option could be set, test the specific tracer options */
3948 	if (!trace_options[i])
3949 		ret = set_tracer_option(tr, cmp, neg);
3950 
3951 	mutex_unlock(&trace_types_lock);
3952 
3953 	/*
3954 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3955 	 * turn it back into a space.
3956 	 */
3957 	if (orig_len > strlen(option))
3958 		option[strlen(option)] = ' ';
3959 
3960 	return ret;
3961 }
3962 
3963 static void __init apply_trace_boot_options(void)
3964 {
3965 	char *buf = trace_boot_options_buf;
3966 	char *option;
3967 
3968 	while (true) {
3969 		option = strsep(&buf, ",");
3970 
3971 		if (!option)
3972 			break;
3973 
3974 		if (*option)
3975 			trace_set_options(&global_trace, option);
3976 
3977 		/* Put back the comma to allow this to be called again */
3978 		if (buf)
3979 			*(buf - 1) = ',';
3980 	}
3981 }
3982 
3983 static ssize_t
3984 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985 			size_t cnt, loff_t *ppos)
3986 {
3987 	struct seq_file *m = filp->private_data;
3988 	struct trace_array *tr = m->private;
3989 	char buf[64];
3990 	int ret;
3991 
3992 	if (cnt >= sizeof(buf))
3993 		return -EINVAL;
3994 
3995 	if (copy_from_user(buf, ubuf, cnt))
3996 		return -EFAULT;
3997 
3998 	buf[cnt] = 0;
3999 
4000 	ret = trace_set_options(tr, buf);
4001 	if (ret < 0)
4002 		return ret;
4003 
4004 	*ppos += cnt;
4005 
4006 	return cnt;
4007 }
4008 
4009 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010 {
4011 	struct trace_array *tr = inode->i_private;
4012 	int ret;
4013 
4014 	if (tracing_disabled)
4015 		return -ENODEV;
4016 
4017 	if (trace_array_get(tr) < 0)
4018 		return -ENODEV;
4019 
4020 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021 	if (ret < 0)
4022 		trace_array_put(tr);
4023 
4024 	return ret;
4025 }
4026 
4027 static const struct file_operations tracing_iter_fops = {
4028 	.open		= tracing_trace_options_open,
4029 	.read		= seq_read,
4030 	.llseek		= seq_lseek,
4031 	.release	= tracing_single_release_tr,
4032 	.write		= tracing_trace_options_write,
4033 };
4034 
4035 static const char readme_msg[] =
4036 	"tracing mini-HOWTO:\n\n"
4037 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4038 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039 	" Important files:\n"
4040 	"  trace\t\t\t- The static contents of the buffer\n"
4041 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043 	"  current_tracer\t- function and latency tracers\n"
4044 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4045 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047 	"  trace_clock\t\t-change the clock used to order events\n"
4048 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4049 	"      global:   Synced across CPUs but slows tracing down.\n"
4050 	"     counter:   Not a clock, but just an increment\n"
4051 	"      uptime:   Jiffy counter from time of boot\n"
4052 	"        perf:   Same clock that perf events use\n"
4053 #ifdef CONFIG_X86_64
4054 	"     x86-tsc:   TSC cycle counter\n"
4055 #endif
4056 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4058 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4059 	"\t\t\t  Remove sub-buffer with rmdir\n"
4060 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4061 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4062 	"\t\t\t  option name\n"
4063 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4064 #ifdef CONFIG_DYNAMIC_FTRACE
4065 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4066 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4067 	"\t\t\t  functions\n"
4068 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4069 	"\t     modules: Can select a group via module\n"
4070 	"\t      Format: :mod:<module-name>\n"
4071 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4072 	"\t    triggers: a command to perform when function is hit\n"
4073 	"\t      Format: <function>:<trigger>[:count]\n"
4074 	"\t     trigger: traceon, traceoff\n"
4075 	"\t\t      enable_event:<system>:<event>\n"
4076 	"\t\t      disable_event:<system>:<event>\n"
4077 #ifdef CONFIG_STACKTRACE
4078 	"\t\t      stacktrace\n"
4079 #endif
4080 #ifdef CONFIG_TRACER_SNAPSHOT
4081 	"\t\t      snapshot\n"
4082 #endif
4083 	"\t\t      dump\n"
4084 	"\t\t      cpudump\n"
4085 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4086 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4087 	"\t     The first one will disable tracing every time do_fault is hit\n"
4088 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4089 	"\t       The first time do trap is hit and it disables tracing, the\n"
4090 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4091 	"\t       the counter will not decrement. It only decrements when the\n"
4092 	"\t       trigger did work\n"
4093 	"\t     To remove trigger without count:\n"
4094 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4095 	"\t     To remove trigger with a count:\n"
4096 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4097 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4098 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4099 	"\t    modules: Can select a group via module command :mod:\n"
4100 	"\t    Does not accept triggers\n"
4101 #endif /* CONFIG_DYNAMIC_FTRACE */
4102 #ifdef CONFIG_FUNCTION_TRACER
4103 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4104 	"\t\t    (function)\n"
4105 #endif
4106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4107 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4108 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4109 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4110 #endif
4111 #ifdef CONFIG_TRACER_SNAPSHOT
4112 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4113 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4114 	"\t\t\t  information\n"
4115 #endif
4116 #ifdef CONFIG_STACK_TRACER
4117 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4118 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4119 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4120 	"\t\t\t  new trace)\n"
4121 #ifdef CONFIG_DYNAMIC_FTRACE
4122 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4123 	"\t\t\t  traces\n"
4124 #endif
4125 #endif /* CONFIG_STACK_TRACER */
4126 #ifdef CONFIG_KPROBE_EVENT
4127 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4128 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4129 #endif
4130 #ifdef CONFIG_UPROBE_EVENT
4131 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4132 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4133 #endif
4134 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4135 	"\t  accepts: event-definitions (one definition per line)\n"
4136 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4137 	"\t           -:[<group>/]<event>\n"
4138 #ifdef CONFIG_KPROBE_EVENT
4139 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4140 #endif
4141 #ifdef CONFIG_UPROBE_EVENT
4142 	"\t    place: <path>:<offset>\n"
4143 #endif
4144 	"\t     args: <name>=fetcharg[:type]\n"
4145 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4146 	"\t           $stack<index>, $stack, $retval, $comm\n"
4147 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4148 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4149 #endif
4150 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4151 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4152 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4153 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4154 	"\t\t\t  events\n"
4155 	"      filter\t\t- If set, only events passing filter are traced\n"
4156 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4157 	"\t\t\t  <event>:\n"
4158 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4159 	"      filter\t\t- If set, only events passing filter are traced\n"
4160 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4161 	"\t    Format: <trigger>[:count][if <filter>]\n"
4162 	"\t   trigger: traceon, traceoff\n"
4163 	"\t            enable_event:<system>:<event>\n"
4164 	"\t            disable_event:<system>:<event>\n"
4165 #ifdef CONFIG_HIST_TRIGGERS
4166 	"\t            enable_hist:<system>:<event>\n"
4167 	"\t            disable_hist:<system>:<event>\n"
4168 #endif
4169 #ifdef CONFIG_STACKTRACE
4170 	"\t\t    stacktrace\n"
4171 #endif
4172 #ifdef CONFIG_TRACER_SNAPSHOT
4173 	"\t\t    snapshot\n"
4174 #endif
4175 #ifdef CONFIG_HIST_TRIGGERS
4176 	"\t\t    hist (see below)\n"
4177 #endif
4178 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4179 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4180 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4181 	"\t                  events/block/block_unplug/trigger\n"
4182 	"\t   The first disables tracing every time block_unplug is hit.\n"
4183 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4184 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4185 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4186 	"\t   Like function triggers, the counter is only decremented if it\n"
4187 	"\t    enabled or disabled tracing.\n"
4188 	"\t   To remove a trigger without a count:\n"
4189 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4190 	"\t   To remove a trigger with a count:\n"
4191 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4192 	"\t   Filters can be ignored when removing a trigger.\n"
4193 #ifdef CONFIG_HIST_TRIGGERS
4194 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4195 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4196 	"\t            [:values=<field1[,field2,...]>]\n"
4197 	"\t            [:sort=<field1[,field2,...]>]\n"
4198 	"\t            [:size=#entries]\n"
4199 	"\t            [:pause][:continue][:clear]\n"
4200 	"\t            [:name=histname1]\n"
4201 	"\t            [if <filter>]\n\n"
4202 	"\t    When a matching event is hit, an entry is added to a hash\n"
4203 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4204 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4205 	"\t    correspond to fields in the event's format description.  Keys\n"
4206 	"\t    can be any field, or the special string 'stacktrace'.\n"
4207 	"\t    Compound keys consisting of up to two fields can be specified\n"
4208 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4209 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4210 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4211 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4212 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4213 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4214 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4215 	"\t    its histogram data will be shared with other triggers of the\n"
4216 	"\t    same name, and trigger hits will update this common data.\n\n"
4217 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4218 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4219 	"\t    triggers attached to an event, there will be a table for each\n"
4220 	"\t    trigger in the output.  The table displayed for a named\n"
4221 	"\t    trigger will be the same as any other instance having the\n"
4222 	"\t    same name.  The default format used to display a given field\n"
4223 	"\t    can be modified by appending any of the following modifiers\n"
4224 	"\t    to the field name, as applicable:\n\n"
4225 	"\t            .hex        display a number as a hex value\n"
4226 	"\t            .sym        display an address as a symbol\n"
4227 	"\t            .sym-offset display an address as a symbol and offset\n"
4228 	"\t            .execname   display a common_pid as a program name\n"
4229 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4230 	"\t            .log2       display log2 value rather than raw number\n\n"
4231 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4232 	"\t    trigger or to start a hist trigger but not log any events\n"
4233 	"\t    until told to do so.  'continue' can be used to start or\n"
4234 	"\t    restart a paused hist trigger.\n\n"
4235 	"\t    The 'clear' parameter will clear the contents of a running\n"
4236 	"\t    hist trigger and leave its current paused/active state\n"
4237 	"\t    unchanged.\n\n"
4238 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4239 	"\t    have one event conditionally start and stop another event's\n"
4240 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4241 	"\t    the enable_event and disable_event triggers.\n"
4242 #endif
4243 ;
4244 
4245 static ssize_t
4246 tracing_readme_read(struct file *filp, char __user *ubuf,
4247 		       size_t cnt, loff_t *ppos)
4248 {
4249 	return simple_read_from_buffer(ubuf, cnt, ppos,
4250 					readme_msg, strlen(readme_msg));
4251 }
4252 
4253 static const struct file_operations tracing_readme_fops = {
4254 	.open		= tracing_open_generic,
4255 	.read		= tracing_readme_read,
4256 	.llseek		= generic_file_llseek,
4257 };
4258 
4259 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4260 {
4261 	unsigned int *ptr = v;
4262 
4263 	if (*pos || m->count)
4264 		ptr++;
4265 
4266 	(*pos)++;
4267 
4268 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4269 	     ptr++) {
4270 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4271 			continue;
4272 
4273 		return ptr;
4274 	}
4275 
4276 	return NULL;
4277 }
4278 
4279 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4280 {
4281 	void *v;
4282 	loff_t l = 0;
4283 
4284 	preempt_disable();
4285 	arch_spin_lock(&trace_cmdline_lock);
4286 
4287 	v = &savedcmd->map_cmdline_to_pid[0];
4288 	while (l <= *pos) {
4289 		v = saved_cmdlines_next(m, v, &l);
4290 		if (!v)
4291 			return NULL;
4292 	}
4293 
4294 	return v;
4295 }
4296 
4297 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4298 {
4299 	arch_spin_unlock(&trace_cmdline_lock);
4300 	preempt_enable();
4301 }
4302 
4303 static int saved_cmdlines_show(struct seq_file *m, void *v)
4304 {
4305 	char buf[TASK_COMM_LEN];
4306 	unsigned int *pid = v;
4307 
4308 	__trace_find_cmdline(*pid, buf);
4309 	seq_printf(m, "%d %s\n", *pid, buf);
4310 	return 0;
4311 }
4312 
4313 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4314 	.start		= saved_cmdlines_start,
4315 	.next		= saved_cmdlines_next,
4316 	.stop		= saved_cmdlines_stop,
4317 	.show		= saved_cmdlines_show,
4318 };
4319 
4320 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4321 {
4322 	if (tracing_disabled)
4323 		return -ENODEV;
4324 
4325 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4326 }
4327 
4328 static const struct file_operations tracing_saved_cmdlines_fops = {
4329 	.open		= tracing_saved_cmdlines_open,
4330 	.read		= seq_read,
4331 	.llseek		= seq_lseek,
4332 	.release	= seq_release,
4333 };
4334 
4335 static ssize_t
4336 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4337 				 size_t cnt, loff_t *ppos)
4338 {
4339 	char buf[64];
4340 	int r;
4341 
4342 	arch_spin_lock(&trace_cmdline_lock);
4343 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4344 	arch_spin_unlock(&trace_cmdline_lock);
4345 
4346 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4347 }
4348 
4349 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4350 {
4351 	kfree(s->saved_cmdlines);
4352 	kfree(s->map_cmdline_to_pid);
4353 	kfree(s);
4354 }
4355 
4356 static int tracing_resize_saved_cmdlines(unsigned int val)
4357 {
4358 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4359 
4360 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4361 	if (!s)
4362 		return -ENOMEM;
4363 
4364 	if (allocate_cmdlines_buffer(val, s) < 0) {
4365 		kfree(s);
4366 		return -ENOMEM;
4367 	}
4368 
4369 	arch_spin_lock(&trace_cmdline_lock);
4370 	savedcmd_temp = savedcmd;
4371 	savedcmd = s;
4372 	arch_spin_unlock(&trace_cmdline_lock);
4373 	free_saved_cmdlines_buffer(savedcmd_temp);
4374 
4375 	return 0;
4376 }
4377 
4378 static ssize_t
4379 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4380 				  size_t cnt, loff_t *ppos)
4381 {
4382 	unsigned long val;
4383 	int ret;
4384 
4385 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4386 	if (ret)
4387 		return ret;
4388 
4389 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4390 	if (!val || val > PID_MAX_DEFAULT)
4391 		return -EINVAL;
4392 
4393 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4394 	if (ret < 0)
4395 		return ret;
4396 
4397 	*ppos += cnt;
4398 
4399 	return cnt;
4400 }
4401 
4402 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4403 	.open		= tracing_open_generic,
4404 	.read		= tracing_saved_cmdlines_size_read,
4405 	.write		= tracing_saved_cmdlines_size_write,
4406 };
4407 
4408 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4409 static union trace_enum_map_item *
4410 update_enum_map(union trace_enum_map_item *ptr)
4411 {
4412 	if (!ptr->map.enum_string) {
4413 		if (ptr->tail.next) {
4414 			ptr = ptr->tail.next;
4415 			/* Set ptr to the next real item (skip head) */
4416 			ptr++;
4417 		} else
4418 			return NULL;
4419 	}
4420 	return ptr;
4421 }
4422 
4423 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4424 {
4425 	union trace_enum_map_item *ptr = v;
4426 
4427 	/*
4428 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4429 	 * This really should never happen.
4430 	 */
4431 	ptr = update_enum_map(ptr);
4432 	if (WARN_ON_ONCE(!ptr))
4433 		return NULL;
4434 
4435 	ptr++;
4436 
4437 	(*pos)++;
4438 
4439 	ptr = update_enum_map(ptr);
4440 
4441 	return ptr;
4442 }
4443 
4444 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4445 {
4446 	union trace_enum_map_item *v;
4447 	loff_t l = 0;
4448 
4449 	mutex_lock(&trace_enum_mutex);
4450 
4451 	v = trace_enum_maps;
4452 	if (v)
4453 		v++;
4454 
4455 	while (v && l < *pos) {
4456 		v = enum_map_next(m, v, &l);
4457 	}
4458 
4459 	return v;
4460 }
4461 
4462 static void enum_map_stop(struct seq_file *m, void *v)
4463 {
4464 	mutex_unlock(&trace_enum_mutex);
4465 }
4466 
4467 static int enum_map_show(struct seq_file *m, void *v)
4468 {
4469 	union trace_enum_map_item *ptr = v;
4470 
4471 	seq_printf(m, "%s %ld (%s)\n",
4472 		   ptr->map.enum_string, ptr->map.enum_value,
4473 		   ptr->map.system);
4474 
4475 	return 0;
4476 }
4477 
4478 static const struct seq_operations tracing_enum_map_seq_ops = {
4479 	.start		= enum_map_start,
4480 	.next		= enum_map_next,
4481 	.stop		= enum_map_stop,
4482 	.show		= enum_map_show,
4483 };
4484 
4485 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4486 {
4487 	if (tracing_disabled)
4488 		return -ENODEV;
4489 
4490 	return seq_open(filp, &tracing_enum_map_seq_ops);
4491 }
4492 
4493 static const struct file_operations tracing_enum_map_fops = {
4494 	.open		= tracing_enum_map_open,
4495 	.read		= seq_read,
4496 	.llseek		= seq_lseek,
4497 	.release	= seq_release,
4498 };
4499 
4500 static inline union trace_enum_map_item *
4501 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4502 {
4503 	/* Return tail of array given the head */
4504 	return ptr + ptr->head.length + 1;
4505 }
4506 
4507 static void
4508 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4509 			   int len)
4510 {
4511 	struct trace_enum_map **stop;
4512 	struct trace_enum_map **map;
4513 	union trace_enum_map_item *map_array;
4514 	union trace_enum_map_item *ptr;
4515 
4516 	stop = start + len;
4517 
4518 	/*
4519 	 * The trace_enum_maps contains the map plus a head and tail item,
4520 	 * where the head holds the module and length of array, and the
4521 	 * tail holds a pointer to the next list.
4522 	 */
4523 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4524 	if (!map_array) {
4525 		pr_warn("Unable to allocate trace enum mapping\n");
4526 		return;
4527 	}
4528 
4529 	mutex_lock(&trace_enum_mutex);
4530 
4531 	if (!trace_enum_maps)
4532 		trace_enum_maps = map_array;
4533 	else {
4534 		ptr = trace_enum_maps;
4535 		for (;;) {
4536 			ptr = trace_enum_jmp_to_tail(ptr);
4537 			if (!ptr->tail.next)
4538 				break;
4539 			ptr = ptr->tail.next;
4540 
4541 		}
4542 		ptr->tail.next = map_array;
4543 	}
4544 	map_array->head.mod = mod;
4545 	map_array->head.length = len;
4546 	map_array++;
4547 
4548 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4549 		map_array->map = **map;
4550 		map_array++;
4551 	}
4552 	memset(map_array, 0, sizeof(*map_array));
4553 
4554 	mutex_unlock(&trace_enum_mutex);
4555 }
4556 
4557 static void trace_create_enum_file(struct dentry *d_tracer)
4558 {
4559 	trace_create_file("enum_map", 0444, d_tracer,
4560 			  NULL, &tracing_enum_map_fops);
4561 }
4562 
4563 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4564 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4565 static inline void trace_insert_enum_map_file(struct module *mod,
4566 			      struct trace_enum_map **start, int len) { }
4567 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4568 
4569 static void trace_insert_enum_map(struct module *mod,
4570 				  struct trace_enum_map **start, int len)
4571 {
4572 	struct trace_enum_map **map;
4573 
4574 	if (len <= 0)
4575 		return;
4576 
4577 	map = start;
4578 
4579 	trace_event_enum_update(map, len);
4580 
4581 	trace_insert_enum_map_file(mod, start, len);
4582 }
4583 
4584 static ssize_t
4585 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4586 		       size_t cnt, loff_t *ppos)
4587 {
4588 	struct trace_array *tr = filp->private_data;
4589 	char buf[MAX_TRACER_SIZE+2];
4590 	int r;
4591 
4592 	mutex_lock(&trace_types_lock);
4593 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4594 	mutex_unlock(&trace_types_lock);
4595 
4596 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4597 }
4598 
4599 int tracer_init(struct tracer *t, struct trace_array *tr)
4600 {
4601 	tracing_reset_online_cpus(&tr->trace_buffer);
4602 	return t->init(tr);
4603 }
4604 
4605 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4606 {
4607 	int cpu;
4608 
4609 	for_each_tracing_cpu(cpu)
4610 		per_cpu_ptr(buf->data, cpu)->entries = val;
4611 }
4612 
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 /* resize @tr's buffer to the size of @size_tr's entries */
4615 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4616 					struct trace_buffer *size_buf, int cpu_id)
4617 {
4618 	int cpu, ret = 0;
4619 
4620 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4621 		for_each_tracing_cpu(cpu) {
4622 			ret = ring_buffer_resize(trace_buf->buffer,
4623 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4624 			if (ret < 0)
4625 				break;
4626 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4627 				per_cpu_ptr(size_buf->data, cpu)->entries;
4628 		}
4629 	} else {
4630 		ret = ring_buffer_resize(trace_buf->buffer,
4631 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4632 		if (ret == 0)
4633 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4634 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4635 	}
4636 
4637 	return ret;
4638 }
4639 #endif /* CONFIG_TRACER_MAX_TRACE */
4640 
4641 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4642 					unsigned long size, int cpu)
4643 {
4644 	int ret;
4645 
4646 	/*
4647 	 * If kernel or user changes the size of the ring buffer
4648 	 * we use the size that was given, and we can forget about
4649 	 * expanding it later.
4650 	 */
4651 	ring_buffer_expanded = true;
4652 
4653 	/* May be called before buffers are initialized */
4654 	if (!tr->trace_buffer.buffer)
4655 		return 0;
4656 
4657 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4658 	if (ret < 0)
4659 		return ret;
4660 
4661 #ifdef CONFIG_TRACER_MAX_TRACE
4662 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4663 	    !tr->current_trace->use_max_tr)
4664 		goto out;
4665 
4666 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4667 	if (ret < 0) {
4668 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4669 						     &tr->trace_buffer, cpu);
4670 		if (r < 0) {
4671 			/*
4672 			 * AARGH! We are left with different
4673 			 * size max buffer!!!!
4674 			 * The max buffer is our "snapshot" buffer.
4675 			 * When a tracer needs a snapshot (one of the
4676 			 * latency tracers), it swaps the max buffer
4677 			 * with the saved snap shot. We succeeded to
4678 			 * update the size of the main buffer, but failed to
4679 			 * update the size of the max buffer. But when we tried
4680 			 * to reset the main buffer to the original size, we
4681 			 * failed there too. This is very unlikely to
4682 			 * happen, but if it does, warn and kill all
4683 			 * tracing.
4684 			 */
4685 			WARN_ON(1);
4686 			tracing_disabled = 1;
4687 		}
4688 		return ret;
4689 	}
4690 
4691 	if (cpu == RING_BUFFER_ALL_CPUS)
4692 		set_buffer_entries(&tr->max_buffer, size);
4693 	else
4694 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4695 
4696  out:
4697 #endif /* CONFIG_TRACER_MAX_TRACE */
4698 
4699 	if (cpu == RING_BUFFER_ALL_CPUS)
4700 		set_buffer_entries(&tr->trace_buffer, size);
4701 	else
4702 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4703 
4704 	return ret;
4705 }
4706 
4707 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4708 					  unsigned long size, int cpu_id)
4709 {
4710 	int ret = size;
4711 
4712 	mutex_lock(&trace_types_lock);
4713 
4714 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4715 		/* make sure, this cpu is enabled in the mask */
4716 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4717 			ret = -EINVAL;
4718 			goto out;
4719 		}
4720 	}
4721 
4722 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4723 	if (ret < 0)
4724 		ret = -ENOMEM;
4725 
4726 out:
4727 	mutex_unlock(&trace_types_lock);
4728 
4729 	return ret;
4730 }
4731 
4732 
4733 /**
4734  * tracing_update_buffers - used by tracing facility to expand ring buffers
4735  *
4736  * To save on memory when the tracing is never used on a system with it
4737  * configured in. The ring buffers are set to a minimum size. But once
4738  * a user starts to use the tracing facility, then they need to grow
4739  * to their default size.
4740  *
4741  * This function is to be called when a tracer is about to be used.
4742  */
4743 int tracing_update_buffers(void)
4744 {
4745 	int ret = 0;
4746 
4747 	mutex_lock(&trace_types_lock);
4748 	if (!ring_buffer_expanded)
4749 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4750 						RING_BUFFER_ALL_CPUS);
4751 	mutex_unlock(&trace_types_lock);
4752 
4753 	return ret;
4754 }
4755 
4756 struct trace_option_dentry;
4757 
4758 static void
4759 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4760 
4761 /*
4762  * Used to clear out the tracer before deletion of an instance.
4763  * Must have trace_types_lock held.
4764  */
4765 static void tracing_set_nop(struct trace_array *tr)
4766 {
4767 	if (tr->current_trace == &nop_trace)
4768 		return;
4769 
4770 	tr->current_trace->enabled--;
4771 
4772 	if (tr->current_trace->reset)
4773 		tr->current_trace->reset(tr);
4774 
4775 	tr->current_trace = &nop_trace;
4776 }
4777 
4778 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4779 {
4780 	/* Only enable if the directory has been created already. */
4781 	if (!tr->dir)
4782 		return;
4783 
4784 	create_trace_option_files(tr, t);
4785 }
4786 
4787 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4788 {
4789 	struct tracer *t;
4790 #ifdef CONFIG_TRACER_MAX_TRACE
4791 	bool had_max_tr;
4792 #endif
4793 	int ret = 0;
4794 
4795 	mutex_lock(&trace_types_lock);
4796 
4797 	if (!ring_buffer_expanded) {
4798 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4799 						RING_BUFFER_ALL_CPUS);
4800 		if (ret < 0)
4801 			goto out;
4802 		ret = 0;
4803 	}
4804 
4805 	for (t = trace_types; t; t = t->next) {
4806 		if (strcmp(t->name, buf) == 0)
4807 			break;
4808 	}
4809 	if (!t) {
4810 		ret = -EINVAL;
4811 		goto out;
4812 	}
4813 	if (t == tr->current_trace)
4814 		goto out;
4815 
4816 	/* Some tracers are only allowed for the top level buffer */
4817 	if (!trace_ok_for_array(t, tr)) {
4818 		ret = -EINVAL;
4819 		goto out;
4820 	}
4821 
4822 	/* If trace pipe files are being read, we can't change the tracer */
4823 	if (tr->current_trace->ref) {
4824 		ret = -EBUSY;
4825 		goto out;
4826 	}
4827 
4828 	trace_branch_disable();
4829 
4830 	tr->current_trace->enabled--;
4831 
4832 	if (tr->current_trace->reset)
4833 		tr->current_trace->reset(tr);
4834 
4835 	/* Current trace needs to be nop_trace before synchronize_sched */
4836 	tr->current_trace = &nop_trace;
4837 
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839 	had_max_tr = tr->allocated_snapshot;
4840 
4841 	if (had_max_tr && !t->use_max_tr) {
4842 		/*
4843 		 * We need to make sure that the update_max_tr sees that
4844 		 * current_trace changed to nop_trace to keep it from
4845 		 * swapping the buffers after we resize it.
4846 		 * The update_max_tr is called from interrupts disabled
4847 		 * so a synchronized_sched() is sufficient.
4848 		 */
4849 		synchronize_sched();
4850 		free_snapshot(tr);
4851 	}
4852 #endif
4853 
4854 #ifdef CONFIG_TRACER_MAX_TRACE
4855 	if (t->use_max_tr && !had_max_tr) {
4856 		ret = alloc_snapshot(tr);
4857 		if (ret < 0)
4858 			goto out;
4859 	}
4860 #endif
4861 
4862 	if (t->init) {
4863 		ret = tracer_init(t, tr);
4864 		if (ret)
4865 			goto out;
4866 	}
4867 
4868 	tr->current_trace = t;
4869 	tr->current_trace->enabled++;
4870 	trace_branch_enable(tr);
4871  out:
4872 	mutex_unlock(&trace_types_lock);
4873 
4874 	return ret;
4875 }
4876 
4877 static ssize_t
4878 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4879 			size_t cnt, loff_t *ppos)
4880 {
4881 	struct trace_array *tr = filp->private_data;
4882 	char buf[MAX_TRACER_SIZE+1];
4883 	int i;
4884 	size_t ret;
4885 	int err;
4886 
4887 	ret = cnt;
4888 
4889 	if (cnt > MAX_TRACER_SIZE)
4890 		cnt = MAX_TRACER_SIZE;
4891 
4892 	if (copy_from_user(buf, ubuf, cnt))
4893 		return -EFAULT;
4894 
4895 	buf[cnt] = 0;
4896 
4897 	/* strip ending whitespace. */
4898 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4899 		buf[i] = 0;
4900 
4901 	err = tracing_set_tracer(tr, buf);
4902 	if (err)
4903 		return err;
4904 
4905 	*ppos += ret;
4906 
4907 	return ret;
4908 }
4909 
4910 static ssize_t
4911 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4912 		   size_t cnt, loff_t *ppos)
4913 {
4914 	char buf[64];
4915 	int r;
4916 
4917 	r = snprintf(buf, sizeof(buf), "%ld\n",
4918 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4919 	if (r > sizeof(buf))
4920 		r = sizeof(buf);
4921 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4922 }
4923 
4924 static ssize_t
4925 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4926 		    size_t cnt, loff_t *ppos)
4927 {
4928 	unsigned long val;
4929 	int ret;
4930 
4931 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4932 	if (ret)
4933 		return ret;
4934 
4935 	*ptr = val * 1000;
4936 
4937 	return cnt;
4938 }
4939 
4940 static ssize_t
4941 tracing_thresh_read(struct file *filp, char __user *ubuf,
4942 		    size_t cnt, loff_t *ppos)
4943 {
4944 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4945 }
4946 
4947 static ssize_t
4948 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4949 		     size_t cnt, loff_t *ppos)
4950 {
4951 	struct trace_array *tr = filp->private_data;
4952 	int ret;
4953 
4954 	mutex_lock(&trace_types_lock);
4955 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4956 	if (ret < 0)
4957 		goto out;
4958 
4959 	if (tr->current_trace->update_thresh) {
4960 		ret = tr->current_trace->update_thresh(tr);
4961 		if (ret < 0)
4962 			goto out;
4963 	}
4964 
4965 	ret = cnt;
4966 out:
4967 	mutex_unlock(&trace_types_lock);
4968 
4969 	return ret;
4970 }
4971 
4972 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4973 
4974 static ssize_t
4975 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4976 		     size_t cnt, loff_t *ppos)
4977 {
4978 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4979 }
4980 
4981 static ssize_t
4982 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4983 		      size_t cnt, loff_t *ppos)
4984 {
4985 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4986 }
4987 
4988 #endif
4989 
4990 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4991 {
4992 	struct trace_array *tr = inode->i_private;
4993 	struct trace_iterator *iter;
4994 	int ret = 0;
4995 
4996 	if (tracing_disabled)
4997 		return -ENODEV;
4998 
4999 	if (trace_array_get(tr) < 0)
5000 		return -ENODEV;
5001 
5002 	mutex_lock(&trace_types_lock);
5003 
5004 	/* create a buffer to store the information to pass to userspace */
5005 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5006 	if (!iter) {
5007 		ret = -ENOMEM;
5008 		__trace_array_put(tr);
5009 		goto out;
5010 	}
5011 
5012 	trace_seq_init(&iter->seq);
5013 	iter->trace = tr->current_trace;
5014 
5015 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5016 		ret = -ENOMEM;
5017 		goto fail;
5018 	}
5019 
5020 	/* trace pipe does not show start of buffer */
5021 	cpumask_setall(iter->started);
5022 
5023 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5024 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5025 
5026 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5027 	if (trace_clocks[tr->clock_id].in_ns)
5028 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5029 
5030 	iter->tr = tr;
5031 	iter->trace_buffer = &tr->trace_buffer;
5032 	iter->cpu_file = tracing_get_cpu(inode);
5033 	mutex_init(&iter->mutex);
5034 	filp->private_data = iter;
5035 
5036 	if (iter->trace->pipe_open)
5037 		iter->trace->pipe_open(iter);
5038 
5039 	nonseekable_open(inode, filp);
5040 
5041 	tr->current_trace->ref++;
5042 out:
5043 	mutex_unlock(&trace_types_lock);
5044 	return ret;
5045 
5046 fail:
5047 	kfree(iter->trace);
5048 	kfree(iter);
5049 	__trace_array_put(tr);
5050 	mutex_unlock(&trace_types_lock);
5051 	return ret;
5052 }
5053 
5054 static int tracing_release_pipe(struct inode *inode, struct file *file)
5055 {
5056 	struct trace_iterator *iter = file->private_data;
5057 	struct trace_array *tr = inode->i_private;
5058 
5059 	mutex_lock(&trace_types_lock);
5060 
5061 	tr->current_trace->ref--;
5062 
5063 	if (iter->trace->pipe_close)
5064 		iter->trace->pipe_close(iter);
5065 
5066 	mutex_unlock(&trace_types_lock);
5067 
5068 	free_cpumask_var(iter->started);
5069 	mutex_destroy(&iter->mutex);
5070 	kfree(iter);
5071 
5072 	trace_array_put(tr);
5073 
5074 	return 0;
5075 }
5076 
5077 static unsigned int
5078 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5079 {
5080 	struct trace_array *tr = iter->tr;
5081 
5082 	/* Iterators are static, they should be filled or empty */
5083 	if (trace_buffer_iter(iter, iter->cpu_file))
5084 		return POLLIN | POLLRDNORM;
5085 
5086 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5087 		/*
5088 		 * Always select as readable when in blocking mode
5089 		 */
5090 		return POLLIN | POLLRDNORM;
5091 	else
5092 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5093 					     filp, poll_table);
5094 }
5095 
5096 static unsigned int
5097 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5098 {
5099 	struct trace_iterator *iter = filp->private_data;
5100 
5101 	return trace_poll(iter, filp, poll_table);
5102 }
5103 
5104 /* Must be called with iter->mutex held. */
5105 static int tracing_wait_pipe(struct file *filp)
5106 {
5107 	struct trace_iterator *iter = filp->private_data;
5108 	int ret;
5109 
5110 	while (trace_empty(iter)) {
5111 
5112 		if ((filp->f_flags & O_NONBLOCK)) {
5113 			return -EAGAIN;
5114 		}
5115 
5116 		/*
5117 		 * We block until we read something and tracing is disabled.
5118 		 * We still block if tracing is disabled, but we have never
5119 		 * read anything. This allows a user to cat this file, and
5120 		 * then enable tracing. But after we have read something,
5121 		 * we give an EOF when tracing is again disabled.
5122 		 *
5123 		 * iter->pos will be 0 if we haven't read anything.
5124 		 */
5125 		if (!tracing_is_on() && iter->pos)
5126 			break;
5127 
5128 		mutex_unlock(&iter->mutex);
5129 
5130 		ret = wait_on_pipe(iter, false);
5131 
5132 		mutex_lock(&iter->mutex);
5133 
5134 		if (ret)
5135 			return ret;
5136 	}
5137 
5138 	return 1;
5139 }
5140 
5141 /*
5142  * Consumer reader.
5143  */
5144 static ssize_t
5145 tracing_read_pipe(struct file *filp, char __user *ubuf,
5146 		  size_t cnt, loff_t *ppos)
5147 {
5148 	struct trace_iterator *iter = filp->private_data;
5149 	ssize_t sret;
5150 
5151 	/*
5152 	 * Avoid more than one consumer on a single file descriptor
5153 	 * This is just a matter of traces coherency, the ring buffer itself
5154 	 * is protected.
5155 	 */
5156 	mutex_lock(&iter->mutex);
5157 
5158 	/* return any leftover data */
5159 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5160 	if (sret != -EBUSY)
5161 		goto out;
5162 
5163 	trace_seq_init(&iter->seq);
5164 
5165 	if (iter->trace->read) {
5166 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5167 		if (sret)
5168 			goto out;
5169 	}
5170 
5171 waitagain:
5172 	sret = tracing_wait_pipe(filp);
5173 	if (sret <= 0)
5174 		goto out;
5175 
5176 	/* stop when tracing is finished */
5177 	if (trace_empty(iter)) {
5178 		sret = 0;
5179 		goto out;
5180 	}
5181 
5182 	if (cnt >= PAGE_SIZE)
5183 		cnt = PAGE_SIZE - 1;
5184 
5185 	/* reset all but tr, trace, and overruns */
5186 	memset(&iter->seq, 0,
5187 	       sizeof(struct trace_iterator) -
5188 	       offsetof(struct trace_iterator, seq));
5189 	cpumask_clear(iter->started);
5190 	iter->pos = -1;
5191 
5192 	trace_event_read_lock();
5193 	trace_access_lock(iter->cpu_file);
5194 	while (trace_find_next_entry_inc(iter) != NULL) {
5195 		enum print_line_t ret;
5196 		int save_len = iter->seq.seq.len;
5197 
5198 		ret = print_trace_line(iter);
5199 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5200 			/* don't print partial lines */
5201 			iter->seq.seq.len = save_len;
5202 			break;
5203 		}
5204 		if (ret != TRACE_TYPE_NO_CONSUME)
5205 			trace_consume(iter);
5206 
5207 		if (trace_seq_used(&iter->seq) >= cnt)
5208 			break;
5209 
5210 		/*
5211 		 * Setting the full flag means we reached the trace_seq buffer
5212 		 * size and we should leave by partial output condition above.
5213 		 * One of the trace_seq_* functions is not used properly.
5214 		 */
5215 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5216 			  iter->ent->type);
5217 	}
5218 	trace_access_unlock(iter->cpu_file);
5219 	trace_event_read_unlock();
5220 
5221 	/* Now copy what we have to the user */
5222 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5223 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5224 		trace_seq_init(&iter->seq);
5225 
5226 	/*
5227 	 * If there was nothing to send to user, in spite of consuming trace
5228 	 * entries, go back to wait for more entries.
5229 	 */
5230 	if (sret == -EBUSY)
5231 		goto waitagain;
5232 
5233 out:
5234 	mutex_unlock(&iter->mutex);
5235 
5236 	return sret;
5237 }
5238 
5239 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5240 				     unsigned int idx)
5241 {
5242 	__free_page(spd->pages[idx]);
5243 }
5244 
5245 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5246 	.can_merge		= 0,
5247 	.confirm		= generic_pipe_buf_confirm,
5248 	.release		= generic_pipe_buf_release,
5249 	.steal			= generic_pipe_buf_steal,
5250 	.get			= generic_pipe_buf_get,
5251 };
5252 
5253 static size_t
5254 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5255 {
5256 	size_t count;
5257 	int save_len;
5258 	int ret;
5259 
5260 	/* Seq buffer is page-sized, exactly what we need. */
5261 	for (;;) {
5262 		save_len = iter->seq.seq.len;
5263 		ret = print_trace_line(iter);
5264 
5265 		if (trace_seq_has_overflowed(&iter->seq)) {
5266 			iter->seq.seq.len = save_len;
5267 			break;
5268 		}
5269 
5270 		/*
5271 		 * This should not be hit, because it should only
5272 		 * be set if the iter->seq overflowed. But check it
5273 		 * anyway to be safe.
5274 		 */
5275 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5276 			iter->seq.seq.len = save_len;
5277 			break;
5278 		}
5279 
5280 		count = trace_seq_used(&iter->seq) - save_len;
5281 		if (rem < count) {
5282 			rem = 0;
5283 			iter->seq.seq.len = save_len;
5284 			break;
5285 		}
5286 
5287 		if (ret != TRACE_TYPE_NO_CONSUME)
5288 			trace_consume(iter);
5289 		rem -= count;
5290 		if (!trace_find_next_entry_inc(iter))	{
5291 			rem = 0;
5292 			iter->ent = NULL;
5293 			break;
5294 		}
5295 	}
5296 
5297 	return rem;
5298 }
5299 
5300 static ssize_t tracing_splice_read_pipe(struct file *filp,
5301 					loff_t *ppos,
5302 					struct pipe_inode_info *pipe,
5303 					size_t len,
5304 					unsigned int flags)
5305 {
5306 	struct page *pages_def[PIPE_DEF_BUFFERS];
5307 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5308 	struct trace_iterator *iter = filp->private_data;
5309 	struct splice_pipe_desc spd = {
5310 		.pages		= pages_def,
5311 		.partial	= partial_def,
5312 		.nr_pages	= 0, /* This gets updated below. */
5313 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5314 		.flags		= flags,
5315 		.ops		= &tracing_pipe_buf_ops,
5316 		.spd_release	= tracing_spd_release_pipe,
5317 	};
5318 	ssize_t ret;
5319 	size_t rem;
5320 	unsigned int i;
5321 
5322 	if (splice_grow_spd(pipe, &spd))
5323 		return -ENOMEM;
5324 
5325 	mutex_lock(&iter->mutex);
5326 
5327 	if (iter->trace->splice_read) {
5328 		ret = iter->trace->splice_read(iter, filp,
5329 					       ppos, pipe, len, flags);
5330 		if (ret)
5331 			goto out_err;
5332 	}
5333 
5334 	ret = tracing_wait_pipe(filp);
5335 	if (ret <= 0)
5336 		goto out_err;
5337 
5338 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5339 		ret = -EFAULT;
5340 		goto out_err;
5341 	}
5342 
5343 	trace_event_read_lock();
5344 	trace_access_lock(iter->cpu_file);
5345 
5346 	/* Fill as many pages as possible. */
5347 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5348 		spd.pages[i] = alloc_page(GFP_KERNEL);
5349 		if (!spd.pages[i])
5350 			break;
5351 
5352 		rem = tracing_fill_pipe_page(rem, iter);
5353 
5354 		/* Copy the data into the page, so we can start over. */
5355 		ret = trace_seq_to_buffer(&iter->seq,
5356 					  page_address(spd.pages[i]),
5357 					  trace_seq_used(&iter->seq));
5358 		if (ret < 0) {
5359 			__free_page(spd.pages[i]);
5360 			break;
5361 		}
5362 		spd.partial[i].offset = 0;
5363 		spd.partial[i].len = trace_seq_used(&iter->seq);
5364 
5365 		trace_seq_init(&iter->seq);
5366 	}
5367 
5368 	trace_access_unlock(iter->cpu_file);
5369 	trace_event_read_unlock();
5370 	mutex_unlock(&iter->mutex);
5371 
5372 	spd.nr_pages = i;
5373 
5374 	if (i)
5375 		ret = splice_to_pipe(pipe, &spd);
5376 	else
5377 		ret = 0;
5378 out:
5379 	splice_shrink_spd(&spd);
5380 	return ret;
5381 
5382 out_err:
5383 	mutex_unlock(&iter->mutex);
5384 	goto out;
5385 }
5386 
5387 static ssize_t
5388 tracing_entries_read(struct file *filp, char __user *ubuf,
5389 		     size_t cnt, loff_t *ppos)
5390 {
5391 	struct inode *inode = file_inode(filp);
5392 	struct trace_array *tr = inode->i_private;
5393 	int cpu = tracing_get_cpu(inode);
5394 	char buf[64];
5395 	int r = 0;
5396 	ssize_t ret;
5397 
5398 	mutex_lock(&trace_types_lock);
5399 
5400 	if (cpu == RING_BUFFER_ALL_CPUS) {
5401 		int cpu, buf_size_same;
5402 		unsigned long size;
5403 
5404 		size = 0;
5405 		buf_size_same = 1;
5406 		/* check if all cpu sizes are same */
5407 		for_each_tracing_cpu(cpu) {
5408 			/* fill in the size from first enabled cpu */
5409 			if (size == 0)
5410 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5411 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5412 				buf_size_same = 0;
5413 				break;
5414 			}
5415 		}
5416 
5417 		if (buf_size_same) {
5418 			if (!ring_buffer_expanded)
5419 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5420 					    size >> 10,
5421 					    trace_buf_size >> 10);
5422 			else
5423 				r = sprintf(buf, "%lu\n", size >> 10);
5424 		} else
5425 			r = sprintf(buf, "X\n");
5426 	} else
5427 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5428 
5429 	mutex_unlock(&trace_types_lock);
5430 
5431 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5432 	return ret;
5433 }
5434 
5435 static ssize_t
5436 tracing_entries_write(struct file *filp, const char __user *ubuf,
5437 		      size_t cnt, loff_t *ppos)
5438 {
5439 	struct inode *inode = file_inode(filp);
5440 	struct trace_array *tr = inode->i_private;
5441 	unsigned long val;
5442 	int ret;
5443 
5444 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5445 	if (ret)
5446 		return ret;
5447 
5448 	/* must have at least 1 entry */
5449 	if (!val)
5450 		return -EINVAL;
5451 
5452 	/* value is in KB */
5453 	val <<= 10;
5454 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5455 	if (ret < 0)
5456 		return ret;
5457 
5458 	*ppos += cnt;
5459 
5460 	return cnt;
5461 }
5462 
5463 static ssize_t
5464 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5465 				size_t cnt, loff_t *ppos)
5466 {
5467 	struct trace_array *tr = filp->private_data;
5468 	char buf[64];
5469 	int r, cpu;
5470 	unsigned long size = 0, expanded_size = 0;
5471 
5472 	mutex_lock(&trace_types_lock);
5473 	for_each_tracing_cpu(cpu) {
5474 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5475 		if (!ring_buffer_expanded)
5476 			expanded_size += trace_buf_size >> 10;
5477 	}
5478 	if (ring_buffer_expanded)
5479 		r = sprintf(buf, "%lu\n", size);
5480 	else
5481 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5482 	mutex_unlock(&trace_types_lock);
5483 
5484 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5485 }
5486 
5487 static ssize_t
5488 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5489 			  size_t cnt, loff_t *ppos)
5490 {
5491 	/*
5492 	 * There is no need to read what the user has written, this function
5493 	 * is just to make sure that there is no error when "echo" is used
5494 	 */
5495 
5496 	*ppos += cnt;
5497 
5498 	return cnt;
5499 }
5500 
5501 static int
5502 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5503 {
5504 	struct trace_array *tr = inode->i_private;
5505 
5506 	/* disable tracing ? */
5507 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5508 		tracer_tracing_off(tr);
5509 	/* resize the ring buffer to 0 */
5510 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5511 
5512 	trace_array_put(tr);
5513 
5514 	return 0;
5515 }
5516 
5517 static ssize_t
5518 tracing_mark_write(struct file *filp, const char __user *ubuf,
5519 					size_t cnt, loff_t *fpos)
5520 {
5521 	unsigned long addr = (unsigned long)ubuf;
5522 	struct trace_array *tr = filp->private_data;
5523 	struct ring_buffer_event *event;
5524 	struct ring_buffer *buffer;
5525 	struct print_entry *entry;
5526 	unsigned long irq_flags;
5527 	struct page *pages[2];
5528 	void *map_page[2];
5529 	int nr_pages = 1;
5530 	ssize_t written;
5531 	int offset;
5532 	int size;
5533 	int len;
5534 	int ret;
5535 	int i;
5536 
5537 	if (tracing_disabled)
5538 		return -EINVAL;
5539 
5540 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5541 		return -EINVAL;
5542 
5543 	if (cnt > TRACE_BUF_SIZE)
5544 		cnt = TRACE_BUF_SIZE;
5545 
5546 	/*
5547 	 * Userspace is injecting traces into the kernel trace buffer.
5548 	 * We want to be as non intrusive as possible.
5549 	 * To do so, we do not want to allocate any special buffers
5550 	 * or take any locks, but instead write the userspace data
5551 	 * straight into the ring buffer.
5552 	 *
5553 	 * First we need to pin the userspace buffer into memory,
5554 	 * which, most likely it is, because it just referenced it.
5555 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5556 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5557 	 * pages directly. We then write the data directly into the
5558 	 * ring buffer.
5559 	 */
5560 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5561 
5562 	/* check if we cross pages */
5563 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5564 		nr_pages = 2;
5565 
5566 	offset = addr & (PAGE_SIZE - 1);
5567 	addr &= PAGE_MASK;
5568 
5569 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5570 	if (ret < nr_pages) {
5571 		while (--ret >= 0)
5572 			put_page(pages[ret]);
5573 		written = -EFAULT;
5574 		goto out;
5575 	}
5576 
5577 	for (i = 0; i < nr_pages; i++)
5578 		map_page[i] = kmap_atomic(pages[i]);
5579 
5580 	local_save_flags(irq_flags);
5581 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5582 	buffer = tr->trace_buffer.buffer;
5583 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5584 					  irq_flags, preempt_count());
5585 	if (!event) {
5586 		/* Ring buffer disabled, return as if not open for write */
5587 		written = -EBADF;
5588 		goto out_unlock;
5589 	}
5590 
5591 	entry = ring_buffer_event_data(event);
5592 	entry->ip = _THIS_IP_;
5593 
5594 	if (nr_pages == 2) {
5595 		len = PAGE_SIZE - offset;
5596 		memcpy(&entry->buf, map_page[0] + offset, len);
5597 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5598 	} else
5599 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5600 
5601 	if (entry->buf[cnt - 1] != '\n') {
5602 		entry->buf[cnt] = '\n';
5603 		entry->buf[cnt + 1] = '\0';
5604 	} else
5605 		entry->buf[cnt] = '\0';
5606 
5607 	__buffer_unlock_commit(buffer, event);
5608 
5609 	written = cnt;
5610 
5611 	*fpos += written;
5612 
5613  out_unlock:
5614 	for (i = nr_pages - 1; i >= 0; i--) {
5615 		kunmap_atomic(map_page[i]);
5616 		put_page(pages[i]);
5617 	}
5618  out:
5619 	return written;
5620 }
5621 
5622 static int tracing_clock_show(struct seq_file *m, void *v)
5623 {
5624 	struct trace_array *tr = m->private;
5625 	int i;
5626 
5627 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5628 		seq_printf(m,
5629 			"%s%s%s%s", i ? " " : "",
5630 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5631 			i == tr->clock_id ? "]" : "");
5632 	seq_putc(m, '\n');
5633 
5634 	return 0;
5635 }
5636 
5637 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5638 {
5639 	int i;
5640 
5641 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5642 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5643 			break;
5644 	}
5645 	if (i == ARRAY_SIZE(trace_clocks))
5646 		return -EINVAL;
5647 
5648 	mutex_lock(&trace_types_lock);
5649 
5650 	tr->clock_id = i;
5651 
5652 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5653 
5654 	/*
5655 	 * New clock may not be consistent with the previous clock.
5656 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5657 	 */
5658 	tracing_reset_online_cpus(&tr->trace_buffer);
5659 
5660 #ifdef CONFIG_TRACER_MAX_TRACE
5661 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5662 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5663 	tracing_reset_online_cpus(&tr->max_buffer);
5664 #endif
5665 
5666 	mutex_unlock(&trace_types_lock);
5667 
5668 	return 0;
5669 }
5670 
5671 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5672 				   size_t cnt, loff_t *fpos)
5673 {
5674 	struct seq_file *m = filp->private_data;
5675 	struct trace_array *tr = m->private;
5676 	char buf[64];
5677 	const char *clockstr;
5678 	int ret;
5679 
5680 	if (cnt >= sizeof(buf))
5681 		return -EINVAL;
5682 
5683 	if (copy_from_user(buf, ubuf, cnt))
5684 		return -EFAULT;
5685 
5686 	buf[cnt] = 0;
5687 
5688 	clockstr = strstrip(buf);
5689 
5690 	ret = tracing_set_clock(tr, clockstr);
5691 	if (ret)
5692 		return ret;
5693 
5694 	*fpos += cnt;
5695 
5696 	return cnt;
5697 }
5698 
5699 static int tracing_clock_open(struct inode *inode, struct file *file)
5700 {
5701 	struct trace_array *tr = inode->i_private;
5702 	int ret;
5703 
5704 	if (tracing_disabled)
5705 		return -ENODEV;
5706 
5707 	if (trace_array_get(tr))
5708 		return -ENODEV;
5709 
5710 	ret = single_open(file, tracing_clock_show, inode->i_private);
5711 	if (ret < 0)
5712 		trace_array_put(tr);
5713 
5714 	return ret;
5715 }
5716 
5717 struct ftrace_buffer_info {
5718 	struct trace_iterator	iter;
5719 	void			*spare;
5720 	unsigned int		read;
5721 };
5722 
5723 #ifdef CONFIG_TRACER_SNAPSHOT
5724 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5725 {
5726 	struct trace_array *tr = inode->i_private;
5727 	struct trace_iterator *iter;
5728 	struct seq_file *m;
5729 	int ret = 0;
5730 
5731 	if (trace_array_get(tr) < 0)
5732 		return -ENODEV;
5733 
5734 	if (file->f_mode & FMODE_READ) {
5735 		iter = __tracing_open(inode, file, true);
5736 		if (IS_ERR(iter))
5737 			ret = PTR_ERR(iter);
5738 	} else {
5739 		/* Writes still need the seq_file to hold the private data */
5740 		ret = -ENOMEM;
5741 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5742 		if (!m)
5743 			goto out;
5744 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5745 		if (!iter) {
5746 			kfree(m);
5747 			goto out;
5748 		}
5749 		ret = 0;
5750 
5751 		iter->tr = tr;
5752 		iter->trace_buffer = &tr->max_buffer;
5753 		iter->cpu_file = tracing_get_cpu(inode);
5754 		m->private = iter;
5755 		file->private_data = m;
5756 	}
5757 out:
5758 	if (ret < 0)
5759 		trace_array_put(tr);
5760 
5761 	return ret;
5762 }
5763 
5764 static ssize_t
5765 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5766 		       loff_t *ppos)
5767 {
5768 	struct seq_file *m = filp->private_data;
5769 	struct trace_iterator *iter = m->private;
5770 	struct trace_array *tr = iter->tr;
5771 	unsigned long val;
5772 	int ret;
5773 
5774 	ret = tracing_update_buffers();
5775 	if (ret < 0)
5776 		return ret;
5777 
5778 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5779 	if (ret)
5780 		return ret;
5781 
5782 	mutex_lock(&trace_types_lock);
5783 
5784 	if (tr->current_trace->use_max_tr) {
5785 		ret = -EBUSY;
5786 		goto out;
5787 	}
5788 
5789 	switch (val) {
5790 	case 0:
5791 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5792 			ret = -EINVAL;
5793 			break;
5794 		}
5795 		if (tr->allocated_snapshot)
5796 			free_snapshot(tr);
5797 		break;
5798 	case 1:
5799 /* Only allow per-cpu swap if the ring buffer supports it */
5800 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5801 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5802 			ret = -EINVAL;
5803 			break;
5804 		}
5805 #endif
5806 		if (!tr->allocated_snapshot) {
5807 			ret = alloc_snapshot(tr);
5808 			if (ret < 0)
5809 				break;
5810 		}
5811 		local_irq_disable();
5812 		/* Now, we're going to swap */
5813 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5814 			update_max_tr(tr, current, smp_processor_id());
5815 		else
5816 			update_max_tr_single(tr, current, iter->cpu_file);
5817 		local_irq_enable();
5818 		break;
5819 	default:
5820 		if (tr->allocated_snapshot) {
5821 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5822 				tracing_reset_online_cpus(&tr->max_buffer);
5823 			else
5824 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5825 		}
5826 		break;
5827 	}
5828 
5829 	if (ret >= 0) {
5830 		*ppos += cnt;
5831 		ret = cnt;
5832 	}
5833 out:
5834 	mutex_unlock(&trace_types_lock);
5835 	return ret;
5836 }
5837 
5838 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5839 {
5840 	struct seq_file *m = file->private_data;
5841 	int ret;
5842 
5843 	ret = tracing_release(inode, file);
5844 
5845 	if (file->f_mode & FMODE_READ)
5846 		return ret;
5847 
5848 	/* If write only, the seq_file is just a stub */
5849 	if (m)
5850 		kfree(m->private);
5851 	kfree(m);
5852 
5853 	return 0;
5854 }
5855 
5856 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5857 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5858 				    size_t count, loff_t *ppos);
5859 static int tracing_buffers_release(struct inode *inode, struct file *file);
5860 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5861 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5862 
5863 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5864 {
5865 	struct ftrace_buffer_info *info;
5866 	int ret;
5867 
5868 	ret = tracing_buffers_open(inode, filp);
5869 	if (ret < 0)
5870 		return ret;
5871 
5872 	info = filp->private_data;
5873 
5874 	if (info->iter.trace->use_max_tr) {
5875 		tracing_buffers_release(inode, filp);
5876 		return -EBUSY;
5877 	}
5878 
5879 	info->iter.snapshot = true;
5880 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5881 
5882 	return ret;
5883 }
5884 
5885 #endif /* CONFIG_TRACER_SNAPSHOT */
5886 
5887 
5888 static const struct file_operations tracing_thresh_fops = {
5889 	.open		= tracing_open_generic,
5890 	.read		= tracing_thresh_read,
5891 	.write		= tracing_thresh_write,
5892 	.llseek		= generic_file_llseek,
5893 };
5894 
5895 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5896 static const struct file_operations tracing_max_lat_fops = {
5897 	.open		= tracing_open_generic,
5898 	.read		= tracing_max_lat_read,
5899 	.write		= tracing_max_lat_write,
5900 	.llseek		= generic_file_llseek,
5901 };
5902 #endif
5903 
5904 static const struct file_operations set_tracer_fops = {
5905 	.open		= tracing_open_generic,
5906 	.read		= tracing_set_trace_read,
5907 	.write		= tracing_set_trace_write,
5908 	.llseek		= generic_file_llseek,
5909 };
5910 
5911 static const struct file_operations tracing_pipe_fops = {
5912 	.open		= tracing_open_pipe,
5913 	.poll		= tracing_poll_pipe,
5914 	.read		= tracing_read_pipe,
5915 	.splice_read	= tracing_splice_read_pipe,
5916 	.release	= tracing_release_pipe,
5917 	.llseek		= no_llseek,
5918 };
5919 
5920 static const struct file_operations tracing_entries_fops = {
5921 	.open		= tracing_open_generic_tr,
5922 	.read		= tracing_entries_read,
5923 	.write		= tracing_entries_write,
5924 	.llseek		= generic_file_llseek,
5925 	.release	= tracing_release_generic_tr,
5926 };
5927 
5928 static const struct file_operations tracing_total_entries_fops = {
5929 	.open		= tracing_open_generic_tr,
5930 	.read		= tracing_total_entries_read,
5931 	.llseek		= generic_file_llseek,
5932 	.release	= tracing_release_generic_tr,
5933 };
5934 
5935 static const struct file_operations tracing_free_buffer_fops = {
5936 	.open		= tracing_open_generic_tr,
5937 	.write		= tracing_free_buffer_write,
5938 	.release	= tracing_free_buffer_release,
5939 };
5940 
5941 static const struct file_operations tracing_mark_fops = {
5942 	.open		= tracing_open_generic_tr,
5943 	.write		= tracing_mark_write,
5944 	.llseek		= generic_file_llseek,
5945 	.release	= tracing_release_generic_tr,
5946 };
5947 
5948 static const struct file_operations trace_clock_fops = {
5949 	.open		= tracing_clock_open,
5950 	.read		= seq_read,
5951 	.llseek		= seq_lseek,
5952 	.release	= tracing_single_release_tr,
5953 	.write		= tracing_clock_write,
5954 };
5955 
5956 #ifdef CONFIG_TRACER_SNAPSHOT
5957 static const struct file_operations snapshot_fops = {
5958 	.open		= tracing_snapshot_open,
5959 	.read		= seq_read,
5960 	.write		= tracing_snapshot_write,
5961 	.llseek		= tracing_lseek,
5962 	.release	= tracing_snapshot_release,
5963 };
5964 
5965 static const struct file_operations snapshot_raw_fops = {
5966 	.open		= snapshot_raw_open,
5967 	.read		= tracing_buffers_read,
5968 	.release	= tracing_buffers_release,
5969 	.splice_read	= tracing_buffers_splice_read,
5970 	.llseek		= no_llseek,
5971 };
5972 
5973 #endif /* CONFIG_TRACER_SNAPSHOT */
5974 
5975 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5976 {
5977 	struct trace_array *tr = inode->i_private;
5978 	struct ftrace_buffer_info *info;
5979 	int ret;
5980 
5981 	if (tracing_disabled)
5982 		return -ENODEV;
5983 
5984 	if (trace_array_get(tr) < 0)
5985 		return -ENODEV;
5986 
5987 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5988 	if (!info) {
5989 		trace_array_put(tr);
5990 		return -ENOMEM;
5991 	}
5992 
5993 	mutex_lock(&trace_types_lock);
5994 
5995 	info->iter.tr		= tr;
5996 	info->iter.cpu_file	= tracing_get_cpu(inode);
5997 	info->iter.trace	= tr->current_trace;
5998 	info->iter.trace_buffer = &tr->trace_buffer;
5999 	info->spare		= NULL;
6000 	/* Force reading ring buffer for first read */
6001 	info->read		= (unsigned int)-1;
6002 
6003 	filp->private_data = info;
6004 
6005 	tr->current_trace->ref++;
6006 
6007 	mutex_unlock(&trace_types_lock);
6008 
6009 	ret = nonseekable_open(inode, filp);
6010 	if (ret < 0)
6011 		trace_array_put(tr);
6012 
6013 	return ret;
6014 }
6015 
6016 static unsigned int
6017 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6018 {
6019 	struct ftrace_buffer_info *info = filp->private_data;
6020 	struct trace_iterator *iter = &info->iter;
6021 
6022 	return trace_poll(iter, filp, poll_table);
6023 }
6024 
6025 static ssize_t
6026 tracing_buffers_read(struct file *filp, char __user *ubuf,
6027 		     size_t count, loff_t *ppos)
6028 {
6029 	struct ftrace_buffer_info *info = filp->private_data;
6030 	struct trace_iterator *iter = &info->iter;
6031 	ssize_t ret;
6032 	ssize_t size;
6033 
6034 	if (!count)
6035 		return 0;
6036 
6037 #ifdef CONFIG_TRACER_MAX_TRACE
6038 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6039 		return -EBUSY;
6040 #endif
6041 
6042 	if (!info->spare)
6043 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6044 							  iter->cpu_file);
6045 	if (!info->spare)
6046 		return -ENOMEM;
6047 
6048 	/* Do we have previous read data to read? */
6049 	if (info->read < PAGE_SIZE)
6050 		goto read;
6051 
6052  again:
6053 	trace_access_lock(iter->cpu_file);
6054 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6055 				    &info->spare,
6056 				    count,
6057 				    iter->cpu_file, 0);
6058 	trace_access_unlock(iter->cpu_file);
6059 
6060 	if (ret < 0) {
6061 		if (trace_empty(iter)) {
6062 			if ((filp->f_flags & O_NONBLOCK))
6063 				return -EAGAIN;
6064 
6065 			ret = wait_on_pipe(iter, false);
6066 			if (ret)
6067 				return ret;
6068 
6069 			goto again;
6070 		}
6071 		return 0;
6072 	}
6073 
6074 	info->read = 0;
6075  read:
6076 	size = PAGE_SIZE - info->read;
6077 	if (size > count)
6078 		size = count;
6079 
6080 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6081 	if (ret == size)
6082 		return -EFAULT;
6083 
6084 	size -= ret;
6085 
6086 	*ppos += size;
6087 	info->read += size;
6088 
6089 	return size;
6090 }
6091 
6092 static int tracing_buffers_release(struct inode *inode, struct file *file)
6093 {
6094 	struct ftrace_buffer_info *info = file->private_data;
6095 	struct trace_iterator *iter = &info->iter;
6096 
6097 	mutex_lock(&trace_types_lock);
6098 
6099 	iter->tr->current_trace->ref--;
6100 
6101 	__trace_array_put(iter->tr);
6102 
6103 	if (info->spare)
6104 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6105 	kfree(info);
6106 
6107 	mutex_unlock(&trace_types_lock);
6108 
6109 	return 0;
6110 }
6111 
6112 struct buffer_ref {
6113 	struct ring_buffer	*buffer;
6114 	void			*page;
6115 	int			ref;
6116 };
6117 
6118 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6119 				    struct pipe_buffer *buf)
6120 {
6121 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6122 
6123 	if (--ref->ref)
6124 		return;
6125 
6126 	ring_buffer_free_read_page(ref->buffer, ref->page);
6127 	kfree(ref);
6128 	buf->private = 0;
6129 }
6130 
6131 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6132 				struct pipe_buffer *buf)
6133 {
6134 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6135 
6136 	ref->ref++;
6137 }
6138 
6139 /* Pipe buffer operations for a buffer. */
6140 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6141 	.can_merge		= 0,
6142 	.confirm		= generic_pipe_buf_confirm,
6143 	.release		= buffer_pipe_buf_release,
6144 	.steal			= generic_pipe_buf_steal,
6145 	.get			= buffer_pipe_buf_get,
6146 };
6147 
6148 /*
6149  * Callback from splice_to_pipe(), if we need to release some pages
6150  * at the end of the spd in case we error'ed out in filling the pipe.
6151  */
6152 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6153 {
6154 	struct buffer_ref *ref =
6155 		(struct buffer_ref *)spd->partial[i].private;
6156 
6157 	if (--ref->ref)
6158 		return;
6159 
6160 	ring_buffer_free_read_page(ref->buffer, ref->page);
6161 	kfree(ref);
6162 	spd->partial[i].private = 0;
6163 }
6164 
6165 static ssize_t
6166 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6167 			    struct pipe_inode_info *pipe, size_t len,
6168 			    unsigned int flags)
6169 {
6170 	struct ftrace_buffer_info *info = file->private_data;
6171 	struct trace_iterator *iter = &info->iter;
6172 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6173 	struct page *pages_def[PIPE_DEF_BUFFERS];
6174 	struct splice_pipe_desc spd = {
6175 		.pages		= pages_def,
6176 		.partial	= partial_def,
6177 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6178 		.flags		= flags,
6179 		.ops		= &buffer_pipe_buf_ops,
6180 		.spd_release	= buffer_spd_release,
6181 	};
6182 	struct buffer_ref *ref;
6183 	int entries, size, i;
6184 	ssize_t ret = 0;
6185 
6186 #ifdef CONFIG_TRACER_MAX_TRACE
6187 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6188 		return -EBUSY;
6189 #endif
6190 
6191 	if (*ppos & (PAGE_SIZE - 1))
6192 		return -EINVAL;
6193 
6194 	if (len & (PAGE_SIZE - 1)) {
6195 		if (len < PAGE_SIZE)
6196 			return -EINVAL;
6197 		len &= PAGE_MASK;
6198 	}
6199 
6200 	if (splice_grow_spd(pipe, &spd))
6201 		return -ENOMEM;
6202 
6203  again:
6204 	trace_access_lock(iter->cpu_file);
6205 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6206 
6207 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6208 		struct page *page;
6209 		int r;
6210 
6211 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6212 		if (!ref) {
6213 			ret = -ENOMEM;
6214 			break;
6215 		}
6216 
6217 		ref->ref = 1;
6218 		ref->buffer = iter->trace_buffer->buffer;
6219 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6220 		if (!ref->page) {
6221 			ret = -ENOMEM;
6222 			kfree(ref);
6223 			break;
6224 		}
6225 
6226 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6227 					  len, iter->cpu_file, 1);
6228 		if (r < 0) {
6229 			ring_buffer_free_read_page(ref->buffer, ref->page);
6230 			kfree(ref);
6231 			break;
6232 		}
6233 
6234 		/*
6235 		 * zero out any left over data, this is going to
6236 		 * user land.
6237 		 */
6238 		size = ring_buffer_page_len(ref->page);
6239 		if (size < PAGE_SIZE)
6240 			memset(ref->page + size, 0, PAGE_SIZE - size);
6241 
6242 		page = virt_to_page(ref->page);
6243 
6244 		spd.pages[i] = page;
6245 		spd.partial[i].len = PAGE_SIZE;
6246 		spd.partial[i].offset = 0;
6247 		spd.partial[i].private = (unsigned long)ref;
6248 		spd.nr_pages++;
6249 		*ppos += PAGE_SIZE;
6250 
6251 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6252 	}
6253 
6254 	trace_access_unlock(iter->cpu_file);
6255 	spd.nr_pages = i;
6256 
6257 	/* did we read anything? */
6258 	if (!spd.nr_pages) {
6259 		if (ret)
6260 			goto out;
6261 
6262 		ret = -EAGAIN;
6263 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6264 			goto out;
6265 
6266 		ret = wait_on_pipe(iter, true);
6267 		if (ret)
6268 			goto out;
6269 
6270 		goto again;
6271 	}
6272 
6273 	ret = splice_to_pipe(pipe, &spd);
6274 out:
6275 	splice_shrink_spd(&spd);
6276 
6277 	return ret;
6278 }
6279 
6280 static const struct file_operations tracing_buffers_fops = {
6281 	.open		= tracing_buffers_open,
6282 	.read		= tracing_buffers_read,
6283 	.poll		= tracing_buffers_poll,
6284 	.release	= tracing_buffers_release,
6285 	.splice_read	= tracing_buffers_splice_read,
6286 	.llseek		= no_llseek,
6287 };
6288 
6289 static ssize_t
6290 tracing_stats_read(struct file *filp, char __user *ubuf,
6291 		   size_t count, loff_t *ppos)
6292 {
6293 	struct inode *inode = file_inode(filp);
6294 	struct trace_array *tr = inode->i_private;
6295 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6296 	int cpu = tracing_get_cpu(inode);
6297 	struct trace_seq *s;
6298 	unsigned long cnt;
6299 	unsigned long long t;
6300 	unsigned long usec_rem;
6301 
6302 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6303 	if (!s)
6304 		return -ENOMEM;
6305 
6306 	trace_seq_init(s);
6307 
6308 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6309 	trace_seq_printf(s, "entries: %ld\n", cnt);
6310 
6311 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6312 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6313 
6314 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6315 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6316 
6317 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6318 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6319 
6320 	if (trace_clocks[tr->clock_id].in_ns) {
6321 		/* local or global for trace_clock */
6322 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6323 		usec_rem = do_div(t, USEC_PER_SEC);
6324 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6325 								t, usec_rem);
6326 
6327 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6328 		usec_rem = do_div(t, USEC_PER_SEC);
6329 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6330 	} else {
6331 		/* counter or tsc mode for trace_clock */
6332 		trace_seq_printf(s, "oldest event ts: %llu\n",
6333 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6334 
6335 		trace_seq_printf(s, "now ts: %llu\n",
6336 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6337 	}
6338 
6339 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6340 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6341 
6342 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6343 	trace_seq_printf(s, "read events: %ld\n", cnt);
6344 
6345 	count = simple_read_from_buffer(ubuf, count, ppos,
6346 					s->buffer, trace_seq_used(s));
6347 
6348 	kfree(s);
6349 
6350 	return count;
6351 }
6352 
6353 static const struct file_operations tracing_stats_fops = {
6354 	.open		= tracing_open_generic_tr,
6355 	.read		= tracing_stats_read,
6356 	.llseek		= generic_file_llseek,
6357 	.release	= tracing_release_generic_tr,
6358 };
6359 
6360 #ifdef CONFIG_DYNAMIC_FTRACE
6361 
6362 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6363 {
6364 	return 0;
6365 }
6366 
6367 static ssize_t
6368 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6369 		  size_t cnt, loff_t *ppos)
6370 {
6371 	static char ftrace_dyn_info_buffer[1024];
6372 	static DEFINE_MUTEX(dyn_info_mutex);
6373 	unsigned long *p = filp->private_data;
6374 	char *buf = ftrace_dyn_info_buffer;
6375 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6376 	int r;
6377 
6378 	mutex_lock(&dyn_info_mutex);
6379 	r = sprintf(buf, "%ld ", *p);
6380 
6381 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6382 	buf[r++] = '\n';
6383 
6384 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6385 
6386 	mutex_unlock(&dyn_info_mutex);
6387 
6388 	return r;
6389 }
6390 
6391 static const struct file_operations tracing_dyn_info_fops = {
6392 	.open		= tracing_open_generic,
6393 	.read		= tracing_read_dyn_info,
6394 	.llseek		= generic_file_llseek,
6395 };
6396 #endif /* CONFIG_DYNAMIC_FTRACE */
6397 
6398 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6399 static void
6400 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6401 {
6402 	tracing_snapshot();
6403 }
6404 
6405 static void
6406 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6407 {
6408 	unsigned long *count = (long *)data;
6409 
6410 	if (!*count)
6411 		return;
6412 
6413 	if (*count != -1)
6414 		(*count)--;
6415 
6416 	tracing_snapshot();
6417 }
6418 
6419 static int
6420 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6421 		      struct ftrace_probe_ops *ops, void *data)
6422 {
6423 	long count = (long)data;
6424 
6425 	seq_printf(m, "%ps:", (void *)ip);
6426 
6427 	seq_puts(m, "snapshot");
6428 
6429 	if (count == -1)
6430 		seq_puts(m, ":unlimited\n");
6431 	else
6432 		seq_printf(m, ":count=%ld\n", count);
6433 
6434 	return 0;
6435 }
6436 
6437 static struct ftrace_probe_ops snapshot_probe_ops = {
6438 	.func			= ftrace_snapshot,
6439 	.print			= ftrace_snapshot_print,
6440 };
6441 
6442 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6443 	.func			= ftrace_count_snapshot,
6444 	.print			= ftrace_snapshot_print,
6445 };
6446 
6447 static int
6448 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6449 			       char *glob, char *cmd, char *param, int enable)
6450 {
6451 	struct ftrace_probe_ops *ops;
6452 	void *count = (void *)-1;
6453 	char *number;
6454 	int ret;
6455 
6456 	/* hash funcs only work with set_ftrace_filter */
6457 	if (!enable)
6458 		return -EINVAL;
6459 
6460 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6461 
6462 	if (glob[0] == '!') {
6463 		unregister_ftrace_function_probe_func(glob+1, ops);
6464 		return 0;
6465 	}
6466 
6467 	if (!param)
6468 		goto out_reg;
6469 
6470 	number = strsep(&param, ":");
6471 
6472 	if (!strlen(number))
6473 		goto out_reg;
6474 
6475 	/*
6476 	 * We use the callback data field (which is a pointer)
6477 	 * as our counter.
6478 	 */
6479 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6480 	if (ret)
6481 		return ret;
6482 
6483  out_reg:
6484 	ret = register_ftrace_function_probe(glob, ops, count);
6485 
6486 	if (ret >= 0)
6487 		alloc_snapshot(&global_trace);
6488 
6489 	return ret < 0 ? ret : 0;
6490 }
6491 
6492 static struct ftrace_func_command ftrace_snapshot_cmd = {
6493 	.name			= "snapshot",
6494 	.func			= ftrace_trace_snapshot_callback,
6495 };
6496 
6497 static __init int register_snapshot_cmd(void)
6498 {
6499 	return register_ftrace_command(&ftrace_snapshot_cmd);
6500 }
6501 #else
6502 static inline __init int register_snapshot_cmd(void) { return 0; }
6503 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6504 
6505 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6506 {
6507 	if (WARN_ON(!tr->dir))
6508 		return ERR_PTR(-ENODEV);
6509 
6510 	/* Top directory uses NULL as the parent */
6511 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6512 		return NULL;
6513 
6514 	/* All sub buffers have a descriptor */
6515 	return tr->dir;
6516 }
6517 
6518 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6519 {
6520 	struct dentry *d_tracer;
6521 
6522 	if (tr->percpu_dir)
6523 		return tr->percpu_dir;
6524 
6525 	d_tracer = tracing_get_dentry(tr);
6526 	if (IS_ERR(d_tracer))
6527 		return NULL;
6528 
6529 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6530 
6531 	WARN_ONCE(!tr->percpu_dir,
6532 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6533 
6534 	return tr->percpu_dir;
6535 }
6536 
6537 static struct dentry *
6538 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6539 		      void *data, long cpu, const struct file_operations *fops)
6540 {
6541 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6542 
6543 	if (ret) /* See tracing_get_cpu() */
6544 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6545 	return ret;
6546 }
6547 
6548 static void
6549 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6550 {
6551 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6552 	struct dentry *d_cpu;
6553 	char cpu_dir[30]; /* 30 characters should be more than enough */
6554 
6555 	if (!d_percpu)
6556 		return;
6557 
6558 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6559 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6560 	if (!d_cpu) {
6561 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6562 		return;
6563 	}
6564 
6565 	/* per cpu trace_pipe */
6566 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6567 				tr, cpu, &tracing_pipe_fops);
6568 
6569 	/* per cpu trace */
6570 	trace_create_cpu_file("trace", 0644, d_cpu,
6571 				tr, cpu, &tracing_fops);
6572 
6573 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6574 				tr, cpu, &tracing_buffers_fops);
6575 
6576 	trace_create_cpu_file("stats", 0444, d_cpu,
6577 				tr, cpu, &tracing_stats_fops);
6578 
6579 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6580 				tr, cpu, &tracing_entries_fops);
6581 
6582 #ifdef CONFIG_TRACER_SNAPSHOT
6583 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6584 				tr, cpu, &snapshot_fops);
6585 
6586 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6587 				tr, cpu, &snapshot_raw_fops);
6588 #endif
6589 }
6590 
6591 #ifdef CONFIG_FTRACE_SELFTEST
6592 /* Let selftest have access to static functions in this file */
6593 #include "trace_selftest.c"
6594 #endif
6595 
6596 static ssize_t
6597 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6598 			loff_t *ppos)
6599 {
6600 	struct trace_option_dentry *topt = filp->private_data;
6601 	char *buf;
6602 
6603 	if (topt->flags->val & topt->opt->bit)
6604 		buf = "1\n";
6605 	else
6606 		buf = "0\n";
6607 
6608 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6609 }
6610 
6611 static ssize_t
6612 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6613 			 loff_t *ppos)
6614 {
6615 	struct trace_option_dentry *topt = filp->private_data;
6616 	unsigned long val;
6617 	int ret;
6618 
6619 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6620 	if (ret)
6621 		return ret;
6622 
6623 	if (val != 0 && val != 1)
6624 		return -EINVAL;
6625 
6626 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6627 		mutex_lock(&trace_types_lock);
6628 		ret = __set_tracer_option(topt->tr, topt->flags,
6629 					  topt->opt, !val);
6630 		mutex_unlock(&trace_types_lock);
6631 		if (ret)
6632 			return ret;
6633 	}
6634 
6635 	*ppos += cnt;
6636 
6637 	return cnt;
6638 }
6639 
6640 
6641 static const struct file_operations trace_options_fops = {
6642 	.open = tracing_open_generic,
6643 	.read = trace_options_read,
6644 	.write = trace_options_write,
6645 	.llseek	= generic_file_llseek,
6646 };
6647 
6648 /*
6649  * In order to pass in both the trace_array descriptor as well as the index
6650  * to the flag that the trace option file represents, the trace_array
6651  * has a character array of trace_flags_index[], which holds the index
6652  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6653  * The address of this character array is passed to the flag option file
6654  * read/write callbacks.
6655  *
6656  * In order to extract both the index and the trace_array descriptor,
6657  * get_tr_index() uses the following algorithm.
6658  *
6659  *   idx = *ptr;
6660  *
6661  * As the pointer itself contains the address of the index (remember
6662  * index[1] == 1).
6663  *
6664  * Then to get the trace_array descriptor, by subtracting that index
6665  * from the ptr, we get to the start of the index itself.
6666  *
6667  *   ptr - idx == &index[0]
6668  *
6669  * Then a simple container_of() from that pointer gets us to the
6670  * trace_array descriptor.
6671  */
6672 static void get_tr_index(void *data, struct trace_array **ptr,
6673 			 unsigned int *pindex)
6674 {
6675 	*pindex = *(unsigned char *)data;
6676 
6677 	*ptr = container_of(data - *pindex, struct trace_array,
6678 			    trace_flags_index);
6679 }
6680 
6681 static ssize_t
6682 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6683 			loff_t *ppos)
6684 {
6685 	void *tr_index = filp->private_data;
6686 	struct trace_array *tr;
6687 	unsigned int index;
6688 	char *buf;
6689 
6690 	get_tr_index(tr_index, &tr, &index);
6691 
6692 	if (tr->trace_flags & (1 << index))
6693 		buf = "1\n";
6694 	else
6695 		buf = "0\n";
6696 
6697 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6698 }
6699 
6700 static ssize_t
6701 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6702 			 loff_t *ppos)
6703 {
6704 	void *tr_index = filp->private_data;
6705 	struct trace_array *tr;
6706 	unsigned int index;
6707 	unsigned long val;
6708 	int ret;
6709 
6710 	get_tr_index(tr_index, &tr, &index);
6711 
6712 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6713 	if (ret)
6714 		return ret;
6715 
6716 	if (val != 0 && val != 1)
6717 		return -EINVAL;
6718 
6719 	mutex_lock(&trace_types_lock);
6720 	ret = set_tracer_flag(tr, 1 << index, val);
6721 	mutex_unlock(&trace_types_lock);
6722 
6723 	if (ret < 0)
6724 		return ret;
6725 
6726 	*ppos += cnt;
6727 
6728 	return cnt;
6729 }
6730 
6731 static const struct file_operations trace_options_core_fops = {
6732 	.open = tracing_open_generic,
6733 	.read = trace_options_core_read,
6734 	.write = trace_options_core_write,
6735 	.llseek = generic_file_llseek,
6736 };
6737 
6738 struct dentry *trace_create_file(const char *name,
6739 				 umode_t mode,
6740 				 struct dentry *parent,
6741 				 void *data,
6742 				 const struct file_operations *fops)
6743 {
6744 	struct dentry *ret;
6745 
6746 	ret = tracefs_create_file(name, mode, parent, data, fops);
6747 	if (!ret)
6748 		pr_warn("Could not create tracefs '%s' entry\n", name);
6749 
6750 	return ret;
6751 }
6752 
6753 
6754 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6755 {
6756 	struct dentry *d_tracer;
6757 
6758 	if (tr->options)
6759 		return tr->options;
6760 
6761 	d_tracer = tracing_get_dentry(tr);
6762 	if (IS_ERR(d_tracer))
6763 		return NULL;
6764 
6765 	tr->options = tracefs_create_dir("options", d_tracer);
6766 	if (!tr->options) {
6767 		pr_warn("Could not create tracefs directory 'options'\n");
6768 		return NULL;
6769 	}
6770 
6771 	return tr->options;
6772 }
6773 
6774 static void
6775 create_trace_option_file(struct trace_array *tr,
6776 			 struct trace_option_dentry *topt,
6777 			 struct tracer_flags *flags,
6778 			 struct tracer_opt *opt)
6779 {
6780 	struct dentry *t_options;
6781 
6782 	t_options = trace_options_init_dentry(tr);
6783 	if (!t_options)
6784 		return;
6785 
6786 	topt->flags = flags;
6787 	topt->opt = opt;
6788 	topt->tr = tr;
6789 
6790 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6791 				    &trace_options_fops);
6792 
6793 }
6794 
6795 static void
6796 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6797 {
6798 	struct trace_option_dentry *topts;
6799 	struct trace_options *tr_topts;
6800 	struct tracer_flags *flags;
6801 	struct tracer_opt *opts;
6802 	int cnt;
6803 	int i;
6804 
6805 	if (!tracer)
6806 		return;
6807 
6808 	flags = tracer->flags;
6809 
6810 	if (!flags || !flags->opts)
6811 		return;
6812 
6813 	/*
6814 	 * If this is an instance, only create flags for tracers
6815 	 * the instance may have.
6816 	 */
6817 	if (!trace_ok_for_array(tracer, tr))
6818 		return;
6819 
6820 	for (i = 0; i < tr->nr_topts; i++) {
6821 		/* Make sure there's no duplicate flags. */
6822 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6823 			return;
6824 	}
6825 
6826 	opts = flags->opts;
6827 
6828 	for (cnt = 0; opts[cnt].name; cnt++)
6829 		;
6830 
6831 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6832 	if (!topts)
6833 		return;
6834 
6835 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6836 			    GFP_KERNEL);
6837 	if (!tr_topts) {
6838 		kfree(topts);
6839 		return;
6840 	}
6841 
6842 	tr->topts = tr_topts;
6843 	tr->topts[tr->nr_topts].tracer = tracer;
6844 	tr->topts[tr->nr_topts].topts = topts;
6845 	tr->nr_topts++;
6846 
6847 	for (cnt = 0; opts[cnt].name; cnt++) {
6848 		create_trace_option_file(tr, &topts[cnt], flags,
6849 					 &opts[cnt]);
6850 		WARN_ONCE(topts[cnt].entry == NULL,
6851 			  "Failed to create trace option: %s",
6852 			  opts[cnt].name);
6853 	}
6854 }
6855 
6856 static struct dentry *
6857 create_trace_option_core_file(struct trace_array *tr,
6858 			      const char *option, long index)
6859 {
6860 	struct dentry *t_options;
6861 
6862 	t_options = trace_options_init_dentry(tr);
6863 	if (!t_options)
6864 		return NULL;
6865 
6866 	return trace_create_file(option, 0644, t_options,
6867 				 (void *)&tr->trace_flags_index[index],
6868 				 &trace_options_core_fops);
6869 }
6870 
6871 static void create_trace_options_dir(struct trace_array *tr)
6872 {
6873 	struct dentry *t_options;
6874 	bool top_level = tr == &global_trace;
6875 	int i;
6876 
6877 	t_options = trace_options_init_dentry(tr);
6878 	if (!t_options)
6879 		return;
6880 
6881 	for (i = 0; trace_options[i]; i++) {
6882 		if (top_level ||
6883 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6884 			create_trace_option_core_file(tr, trace_options[i], i);
6885 	}
6886 }
6887 
6888 static ssize_t
6889 rb_simple_read(struct file *filp, char __user *ubuf,
6890 	       size_t cnt, loff_t *ppos)
6891 {
6892 	struct trace_array *tr = filp->private_data;
6893 	char buf[64];
6894 	int r;
6895 
6896 	r = tracer_tracing_is_on(tr);
6897 	r = sprintf(buf, "%d\n", r);
6898 
6899 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6900 }
6901 
6902 static ssize_t
6903 rb_simple_write(struct file *filp, const char __user *ubuf,
6904 		size_t cnt, loff_t *ppos)
6905 {
6906 	struct trace_array *tr = filp->private_data;
6907 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6908 	unsigned long val;
6909 	int ret;
6910 
6911 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6912 	if (ret)
6913 		return ret;
6914 
6915 	if (buffer) {
6916 		mutex_lock(&trace_types_lock);
6917 		if (val) {
6918 			tracer_tracing_on(tr);
6919 			if (tr->current_trace->start)
6920 				tr->current_trace->start(tr);
6921 		} else {
6922 			tracer_tracing_off(tr);
6923 			if (tr->current_trace->stop)
6924 				tr->current_trace->stop(tr);
6925 		}
6926 		mutex_unlock(&trace_types_lock);
6927 	}
6928 
6929 	(*ppos)++;
6930 
6931 	return cnt;
6932 }
6933 
6934 static const struct file_operations rb_simple_fops = {
6935 	.open		= tracing_open_generic_tr,
6936 	.read		= rb_simple_read,
6937 	.write		= rb_simple_write,
6938 	.release	= tracing_release_generic_tr,
6939 	.llseek		= default_llseek,
6940 };
6941 
6942 struct dentry *trace_instance_dir;
6943 
6944 static void
6945 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6946 
6947 static int
6948 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6949 {
6950 	enum ring_buffer_flags rb_flags;
6951 
6952 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6953 
6954 	buf->tr = tr;
6955 
6956 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6957 	if (!buf->buffer)
6958 		return -ENOMEM;
6959 
6960 	buf->data = alloc_percpu(struct trace_array_cpu);
6961 	if (!buf->data) {
6962 		ring_buffer_free(buf->buffer);
6963 		return -ENOMEM;
6964 	}
6965 
6966 	/* Allocate the first page for all buffers */
6967 	set_buffer_entries(&tr->trace_buffer,
6968 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6969 
6970 	return 0;
6971 }
6972 
6973 static int allocate_trace_buffers(struct trace_array *tr, int size)
6974 {
6975 	int ret;
6976 
6977 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6978 	if (ret)
6979 		return ret;
6980 
6981 #ifdef CONFIG_TRACER_MAX_TRACE
6982 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6983 				    allocate_snapshot ? size : 1);
6984 	if (WARN_ON(ret)) {
6985 		ring_buffer_free(tr->trace_buffer.buffer);
6986 		free_percpu(tr->trace_buffer.data);
6987 		return -ENOMEM;
6988 	}
6989 	tr->allocated_snapshot = allocate_snapshot;
6990 
6991 	/*
6992 	 * Only the top level trace array gets its snapshot allocated
6993 	 * from the kernel command line.
6994 	 */
6995 	allocate_snapshot = false;
6996 #endif
6997 	return 0;
6998 }
6999 
7000 static void free_trace_buffer(struct trace_buffer *buf)
7001 {
7002 	if (buf->buffer) {
7003 		ring_buffer_free(buf->buffer);
7004 		buf->buffer = NULL;
7005 		free_percpu(buf->data);
7006 		buf->data = NULL;
7007 	}
7008 }
7009 
7010 static void free_trace_buffers(struct trace_array *tr)
7011 {
7012 	if (!tr)
7013 		return;
7014 
7015 	free_trace_buffer(&tr->trace_buffer);
7016 
7017 #ifdef CONFIG_TRACER_MAX_TRACE
7018 	free_trace_buffer(&tr->max_buffer);
7019 #endif
7020 }
7021 
7022 static void init_trace_flags_index(struct trace_array *tr)
7023 {
7024 	int i;
7025 
7026 	/* Used by the trace options files */
7027 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7028 		tr->trace_flags_index[i] = i;
7029 }
7030 
7031 static void __update_tracer_options(struct trace_array *tr)
7032 {
7033 	struct tracer *t;
7034 
7035 	for (t = trace_types; t; t = t->next)
7036 		add_tracer_options(tr, t);
7037 }
7038 
7039 static void update_tracer_options(struct trace_array *tr)
7040 {
7041 	mutex_lock(&trace_types_lock);
7042 	__update_tracer_options(tr);
7043 	mutex_unlock(&trace_types_lock);
7044 }
7045 
7046 static int instance_mkdir(const char *name)
7047 {
7048 	struct trace_array *tr;
7049 	int ret;
7050 
7051 	mutex_lock(&trace_types_lock);
7052 
7053 	ret = -EEXIST;
7054 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7055 		if (tr->name && strcmp(tr->name, name) == 0)
7056 			goto out_unlock;
7057 	}
7058 
7059 	ret = -ENOMEM;
7060 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7061 	if (!tr)
7062 		goto out_unlock;
7063 
7064 	tr->name = kstrdup(name, GFP_KERNEL);
7065 	if (!tr->name)
7066 		goto out_free_tr;
7067 
7068 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7069 		goto out_free_tr;
7070 
7071 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7072 
7073 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7074 
7075 	raw_spin_lock_init(&tr->start_lock);
7076 
7077 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7078 
7079 	tr->current_trace = &nop_trace;
7080 
7081 	INIT_LIST_HEAD(&tr->systems);
7082 	INIT_LIST_HEAD(&tr->events);
7083 
7084 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7085 		goto out_free_tr;
7086 
7087 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7088 	if (!tr->dir)
7089 		goto out_free_tr;
7090 
7091 	ret = event_trace_add_tracer(tr->dir, tr);
7092 	if (ret) {
7093 		tracefs_remove_recursive(tr->dir);
7094 		goto out_free_tr;
7095 	}
7096 
7097 	init_tracer_tracefs(tr, tr->dir);
7098 	init_trace_flags_index(tr);
7099 	__update_tracer_options(tr);
7100 
7101 	list_add(&tr->list, &ftrace_trace_arrays);
7102 
7103 	mutex_unlock(&trace_types_lock);
7104 
7105 	return 0;
7106 
7107  out_free_tr:
7108 	free_trace_buffers(tr);
7109 	free_cpumask_var(tr->tracing_cpumask);
7110 	kfree(tr->name);
7111 	kfree(tr);
7112 
7113  out_unlock:
7114 	mutex_unlock(&trace_types_lock);
7115 
7116 	return ret;
7117 
7118 }
7119 
7120 static int instance_rmdir(const char *name)
7121 {
7122 	struct trace_array *tr;
7123 	int found = 0;
7124 	int ret;
7125 	int i;
7126 
7127 	mutex_lock(&trace_types_lock);
7128 
7129 	ret = -ENODEV;
7130 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7131 		if (tr->name && strcmp(tr->name, name) == 0) {
7132 			found = 1;
7133 			break;
7134 		}
7135 	}
7136 	if (!found)
7137 		goto out_unlock;
7138 
7139 	ret = -EBUSY;
7140 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7141 		goto out_unlock;
7142 
7143 	list_del(&tr->list);
7144 
7145 	/* Disable all the flags that were enabled coming in */
7146 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7147 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7148 			set_tracer_flag(tr, 1 << i, 0);
7149 	}
7150 
7151 	tracing_set_nop(tr);
7152 	event_trace_del_tracer(tr);
7153 	ftrace_destroy_function_files(tr);
7154 	tracefs_remove_recursive(tr->dir);
7155 	free_trace_buffers(tr);
7156 
7157 	for (i = 0; i < tr->nr_topts; i++) {
7158 		kfree(tr->topts[i].topts);
7159 	}
7160 	kfree(tr->topts);
7161 
7162 	kfree(tr->name);
7163 	kfree(tr);
7164 
7165 	ret = 0;
7166 
7167  out_unlock:
7168 	mutex_unlock(&trace_types_lock);
7169 
7170 	return ret;
7171 }
7172 
7173 static __init void create_trace_instances(struct dentry *d_tracer)
7174 {
7175 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7176 							 instance_mkdir,
7177 							 instance_rmdir);
7178 	if (WARN_ON(!trace_instance_dir))
7179 		return;
7180 }
7181 
7182 static void
7183 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7184 {
7185 	int cpu;
7186 
7187 	trace_create_file("available_tracers", 0444, d_tracer,
7188 			tr, &show_traces_fops);
7189 
7190 	trace_create_file("current_tracer", 0644, d_tracer,
7191 			tr, &set_tracer_fops);
7192 
7193 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7194 			  tr, &tracing_cpumask_fops);
7195 
7196 	trace_create_file("trace_options", 0644, d_tracer,
7197 			  tr, &tracing_iter_fops);
7198 
7199 	trace_create_file("trace", 0644, d_tracer,
7200 			  tr, &tracing_fops);
7201 
7202 	trace_create_file("trace_pipe", 0444, d_tracer,
7203 			  tr, &tracing_pipe_fops);
7204 
7205 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7206 			  tr, &tracing_entries_fops);
7207 
7208 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7209 			  tr, &tracing_total_entries_fops);
7210 
7211 	trace_create_file("free_buffer", 0200, d_tracer,
7212 			  tr, &tracing_free_buffer_fops);
7213 
7214 	trace_create_file("trace_marker", 0220, d_tracer,
7215 			  tr, &tracing_mark_fops);
7216 
7217 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7218 			  &trace_clock_fops);
7219 
7220 	trace_create_file("tracing_on", 0644, d_tracer,
7221 			  tr, &rb_simple_fops);
7222 
7223 	create_trace_options_dir(tr);
7224 
7225 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7226 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7227 			&tr->max_latency, &tracing_max_lat_fops);
7228 #endif
7229 
7230 	if (ftrace_create_function_files(tr, d_tracer))
7231 		WARN(1, "Could not allocate function filter files");
7232 
7233 #ifdef CONFIG_TRACER_SNAPSHOT
7234 	trace_create_file("snapshot", 0644, d_tracer,
7235 			  tr, &snapshot_fops);
7236 #endif
7237 
7238 	for_each_tracing_cpu(cpu)
7239 		tracing_init_tracefs_percpu(tr, cpu);
7240 
7241 	ftrace_init_tracefs(tr, d_tracer);
7242 }
7243 
7244 static struct vfsmount *trace_automount(void *ingore)
7245 {
7246 	struct vfsmount *mnt;
7247 	struct file_system_type *type;
7248 
7249 	/*
7250 	 * To maintain backward compatibility for tools that mount
7251 	 * debugfs to get to the tracing facility, tracefs is automatically
7252 	 * mounted to the debugfs/tracing directory.
7253 	 */
7254 	type = get_fs_type("tracefs");
7255 	if (!type)
7256 		return NULL;
7257 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7258 	put_filesystem(type);
7259 	if (IS_ERR(mnt))
7260 		return NULL;
7261 	mntget(mnt);
7262 
7263 	return mnt;
7264 }
7265 
7266 /**
7267  * tracing_init_dentry - initialize top level trace array
7268  *
7269  * This is called when creating files or directories in the tracing
7270  * directory. It is called via fs_initcall() by any of the boot up code
7271  * and expects to return the dentry of the top level tracing directory.
7272  */
7273 struct dentry *tracing_init_dentry(void)
7274 {
7275 	struct trace_array *tr = &global_trace;
7276 
7277 	/* The top level trace array uses  NULL as parent */
7278 	if (tr->dir)
7279 		return NULL;
7280 
7281 	if (WARN_ON(!tracefs_initialized()) ||
7282 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7283 		 WARN_ON(!debugfs_initialized())))
7284 		return ERR_PTR(-ENODEV);
7285 
7286 	/*
7287 	 * As there may still be users that expect the tracing
7288 	 * files to exist in debugfs/tracing, we must automount
7289 	 * the tracefs file system there, so older tools still
7290 	 * work with the newer kerenl.
7291 	 */
7292 	tr->dir = debugfs_create_automount("tracing", NULL,
7293 					   trace_automount, NULL);
7294 	if (!tr->dir) {
7295 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7296 		return ERR_PTR(-ENOMEM);
7297 	}
7298 
7299 	return NULL;
7300 }
7301 
7302 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7303 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7304 
7305 static void __init trace_enum_init(void)
7306 {
7307 	int len;
7308 
7309 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7310 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7311 }
7312 
7313 #ifdef CONFIG_MODULES
7314 static void trace_module_add_enums(struct module *mod)
7315 {
7316 	if (!mod->num_trace_enums)
7317 		return;
7318 
7319 	/*
7320 	 * Modules with bad taint do not have events created, do
7321 	 * not bother with enums either.
7322 	 */
7323 	if (trace_module_has_bad_taint(mod))
7324 		return;
7325 
7326 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7327 }
7328 
7329 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7330 static void trace_module_remove_enums(struct module *mod)
7331 {
7332 	union trace_enum_map_item *map;
7333 	union trace_enum_map_item **last = &trace_enum_maps;
7334 
7335 	if (!mod->num_trace_enums)
7336 		return;
7337 
7338 	mutex_lock(&trace_enum_mutex);
7339 
7340 	map = trace_enum_maps;
7341 
7342 	while (map) {
7343 		if (map->head.mod == mod)
7344 			break;
7345 		map = trace_enum_jmp_to_tail(map);
7346 		last = &map->tail.next;
7347 		map = map->tail.next;
7348 	}
7349 	if (!map)
7350 		goto out;
7351 
7352 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7353 	kfree(map);
7354  out:
7355 	mutex_unlock(&trace_enum_mutex);
7356 }
7357 #else
7358 static inline void trace_module_remove_enums(struct module *mod) { }
7359 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7360 
7361 static int trace_module_notify(struct notifier_block *self,
7362 			       unsigned long val, void *data)
7363 {
7364 	struct module *mod = data;
7365 
7366 	switch (val) {
7367 	case MODULE_STATE_COMING:
7368 		trace_module_add_enums(mod);
7369 		break;
7370 	case MODULE_STATE_GOING:
7371 		trace_module_remove_enums(mod);
7372 		break;
7373 	}
7374 
7375 	return 0;
7376 }
7377 
7378 static struct notifier_block trace_module_nb = {
7379 	.notifier_call = trace_module_notify,
7380 	.priority = 0,
7381 };
7382 #endif /* CONFIG_MODULES */
7383 
7384 static __init int tracer_init_tracefs(void)
7385 {
7386 	struct dentry *d_tracer;
7387 
7388 	trace_access_lock_init();
7389 
7390 	d_tracer = tracing_init_dentry();
7391 	if (IS_ERR(d_tracer))
7392 		return 0;
7393 
7394 	init_tracer_tracefs(&global_trace, d_tracer);
7395 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7396 
7397 	trace_create_file("tracing_thresh", 0644, d_tracer,
7398 			&global_trace, &tracing_thresh_fops);
7399 
7400 	trace_create_file("README", 0444, d_tracer,
7401 			NULL, &tracing_readme_fops);
7402 
7403 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7404 			NULL, &tracing_saved_cmdlines_fops);
7405 
7406 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7407 			  NULL, &tracing_saved_cmdlines_size_fops);
7408 
7409 	trace_enum_init();
7410 
7411 	trace_create_enum_file(d_tracer);
7412 
7413 #ifdef CONFIG_MODULES
7414 	register_module_notifier(&trace_module_nb);
7415 #endif
7416 
7417 #ifdef CONFIG_DYNAMIC_FTRACE
7418 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7419 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7420 #endif
7421 
7422 	create_trace_instances(d_tracer);
7423 
7424 	update_tracer_options(&global_trace);
7425 
7426 	return 0;
7427 }
7428 
7429 static int trace_panic_handler(struct notifier_block *this,
7430 			       unsigned long event, void *unused)
7431 {
7432 	if (ftrace_dump_on_oops)
7433 		ftrace_dump(ftrace_dump_on_oops);
7434 	return NOTIFY_OK;
7435 }
7436 
7437 static struct notifier_block trace_panic_notifier = {
7438 	.notifier_call  = trace_panic_handler,
7439 	.next           = NULL,
7440 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7441 };
7442 
7443 static int trace_die_handler(struct notifier_block *self,
7444 			     unsigned long val,
7445 			     void *data)
7446 {
7447 	switch (val) {
7448 	case DIE_OOPS:
7449 		if (ftrace_dump_on_oops)
7450 			ftrace_dump(ftrace_dump_on_oops);
7451 		break;
7452 	default:
7453 		break;
7454 	}
7455 	return NOTIFY_OK;
7456 }
7457 
7458 static struct notifier_block trace_die_notifier = {
7459 	.notifier_call = trace_die_handler,
7460 	.priority = 200
7461 };
7462 
7463 /*
7464  * printk is set to max of 1024, we really don't need it that big.
7465  * Nothing should be printing 1000 characters anyway.
7466  */
7467 #define TRACE_MAX_PRINT		1000
7468 
7469 /*
7470  * Define here KERN_TRACE so that we have one place to modify
7471  * it if we decide to change what log level the ftrace dump
7472  * should be at.
7473  */
7474 #define KERN_TRACE		KERN_EMERG
7475 
7476 void
7477 trace_printk_seq(struct trace_seq *s)
7478 {
7479 	/* Probably should print a warning here. */
7480 	if (s->seq.len >= TRACE_MAX_PRINT)
7481 		s->seq.len = TRACE_MAX_PRINT;
7482 
7483 	/*
7484 	 * More paranoid code. Although the buffer size is set to
7485 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7486 	 * an extra layer of protection.
7487 	 */
7488 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7489 		s->seq.len = s->seq.size - 1;
7490 
7491 	/* should be zero ended, but we are paranoid. */
7492 	s->buffer[s->seq.len] = 0;
7493 
7494 	printk(KERN_TRACE "%s", s->buffer);
7495 
7496 	trace_seq_init(s);
7497 }
7498 
7499 void trace_init_global_iter(struct trace_iterator *iter)
7500 {
7501 	iter->tr = &global_trace;
7502 	iter->trace = iter->tr->current_trace;
7503 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7504 	iter->trace_buffer = &global_trace.trace_buffer;
7505 
7506 	if (iter->trace && iter->trace->open)
7507 		iter->trace->open(iter);
7508 
7509 	/* Annotate start of buffers if we had overruns */
7510 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7511 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7512 
7513 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7514 	if (trace_clocks[iter->tr->clock_id].in_ns)
7515 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7516 }
7517 
7518 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7519 {
7520 	/* use static because iter can be a bit big for the stack */
7521 	static struct trace_iterator iter;
7522 	static atomic_t dump_running;
7523 	struct trace_array *tr = &global_trace;
7524 	unsigned int old_userobj;
7525 	unsigned long flags;
7526 	int cnt = 0, cpu;
7527 
7528 	/* Only allow one dump user at a time. */
7529 	if (atomic_inc_return(&dump_running) != 1) {
7530 		atomic_dec(&dump_running);
7531 		return;
7532 	}
7533 
7534 	/*
7535 	 * Always turn off tracing when we dump.
7536 	 * We don't need to show trace output of what happens
7537 	 * between multiple crashes.
7538 	 *
7539 	 * If the user does a sysrq-z, then they can re-enable
7540 	 * tracing with echo 1 > tracing_on.
7541 	 */
7542 	tracing_off();
7543 
7544 	local_irq_save(flags);
7545 
7546 	/* Simulate the iterator */
7547 	trace_init_global_iter(&iter);
7548 
7549 	for_each_tracing_cpu(cpu) {
7550 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7551 	}
7552 
7553 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7554 
7555 	/* don't look at user memory in panic mode */
7556 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7557 
7558 	switch (oops_dump_mode) {
7559 	case DUMP_ALL:
7560 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7561 		break;
7562 	case DUMP_ORIG:
7563 		iter.cpu_file = raw_smp_processor_id();
7564 		break;
7565 	case DUMP_NONE:
7566 		goto out_enable;
7567 	default:
7568 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7569 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7570 	}
7571 
7572 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7573 
7574 	/* Did function tracer already get disabled? */
7575 	if (ftrace_is_dead()) {
7576 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7577 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7578 	}
7579 
7580 	/*
7581 	 * We need to stop all tracing on all CPUS to read the
7582 	 * the next buffer. This is a bit expensive, but is
7583 	 * not done often. We fill all what we can read,
7584 	 * and then release the locks again.
7585 	 */
7586 
7587 	while (!trace_empty(&iter)) {
7588 
7589 		if (!cnt)
7590 			printk(KERN_TRACE "---------------------------------\n");
7591 
7592 		cnt++;
7593 
7594 		/* reset all but tr, trace, and overruns */
7595 		memset(&iter.seq, 0,
7596 		       sizeof(struct trace_iterator) -
7597 		       offsetof(struct trace_iterator, seq));
7598 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7599 		iter.pos = -1;
7600 
7601 		if (trace_find_next_entry_inc(&iter) != NULL) {
7602 			int ret;
7603 
7604 			ret = print_trace_line(&iter);
7605 			if (ret != TRACE_TYPE_NO_CONSUME)
7606 				trace_consume(&iter);
7607 		}
7608 		touch_nmi_watchdog();
7609 
7610 		trace_printk_seq(&iter.seq);
7611 	}
7612 
7613 	if (!cnt)
7614 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7615 	else
7616 		printk(KERN_TRACE "---------------------------------\n");
7617 
7618  out_enable:
7619 	tr->trace_flags |= old_userobj;
7620 
7621 	for_each_tracing_cpu(cpu) {
7622 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7623 	}
7624  	atomic_dec(&dump_running);
7625 	local_irq_restore(flags);
7626 }
7627 EXPORT_SYMBOL_GPL(ftrace_dump);
7628 
7629 __init static int tracer_alloc_buffers(void)
7630 {
7631 	int ring_buf_size;
7632 	int ret = -ENOMEM;
7633 
7634 	/*
7635 	 * Make sure we don't accidently add more trace options
7636 	 * than we have bits for.
7637 	 */
7638 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7639 
7640 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7641 		goto out;
7642 
7643 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7644 		goto out_free_buffer_mask;
7645 
7646 	/* Only allocate trace_printk buffers if a trace_printk exists */
7647 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7648 		/* Must be called before global_trace.buffer is allocated */
7649 		trace_printk_init_buffers();
7650 
7651 	/* To save memory, keep the ring buffer size to its minimum */
7652 	if (ring_buffer_expanded)
7653 		ring_buf_size = trace_buf_size;
7654 	else
7655 		ring_buf_size = 1;
7656 
7657 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7658 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7659 
7660 	raw_spin_lock_init(&global_trace.start_lock);
7661 
7662 	/* Used for event triggers */
7663 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7664 	if (!temp_buffer)
7665 		goto out_free_cpumask;
7666 
7667 	if (trace_create_savedcmd() < 0)
7668 		goto out_free_temp_buffer;
7669 
7670 	/* TODO: make the number of buffers hot pluggable with CPUS */
7671 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7672 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7673 		WARN_ON(1);
7674 		goto out_free_savedcmd;
7675 	}
7676 
7677 	if (global_trace.buffer_disabled)
7678 		tracing_off();
7679 
7680 	if (trace_boot_clock) {
7681 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7682 		if (ret < 0)
7683 			pr_warn("Trace clock %s not defined, going back to default\n",
7684 				trace_boot_clock);
7685 	}
7686 
7687 	/*
7688 	 * register_tracer() might reference current_trace, so it
7689 	 * needs to be set before we register anything. This is
7690 	 * just a bootstrap of current_trace anyway.
7691 	 */
7692 	global_trace.current_trace = &nop_trace;
7693 
7694 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7695 
7696 	ftrace_init_global_array_ops(&global_trace);
7697 
7698 	init_trace_flags_index(&global_trace);
7699 
7700 	register_tracer(&nop_trace);
7701 
7702 	/* All seems OK, enable tracing */
7703 	tracing_disabled = 0;
7704 
7705 	atomic_notifier_chain_register(&panic_notifier_list,
7706 				       &trace_panic_notifier);
7707 
7708 	register_die_notifier(&trace_die_notifier);
7709 
7710 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7711 
7712 	INIT_LIST_HEAD(&global_trace.systems);
7713 	INIT_LIST_HEAD(&global_trace.events);
7714 	list_add(&global_trace.list, &ftrace_trace_arrays);
7715 
7716 	apply_trace_boot_options();
7717 
7718 	register_snapshot_cmd();
7719 
7720 	return 0;
7721 
7722 out_free_savedcmd:
7723 	free_saved_cmdlines_buffer(savedcmd);
7724 out_free_temp_buffer:
7725 	ring_buffer_free(temp_buffer);
7726 out_free_cpumask:
7727 	free_cpumask_var(global_trace.tracing_cpumask);
7728 out_free_buffer_mask:
7729 	free_cpumask_var(tracing_buffer_mask);
7730 out:
7731 	return ret;
7732 }
7733 
7734 void __init trace_init(void)
7735 {
7736 	if (tracepoint_printk) {
7737 		tracepoint_print_iter =
7738 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7739 		if (WARN_ON(!tracepoint_print_iter))
7740 			tracepoint_printk = 0;
7741 	}
7742 	tracer_alloc_buffers();
7743 	trace_event_init();
7744 }
7745 
7746 __init static int clear_boot_tracer(void)
7747 {
7748 	/*
7749 	 * The default tracer at boot buffer is an init section.
7750 	 * This function is called in lateinit. If we did not
7751 	 * find the boot tracer, then clear it out, to prevent
7752 	 * later registration from accessing the buffer that is
7753 	 * about to be freed.
7754 	 */
7755 	if (!default_bootup_tracer)
7756 		return 0;
7757 
7758 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7759 	       default_bootup_tracer);
7760 	default_bootup_tracer = NULL;
7761 
7762 	return 0;
7763 }
7764 
7765 fs_initcall(tracer_init_tracefs);
7766 late_initcall(clear_boot_tracer);
7767