xref: /openbmc/linux/kernel/trace/trace.c (revision b08918fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163 				   unsigned long flags, int pc);
164 
165 #define MAX_TRACER_SIZE		100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
171 static int __init set_cmdline_ftrace(char *str)
172 {
173 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 	default_bootup_tracer = bootup_tracer_buf;
175 	/* We are using ftrace early, expand it */
176 	ring_buffer_expanded = true;
177 	return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 	if (*str++ != '=' || !*str) {
184 		ftrace_dump_on_oops = DUMP_ALL;
185 		return 1;
186 	}
187 
188 	if (!strcmp("orig_cpu", str)) {
189 		ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
197 static int __init stop_trace_on_warning(char *str)
198 {
199 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 		__disable_trace_on_warning = 1;
201 	return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
205 static int __init boot_alloc_snapshot(char *str)
206 {
207 	allocate_snapshot = true;
208 	/* We also need the main ring buffer expanded */
209 	ring_buffer_expanded = true;
210 	return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
217 static int __init set_trace_boot_options(char *str)
218 {
219 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 	return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
227 static int __init set_trace_boot_clock(char *str)
228 {
229 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 	trace_boot_clock = trace_boot_clock_buf;
231 	return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
235 static int __init set_tracepoint_printk(char *str)
236 {
237 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 		tracepoint_printk = 1;
239 	return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
243 unsigned long long ns2usecs(u64 nsec)
244 {
245 	nsec += 500;
246 	do_div(nsec, 1000);
247 	return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS						\
252 	(FUNCTION_DEFAULT_FLAGS |					\
253 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
254 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
255 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
256 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
260 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265 
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271 	.trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273 
274 LIST_HEAD(ftrace_trace_arrays);
275 
276 int trace_array_get(struct trace_array *this_tr)
277 {
278 	struct trace_array *tr;
279 	int ret = -ENODEV;
280 
281 	mutex_lock(&trace_types_lock);
282 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283 		if (tr == this_tr) {
284 			tr->ref++;
285 			ret = 0;
286 			break;
287 		}
288 	}
289 	mutex_unlock(&trace_types_lock);
290 
291 	return ret;
292 }
293 
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296 	WARN_ON(!this_tr->ref);
297 	this_tr->ref--;
298 }
299 
300 void trace_array_put(struct trace_array *this_tr)
301 {
302 	mutex_lock(&trace_types_lock);
303 	__trace_array_put(this_tr);
304 	mutex_unlock(&trace_types_lock);
305 }
306 
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308 			      struct ring_buffer *buffer,
309 			      struct ring_buffer_event *event)
310 {
311 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312 	    !filter_match_preds(call->filter, rec)) {
313 		__trace_event_discard_commit(buffer, event);
314 		return 1;
315 	}
316 
317 	return 0;
318 }
319 
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322 	vfree(pid_list->pids);
323 	kfree(pid_list);
324 }
325 
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336 	/*
337 	 * If pid_max changed after filtered_pids was created, we
338 	 * by default ignore all pids greater than the previous pid_max.
339 	 */
340 	if (search_pid >= filtered_pids->pid_max)
341 		return false;
342 
343 	return test_bit(search_pid, filtered_pids->pids);
344 }
345 
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358 	/*
359 	 * Return false, because if filtered_pids does not exist,
360 	 * all pids are good to trace.
361 	 */
362 	if (!filtered_pids)
363 		return false;
364 
365 	return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367 
368 /**
369  * trace_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381 				  struct task_struct *self,
382 				  struct task_struct *task)
383 {
384 	if (!pid_list)
385 		return;
386 
387 	/* For forks, we only add if the forking task is listed */
388 	if (self) {
389 		if (!trace_find_filtered_pid(pid_list, self->pid))
390 			return;
391 	}
392 
393 	/* Sorry, but we don't support pid_max changing after setting */
394 	if (task->pid >= pid_list->pid_max)
395 		return;
396 
397 	/* "self" is set for forks, and NULL for exits */
398 	if (self)
399 		set_bit(task->pid, pid_list->pids);
400 	else
401 		clear_bit(task->pid, pid_list->pids);
402 }
403 
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418 	unsigned long pid = (unsigned long)v;
419 
420 	(*pos)++;
421 
422 	/* pid already is +1 of the actual prevous bit */
423 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424 
425 	/* Return pid + 1 to allow zero to be represented */
426 	if (pid < pid_list->pid_max)
427 		return (void *)(pid + 1);
428 
429 	return NULL;
430 }
431 
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445 	unsigned long pid;
446 	loff_t l = 0;
447 
448 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449 	if (pid >= pid_list->pid_max)
450 		return NULL;
451 
452 	/* Return pid + 1 so that zero can be the exit value */
453 	for (pid++; pid && l < *pos;
454 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455 		;
456 	return (void *)pid;
457 }
458 
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469 	unsigned long pid = (unsigned long)v - 1;
470 
471 	seq_printf(m, "%lu\n", pid);
472 	return 0;
473 }
474 
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE		127
477 
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479 		    struct trace_pid_list **new_pid_list,
480 		    const char __user *ubuf, size_t cnt)
481 {
482 	struct trace_pid_list *pid_list;
483 	struct trace_parser parser;
484 	unsigned long val;
485 	int nr_pids = 0;
486 	ssize_t read = 0;
487 	ssize_t ret = 0;
488 	loff_t pos;
489 	pid_t pid;
490 
491 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492 		return -ENOMEM;
493 
494 	/*
495 	 * Always recreate a new array. The write is an all or nothing
496 	 * operation. Always create a new array when adding new pids by
497 	 * the user. If the operation fails, then the current list is
498 	 * not modified.
499 	 */
500 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501 	if (!pid_list) {
502 		trace_parser_put(&parser);
503 		return -ENOMEM;
504 	}
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		trace_parser_put(&parser);
515 		kfree(pid_list);
516 		return -ENOMEM;
517 	}
518 
519 	if (filtered_pids) {
520 		/* copy the current bits to the new max */
521 		for_each_set_bit(pid, filtered_pids->pids,
522 				 filtered_pids->pid_max) {
523 			set_bit(pid, pid_list->pids);
524 			nr_pids++;
525 		}
526 	}
527 
528 	while (cnt > 0) {
529 
530 		pos = 0;
531 
532 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
533 		if (ret < 0 || !trace_parser_loaded(&parser))
534 			break;
535 
536 		read += ret;
537 		ubuf += ret;
538 		cnt -= ret;
539 
540 		ret = -EINVAL;
541 		if (kstrtoul(parser.buffer, 0, &val))
542 			break;
543 		if (val >= pid_list->pid_max)
544 			break;
545 
546 		pid = (pid_t)val;
547 
548 		set_bit(pid, pid_list->pids);
549 		nr_pids++;
550 
551 		trace_parser_clear(&parser);
552 		ret = 0;
553 	}
554 	trace_parser_put(&parser);
555 
556 	if (ret < 0) {
557 		trace_free_pid_list(pid_list);
558 		return ret;
559 	}
560 
561 	if (!nr_pids) {
562 		/* Cleared the list of pids */
563 		trace_free_pid_list(pid_list);
564 		read = ret;
565 		pid_list = NULL;
566 	}
567 
568 	*new_pid_list = pid_list;
569 
570 	return read;
571 }
572 
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575 	u64 ts;
576 
577 	/* Early boot up does not have a buffer yet */
578 	if (!buf->buffer)
579 		return trace_clock_local();
580 
581 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
582 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583 
584 	return ts;
585 }
586 
587 u64 ftrace_now(int cpu)
588 {
589 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591 
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603 	/*
604 	 * For quick access (irqsoff uses this in fast path), just
605 	 * return the mirror variable of the state of the ring buffer.
606 	 * It's a little racy, but we don't really care.
607 	 */
608 	smp_rmb();
609 	return !global_trace.buffer_disabled;
610 }
611 
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
623 
624 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625 
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer		*trace_types __read_mostly;
628 
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633 
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655 
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659 
660 static inline void trace_access_lock(int cpu)
661 {
662 	if (cpu == RING_BUFFER_ALL_CPUS) {
663 		/* gain it for accessing the whole ring buffer. */
664 		down_write(&all_cpu_access_lock);
665 	} else {
666 		/* gain it for accessing a cpu ring buffer. */
667 
668 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669 		down_read(&all_cpu_access_lock);
670 
671 		/* Secondly block other access to this @cpu ring buffer. */
672 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
673 	}
674 }
675 
676 static inline void trace_access_unlock(int cpu)
677 {
678 	if (cpu == RING_BUFFER_ALL_CPUS) {
679 		up_write(&all_cpu_access_lock);
680 	} else {
681 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682 		up_read(&all_cpu_access_lock);
683 	}
684 }
685 
686 static inline void trace_access_lock_init(void)
687 {
688 	int cpu;
689 
690 	for_each_possible_cpu(cpu)
691 		mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693 
694 #else
695 
696 static DEFINE_MUTEX(access_lock);
697 
698 static inline void trace_access_lock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_lock(&access_lock);
702 }
703 
704 static inline void trace_access_unlock(int cpu)
705 {
706 	(void)cpu;
707 	mutex_unlock(&access_lock);
708 }
709 
710 static inline void trace_access_lock_init(void)
711 {
712 }
713 
714 #endif
715 
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718 				 unsigned long flags,
719 				 int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721 				      struct ring_buffer *buffer,
722 				      unsigned long flags,
723 				      int skip, int pc, struct pt_regs *regs);
724 
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727 					unsigned long flags,
728 					int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732 				      struct ring_buffer *buffer,
733 				      unsigned long flags,
734 				      int skip, int pc, struct pt_regs *regs)
735 {
736 }
737 
738 #endif
739 
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742 		  int type, unsigned long flags, int pc)
743 {
744 	struct trace_entry *ent = ring_buffer_event_data(event);
745 
746 	tracing_generic_entry_update(ent, type, flags, pc);
747 }
748 
749 static __always_inline struct ring_buffer_event *
750 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
751 			  int type,
752 			  unsigned long len,
753 			  unsigned long flags, int pc)
754 {
755 	struct ring_buffer_event *event;
756 
757 	event = ring_buffer_lock_reserve(buffer, len);
758 	if (event != NULL)
759 		trace_event_setup(event, type, flags, pc);
760 
761 	return event;
762 }
763 
764 void tracer_tracing_on(struct trace_array *tr)
765 {
766 	if (tr->trace_buffer.buffer)
767 		ring_buffer_record_on(tr->trace_buffer.buffer);
768 	/*
769 	 * This flag is looked at when buffers haven't been allocated
770 	 * yet, or by some tracers (like irqsoff), that just want to
771 	 * know if the ring buffer has been disabled, but it can handle
772 	 * races of where it gets disabled but we still do a record.
773 	 * As the check is in the fast path of the tracers, it is more
774 	 * important to be fast than accurate.
775 	 */
776 	tr->buffer_disabled = 0;
777 	/* Make the flag seen by readers */
778 	smp_wmb();
779 }
780 
781 /**
782  * tracing_on - enable tracing buffers
783  *
784  * This function enables tracing buffers that may have been
785  * disabled with tracing_off.
786  */
787 void tracing_on(void)
788 {
789 	tracer_tracing_on(&global_trace);
790 }
791 EXPORT_SYMBOL_GPL(tracing_on);
792 
793 
794 static __always_inline void
795 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
796 {
797 	__this_cpu_write(trace_taskinfo_save, true);
798 
799 	/* If this is the temp buffer, we need to commit fully */
800 	if (this_cpu_read(trace_buffered_event) == event) {
801 		/* Length is in event->array[0] */
802 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
803 		/* Release the temp buffer */
804 		this_cpu_dec(trace_buffered_event_cnt);
805 	} else
806 		ring_buffer_unlock_commit(buffer, event);
807 }
808 
809 /**
810  * __trace_puts - write a constant string into the trace buffer.
811  * @ip:	   The address of the caller
812  * @str:   The constant string to write
813  * @size:  The size of the string.
814  */
815 int __trace_puts(unsigned long ip, const char *str, int size)
816 {
817 	struct ring_buffer_event *event;
818 	struct ring_buffer *buffer;
819 	struct print_entry *entry;
820 	unsigned long irq_flags;
821 	int alloc;
822 	int pc;
823 
824 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
825 		return 0;
826 
827 	pc = preempt_count();
828 
829 	if (unlikely(tracing_selftest_running || tracing_disabled))
830 		return 0;
831 
832 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
833 
834 	local_save_flags(irq_flags);
835 	buffer = global_trace.trace_buffer.buffer;
836 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
837 					    irq_flags, pc);
838 	if (!event)
839 		return 0;
840 
841 	entry = ring_buffer_event_data(event);
842 	entry->ip = ip;
843 
844 	memcpy(&entry->buf, str, size);
845 
846 	/* Add a newline if necessary */
847 	if (entry->buf[size - 1] != '\n') {
848 		entry->buf[size] = '\n';
849 		entry->buf[size + 1] = '\0';
850 	} else
851 		entry->buf[size] = '\0';
852 
853 	__buffer_unlock_commit(buffer, event);
854 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
855 
856 	return size;
857 }
858 EXPORT_SYMBOL_GPL(__trace_puts);
859 
860 /**
861  * __trace_bputs - write the pointer to a constant string into trace buffer
862  * @ip:	   The address of the caller
863  * @str:   The constant string to write to the buffer to
864  */
865 int __trace_bputs(unsigned long ip, const char *str)
866 {
867 	struct ring_buffer_event *event;
868 	struct ring_buffer *buffer;
869 	struct bputs_entry *entry;
870 	unsigned long irq_flags;
871 	int size = sizeof(struct bputs_entry);
872 	int pc;
873 
874 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
875 		return 0;
876 
877 	pc = preempt_count();
878 
879 	if (unlikely(tracing_selftest_running || tracing_disabled))
880 		return 0;
881 
882 	local_save_flags(irq_flags);
883 	buffer = global_trace.trace_buffer.buffer;
884 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885 					    irq_flags, pc);
886 	if (!event)
887 		return 0;
888 
889 	entry = ring_buffer_event_data(event);
890 	entry->ip			= ip;
891 	entry->str			= str;
892 
893 	__buffer_unlock_commit(buffer, event);
894 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
895 
896 	return 1;
897 }
898 EXPORT_SYMBOL_GPL(__trace_bputs);
899 
900 #ifdef CONFIG_TRACER_SNAPSHOT
901 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
902 {
903 	struct tracer *tracer = tr->current_trace;
904 	unsigned long flags;
905 
906 	if (in_nmi()) {
907 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
908 		internal_trace_puts("*** snapshot is being ignored        ***\n");
909 		return;
910 	}
911 
912 	if (!tr->allocated_snapshot) {
913 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
914 		internal_trace_puts("*** stopping trace here!   ***\n");
915 		tracing_off();
916 		return;
917 	}
918 
919 	/* Note, snapshot can not be used when the tracer uses it */
920 	if (tracer->use_max_tr) {
921 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
922 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
923 		return;
924 	}
925 
926 	local_irq_save(flags);
927 	update_max_tr(tr, current, smp_processor_id(), cond_data);
928 	local_irq_restore(flags);
929 }
930 
931 void tracing_snapshot_instance(struct trace_array *tr)
932 {
933 	tracing_snapshot_instance_cond(tr, NULL);
934 }
935 
936 /**
937  * tracing_snapshot - take a snapshot of the current buffer.
938  *
939  * This causes a swap between the snapshot buffer and the current live
940  * tracing buffer. You can use this to take snapshots of the live
941  * trace when some condition is triggered, but continue to trace.
942  *
943  * Note, make sure to allocate the snapshot with either
944  * a tracing_snapshot_alloc(), or by doing it manually
945  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
946  *
947  * If the snapshot buffer is not allocated, it will stop tracing.
948  * Basically making a permanent snapshot.
949  */
950 void tracing_snapshot(void)
951 {
952 	struct trace_array *tr = &global_trace;
953 
954 	tracing_snapshot_instance(tr);
955 }
956 EXPORT_SYMBOL_GPL(tracing_snapshot);
957 
958 /**
959  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
960  * @tr:		The tracing instance to snapshot
961  * @cond_data:	The data to be tested conditionally, and possibly saved
962  *
963  * This is the same as tracing_snapshot() except that the snapshot is
964  * conditional - the snapshot will only happen if the
965  * cond_snapshot.update() implementation receiving the cond_data
966  * returns true, which means that the trace array's cond_snapshot
967  * update() operation used the cond_data to determine whether the
968  * snapshot should be taken, and if it was, presumably saved it along
969  * with the snapshot.
970  */
971 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
972 {
973 	tracing_snapshot_instance_cond(tr, cond_data);
974 }
975 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
976 
977 /**
978  * tracing_snapshot_cond_data - get the user data associated with a snapshot
979  * @tr:		The tracing instance
980  *
981  * When the user enables a conditional snapshot using
982  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
983  * with the snapshot.  This accessor is used to retrieve it.
984  *
985  * Should not be called from cond_snapshot.update(), since it takes
986  * the tr->max_lock lock, which the code calling
987  * cond_snapshot.update() has already done.
988  *
989  * Returns the cond_data associated with the trace array's snapshot.
990  */
991 void *tracing_cond_snapshot_data(struct trace_array *tr)
992 {
993 	void *cond_data = NULL;
994 
995 	arch_spin_lock(&tr->max_lock);
996 
997 	if (tr->cond_snapshot)
998 		cond_data = tr->cond_snapshot->cond_data;
999 
1000 	arch_spin_unlock(&tr->max_lock);
1001 
1002 	return cond_data;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1005 
1006 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1007 					struct trace_buffer *size_buf, int cpu_id);
1008 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1009 
1010 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1011 {
1012 	int ret;
1013 
1014 	if (!tr->allocated_snapshot) {
1015 
1016 		/* allocate spare buffer */
1017 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1018 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1019 		if (ret < 0)
1020 			return ret;
1021 
1022 		tr->allocated_snapshot = true;
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 static void free_snapshot(struct trace_array *tr)
1029 {
1030 	/*
1031 	 * We don't free the ring buffer. instead, resize it because
1032 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1033 	 * we want preserve it.
1034 	 */
1035 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1036 	set_buffer_entries(&tr->max_buffer, 1);
1037 	tracing_reset_online_cpus(&tr->max_buffer);
1038 	tr->allocated_snapshot = false;
1039 }
1040 
1041 /**
1042  * tracing_alloc_snapshot - allocate snapshot buffer.
1043  *
1044  * This only allocates the snapshot buffer if it isn't already
1045  * allocated - it doesn't also take a snapshot.
1046  *
1047  * This is meant to be used in cases where the snapshot buffer needs
1048  * to be set up for events that can't sleep but need to be able to
1049  * trigger a snapshot.
1050  */
1051 int tracing_alloc_snapshot(void)
1052 {
1053 	struct trace_array *tr = &global_trace;
1054 	int ret;
1055 
1056 	ret = tracing_alloc_snapshot_instance(tr);
1057 	WARN_ON(ret < 0);
1058 
1059 	return ret;
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1062 
1063 /**
1064  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1065  *
1066  * This is similar to tracing_snapshot(), but it will allocate the
1067  * snapshot buffer if it isn't already allocated. Use this only
1068  * where it is safe to sleep, as the allocation may sleep.
1069  *
1070  * This causes a swap between the snapshot buffer and the current live
1071  * tracing buffer. You can use this to take snapshots of the live
1072  * trace when some condition is triggered, but continue to trace.
1073  */
1074 void tracing_snapshot_alloc(void)
1075 {
1076 	int ret;
1077 
1078 	ret = tracing_alloc_snapshot();
1079 	if (ret < 0)
1080 		return;
1081 
1082 	tracing_snapshot();
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1085 
1086 /**
1087  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1088  * @tr:		The tracing instance
1089  * @cond_data:	User data to associate with the snapshot
1090  * @update:	Implementation of the cond_snapshot update function
1091  *
1092  * Check whether the conditional snapshot for the given instance has
1093  * already been enabled, or if the current tracer is already using a
1094  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1095  * save the cond_data and update function inside.
1096  *
1097  * Returns 0 if successful, error otherwise.
1098  */
1099 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1100 				 cond_update_fn_t update)
1101 {
1102 	struct cond_snapshot *cond_snapshot;
1103 	int ret = 0;
1104 
1105 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1106 	if (!cond_snapshot)
1107 		return -ENOMEM;
1108 
1109 	cond_snapshot->cond_data = cond_data;
1110 	cond_snapshot->update = update;
1111 
1112 	mutex_lock(&trace_types_lock);
1113 
1114 	ret = tracing_alloc_snapshot_instance(tr);
1115 	if (ret)
1116 		goto fail_unlock;
1117 
1118 	if (tr->current_trace->use_max_tr) {
1119 		ret = -EBUSY;
1120 		goto fail_unlock;
1121 	}
1122 
1123 	/*
1124 	 * The cond_snapshot can only change to NULL without the
1125 	 * trace_types_lock. We don't care if we race with it going
1126 	 * to NULL, but we want to make sure that it's not set to
1127 	 * something other than NULL when we get here, which we can
1128 	 * do safely with only holding the trace_types_lock and not
1129 	 * having to take the max_lock.
1130 	 */
1131 	if (tr->cond_snapshot) {
1132 		ret = -EBUSY;
1133 		goto fail_unlock;
1134 	}
1135 
1136 	arch_spin_lock(&tr->max_lock);
1137 	tr->cond_snapshot = cond_snapshot;
1138 	arch_spin_unlock(&tr->max_lock);
1139 
1140 	mutex_unlock(&trace_types_lock);
1141 
1142 	return ret;
1143 
1144  fail_unlock:
1145 	mutex_unlock(&trace_types_lock);
1146 	kfree(cond_snapshot);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1150 
1151 /**
1152  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1153  * @tr:		The tracing instance
1154  *
1155  * Check whether the conditional snapshot for the given instance is
1156  * enabled; if so, free the cond_snapshot associated with it,
1157  * otherwise return -EINVAL.
1158  *
1159  * Returns 0 if successful, error otherwise.
1160  */
1161 int tracing_snapshot_cond_disable(struct trace_array *tr)
1162 {
1163 	int ret = 0;
1164 
1165 	arch_spin_lock(&tr->max_lock);
1166 
1167 	if (!tr->cond_snapshot)
1168 		ret = -EINVAL;
1169 	else {
1170 		kfree(tr->cond_snapshot);
1171 		tr->cond_snapshot = NULL;
1172 	}
1173 
1174 	arch_spin_unlock(&tr->max_lock);
1175 
1176 	return ret;
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1179 #else
1180 void tracing_snapshot(void)
1181 {
1182 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1183 }
1184 EXPORT_SYMBOL_GPL(tracing_snapshot);
1185 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1186 {
1187 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1190 int tracing_alloc_snapshot(void)
1191 {
1192 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1193 	return -ENODEV;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1196 void tracing_snapshot_alloc(void)
1197 {
1198 	/* Give warning */
1199 	tracing_snapshot();
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204 	return NULL;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1208 {
1209 	return -ENODEV;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1212 int tracing_snapshot_cond_disable(struct trace_array *tr)
1213 {
1214 	return false;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1217 #endif /* CONFIG_TRACER_SNAPSHOT */
1218 
1219 void tracer_tracing_off(struct trace_array *tr)
1220 {
1221 	if (tr->trace_buffer.buffer)
1222 		ring_buffer_record_off(tr->trace_buffer.buffer);
1223 	/*
1224 	 * This flag is looked at when buffers haven't been allocated
1225 	 * yet, or by some tracers (like irqsoff), that just want to
1226 	 * know if the ring buffer has been disabled, but it can handle
1227 	 * races of where it gets disabled but we still do a record.
1228 	 * As the check is in the fast path of the tracers, it is more
1229 	 * important to be fast than accurate.
1230 	 */
1231 	tr->buffer_disabled = 1;
1232 	/* Make the flag seen by readers */
1233 	smp_wmb();
1234 }
1235 
1236 /**
1237  * tracing_off - turn off tracing buffers
1238  *
1239  * This function stops the tracing buffers from recording data.
1240  * It does not disable any overhead the tracers themselves may
1241  * be causing. This function simply causes all recording to
1242  * the ring buffers to fail.
1243  */
1244 void tracing_off(void)
1245 {
1246 	tracer_tracing_off(&global_trace);
1247 }
1248 EXPORT_SYMBOL_GPL(tracing_off);
1249 
1250 void disable_trace_on_warning(void)
1251 {
1252 	if (__disable_trace_on_warning)
1253 		tracing_off();
1254 }
1255 
1256 /**
1257  * tracer_tracing_is_on - show real state of ring buffer enabled
1258  * @tr : the trace array to know if ring buffer is enabled
1259  *
1260  * Shows real state of the ring buffer if it is enabled or not.
1261  */
1262 bool tracer_tracing_is_on(struct trace_array *tr)
1263 {
1264 	if (tr->trace_buffer.buffer)
1265 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1266 	return !tr->buffer_disabled;
1267 }
1268 
1269 /**
1270  * tracing_is_on - show state of ring buffers enabled
1271  */
1272 int tracing_is_on(void)
1273 {
1274 	return tracer_tracing_is_on(&global_trace);
1275 }
1276 EXPORT_SYMBOL_GPL(tracing_is_on);
1277 
1278 static int __init set_buf_size(char *str)
1279 {
1280 	unsigned long buf_size;
1281 
1282 	if (!str)
1283 		return 0;
1284 	buf_size = memparse(str, &str);
1285 	/* nr_entries can not be zero */
1286 	if (buf_size == 0)
1287 		return 0;
1288 	trace_buf_size = buf_size;
1289 	return 1;
1290 }
1291 __setup("trace_buf_size=", set_buf_size);
1292 
1293 static int __init set_tracing_thresh(char *str)
1294 {
1295 	unsigned long threshold;
1296 	int ret;
1297 
1298 	if (!str)
1299 		return 0;
1300 	ret = kstrtoul(str, 0, &threshold);
1301 	if (ret < 0)
1302 		return 0;
1303 	tracing_thresh = threshold * 1000;
1304 	return 1;
1305 }
1306 __setup("tracing_thresh=", set_tracing_thresh);
1307 
1308 unsigned long nsecs_to_usecs(unsigned long nsecs)
1309 {
1310 	return nsecs / 1000;
1311 }
1312 
1313 /*
1314  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1315  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1316  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1317  * of strings in the order that the evals (enum) were defined.
1318  */
1319 #undef C
1320 #define C(a, b) b
1321 
1322 /* These must match the bit postions in trace_iterator_flags */
1323 static const char *trace_options[] = {
1324 	TRACE_FLAGS
1325 	NULL
1326 };
1327 
1328 static struct {
1329 	u64 (*func)(void);
1330 	const char *name;
1331 	int in_ns;		/* is this clock in nanoseconds? */
1332 } trace_clocks[] = {
1333 	{ trace_clock_local,		"local",	1 },
1334 	{ trace_clock_global,		"global",	1 },
1335 	{ trace_clock_counter,		"counter",	0 },
1336 	{ trace_clock_jiffies,		"uptime",	0 },
1337 	{ trace_clock,			"perf",		1 },
1338 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1339 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1340 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1341 	ARCH_TRACE_CLOCKS
1342 };
1343 
1344 bool trace_clock_in_ns(struct trace_array *tr)
1345 {
1346 	if (trace_clocks[tr->clock_id].in_ns)
1347 		return true;
1348 
1349 	return false;
1350 }
1351 
1352 /*
1353  * trace_parser_get_init - gets the buffer for trace parser
1354  */
1355 int trace_parser_get_init(struct trace_parser *parser, int size)
1356 {
1357 	memset(parser, 0, sizeof(*parser));
1358 
1359 	parser->buffer = kmalloc(size, GFP_KERNEL);
1360 	if (!parser->buffer)
1361 		return 1;
1362 
1363 	parser->size = size;
1364 	return 0;
1365 }
1366 
1367 /*
1368  * trace_parser_put - frees the buffer for trace parser
1369  */
1370 void trace_parser_put(struct trace_parser *parser)
1371 {
1372 	kfree(parser->buffer);
1373 	parser->buffer = NULL;
1374 }
1375 
1376 /*
1377  * trace_get_user - reads the user input string separated by  space
1378  * (matched by isspace(ch))
1379  *
1380  * For each string found the 'struct trace_parser' is updated,
1381  * and the function returns.
1382  *
1383  * Returns number of bytes read.
1384  *
1385  * See kernel/trace/trace.h for 'struct trace_parser' details.
1386  */
1387 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1388 	size_t cnt, loff_t *ppos)
1389 {
1390 	char ch;
1391 	size_t read = 0;
1392 	ssize_t ret;
1393 
1394 	if (!*ppos)
1395 		trace_parser_clear(parser);
1396 
1397 	ret = get_user(ch, ubuf++);
1398 	if (ret)
1399 		goto out;
1400 
1401 	read++;
1402 	cnt--;
1403 
1404 	/*
1405 	 * The parser is not finished with the last write,
1406 	 * continue reading the user input without skipping spaces.
1407 	 */
1408 	if (!parser->cont) {
1409 		/* skip white space */
1410 		while (cnt && isspace(ch)) {
1411 			ret = get_user(ch, ubuf++);
1412 			if (ret)
1413 				goto out;
1414 			read++;
1415 			cnt--;
1416 		}
1417 
1418 		parser->idx = 0;
1419 
1420 		/* only spaces were written */
1421 		if (isspace(ch) || !ch) {
1422 			*ppos += read;
1423 			ret = read;
1424 			goto out;
1425 		}
1426 	}
1427 
1428 	/* read the non-space input */
1429 	while (cnt && !isspace(ch) && ch) {
1430 		if (parser->idx < parser->size - 1)
1431 			parser->buffer[parser->idx++] = ch;
1432 		else {
1433 			ret = -EINVAL;
1434 			goto out;
1435 		}
1436 		ret = get_user(ch, ubuf++);
1437 		if (ret)
1438 			goto out;
1439 		read++;
1440 		cnt--;
1441 	}
1442 
1443 	/* We either got finished input or we have to wait for another call. */
1444 	if (isspace(ch) || !ch) {
1445 		parser->buffer[parser->idx] = 0;
1446 		parser->cont = false;
1447 	} else if (parser->idx < parser->size - 1) {
1448 		parser->cont = true;
1449 		parser->buffer[parser->idx++] = ch;
1450 		/* Make sure the parsed string always terminates with '\0'. */
1451 		parser->buffer[parser->idx] = 0;
1452 	} else {
1453 		ret = -EINVAL;
1454 		goto out;
1455 	}
1456 
1457 	*ppos += read;
1458 	ret = read;
1459 
1460 out:
1461 	return ret;
1462 }
1463 
1464 /* TODO add a seq_buf_to_buffer() */
1465 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1466 {
1467 	int len;
1468 
1469 	if (trace_seq_used(s) <= s->seq.readpos)
1470 		return -EBUSY;
1471 
1472 	len = trace_seq_used(s) - s->seq.readpos;
1473 	if (cnt > len)
1474 		cnt = len;
1475 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1476 
1477 	s->seq.readpos += cnt;
1478 	return cnt;
1479 }
1480 
1481 unsigned long __read_mostly	tracing_thresh;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 /*
1485  * Copy the new maximum trace into the separate maximum-trace
1486  * structure. (this way the maximum trace is permanently saved,
1487  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1488  */
1489 static void
1490 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1491 {
1492 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1493 	struct trace_buffer *max_buf = &tr->max_buffer;
1494 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1495 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1496 
1497 	max_buf->cpu = cpu;
1498 	max_buf->time_start = data->preempt_timestamp;
1499 
1500 	max_data->saved_latency = tr->max_latency;
1501 	max_data->critical_start = data->critical_start;
1502 	max_data->critical_end = data->critical_end;
1503 
1504 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1505 	max_data->pid = tsk->pid;
1506 	/*
1507 	 * If tsk == current, then use current_uid(), as that does not use
1508 	 * RCU. The irq tracer can be called out of RCU scope.
1509 	 */
1510 	if (tsk == current)
1511 		max_data->uid = current_uid();
1512 	else
1513 		max_data->uid = task_uid(tsk);
1514 
1515 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1516 	max_data->policy = tsk->policy;
1517 	max_data->rt_priority = tsk->rt_priority;
1518 
1519 	/* record this tasks comm */
1520 	tracing_record_cmdline(tsk);
1521 }
1522 
1523 /**
1524  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1525  * @tr: tracer
1526  * @tsk: the task with the latency
1527  * @cpu: The cpu that initiated the trace.
1528  * @cond_data: User data associated with a conditional snapshot
1529  *
1530  * Flip the buffers between the @tr and the max_tr and record information
1531  * about which task was the cause of this latency.
1532  */
1533 void
1534 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1535 	      void *cond_data)
1536 {
1537 	if (tr->stop_count)
1538 		return;
1539 
1540 	WARN_ON_ONCE(!irqs_disabled());
1541 
1542 	if (!tr->allocated_snapshot) {
1543 		/* Only the nop tracer should hit this when disabling */
1544 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1545 		return;
1546 	}
1547 
1548 	arch_spin_lock(&tr->max_lock);
1549 
1550 	/* Inherit the recordable setting from trace_buffer */
1551 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1552 		ring_buffer_record_on(tr->max_buffer.buffer);
1553 	else
1554 		ring_buffer_record_off(tr->max_buffer.buffer);
1555 
1556 #ifdef CONFIG_TRACER_SNAPSHOT
1557 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1558 		goto out_unlock;
1559 #endif
1560 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1561 
1562 	__update_max_tr(tr, tsk, cpu);
1563 
1564  out_unlock:
1565 	arch_spin_unlock(&tr->max_lock);
1566 }
1567 
1568 /**
1569  * update_max_tr_single - only copy one trace over, and reset the rest
1570  * @tr: tracer
1571  * @tsk: task with the latency
1572  * @cpu: the cpu of the buffer to copy.
1573  *
1574  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1575  */
1576 void
1577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1578 {
1579 	int ret;
1580 
1581 	if (tr->stop_count)
1582 		return;
1583 
1584 	WARN_ON_ONCE(!irqs_disabled());
1585 	if (!tr->allocated_snapshot) {
1586 		/* Only the nop tracer should hit this when disabling */
1587 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1588 		return;
1589 	}
1590 
1591 	arch_spin_lock(&tr->max_lock);
1592 
1593 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1594 
1595 	if (ret == -EBUSY) {
1596 		/*
1597 		 * We failed to swap the buffer due to a commit taking
1598 		 * place on this CPU. We fail to record, but we reset
1599 		 * the max trace buffer (no one writes directly to it)
1600 		 * and flag that it failed.
1601 		 */
1602 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1603 			"Failed to swap buffers due to commit in progress\n");
1604 	}
1605 
1606 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1607 
1608 	__update_max_tr(tr, tsk, cpu);
1609 	arch_spin_unlock(&tr->max_lock);
1610 }
1611 #endif /* CONFIG_TRACER_MAX_TRACE */
1612 
1613 static int wait_on_pipe(struct trace_iterator *iter, int full)
1614 {
1615 	/* Iterators are static, they should be filled or empty */
1616 	if (trace_buffer_iter(iter, iter->cpu_file))
1617 		return 0;
1618 
1619 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1620 				full);
1621 }
1622 
1623 #ifdef CONFIG_FTRACE_STARTUP_TEST
1624 static bool selftests_can_run;
1625 
1626 struct trace_selftests {
1627 	struct list_head		list;
1628 	struct tracer			*type;
1629 };
1630 
1631 static LIST_HEAD(postponed_selftests);
1632 
1633 static int save_selftest(struct tracer *type)
1634 {
1635 	struct trace_selftests *selftest;
1636 
1637 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1638 	if (!selftest)
1639 		return -ENOMEM;
1640 
1641 	selftest->type = type;
1642 	list_add(&selftest->list, &postponed_selftests);
1643 	return 0;
1644 }
1645 
1646 static int run_tracer_selftest(struct tracer *type)
1647 {
1648 	struct trace_array *tr = &global_trace;
1649 	struct tracer *saved_tracer = tr->current_trace;
1650 	int ret;
1651 
1652 	if (!type->selftest || tracing_selftest_disabled)
1653 		return 0;
1654 
1655 	/*
1656 	 * If a tracer registers early in boot up (before scheduling is
1657 	 * initialized and such), then do not run its selftests yet.
1658 	 * Instead, run it a little later in the boot process.
1659 	 */
1660 	if (!selftests_can_run)
1661 		return save_selftest(type);
1662 
1663 	/*
1664 	 * Run a selftest on this tracer.
1665 	 * Here we reset the trace buffer, and set the current
1666 	 * tracer to be this tracer. The tracer can then run some
1667 	 * internal tracing to verify that everything is in order.
1668 	 * If we fail, we do not register this tracer.
1669 	 */
1670 	tracing_reset_online_cpus(&tr->trace_buffer);
1671 
1672 	tr->current_trace = type;
1673 
1674 #ifdef CONFIG_TRACER_MAX_TRACE
1675 	if (type->use_max_tr) {
1676 		/* If we expanded the buffers, make sure the max is expanded too */
1677 		if (ring_buffer_expanded)
1678 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1679 					   RING_BUFFER_ALL_CPUS);
1680 		tr->allocated_snapshot = true;
1681 	}
1682 #endif
1683 
1684 	/* the test is responsible for initializing and enabling */
1685 	pr_info("Testing tracer %s: ", type->name);
1686 	ret = type->selftest(type, tr);
1687 	/* the test is responsible for resetting too */
1688 	tr->current_trace = saved_tracer;
1689 	if (ret) {
1690 		printk(KERN_CONT "FAILED!\n");
1691 		/* Add the warning after printing 'FAILED' */
1692 		WARN_ON(1);
1693 		return -1;
1694 	}
1695 	/* Only reset on passing, to avoid touching corrupted buffers */
1696 	tracing_reset_online_cpus(&tr->trace_buffer);
1697 
1698 #ifdef CONFIG_TRACER_MAX_TRACE
1699 	if (type->use_max_tr) {
1700 		tr->allocated_snapshot = false;
1701 
1702 		/* Shrink the max buffer again */
1703 		if (ring_buffer_expanded)
1704 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1705 					   RING_BUFFER_ALL_CPUS);
1706 	}
1707 #endif
1708 
1709 	printk(KERN_CONT "PASSED\n");
1710 	return 0;
1711 }
1712 
1713 static __init int init_trace_selftests(void)
1714 {
1715 	struct trace_selftests *p, *n;
1716 	struct tracer *t, **last;
1717 	int ret;
1718 
1719 	selftests_can_run = true;
1720 
1721 	mutex_lock(&trace_types_lock);
1722 
1723 	if (list_empty(&postponed_selftests))
1724 		goto out;
1725 
1726 	pr_info("Running postponed tracer tests:\n");
1727 
1728 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1729 		/* This loop can take minutes when sanitizers are enabled, so
1730 		 * lets make sure we allow RCU processing.
1731 		 */
1732 		cond_resched();
1733 		ret = run_tracer_selftest(p->type);
1734 		/* If the test fails, then warn and remove from available_tracers */
1735 		if (ret < 0) {
1736 			WARN(1, "tracer: %s failed selftest, disabling\n",
1737 			     p->type->name);
1738 			last = &trace_types;
1739 			for (t = trace_types; t; t = t->next) {
1740 				if (t == p->type) {
1741 					*last = t->next;
1742 					break;
1743 				}
1744 				last = &t->next;
1745 			}
1746 		}
1747 		list_del(&p->list);
1748 		kfree(p);
1749 	}
1750 
1751  out:
1752 	mutex_unlock(&trace_types_lock);
1753 
1754 	return 0;
1755 }
1756 core_initcall(init_trace_selftests);
1757 #else
1758 static inline int run_tracer_selftest(struct tracer *type)
1759 {
1760 	return 0;
1761 }
1762 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1763 
1764 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1765 
1766 static void __init apply_trace_boot_options(void);
1767 
1768 /**
1769  * register_tracer - register a tracer with the ftrace system.
1770  * @type: the plugin for the tracer
1771  *
1772  * Register a new plugin tracer.
1773  */
1774 int __init register_tracer(struct tracer *type)
1775 {
1776 	struct tracer *t;
1777 	int ret = 0;
1778 
1779 	if (!type->name) {
1780 		pr_info("Tracer must have a name\n");
1781 		return -1;
1782 	}
1783 
1784 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1785 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1786 		return -1;
1787 	}
1788 
1789 	mutex_lock(&trace_types_lock);
1790 
1791 	tracing_selftest_running = true;
1792 
1793 	for (t = trace_types; t; t = t->next) {
1794 		if (strcmp(type->name, t->name) == 0) {
1795 			/* already found */
1796 			pr_info("Tracer %s already registered\n",
1797 				type->name);
1798 			ret = -1;
1799 			goto out;
1800 		}
1801 	}
1802 
1803 	if (!type->set_flag)
1804 		type->set_flag = &dummy_set_flag;
1805 	if (!type->flags) {
1806 		/*allocate a dummy tracer_flags*/
1807 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1808 		if (!type->flags) {
1809 			ret = -ENOMEM;
1810 			goto out;
1811 		}
1812 		type->flags->val = 0;
1813 		type->flags->opts = dummy_tracer_opt;
1814 	} else
1815 		if (!type->flags->opts)
1816 			type->flags->opts = dummy_tracer_opt;
1817 
1818 	/* store the tracer for __set_tracer_option */
1819 	type->flags->trace = type;
1820 
1821 	ret = run_tracer_selftest(type);
1822 	if (ret < 0)
1823 		goto out;
1824 
1825 	type->next = trace_types;
1826 	trace_types = type;
1827 	add_tracer_options(&global_trace, type);
1828 
1829  out:
1830 	tracing_selftest_running = false;
1831 	mutex_unlock(&trace_types_lock);
1832 
1833 	if (ret || !default_bootup_tracer)
1834 		goto out_unlock;
1835 
1836 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1837 		goto out_unlock;
1838 
1839 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1840 	/* Do we want this tracer to start on bootup? */
1841 	tracing_set_tracer(&global_trace, type->name);
1842 	default_bootup_tracer = NULL;
1843 
1844 	apply_trace_boot_options();
1845 
1846 	/* disable other selftests, since this will break it. */
1847 	tracing_selftest_disabled = true;
1848 #ifdef CONFIG_FTRACE_STARTUP_TEST
1849 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1850 	       type->name);
1851 #endif
1852 
1853  out_unlock:
1854 	return ret;
1855 }
1856 
1857 void tracing_reset(struct trace_buffer *buf, int cpu)
1858 {
1859 	struct ring_buffer *buffer = buf->buffer;
1860 
1861 	if (!buffer)
1862 		return;
1863 
1864 	ring_buffer_record_disable(buffer);
1865 
1866 	/* Make sure all commits have finished */
1867 	synchronize_rcu();
1868 	ring_buffer_reset_cpu(buffer, cpu);
1869 
1870 	ring_buffer_record_enable(buffer);
1871 }
1872 
1873 void tracing_reset_online_cpus(struct trace_buffer *buf)
1874 {
1875 	struct ring_buffer *buffer = buf->buffer;
1876 	int cpu;
1877 
1878 	if (!buffer)
1879 		return;
1880 
1881 	ring_buffer_record_disable(buffer);
1882 
1883 	/* Make sure all commits have finished */
1884 	synchronize_rcu();
1885 
1886 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1887 
1888 	for_each_online_cpu(cpu)
1889 		ring_buffer_reset_cpu(buffer, cpu);
1890 
1891 	ring_buffer_record_enable(buffer);
1892 }
1893 
1894 /* Must have trace_types_lock held */
1895 void tracing_reset_all_online_cpus(void)
1896 {
1897 	struct trace_array *tr;
1898 
1899 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1900 		if (!tr->clear_trace)
1901 			continue;
1902 		tr->clear_trace = false;
1903 		tracing_reset_online_cpus(&tr->trace_buffer);
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 		tracing_reset_online_cpus(&tr->max_buffer);
1906 #endif
1907 	}
1908 }
1909 
1910 static int *tgid_map;
1911 
1912 #define SAVED_CMDLINES_DEFAULT 128
1913 #define NO_CMDLINE_MAP UINT_MAX
1914 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1915 struct saved_cmdlines_buffer {
1916 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1917 	unsigned *map_cmdline_to_pid;
1918 	unsigned cmdline_num;
1919 	int cmdline_idx;
1920 	char *saved_cmdlines;
1921 };
1922 static struct saved_cmdlines_buffer *savedcmd;
1923 
1924 /* temporary disable recording */
1925 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1926 
1927 static inline char *get_saved_cmdlines(int idx)
1928 {
1929 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1930 }
1931 
1932 static inline void set_cmdline(int idx, const char *cmdline)
1933 {
1934 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1935 }
1936 
1937 static int allocate_cmdlines_buffer(unsigned int val,
1938 				    struct saved_cmdlines_buffer *s)
1939 {
1940 	s->map_cmdline_to_pid = kmalloc_array(val,
1941 					      sizeof(*s->map_cmdline_to_pid),
1942 					      GFP_KERNEL);
1943 	if (!s->map_cmdline_to_pid)
1944 		return -ENOMEM;
1945 
1946 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1947 	if (!s->saved_cmdlines) {
1948 		kfree(s->map_cmdline_to_pid);
1949 		return -ENOMEM;
1950 	}
1951 
1952 	s->cmdline_idx = 0;
1953 	s->cmdline_num = val;
1954 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1955 	       sizeof(s->map_pid_to_cmdline));
1956 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1957 	       val * sizeof(*s->map_cmdline_to_pid));
1958 
1959 	return 0;
1960 }
1961 
1962 static int trace_create_savedcmd(void)
1963 {
1964 	int ret;
1965 
1966 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1967 	if (!savedcmd)
1968 		return -ENOMEM;
1969 
1970 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1971 	if (ret < 0) {
1972 		kfree(savedcmd);
1973 		savedcmd = NULL;
1974 		return -ENOMEM;
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 int is_tracing_stopped(void)
1981 {
1982 	return global_trace.stop_count;
1983 }
1984 
1985 /**
1986  * tracing_start - quick start of the tracer
1987  *
1988  * If tracing is enabled but was stopped by tracing_stop,
1989  * this will start the tracer back up.
1990  */
1991 void tracing_start(void)
1992 {
1993 	struct ring_buffer *buffer;
1994 	unsigned long flags;
1995 
1996 	if (tracing_disabled)
1997 		return;
1998 
1999 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2000 	if (--global_trace.stop_count) {
2001 		if (global_trace.stop_count < 0) {
2002 			/* Someone screwed up their debugging */
2003 			WARN_ON_ONCE(1);
2004 			global_trace.stop_count = 0;
2005 		}
2006 		goto out;
2007 	}
2008 
2009 	/* Prevent the buffers from switching */
2010 	arch_spin_lock(&global_trace.max_lock);
2011 
2012 	buffer = global_trace.trace_buffer.buffer;
2013 	if (buffer)
2014 		ring_buffer_record_enable(buffer);
2015 
2016 #ifdef CONFIG_TRACER_MAX_TRACE
2017 	buffer = global_trace.max_buffer.buffer;
2018 	if (buffer)
2019 		ring_buffer_record_enable(buffer);
2020 #endif
2021 
2022 	arch_spin_unlock(&global_trace.max_lock);
2023 
2024  out:
2025 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2026 }
2027 
2028 static void tracing_start_tr(struct trace_array *tr)
2029 {
2030 	struct ring_buffer *buffer;
2031 	unsigned long flags;
2032 
2033 	if (tracing_disabled)
2034 		return;
2035 
2036 	/* If global, we need to also start the max tracer */
2037 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2038 		return tracing_start();
2039 
2040 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2041 
2042 	if (--tr->stop_count) {
2043 		if (tr->stop_count < 0) {
2044 			/* Someone screwed up their debugging */
2045 			WARN_ON_ONCE(1);
2046 			tr->stop_count = 0;
2047 		}
2048 		goto out;
2049 	}
2050 
2051 	buffer = tr->trace_buffer.buffer;
2052 	if (buffer)
2053 		ring_buffer_record_enable(buffer);
2054 
2055  out:
2056 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2057 }
2058 
2059 /**
2060  * tracing_stop - quick stop of the tracer
2061  *
2062  * Light weight way to stop tracing. Use in conjunction with
2063  * tracing_start.
2064  */
2065 void tracing_stop(void)
2066 {
2067 	struct ring_buffer *buffer;
2068 	unsigned long flags;
2069 
2070 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071 	if (global_trace.stop_count++)
2072 		goto out;
2073 
2074 	/* Prevent the buffers from switching */
2075 	arch_spin_lock(&global_trace.max_lock);
2076 
2077 	buffer = global_trace.trace_buffer.buffer;
2078 	if (buffer)
2079 		ring_buffer_record_disable(buffer);
2080 
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082 	buffer = global_trace.max_buffer.buffer;
2083 	if (buffer)
2084 		ring_buffer_record_disable(buffer);
2085 #endif
2086 
2087 	arch_spin_unlock(&global_trace.max_lock);
2088 
2089  out:
2090 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2091 }
2092 
2093 static void tracing_stop_tr(struct trace_array *tr)
2094 {
2095 	struct ring_buffer *buffer;
2096 	unsigned long flags;
2097 
2098 	/* If global, we need to also stop the max tracer */
2099 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2100 		return tracing_stop();
2101 
2102 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2103 	if (tr->stop_count++)
2104 		goto out;
2105 
2106 	buffer = tr->trace_buffer.buffer;
2107 	if (buffer)
2108 		ring_buffer_record_disable(buffer);
2109 
2110  out:
2111 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2112 }
2113 
2114 static int trace_save_cmdline(struct task_struct *tsk)
2115 {
2116 	unsigned pid, idx;
2117 
2118 	/* treat recording of idle task as a success */
2119 	if (!tsk->pid)
2120 		return 1;
2121 
2122 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2123 		return 0;
2124 
2125 	/*
2126 	 * It's not the end of the world if we don't get
2127 	 * the lock, but we also don't want to spin
2128 	 * nor do we want to disable interrupts,
2129 	 * so if we miss here, then better luck next time.
2130 	 */
2131 	if (!arch_spin_trylock(&trace_cmdline_lock))
2132 		return 0;
2133 
2134 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2135 	if (idx == NO_CMDLINE_MAP) {
2136 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2137 
2138 		/*
2139 		 * Check whether the cmdline buffer at idx has a pid
2140 		 * mapped. We are going to overwrite that entry so we
2141 		 * need to clear the map_pid_to_cmdline. Otherwise we
2142 		 * would read the new comm for the old pid.
2143 		 */
2144 		pid = savedcmd->map_cmdline_to_pid[idx];
2145 		if (pid != NO_CMDLINE_MAP)
2146 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2147 
2148 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2149 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2150 
2151 		savedcmd->cmdline_idx = idx;
2152 	}
2153 
2154 	set_cmdline(idx, tsk->comm);
2155 
2156 	arch_spin_unlock(&trace_cmdline_lock);
2157 
2158 	return 1;
2159 }
2160 
2161 static void __trace_find_cmdline(int pid, char comm[])
2162 {
2163 	unsigned map;
2164 
2165 	if (!pid) {
2166 		strcpy(comm, "<idle>");
2167 		return;
2168 	}
2169 
2170 	if (WARN_ON_ONCE(pid < 0)) {
2171 		strcpy(comm, "<XXX>");
2172 		return;
2173 	}
2174 
2175 	if (pid > PID_MAX_DEFAULT) {
2176 		strcpy(comm, "<...>");
2177 		return;
2178 	}
2179 
2180 	map = savedcmd->map_pid_to_cmdline[pid];
2181 	if (map != NO_CMDLINE_MAP)
2182 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2183 	else
2184 		strcpy(comm, "<...>");
2185 }
2186 
2187 void trace_find_cmdline(int pid, char comm[])
2188 {
2189 	preempt_disable();
2190 	arch_spin_lock(&trace_cmdline_lock);
2191 
2192 	__trace_find_cmdline(pid, comm);
2193 
2194 	arch_spin_unlock(&trace_cmdline_lock);
2195 	preempt_enable();
2196 }
2197 
2198 int trace_find_tgid(int pid)
2199 {
2200 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2201 		return 0;
2202 
2203 	return tgid_map[pid];
2204 }
2205 
2206 static int trace_save_tgid(struct task_struct *tsk)
2207 {
2208 	/* treat recording of idle task as a success */
2209 	if (!tsk->pid)
2210 		return 1;
2211 
2212 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2213 		return 0;
2214 
2215 	tgid_map[tsk->pid] = tsk->tgid;
2216 	return 1;
2217 }
2218 
2219 static bool tracing_record_taskinfo_skip(int flags)
2220 {
2221 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2222 		return true;
2223 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2224 		return true;
2225 	if (!__this_cpu_read(trace_taskinfo_save))
2226 		return true;
2227 	return false;
2228 }
2229 
2230 /**
2231  * tracing_record_taskinfo - record the task info of a task
2232  *
2233  * @task:  task to record
2234  * @flags: TRACE_RECORD_CMDLINE for recording comm
2235  *         TRACE_RECORD_TGID for recording tgid
2236  */
2237 void tracing_record_taskinfo(struct task_struct *task, int flags)
2238 {
2239 	bool done;
2240 
2241 	if (tracing_record_taskinfo_skip(flags))
2242 		return;
2243 
2244 	/*
2245 	 * Record as much task information as possible. If some fail, continue
2246 	 * to try to record the others.
2247 	 */
2248 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2249 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2250 
2251 	/* If recording any information failed, retry again soon. */
2252 	if (!done)
2253 		return;
2254 
2255 	__this_cpu_write(trace_taskinfo_save, false);
2256 }
2257 
2258 /**
2259  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2260  *
2261  * @prev: previous task during sched_switch
2262  * @next: next task during sched_switch
2263  * @flags: TRACE_RECORD_CMDLINE for recording comm
2264  *         TRACE_RECORD_TGID for recording tgid
2265  */
2266 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2267 					  struct task_struct *next, int flags)
2268 {
2269 	bool done;
2270 
2271 	if (tracing_record_taskinfo_skip(flags))
2272 		return;
2273 
2274 	/*
2275 	 * Record as much task information as possible. If some fail, continue
2276 	 * to try to record the others.
2277 	 */
2278 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2279 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2280 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2281 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2282 
2283 	/* If recording any information failed, retry again soon. */
2284 	if (!done)
2285 		return;
2286 
2287 	__this_cpu_write(trace_taskinfo_save, false);
2288 }
2289 
2290 /* Helpers to record a specific task information */
2291 void tracing_record_cmdline(struct task_struct *task)
2292 {
2293 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2294 }
2295 
2296 void tracing_record_tgid(struct task_struct *task)
2297 {
2298 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2299 }
2300 
2301 /*
2302  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2303  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2304  * simplifies those functions and keeps them in sync.
2305  */
2306 enum print_line_t trace_handle_return(struct trace_seq *s)
2307 {
2308 	return trace_seq_has_overflowed(s) ?
2309 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_handle_return);
2312 
2313 void
2314 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2315 			     unsigned long flags, int pc)
2316 {
2317 	struct task_struct *tsk = current;
2318 
2319 	entry->preempt_count		= pc & 0xff;
2320 	entry->pid			= (tsk) ? tsk->pid : 0;
2321 	entry->type			= type;
2322 	entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326 		TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2329 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335 
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338 			  int type,
2339 			  unsigned long len,
2340 			  unsigned long flags, int pc)
2341 {
2342 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344 
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348 
2349 /**
2350  * trace_buffered_event_enable - enable buffering events
2351  *
2352  * When events are being filtered, it is quicker to use a temporary
2353  * buffer to write the event data into if there's a likely chance
2354  * that it will not be committed. The discard of the ring buffer
2355  * is not as fast as committing, and is much slower than copying
2356  * a commit.
2357  *
2358  * When an event is to be filtered, allocate per cpu buffers to
2359  * write the event data into, and if the event is filtered and discarded
2360  * it is simply dropped, otherwise, the entire data is to be committed
2361  * in one shot.
2362  */
2363 void trace_buffered_event_enable(void)
2364 {
2365 	struct ring_buffer_event *event;
2366 	struct page *page;
2367 	int cpu;
2368 
2369 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370 
2371 	if (trace_buffered_event_ref++)
2372 		return;
2373 
2374 	for_each_tracing_cpu(cpu) {
2375 		page = alloc_pages_node(cpu_to_node(cpu),
2376 					GFP_KERNEL | __GFP_NORETRY, 0);
2377 		if (!page)
2378 			goto failed;
2379 
2380 		event = page_address(page);
2381 		memset(event, 0, sizeof(*event));
2382 
2383 		per_cpu(trace_buffered_event, cpu) = event;
2384 
2385 		preempt_disable();
2386 		if (cpu == smp_processor_id() &&
2387 		    this_cpu_read(trace_buffered_event) !=
2388 		    per_cpu(trace_buffered_event, cpu))
2389 			WARN_ON_ONCE(1);
2390 		preempt_enable();
2391 	}
2392 
2393 	return;
2394  failed:
2395 	trace_buffered_event_disable();
2396 }
2397 
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400 	/* Probably not needed, but do it anyway */
2401 	smp_rmb();
2402 	this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404 
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407 	this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409 
2410 /**
2411  * trace_buffered_event_disable - disable buffering events
2412  *
2413  * When a filter is removed, it is faster to not use the buffered
2414  * events, and to commit directly into the ring buffer. Free up
2415  * the temp buffers when there are no more users. This requires
2416  * special synchronization with current events.
2417  */
2418 void trace_buffered_event_disable(void)
2419 {
2420 	int cpu;
2421 
2422 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423 
2424 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425 		return;
2426 
2427 	if (--trace_buffered_event_ref)
2428 		return;
2429 
2430 	preempt_disable();
2431 	/* For each CPU, set the buffer as used. */
2432 	smp_call_function_many(tracing_buffer_mask,
2433 			       disable_trace_buffered_event, NULL, 1);
2434 	preempt_enable();
2435 
2436 	/* Wait for all current users to finish */
2437 	synchronize_rcu();
2438 
2439 	for_each_tracing_cpu(cpu) {
2440 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441 		per_cpu(trace_buffered_event, cpu) = NULL;
2442 	}
2443 	/*
2444 	 * Make sure trace_buffered_event is NULL before clearing
2445 	 * trace_buffered_event_cnt.
2446 	 */
2447 	smp_wmb();
2448 
2449 	preempt_disable();
2450 	/* Do the work on each cpu */
2451 	smp_call_function_many(tracing_buffer_mask,
2452 			       enable_trace_buffered_event, NULL, 1);
2453 	preempt_enable();
2454 }
2455 
2456 static struct ring_buffer *temp_buffer;
2457 
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460 			  struct trace_event_file *trace_file,
2461 			  int type, unsigned long len,
2462 			  unsigned long flags, int pc)
2463 {
2464 	struct ring_buffer_event *entry;
2465 	int val;
2466 
2467 	*current_rb = trace_file->tr->trace_buffer.buffer;
2468 
2469 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471 	    (entry = this_cpu_read(trace_buffered_event))) {
2472 		/* Try to use the per cpu buffer first */
2473 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2474 		if (val == 1) {
2475 			trace_event_setup(entry, type, flags, pc);
2476 			entry->array[0] = len;
2477 			return entry;
2478 		}
2479 		this_cpu_dec(trace_buffered_event_cnt);
2480 	}
2481 
2482 	entry = __trace_buffer_lock_reserve(*current_rb,
2483 					    type, len, flags, pc);
2484 	/*
2485 	 * If tracing is off, but we have triggers enabled
2486 	 * we still need to look at the event data. Use the temp_buffer
2487 	 * to store the trace event for the tigger to use. It's recusive
2488 	 * safe and will not be recorded anywhere.
2489 	 */
2490 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491 		*current_rb = temp_buffer;
2492 		entry = __trace_buffer_lock_reserve(*current_rb,
2493 						    type, len, flags, pc);
2494 	}
2495 	return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498 
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501 
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504 	struct trace_event_call *event_call;
2505 	struct trace_event *event;
2506 	unsigned long flags;
2507 	struct trace_iterator *iter = tracepoint_print_iter;
2508 
2509 	/* We should never get here if iter is NULL */
2510 	if (WARN_ON_ONCE(!iter))
2511 		return;
2512 
2513 	event_call = fbuffer->trace_file->event_call;
2514 	if (!event_call || !event_call->event.funcs ||
2515 	    !event_call->event.funcs->trace)
2516 		return;
2517 
2518 	event = &fbuffer->trace_file->event_call->event;
2519 
2520 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521 	trace_seq_init(&iter->seq);
2522 	iter->ent = fbuffer->entry;
2523 	event_call->event.funcs->trace(iter, 0, event);
2524 	trace_seq_putc(&iter->seq, 0);
2525 	printk("%s", iter->seq.buffer);
2526 
2527 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529 
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531 			     void __user *buffer, size_t *lenp,
2532 			     loff_t *ppos)
2533 {
2534 	int save_tracepoint_printk;
2535 	int ret;
2536 
2537 	mutex_lock(&tracepoint_printk_mutex);
2538 	save_tracepoint_printk = tracepoint_printk;
2539 
2540 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541 
2542 	/*
2543 	 * This will force exiting early, as tracepoint_printk
2544 	 * is always zero when tracepoint_printk_iter is not allocated
2545 	 */
2546 	if (!tracepoint_print_iter)
2547 		tracepoint_printk = 0;
2548 
2549 	if (save_tracepoint_printk == tracepoint_printk)
2550 		goto out;
2551 
2552 	if (tracepoint_printk)
2553 		static_key_enable(&tracepoint_printk_key.key);
2554 	else
2555 		static_key_disable(&tracepoint_printk_key.key);
2556 
2557  out:
2558 	mutex_unlock(&tracepoint_printk_mutex);
2559 
2560 	return ret;
2561 }
2562 
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565 	if (static_key_false(&tracepoint_printk_key.key))
2566 		output_printk(fbuffer);
2567 
2568 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569 				    fbuffer->event, fbuffer->entry,
2570 				    fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573 
2574 /*
2575  * Skip 3:
2576  *
2577  *   trace_buffer_unlock_commit_regs()
2578  *   trace_event_buffer_commit()
2579  *   trace_event_raw_event_xxx()
2580  */
2581 # define STACK_SKIP 3
2582 
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584 				     struct ring_buffer *buffer,
2585 				     struct ring_buffer_event *event,
2586 				     unsigned long flags, int pc,
2587 				     struct pt_regs *regs)
2588 {
2589 	__buffer_unlock_commit(buffer, event);
2590 
2591 	/*
2592 	 * If regs is not set, then skip the necessary functions.
2593 	 * Note, we can still get here via blktrace, wakeup tracer
2594 	 * and mmiotrace, but that's ok if they lose a function or
2595 	 * two. They are not that meaningful.
2596 	 */
2597 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598 	ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600 
2601 /*
2602  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603  */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606 				   struct ring_buffer_event *event)
2607 {
2608 	__buffer_unlock_commit(buffer, event);
2609 }
2610 
2611 static void
2612 trace_process_export(struct trace_export *export,
2613 	       struct ring_buffer_event *event)
2614 {
2615 	struct trace_entry *entry;
2616 	unsigned int size = 0;
2617 
2618 	entry = ring_buffer_event_data(event);
2619 	size = ring_buffer_event_length(event);
2620 	export->write(export, entry, size);
2621 }
2622 
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624 
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626 
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628 
2629 static inline void ftrace_exports_enable(void)
2630 {
2631 	static_branch_enable(&ftrace_exports_enabled);
2632 }
2633 
2634 static inline void ftrace_exports_disable(void)
2635 {
2636 	static_branch_disable(&ftrace_exports_enabled);
2637 }
2638 
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641 	struct trace_export *export;
2642 
2643 	preempt_disable_notrace();
2644 
2645 	export = rcu_dereference_raw_check(ftrace_exports_list);
2646 	while (export) {
2647 		trace_process_export(export, event);
2648 		export = rcu_dereference_raw_check(export->next);
2649 	}
2650 
2651 	preempt_enable_notrace();
2652 }
2653 
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657 	rcu_assign_pointer(export->next, *list);
2658 	/*
2659 	 * We are entering export into the list but another
2660 	 * CPU might be walking that list. We need to make sure
2661 	 * the export->next pointer is valid before another CPU sees
2662 	 * the export pointer included into the list.
2663 	 */
2664 	rcu_assign_pointer(*list, export);
2665 }
2666 
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670 	struct trace_export **p;
2671 
2672 	for (p = list; *p != NULL; p = &(*p)->next)
2673 		if (*p == export)
2674 			break;
2675 
2676 	if (*p != export)
2677 		return -1;
2678 
2679 	rcu_assign_pointer(*p, (*p)->next);
2680 
2681 	return 0;
2682 }
2683 
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	if (*list == NULL)
2688 		ftrace_exports_enable();
2689 
2690 	add_trace_export(list, export);
2691 }
2692 
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696 	int ret;
2697 
2698 	ret = rm_trace_export(list, export);
2699 	if (*list == NULL)
2700 		ftrace_exports_disable();
2701 
2702 	return ret;
2703 }
2704 
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707 	if (WARN_ON_ONCE(!export->write))
2708 		return -1;
2709 
2710 	mutex_lock(&ftrace_export_lock);
2711 
2712 	add_ftrace_export(&ftrace_exports_list, export);
2713 
2714 	mutex_unlock(&ftrace_export_lock);
2715 
2716 	return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719 
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722 	int ret;
2723 
2724 	mutex_lock(&ftrace_export_lock);
2725 
2726 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2727 
2728 	mutex_unlock(&ftrace_export_lock);
2729 
2730 	return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733 
2734 void
2735 trace_function(struct trace_array *tr,
2736 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737 	       int pc)
2738 {
2739 	struct trace_event_call *call = &event_function;
2740 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741 	struct ring_buffer_event *event;
2742 	struct ftrace_entry *entry;
2743 
2744 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745 					    flags, pc);
2746 	if (!event)
2747 		return;
2748 	entry	= ring_buffer_event_data(event);
2749 	entry->ip			= ip;
2750 	entry->parent_ip		= parent_ip;
2751 
2752 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2753 		if (static_branch_unlikely(&ftrace_exports_enabled))
2754 			ftrace_exports(event);
2755 		__buffer_unlock_commit(buffer, event);
2756 	}
2757 }
2758 
2759 #ifdef CONFIG_STACKTRACE
2760 
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING	4
2763 
2764 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765 
2766 struct ftrace_stack {
2767 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769 
2770 
2771 struct ftrace_stacks {
2772 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2773 };
2774 
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777 
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779 				 unsigned long flags,
2780 				 int skip, int pc, struct pt_regs *regs)
2781 {
2782 	struct trace_event_call *call = &event_kernel_stack;
2783 	struct ring_buffer_event *event;
2784 	unsigned int size, nr_entries;
2785 	struct ftrace_stack *fstack;
2786 	struct stack_entry *entry;
2787 	int stackidx;
2788 
2789 	/*
2790 	 * Add one, for this function and the call to save_stack_trace()
2791 	 * If regs is set, then these functions will not be in the way.
2792 	 */
2793 #ifndef CONFIG_UNWINDER_ORC
2794 	if (!regs)
2795 		skip++;
2796 #endif
2797 
2798 	/*
2799 	 * Since events can happen in NMIs there's no safe way to
2800 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801 	 * or NMI comes in, it will just have to use the default
2802 	 * FTRACE_STACK_SIZE.
2803 	 */
2804 	preempt_disable_notrace();
2805 
2806 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807 
2808 	/* This should never happen. If it does, yell once and skip */
2809 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810 		goto out;
2811 
2812 	/*
2813 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814 	 * interrupt will either see the value pre increment or post
2815 	 * increment. If the interrupt happens pre increment it will have
2816 	 * restored the counter when it returns.  We just need a barrier to
2817 	 * keep gcc from moving things around.
2818 	 */
2819 	barrier();
2820 
2821 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822 	size = ARRAY_SIZE(fstack->calls);
2823 
2824 	if (regs) {
2825 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826 						   size, skip);
2827 	} else {
2828 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2829 	}
2830 
2831 	size = nr_entries * sizeof(unsigned long);
2832 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833 					    sizeof(*entry) + size, flags, pc);
2834 	if (!event)
2835 		goto out;
2836 	entry = ring_buffer_event_data(event);
2837 
2838 	memcpy(&entry->caller, fstack->calls, size);
2839 	entry->size = nr_entries;
2840 
2841 	if (!call_filter_check_discard(call, entry, buffer, event))
2842 		__buffer_unlock_commit(buffer, event);
2843 
2844  out:
2845 	/* Again, don't let gcc optimize things here */
2846 	barrier();
2847 	__this_cpu_dec(ftrace_stack_reserve);
2848 	preempt_enable_notrace();
2849 
2850 }
2851 
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853 				      struct ring_buffer *buffer,
2854 				      unsigned long flags,
2855 				      int skip, int pc, struct pt_regs *regs)
2856 {
2857 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858 		return;
2859 
2860 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862 
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864 		   int pc)
2865 {
2866 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867 
2868 	if (rcu_is_watching()) {
2869 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870 		return;
2871 	}
2872 
2873 	/*
2874 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875 	 * but if the above rcu_is_watching() failed, then the NMI
2876 	 * triggered someplace critical, and rcu_irq_enter() should
2877 	 * not be called from NMI.
2878 	 */
2879 	if (unlikely(in_nmi()))
2880 		return;
2881 
2882 	rcu_irq_enter_irqson();
2883 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884 	rcu_irq_exit_irqson();
2885 }
2886 
2887 /**
2888  * trace_dump_stack - record a stack back trace in the trace buffer
2889  * @skip: Number of functions to skip (helper handlers)
2890  */
2891 void trace_dump_stack(int skip)
2892 {
2893 	unsigned long flags;
2894 
2895 	if (tracing_disabled || tracing_selftest_running)
2896 		return;
2897 
2898 	local_save_flags(flags);
2899 
2900 #ifndef CONFIG_UNWINDER_ORC
2901 	/* Skip 1 to skip this function. */
2902 	skip++;
2903 #endif
2904 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905 			     flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908 
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911 
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 	struct trace_event_call *call = &event_user_stack;
2916 	struct ring_buffer_event *event;
2917 	struct userstack_entry *entry;
2918 
2919 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920 		return;
2921 
2922 	/*
2923 	 * NMIs can not handle page faults, even with fix ups.
2924 	 * The save user stack can (and often does) fault.
2925 	 */
2926 	if (unlikely(in_nmi()))
2927 		return;
2928 
2929 	/*
2930 	 * prevent recursion, since the user stack tracing may
2931 	 * trigger other kernel events.
2932 	 */
2933 	preempt_disable();
2934 	if (__this_cpu_read(user_stack_count))
2935 		goto out;
2936 
2937 	__this_cpu_inc(user_stack_count);
2938 
2939 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940 					    sizeof(*entry), flags, pc);
2941 	if (!event)
2942 		goto out_drop_count;
2943 	entry	= ring_buffer_event_data(event);
2944 
2945 	entry->tgid		= current->tgid;
2946 	memset(&entry->caller, 0, sizeof(entry->caller));
2947 
2948 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949 	if (!call_filter_check_discard(call, entry, buffer, event))
2950 		__buffer_unlock_commit(buffer, event);
2951 
2952  out_drop_count:
2953 	__this_cpu_dec(user_stack_count);
2954  out:
2955 	preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959 				   unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963 
2964 #endif /* CONFIG_STACKTRACE */
2965 
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968 	int nesting;
2969 	char buffer[4][TRACE_BUF_SIZE];
2970 };
2971 
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973 
2974 /*
2975  * Thise allows for lockless recording.  If we're nested too deeply, then
2976  * this returns NULL.
2977  */
2978 static char *get_trace_buf(void)
2979 {
2980 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981 
2982 	if (!buffer || buffer->nesting >= 4)
2983 		return NULL;
2984 
2985 	buffer->nesting++;
2986 
2987 	/* Interrupts must see nesting incremented before we use the buffer */
2988 	barrier();
2989 	return &buffer->buffer[buffer->nesting][0];
2990 }
2991 
2992 static void put_trace_buf(void)
2993 {
2994 	/* Don't let the decrement of nesting leak before this */
2995 	barrier();
2996 	this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998 
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001 	struct trace_buffer_struct *buffers;
3002 
3003 	buffers = alloc_percpu(struct trace_buffer_struct);
3004 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005 		return -ENOMEM;
3006 
3007 	trace_percpu_buffer = buffers;
3008 	return 0;
3009 }
3010 
3011 static int buffers_allocated;
3012 
3013 void trace_printk_init_buffers(void)
3014 {
3015 	if (buffers_allocated)
3016 		return;
3017 
3018 	if (alloc_percpu_trace_buffer())
3019 		return;
3020 
3021 	/* trace_printk() is for debug use only. Don't use it in production. */
3022 
3023 	pr_warn("\n");
3024 	pr_warn("**********************************************************\n");
3025 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3026 	pr_warn("**                                                      **\n");
3027 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3028 	pr_warn("**                                                      **\n");
3029 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3030 	pr_warn("** unsafe for production use.                           **\n");
3031 	pr_warn("**                                                      **\n");
3032 	pr_warn("** If you see this message and you are not debugging    **\n");
3033 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3034 	pr_warn("**                                                      **\n");
3035 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3036 	pr_warn("**********************************************************\n");
3037 
3038 	/* Expand the buffers to set size */
3039 	tracing_update_buffers();
3040 
3041 	buffers_allocated = 1;
3042 
3043 	/*
3044 	 * trace_printk_init_buffers() can be called by modules.
3045 	 * If that happens, then we need to start cmdline recording
3046 	 * directly here. If the global_trace.buffer is already
3047 	 * allocated here, then this was called by module code.
3048 	 */
3049 	if (global_trace.trace_buffer.buffer)
3050 		tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053 
3054 void trace_printk_start_comm(void)
3055 {
3056 	/* Start tracing comms if trace printk is set */
3057 	if (!buffers_allocated)
3058 		return;
3059 	tracing_start_cmdline_record();
3060 }
3061 
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064 	if (!buffers_allocated)
3065 		return;
3066 
3067 	if (enabled)
3068 		tracing_start_cmdline_record();
3069 	else
3070 		tracing_stop_cmdline_record();
3071 }
3072 
3073 /**
3074  * trace_vbprintk - write binary msg to tracing buffer
3075  * @ip:    The address of the caller
3076  * @fmt:   The string format to write to the buffer
3077  * @args:  Arguments for @fmt
3078  */
3079 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3080 {
3081 	struct trace_event_call *call = &event_bprint;
3082 	struct ring_buffer_event *event;
3083 	struct ring_buffer *buffer;
3084 	struct trace_array *tr = &global_trace;
3085 	struct bprint_entry *entry;
3086 	unsigned long flags;
3087 	char *tbuffer;
3088 	int len = 0, size, pc;
3089 
3090 	if (unlikely(tracing_selftest_running || tracing_disabled))
3091 		return 0;
3092 
3093 	/* Don't pollute graph traces with trace_vprintk internals */
3094 	pause_graph_tracing();
3095 
3096 	pc = preempt_count();
3097 	preempt_disable_notrace();
3098 
3099 	tbuffer = get_trace_buf();
3100 	if (!tbuffer) {
3101 		len = 0;
3102 		goto out_nobuffer;
3103 	}
3104 
3105 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3106 
3107 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3108 		goto out;
3109 
3110 	local_save_flags(flags);
3111 	size = sizeof(*entry) + sizeof(u32) * len;
3112 	buffer = tr->trace_buffer.buffer;
3113 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3114 					    flags, pc);
3115 	if (!event)
3116 		goto out;
3117 	entry = ring_buffer_event_data(event);
3118 	entry->ip			= ip;
3119 	entry->fmt			= fmt;
3120 
3121 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3122 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3123 		__buffer_unlock_commit(buffer, event);
3124 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3125 	}
3126 
3127 out:
3128 	put_trace_buf();
3129 
3130 out_nobuffer:
3131 	preempt_enable_notrace();
3132 	unpause_graph_tracing();
3133 
3134 	return len;
3135 }
3136 EXPORT_SYMBOL_GPL(trace_vbprintk);
3137 
3138 __printf(3, 0)
3139 static int
3140 __trace_array_vprintk(struct ring_buffer *buffer,
3141 		      unsigned long ip, const char *fmt, va_list args)
3142 {
3143 	struct trace_event_call *call = &event_print;
3144 	struct ring_buffer_event *event;
3145 	int len = 0, size, pc;
3146 	struct print_entry *entry;
3147 	unsigned long flags;
3148 	char *tbuffer;
3149 
3150 	if (tracing_disabled || tracing_selftest_running)
3151 		return 0;
3152 
3153 	/* Don't pollute graph traces with trace_vprintk internals */
3154 	pause_graph_tracing();
3155 
3156 	pc = preempt_count();
3157 	preempt_disable_notrace();
3158 
3159 
3160 	tbuffer = get_trace_buf();
3161 	if (!tbuffer) {
3162 		len = 0;
3163 		goto out_nobuffer;
3164 	}
3165 
3166 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3167 
3168 	local_save_flags(flags);
3169 	size = sizeof(*entry) + len + 1;
3170 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3171 					    flags, pc);
3172 	if (!event)
3173 		goto out;
3174 	entry = ring_buffer_event_data(event);
3175 	entry->ip = ip;
3176 
3177 	memcpy(&entry->buf, tbuffer, len + 1);
3178 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3179 		__buffer_unlock_commit(buffer, event);
3180 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3181 	}
3182 
3183 out:
3184 	put_trace_buf();
3185 
3186 out_nobuffer:
3187 	preempt_enable_notrace();
3188 	unpause_graph_tracing();
3189 
3190 	return len;
3191 }
3192 
3193 __printf(3, 0)
3194 int trace_array_vprintk(struct trace_array *tr,
3195 			unsigned long ip, const char *fmt, va_list args)
3196 {
3197 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3198 }
3199 
3200 __printf(3, 0)
3201 int trace_array_printk(struct trace_array *tr,
3202 		       unsigned long ip, const char *fmt, ...)
3203 {
3204 	int ret;
3205 	va_list ap;
3206 
3207 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3208 		return 0;
3209 
3210 	va_start(ap, fmt);
3211 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3212 	va_end(ap);
3213 	return ret;
3214 }
3215 EXPORT_SYMBOL_GPL(trace_array_printk);
3216 
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219 			   unsigned long ip, const char *fmt, ...)
3220 {
3221 	int ret;
3222 	va_list ap;
3223 
3224 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 		return 0;
3226 
3227 	va_start(ap, fmt);
3228 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 	va_end(ap);
3230 	return ret;
3231 }
3232 
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239 
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243 
3244 	iter->idx++;
3245 	if (buf_iter)
3246 		ring_buffer_read(buf_iter, NULL);
3247 }
3248 
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 		unsigned long *lost_events)
3252 {
3253 	struct ring_buffer_event *event;
3254 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255 
3256 	if (buf_iter)
3257 		event = ring_buffer_iter_peek(buf_iter, ts);
3258 	else
3259 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 					 lost_events);
3261 
3262 	if (event) {
3263 		iter->ent_size = ring_buffer_event_length(event);
3264 		return ring_buffer_event_data(event);
3265 	}
3266 	iter->ent_size = 0;
3267 	return NULL;
3268 }
3269 
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 		  unsigned long *missing_events, u64 *ent_ts)
3273 {
3274 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 	struct trace_entry *ent, *next = NULL;
3276 	unsigned long lost_events = 0, next_lost = 0;
3277 	int cpu_file = iter->cpu_file;
3278 	u64 next_ts = 0, ts;
3279 	int next_cpu = -1;
3280 	int next_size = 0;
3281 	int cpu;
3282 
3283 	/*
3284 	 * If we are in a per_cpu trace file, don't bother by iterating over
3285 	 * all cpu and peek directly.
3286 	 */
3287 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 			return NULL;
3290 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 		if (ent_cpu)
3292 			*ent_cpu = cpu_file;
3293 
3294 		return ent;
3295 	}
3296 
3297 	for_each_tracing_cpu(cpu) {
3298 
3299 		if (ring_buffer_empty_cpu(buffer, cpu))
3300 			continue;
3301 
3302 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303 
3304 		/*
3305 		 * Pick the entry with the smallest timestamp:
3306 		 */
3307 		if (ent && (!next || ts < next_ts)) {
3308 			next = ent;
3309 			next_cpu = cpu;
3310 			next_ts = ts;
3311 			next_lost = lost_events;
3312 			next_size = iter->ent_size;
3313 		}
3314 	}
3315 
3316 	iter->ent_size = next_size;
3317 
3318 	if (ent_cpu)
3319 		*ent_cpu = next_cpu;
3320 
3321 	if (ent_ts)
3322 		*ent_ts = next_ts;
3323 
3324 	if (missing_events)
3325 		*missing_events = next_lost;
3326 
3327 	return next;
3328 }
3329 
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 					  int *ent_cpu, u64 *ent_ts)
3333 {
3334 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336 
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340 	iter->ent = __find_next_entry(iter, &iter->cpu,
3341 				      &iter->lost_events, &iter->ts);
3342 
3343 	if (iter->ent)
3344 		trace_iterator_increment(iter);
3345 
3346 	return iter->ent ? iter : NULL;
3347 }
3348 
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 			    &iter->lost_events);
3353 }
3354 
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357 	struct trace_iterator *iter = m->private;
3358 	int i = (int)*pos;
3359 	void *ent;
3360 
3361 	WARN_ON_ONCE(iter->leftover);
3362 
3363 	(*pos)++;
3364 
3365 	/* can't go backwards */
3366 	if (iter->idx > i)
3367 		return NULL;
3368 
3369 	if (iter->idx < 0)
3370 		ent = trace_find_next_entry_inc(iter);
3371 	else
3372 		ent = iter;
3373 
3374 	while (ent && iter->idx < i)
3375 		ent = trace_find_next_entry_inc(iter);
3376 
3377 	iter->pos = *pos;
3378 
3379 	return ent;
3380 }
3381 
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384 	struct ring_buffer_event *event;
3385 	struct ring_buffer_iter *buf_iter;
3386 	unsigned long entries = 0;
3387 	u64 ts;
3388 
3389 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390 
3391 	buf_iter = trace_buffer_iter(iter, cpu);
3392 	if (!buf_iter)
3393 		return;
3394 
3395 	ring_buffer_iter_reset(buf_iter);
3396 
3397 	/*
3398 	 * We could have the case with the max latency tracers
3399 	 * that a reset never took place on a cpu. This is evident
3400 	 * by the timestamp being before the start of the buffer.
3401 	 */
3402 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 		if (ts >= iter->trace_buffer->time_start)
3404 			break;
3405 		entries++;
3406 		ring_buffer_read(buf_iter, NULL);
3407 	}
3408 
3409 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411 
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418 	struct trace_iterator *iter = m->private;
3419 	struct trace_array *tr = iter->tr;
3420 	int cpu_file = iter->cpu_file;
3421 	void *p = NULL;
3422 	loff_t l = 0;
3423 	int cpu;
3424 
3425 	/*
3426 	 * copy the tracer to avoid using a global lock all around.
3427 	 * iter->trace is a copy of current_trace, the pointer to the
3428 	 * name may be used instead of a strcmp(), as iter->trace->name
3429 	 * will point to the same string as current_trace->name.
3430 	 */
3431 	mutex_lock(&trace_types_lock);
3432 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 		*iter->trace = *tr->current_trace;
3434 	mutex_unlock(&trace_types_lock);
3435 
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 	if (iter->snapshot && iter->trace->use_max_tr)
3438 		return ERR_PTR(-EBUSY);
3439 #endif
3440 
3441 	if (!iter->snapshot)
3442 		atomic_inc(&trace_record_taskinfo_disabled);
3443 
3444 	if (*pos != iter->pos) {
3445 		iter->ent = NULL;
3446 		iter->cpu = 0;
3447 		iter->idx = -1;
3448 
3449 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 			for_each_tracing_cpu(cpu)
3451 				tracing_iter_reset(iter, cpu);
3452 		} else
3453 			tracing_iter_reset(iter, cpu_file);
3454 
3455 		iter->leftover = 0;
3456 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 			;
3458 
3459 	} else {
3460 		/*
3461 		 * If we overflowed the seq_file before, then we want
3462 		 * to just reuse the trace_seq buffer again.
3463 		 */
3464 		if (iter->leftover)
3465 			p = iter;
3466 		else {
3467 			l = *pos - 1;
3468 			p = s_next(m, p, &l);
3469 		}
3470 	}
3471 
3472 	trace_event_read_lock();
3473 	trace_access_lock(cpu_file);
3474 	return p;
3475 }
3476 
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479 	struct trace_iterator *iter = m->private;
3480 
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482 	if (iter->snapshot && iter->trace->use_max_tr)
3483 		return;
3484 #endif
3485 
3486 	if (!iter->snapshot)
3487 		atomic_dec(&trace_record_taskinfo_disabled);
3488 
3489 	trace_access_unlock(iter->cpu_file);
3490 	trace_event_read_unlock();
3491 }
3492 
3493 static void
3494 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3495 		      unsigned long *entries, int cpu)
3496 {
3497 	unsigned long count;
3498 
3499 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3500 	/*
3501 	 * If this buffer has skipped entries, then we hold all
3502 	 * entries for the trace and we need to ignore the
3503 	 * ones before the time stamp.
3504 	 */
3505 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3506 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3507 		/* total is the same as the entries */
3508 		*total = count;
3509 	} else
3510 		*total = count +
3511 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3512 	*entries = count;
3513 }
3514 
3515 static void
3516 get_total_entries(struct trace_buffer *buf,
3517 		  unsigned long *total, unsigned long *entries)
3518 {
3519 	unsigned long t, e;
3520 	int cpu;
3521 
3522 	*total = 0;
3523 	*entries = 0;
3524 
3525 	for_each_tracing_cpu(cpu) {
3526 		get_total_entries_cpu(buf, &t, &e, cpu);
3527 		*total += t;
3528 		*entries += e;
3529 	}
3530 }
3531 
3532 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3533 {
3534 	unsigned long total, entries;
3535 
3536 	if (!tr)
3537 		tr = &global_trace;
3538 
3539 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3540 
3541 	return entries;
3542 }
3543 
3544 unsigned long trace_total_entries(struct trace_array *tr)
3545 {
3546 	unsigned long total, entries;
3547 
3548 	if (!tr)
3549 		tr = &global_trace;
3550 
3551 	get_total_entries(&tr->trace_buffer, &total, &entries);
3552 
3553 	return entries;
3554 }
3555 
3556 static void print_lat_help_header(struct seq_file *m)
3557 {
3558 	seq_puts(m, "#                  _------=> CPU#            \n"
3559 		    "#                 / _-----=> irqs-off        \n"
3560 		    "#                | / _----=> need-resched    \n"
3561 		    "#                || / _---=> hardirq/softirq \n"
3562 		    "#                ||| / _--=> preempt-depth   \n"
3563 		    "#                |||| /     delay            \n"
3564 		    "#  cmd     pid   ||||| time  |   caller      \n"
3565 		    "#     \\   /      |||||  \\    |   /         \n");
3566 }
3567 
3568 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3569 {
3570 	unsigned long total;
3571 	unsigned long entries;
3572 
3573 	get_total_entries(buf, &total, &entries);
3574 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3575 		   entries, total, num_online_cpus());
3576 	seq_puts(m, "#\n");
3577 }
3578 
3579 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3580 				   unsigned int flags)
3581 {
3582 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3583 
3584 	print_event_info(buf, m);
3585 
3586 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3587 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3588 }
3589 
3590 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3591 				       unsigned int flags)
3592 {
3593 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3594 	const char *space = "          ";
3595 	int prec = tgid ? 10 : 2;
3596 
3597 	print_event_info(buf, m);
3598 
3599 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3600 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3601 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3602 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3603 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3604 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3605 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3606 }
3607 
3608 void
3609 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3610 {
3611 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3612 	struct trace_buffer *buf = iter->trace_buffer;
3613 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3614 	struct tracer *type = iter->trace;
3615 	unsigned long entries;
3616 	unsigned long total;
3617 	const char *name = "preemption";
3618 
3619 	name = type->name;
3620 
3621 	get_total_entries(buf, &total, &entries);
3622 
3623 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3624 		   name, UTS_RELEASE);
3625 	seq_puts(m, "# -----------------------------------"
3626 		 "---------------------------------\n");
3627 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3628 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3629 		   nsecs_to_usecs(data->saved_latency),
3630 		   entries,
3631 		   total,
3632 		   buf->cpu,
3633 #if defined(CONFIG_PREEMPT_NONE)
3634 		   "server",
3635 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3636 		   "desktop",
3637 #elif defined(CONFIG_PREEMPT)
3638 		   "preempt",
3639 #else
3640 		   "unknown",
3641 #endif
3642 		   /* These are reserved for later use */
3643 		   0, 0, 0, 0);
3644 #ifdef CONFIG_SMP
3645 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3646 #else
3647 	seq_puts(m, ")\n");
3648 #endif
3649 	seq_puts(m, "#    -----------------\n");
3650 	seq_printf(m, "#    | task: %.16s-%d "
3651 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3652 		   data->comm, data->pid,
3653 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3654 		   data->policy, data->rt_priority);
3655 	seq_puts(m, "#    -----------------\n");
3656 
3657 	if (data->critical_start) {
3658 		seq_puts(m, "#  => started at: ");
3659 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3660 		trace_print_seq(m, &iter->seq);
3661 		seq_puts(m, "\n#  => ended at:   ");
3662 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3663 		trace_print_seq(m, &iter->seq);
3664 		seq_puts(m, "\n#\n");
3665 	}
3666 
3667 	seq_puts(m, "#\n");
3668 }
3669 
3670 static void test_cpu_buff_start(struct trace_iterator *iter)
3671 {
3672 	struct trace_seq *s = &iter->seq;
3673 	struct trace_array *tr = iter->tr;
3674 
3675 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3676 		return;
3677 
3678 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3679 		return;
3680 
3681 	if (cpumask_available(iter->started) &&
3682 	    cpumask_test_cpu(iter->cpu, iter->started))
3683 		return;
3684 
3685 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3686 		return;
3687 
3688 	if (cpumask_available(iter->started))
3689 		cpumask_set_cpu(iter->cpu, iter->started);
3690 
3691 	/* Don't print started cpu buffer for the first entry of the trace */
3692 	if (iter->idx > 1)
3693 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3694 				iter->cpu);
3695 }
3696 
3697 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3698 {
3699 	struct trace_array *tr = iter->tr;
3700 	struct trace_seq *s = &iter->seq;
3701 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3702 	struct trace_entry *entry;
3703 	struct trace_event *event;
3704 
3705 	entry = iter->ent;
3706 
3707 	test_cpu_buff_start(iter);
3708 
3709 	event = ftrace_find_event(entry->type);
3710 
3711 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3712 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3713 			trace_print_lat_context(iter);
3714 		else
3715 			trace_print_context(iter);
3716 	}
3717 
3718 	if (trace_seq_has_overflowed(s))
3719 		return TRACE_TYPE_PARTIAL_LINE;
3720 
3721 	if (event)
3722 		return event->funcs->trace(iter, sym_flags, event);
3723 
3724 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3725 
3726 	return trace_handle_return(s);
3727 }
3728 
3729 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3730 {
3731 	struct trace_array *tr = iter->tr;
3732 	struct trace_seq *s = &iter->seq;
3733 	struct trace_entry *entry;
3734 	struct trace_event *event;
3735 
3736 	entry = iter->ent;
3737 
3738 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3739 		trace_seq_printf(s, "%d %d %llu ",
3740 				 entry->pid, iter->cpu, iter->ts);
3741 
3742 	if (trace_seq_has_overflowed(s))
3743 		return TRACE_TYPE_PARTIAL_LINE;
3744 
3745 	event = ftrace_find_event(entry->type);
3746 	if (event)
3747 		return event->funcs->raw(iter, 0, event);
3748 
3749 	trace_seq_printf(s, "%d ?\n", entry->type);
3750 
3751 	return trace_handle_return(s);
3752 }
3753 
3754 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3755 {
3756 	struct trace_array *tr = iter->tr;
3757 	struct trace_seq *s = &iter->seq;
3758 	unsigned char newline = '\n';
3759 	struct trace_entry *entry;
3760 	struct trace_event *event;
3761 
3762 	entry = iter->ent;
3763 
3764 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3765 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3766 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3767 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3768 		if (trace_seq_has_overflowed(s))
3769 			return TRACE_TYPE_PARTIAL_LINE;
3770 	}
3771 
3772 	event = ftrace_find_event(entry->type);
3773 	if (event) {
3774 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3775 		if (ret != TRACE_TYPE_HANDLED)
3776 			return ret;
3777 	}
3778 
3779 	SEQ_PUT_FIELD(s, newline);
3780 
3781 	return trace_handle_return(s);
3782 }
3783 
3784 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3785 {
3786 	struct trace_array *tr = iter->tr;
3787 	struct trace_seq *s = &iter->seq;
3788 	struct trace_entry *entry;
3789 	struct trace_event *event;
3790 
3791 	entry = iter->ent;
3792 
3793 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3794 		SEQ_PUT_FIELD(s, entry->pid);
3795 		SEQ_PUT_FIELD(s, iter->cpu);
3796 		SEQ_PUT_FIELD(s, iter->ts);
3797 		if (trace_seq_has_overflowed(s))
3798 			return TRACE_TYPE_PARTIAL_LINE;
3799 	}
3800 
3801 	event = ftrace_find_event(entry->type);
3802 	return event ? event->funcs->binary(iter, 0, event) :
3803 		TRACE_TYPE_HANDLED;
3804 }
3805 
3806 int trace_empty(struct trace_iterator *iter)
3807 {
3808 	struct ring_buffer_iter *buf_iter;
3809 	int cpu;
3810 
3811 	/* If we are looking at one CPU buffer, only check that one */
3812 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3813 		cpu = iter->cpu_file;
3814 		buf_iter = trace_buffer_iter(iter, cpu);
3815 		if (buf_iter) {
3816 			if (!ring_buffer_iter_empty(buf_iter))
3817 				return 0;
3818 		} else {
3819 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3820 				return 0;
3821 		}
3822 		return 1;
3823 	}
3824 
3825 	for_each_tracing_cpu(cpu) {
3826 		buf_iter = trace_buffer_iter(iter, cpu);
3827 		if (buf_iter) {
3828 			if (!ring_buffer_iter_empty(buf_iter))
3829 				return 0;
3830 		} else {
3831 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3832 				return 0;
3833 		}
3834 	}
3835 
3836 	return 1;
3837 }
3838 
3839 /*  Called with trace_event_read_lock() held. */
3840 enum print_line_t print_trace_line(struct trace_iterator *iter)
3841 {
3842 	struct trace_array *tr = iter->tr;
3843 	unsigned long trace_flags = tr->trace_flags;
3844 	enum print_line_t ret;
3845 
3846 	if (iter->lost_events) {
3847 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3848 				 iter->cpu, iter->lost_events);
3849 		if (trace_seq_has_overflowed(&iter->seq))
3850 			return TRACE_TYPE_PARTIAL_LINE;
3851 	}
3852 
3853 	if (iter->trace && iter->trace->print_line) {
3854 		ret = iter->trace->print_line(iter);
3855 		if (ret != TRACE_TYPE_UNHANDLED)
3856 			return ret;
3857 	}
3858 
3859 	if (iter->ent->type == TRACE_BPUTS &&
3860 			trace_flags & TRACE_ITER_PRINTK &&
3861 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3862 		return trace_print_bputs_msg_only(iter);
3863 
3864 	if (iter->ent->type == TRACE_BPRINT &&
3865 			trace_flags & TRACE_ITER_PRINTK &&
3866 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3867 		return trace_print_bprintk_msg_only(iter);
3868 
3869 	if (iter->ent->type == TRACE_PRINT &&
3870 			trace_flags & TRACE_ITER_PRINTK &&
3871 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3872 		return trace_print_printk_msg_only(iter);
3873 
3874 	if (trace_flags & TRACE_ITER_BIN)
3875 		return print_bin_fmt(iter);
3876 
3877 	if (trace_flags & TRACE_ITER_HEX)
3878 		return print_hex_fmt(iter);
3879 
3880 	if (trace_flags & TRACE_ITER_RAW)
3881 		return print_raw_fmt(iter);
3882 
3883 	return print_trace_fmt(iter);
3884 }
3885 
3886 void trace_latency_header(struct seq_file *m)
3887 {
3888 	struct trace_iterator *iter = m->private;
3889 	struct trace_array *tr = iter->tr;
3890 
3891 	/* print nothing if the buffers are empty */
3892 	if (trace_empty(iter))
3893 		return;
3894 
3895 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3896 		print_trace_header(m, iter);
3897 
3898 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3899 		print_lat_help_header(m);
3900 }
3901 
3902 void trace_default_header(struct seq_file *m)
3903 {
3904 	struct trace_iterator *iter = m->private;
3905 	struct trace_array *tr = iter->tr;
3906 	unsigned long trace_flags = tr->trace_flags;
3907 
3908 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3909 		return;
3910 
3911 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3912 		/* print nothing if the buffers are empty */
3913 		if (trace_empty(iter))
3914 			return;
3915 		print_trace_header(m, iter);
3916 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3917 			print_lat_help_header(m);
3918 	} else {
3919 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3920 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3921 				print_func_help_header_irq(iter->trace_buffer,
3922 							   m, trace_flags);
3923 			else
3924 				print_func_help_header(iter->trace_buffer, m,
3925 						       trace_flags);
3926 		}
3927 	}
3928 }
3929 
3930 static void test_ftrace_alive(struct seq_file *m)
3931 {
3932 	if (!ftrace_is_dead())
3933 		return;
3934 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3935 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3936 }
3937 
3938 #ifdef CONFIG_TRACER_MAX_TRACE
3939 static void show_snapshot_main_help(struct seq_file *m)
3940 {
3941 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3942 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3943 		    "#                      Takes a snapshot of the main buffer.\n"
3944 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3945 		    "#                      (Doesn't have to be '2' works with any number that\n"
3946 		    "#                       is not a '0' or '1')\n");
3947 }
3948 
3949 static void show_snapshot_percpu_help(struct seq_file *m)
3950 {
3951 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3952 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3953 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3954 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3955 #else
3956 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3957 		    "#                     Must use main snapshot file to allocate.\n");
3958 #endif
3959 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3960 		    "#                      (Doesn't have to be '2' works with any number that\n"
3961 		    "#                       is not a '0' or '1')\n");
3962 }
3963 
3964 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3965 {
3966 	if (iter->tr->allocated_snapshot)
3967 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3968 	else
3969 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3970 
3971 	seq_puts(m, "# Snapshot commands:\n");
3972 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3973 		show_snapshot_main_help(m);
3974 	else
3975 		show_snapshot_percpu_help(m);
3976 }
3977 #else
3978 /* Should never be called */
3979 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3980 #endif
3981 
3982 static int s_show(struct seq_file *m, void *v)
3983 {
3984 	struct trace_iterator *iter = v;
3985 	int ret;
3986 
3987 	if (iter->ent == NULL) {
3988 		if (iter->tr) {
3989 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3990 			seq_puts(m, "#\n");
3991 			test_ftrace_alive(m);
3992 		}
3993 		if (iter->snapshot && trace_empty(iter))
3994 			print_snapshot_help(m, iter);
3995 		else if (iter->trace && iter->trace->print_header)
3996 			iter->trace->print_header(m);
3997 		else
3998 			trace_default_header(m);
3999 
4000 	} else if (iter->leftover) {
4001 		/*
4002 		 * If we filled the seq_file buffer earlier, we
4003 		 * want to just show it now.
4004 		 */
4005 		ret = trace_print_seq(m, &iter->seq);
4006 
4007 		/* ret should this time be zero, but you never know */
4008 		iter->leftover = ret;
4009 
4010 	} else {
4011 		print_trace_line(iter);
4012 		ret = trace_print_seq(m, &iter->seq);
4013 		/*
4014 		 * If we overflow the seq_file buffer, then it will
4015 		 * ask us for this data again at start up.
4016 		 * Use that instead.
4017 		 *  ret is 0 if seq_file write succeeded.
4018 		 *        -1 otherwise.
4019 		 */
4020 		iter->leftover = ret;
4021 	}
4022 
4023 	return 0;
4024 }
4025 
4026 /*
4027  * Should be used after trace_array_get(), trace_types_lock
4028  * ensures that i_cdev was already initialized.
4029  */
4030 static inline int tracing_get_cpu(struct inode *inode)
4031 {
4032 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4033 		return (long)inode->i_cdev - 1;
4034 	return RING_BUFFER_ALL_CPUS;
4035 }
4036 
4037 static const struct seq_operations tracer_seq_ops = {
4038 	.start		= s_start,
4039 	.next		= s_next,
4040 	.stop		= s_stop,
4041 	.show		= s_show,
4042 };
4043 
4044 static struct trace_iterator *
4045 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4046 {
4047 	struct trace_array *tr = inode->i_private;
4048 	struct trace_iterator *iter;
4049 	int cpu;
4050 
4051 	if (tracing_disabled)
4052 		return ERR_PTR(-ENODEV);
4053 
4054 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4055 	if (!iter)
4056 		return ERR_PTR(-ENOMEM);
4057 
4058 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4059 				    GFP_KERNEL);
4060 	if (!iter->buffer_iter)
4061 		goto release;
4062 
4063 	/*
4064 	 * We make a copy of the current tracer to avoid concurrent
4065 	 * changes on it while we are reading.
4066 	 */
4067 	mutex_lock(&trace_types_lock);
4068 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4069 	if (!iter->trace)
4070 		goto fail;
4071 
4072 	*iter->trace = *tr->current_trace;
4073 
4074 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4075 		goto fail;
4076 
4077 	iter->tr = tr;
4078 
4079 #ifdef CONFIG_TRACER_MAX_TRACE
4080 	/* Currently only the top directory has a snapshot */
4081 	if (tr->current_trace->print_max || snapshot)
4082 		iter->trace_buffer = &tr->max_buffer;
4083 	else
4084 #endif
4085 		iter->trace_buffer = &tr->trace_buffer;
4086 	iter->snapshot = snapshot;
4087 	iter->pos = -1;
4088 	iter->cpu_file = tracing_get_cpu(inode);
4089 	mutex_init(&iter->mutex);
4090 
4091 	/* Notify the tracer early; before we stop tracing. */
4092 	if (iter->trace && iter->trace->open)
4093 		iter->trace->open(iter);
4094 
4095 	/* Annotate start of buffers if we had overruns */
4096 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4097 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4098 
4099 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4100 	if (trace_clocks[tr->clock_id].in_ns)
4101 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4102 
4103 	/* stop the trace while dumping if we are not opening "snapshot" */
4104 	if (!iter->snapshot)
4105 		tracing_stop_tr(tr);
4106 
4107 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4108 		for_each_tracing_cpu(cpu) {
4109 			iter->buffer_iter[cpu] =
4110 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4111 							 cpu, GFP_KERNEL);
4112 		}
4113 		ring_buffer_read_prepare_sync();
4114 		for_each_tracing_cpu(cpu) {
4115 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4116 			tracing_iter_reset(iter, cpu);
4117 		}
4118 	} else {
4119 		cpu = iter->cpu_file;
4120 		iter->buffer_iter[cpu] =
4121 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4122 						 cpu, GFP_KERNEL);
4123 		ring_buffer_read_prepare_sync();
4124 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4125 		tracing_iter_reset(iter, cpu);
4126 	}
4127 
4128 	mutex_unlock(&trace_types_lock);
4129 
4130 	return iter;
4131 
4132  fail:
4133 	mutex_unlock(&trace_types_lock);
4134 	kfree(iter->trace);
4135 	kfree(iter->buffer_iter);
4136 release:
4137 	seq_release_private(inode, file);
4138 	return ERR_PTR(-ENOMEM);
4139 }
4140 
4141 int tracing_open_generic(struct inode *inode, struct file *filp)
4142 {
4143 	if (tracing_disabled)
4144 		return -ENODEV;
4145 
4146 	filp->private_data = inode->i_private;
4147 	return 0;
4148 }
4149 
4150 bool tracing_is_disabled(void)
4151 {
4152 	return (tracing_disabled) ? true: false;
4153 }
4154 
4155 /*
4156  * Open and update trace_array ref count.
4157  * Must have the current trace_array passed to it.
4158  */
4159 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4160 {
4161 	struct trace_array *tr = inode->i_private;
4162 
4163 	if (tracing_disabled)
4164 		return -ENODEV;
4165 
4166 	if (trace_array_get(tr) < 0)
4167 		return -ENODEV;
4168 
4169 	filp->private_data = inode->i_private;
4170 
4171 	return 0;
4172 }
4173 
4174 static int tracing_release(struct inode *inode, struct file *file)
4175 {
4176 	struct trace_array *tr = inode->i_private;
4177 	struct seq_file *m = file->private_data;
4178 	struct trace_iterator *iter;
4179 	int cpu;
4180 
4181 	if (!(file->f_mode & FMODE_READ)) {
4182 		trace_array_put(tr);
4183 		return 0;
4184 	}
4185 
4186 	/* Writes do not use seq_file */
4187 	iter = m->private;
4188 	mutex_lock(&trace_types_lock);
4189 
4190 	for_each_tracing_cpu(cpu) {
4191 		if (iter->buffer_iter[cpu])
4192 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4193 	}
4194 
4195 	if (iter->trace && iter->trace->close)
4196 		iter->trace->close(iter);
4197 
4198 	if (!iter->snapshot)
4199 		/* reenable tracing if it was previously enabled */
4200 		tracing_start_tr(tr);
4201 
4202 	__trace_array_put(tr);
4203 
4204 	mutex_unlock(&trace_types_lock);
4205 
4206 	mutex_destroy(&iter->mutex);
4207 	free_cpumask_var(iter->started);
4208 	kfree(iter->trace);
4209 	kfree(iter->buffer_iter);
4210 	seq_release_private(inode, file);
4211 
4212 	return 0;
4213 }
4214 
4215 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4216 {
4217 	struct trace_array *tr = inode->i_private;
4218 
4219 	trace_array_put(tr);
4220 	return 0;
4221 }
4222 
4223 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4224 {
4225 	struct trace_array *tr = inode->i_private;
4226 
4227 	trace_array_put(tr);
4228 
4229 	return single_release(inode, file);
4230 }
4231 
4232 static int tracing_open(struct inode *inode, struct file *file)
4233 {
4234 	struct trace_array *tr = inode->i_private;
4235 	struct trace_iterator *iter;
4236 	int ret = 0;
4237 
4238 	if (trace_array_get(tr) < 0)
4239 		return -ENODEV;
4240 
4241 	/* If this file was open for write, then erase contents */
4242 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4243 		int cpu = tracing_get_cpu(inode);
4244 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4245 
4246 #ifdef CONFIG_TRACER_MAX_TRACE
4247 		if (tr->current_trace->print_max)
4248 			trace_buf = &tr->max_buffer;
4249 #endif
4250 
4251 		if (cpu == RING_BUFFER_ALL_CPUS)
4252 			tracing_reset_online_cpus(trace_buf);
4253 		else
4254 			tracing_reset(trace_buf, cpu);
4255 	}
4256 
4257 	if (file->f_mode & FMODE_READ) {
4258 		iter = __tracing_open(inode, file, false);
4259 		if (IS_ERR(iter))
4260 			ret = PTR_ERR(iter);
4261 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4262 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4263 	}
4264 
4265 	if (ret < 0)
4266 		trace_array_put(tr);
4267 
4268 	return ret;
4269 }
4270 
4271 /*
4272  * Some tracers are not suitable for instance buffers.
4273  * A tracer is always available for the global array (toplevel)
4274  * or if it explicitly states that it is.
4275  */
4276 static bool
4277 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4278 {
4279 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4280 }
4281 
4282 /* Find the next tracer that this trace array may use */
4283 static struct tracer *
4284 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4285 {
4286 	while (t && !trace_ok_for_array(t, tr))
4287 		t = t->next;
4288 
4289 	return t;
4290 }
4291 
4292 static void *
4293 t_next(struct seq_file *m, void *v, loff_t *pos)
4294 {
4295 	struct trace_array *tr = m->private;
4296 	struct tracer *t = v;
4297 
4298 	(*pos)++;
4299 
4300 	if (t)
4301 		t = get_tracer_for_array(tr, t->next);
4302 
4303 	return t;
4304 }
4305 
4306 static void *t_start(struct seq_file *m, loff_t *pos)
4307 {
4308 	struct trace_array *tr = m->private;
4309 	struct tracer *t;
4310 	loff_t l = 0;
4311 
4312 	mutex_lock(&trace_types_lock);
4313 
4314 	t = get_tracer_for_array(tr, trace_types);
4315 	for (; t && l < *pos; t = t_next(m, t, &l))
4316 			;
4317 
4318 	return t;
4319 }
4320 
4321 static void t_stop(struct seq_file *m, void *p)
4322 {
4323 	mutex_unlock(&trace_types_lock);
4324 }
4325 
4326 static int t_show(struct seq_file *m, void *v)
4327 {
4328 	struct tracer *t = v;
4329 
4330 	if (!t)
4331 		return 0;
4332 
4333 	seq_puts(m, t->name);
4334 	if (t->next)
4335 		seq_putc(m, ' ');
4336 	else
4337 		seq_putc(m, '\n');
4338 
4339 	return 0;
4340 }
4341 
4342 static const struct seq_operations show_traces_seq_ops = {
4343 	.start		= t_start,
4344 	.next		= t_next,
4345 	.stop		= t_stop,
4346 	.show		= t_show,
4347 };
4348 
4349 static int show_traces_open(struct inode *inode, struct file *file)
4350 {
4351 	struct trace_array *tr = inode->i_private;
4352 	struct seq_file *m;
4353 	int ret;
4354 
4355 	if (tracing_disabled)
4356 		return -ENODEV;
4357 
4358 	ret = seq_open(file, &show_traces_seq_ops);
4359 	if (ret)
4360 		return ret;
4361 
4362 	m = file->private_data;
4363 	m->private = tr;
4364 
4365 	return 0;
4366 }
4367 
4368 static ssize_t
4369 tracing_write_stub(struct file *filp, const char __user *ubuf,
4370 		   size_t count, loff_t *ppos)
4371 {
4372 	return count;
4373 }
4374 
4375 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4376 {
4377 	int ret;
4378 
4379 	if (file->f_mode & FMODE_READ)
4380 		ret = seq_lseek(file, offset, whence);
4381 	else
4382 		file->f_pos = ret = 0;
4383 
4384 	return ret;
4385 }
4386 
4387 static const struct file_operations tracing_fops = {
4388 	.open		= tracing_open,
4389 	.read		= seq_read,
4390 	.write		= tracing_write_stub,
4391 	.llseek		= tracing_lseek,
4392 	.release	= tracing_release,
4393 };
4394 
4395 static const struct file_operations show_traces_fops = {
4396 	.open		= show_traces_open,
4397 	.read		= seq_read,
4398 	.release	= seq_release,
4399 	.llseek		= seq_lseek,
4400 };
4401 
4402 static ssize_t
4403 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4404 		     size_t count, loff_t *ppos)
4405 {
4406 	struct trace_array *tr = file_inode(filp)->i_private;
4407 	char *mask_str;
4408 	int len;
4409 
4410 	len = snprintf(NULL, 0, "%*pb\n",
4411 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4412 	mask_str = kmalloc(len, GFP_KERNEL);
4413 	if (!mask_str)
4414 		return -ENOMEM;
4415 
4416 	len = snprintf(mask_str, len, "%*pb\n",
4417 		       cpumask_pr_args(tr->tracing_cpumask));
4418 	if (len >= count) {
4419 		count = -EINVAL;
4420 		goto out_err;
4421 	}
4422 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4423 
4424 out_err:
4425 	kfree(mask_str);
4426 
4427 	return count;
4428 }
4429 
4430 static ssize_t
4431 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4432 		      size_t count, loff_t *ppos)
4433 {
4434 	struct trace_array *tr = file_inode(filp)->i_private;
4435 	cpumask_var_t tracing_cpumask_new;
4436 	int err, cpu;
4437 
4438 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4439 		return -ENOMEM;
4440 
4441 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4442 	if (err)
4443 		goto err_unlock;
4444 
4445 	local_irq_disable();
4446 	arch_spin_lock(&tr->max_lock);
4447 	for_each_tracing_cpu(cpu) {
4448 		/*
4449 		 * Increase/decrease the disabled counter if we are
4450 		 * about to flip a bit in the cpumask:
4451 		 */
4452 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4453 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4454 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4455 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4456 		}
4457 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4458 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4459 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4460 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4461 		}
4462 	}
4463 	arch_spin_unlock(&tr->max_lock);
4464 	local_irq_enable();
4465 
4466 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4467 	free_cpumask_var(tracing_cpumask_new);
4468 
4469 	return count;
4470 
4471 err_unlock:
4472 	free_cpumask_var(tracing_cpumask_new);
4473 
4474 	return err;
4475 }
4476 
4477 static const struct file_operations tracing_cpumask_fops = {
4478 	.open		= tracing_open_generic_tr,
4479 	.read		= tracing_cpumask_read,
4480 	.write		= tracing_cpumask_write,
4481 	.release	= tracing_release_generic_tr,
4482 	.llseek		= generic_file_llseek,
4483 };
4484 
4485 static int tracing_trace_options_show(struct seq_file *m, void *v)
4486 {
4487 	struct tracer_opt *trace_opts;
4488 	struct trace_array *tr = m->private;
4489 	u32 tracer_flags;
4490 	int i;
4491 
4492 	mutex_lock(&trace_types_lock);
4493 	tracer_flags = tr->current_trace->flags->val;
4494 	trace_opts = tr->current_trace->flags->opts;
4495 
4496 	for (i = 0; trace_options[i]; i++) {
4497 		if (tr->trace_flags & (1 << i))
4498 			seq_printf(m, "%s\n", trace_options[i]);
4499 		else
4500 			seq_printf(m, "no%s\n", trace_options[i]);
4501 	}
4502 
4503 	for (i = 0; trace_opts[i].name; i++) {
4504 		if (tracer_flags & trace_opts[i].bit)
4505 			seq_printf(m, "%s\n", trace_opts[i].name);
4506 		else
4507 			seq_printf(m, "no%s\n", trace_opts[i].name);
4508 	}
4509 	mutex_unlock(&trace_types_lock);
4510 
4511 	return 0;
4512 }
4513 
4514 static int __set_tracer_option(struct trace_array *tr,
4515 			       struct tracer_flags *tracer_flags,
4516 			       struct tracer_opt *opts, int neg)
4517 {
4518 	struct tracer *trace = tracer_flags->trace;
4519 	int ret;
4520 
4521 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4522 	if (ret)
4523 		return ret;
4524 
4525 	if (neg)
4526 		tracer_flags->val &= ~opts->bit;
4527 	else
4528 		tracer_flags->val |= opts->bit;
4529 	return 0;
4530 }
4531 
4532 /* Try to assign a tracer specific option */
4533 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4534 {
4535 	struct tracer *trace = tr->current_trace;
4536 	struct tracer_flags *tracer_flags = trace->flags;
4537 	struct tracer_opt *opts = NULL;
4538 	int i;
4539 
4540 	for (i = 0; tracer_flags->opts[i].name; i++) {
4541 		opts = &tracer_flags->opts[i];
4542 
4543 		if (strcmp(cmp, opts->name) == 0)
4544 			return __set_tracer_option(tr, trace->flags, opts, neg);
4545 	}
4546 
4547 	return -EINVAL;
4548 }
4549 
4550 /* Some tracers require overwrite to stay enabled */
4551 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4552 {
4553 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4554 		return -1;
4555 
4556 	return 0;
4557 }
4558 
4559 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4560 {
4561 	/* do nothing if flag is already set */
4562 	if (!!(tr->trace_flags & mask) == !!enabled)
4563 		return 0;
4564 
4565 	/* Give the tracer a chance to approve the change */
4566 	if (tr->current_trace->flag_changed)
4567 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4568 			return -EINVAL;
4569 
4570 	if (enabled)
4571 		tr->trace_flags |= mask;
4572 	else
4573 		tr->trace_flags &= ~mask;
4574 
4575 	if (mask == TRACE_ITER_RECORD_CMD)
4576 		trace_event_enable_cmd_record(enabled);
4577 
4578 	if (mask == TRACE_ITER_RECORD_TGID) {
4579 		if (!tgid_map)
4580 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4581 					   sizeof(*tgid_map),
4582 					   GFP_KERNEL);
4583 		if (!tgid_map) {
4584 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4585 			return -ENOMEM;
4586 		}
4587 
4588 		trace_event_enable_tgid_record(enabled);
4589 	}
4590 
4591 	if (mask == TRACE_ITER_EVENT_FORK)
4592 		trace_event_follow_fork(tr, enabled);
4593 
4594 	if (mask == TRACE_ITER_FUNC_FORK)
4595 		ftrace_pid_follow_fork(tr, enabled);
4596 
4597 	if (mask == TRACE_ITER_OVERWRITE) {
4598 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4599 #ifdef CONFIG_TRACER_MAX_TRACE
4600 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4601 #endif
4602 	}
4603 
4604 	if (mask == TRACE_ITER_PRINTK) {
4605 		trace_printk_start_stop_comm(enabled);
4606 		trace_printk_control(enabled);
4607 	}
4608 
4609 	return 0;
4610 }
4611 
4612 static int trace_set_options(struct trace_array *tr, char *option)
4613 {
4614 	char *cmp;
4615 	int neg = 0;
4616 	int ret;
4617 	size_t orig_len = strlen(option);
4618 	int len;
4619 
4620 	cmp = strstrip(option);
4621 
4622 	len = str_has_prefix(cmp, "no");
4623 	if (len)
4624 		neg = 1;
4625 
4626 	cmp += len;
4627 
4628 	mutex_lock(&trace_types_lock);
4629 
4630 	ret = match_string(trace_options, -1, cmp);
4631 	/* If no option could be set, test the specific tracer options */
4632 	if (ret < 0)
4633 		ret = set_tracer_option(tr, cmp, neg);
4634 	else
4635 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4636 
4637 	mutex_unlock(&trace_types_lock);
4638 
4639 	/*
4640 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4641 	 * turn it back into a space.
4642 	 */
4643 	if (orig_len > strlen(option))
4644 		option[strlen(option)] = ' ';
4645 
4646 	return ret;
4647 }
4648 
4649 static void __init apply_trace_boot_options(void)
4650 {
4651 	char *buf = trace_boot_options_buf;
4652 	char *option;
4653 
4654 	while (true) {
4655 		option = strsep(&buf, ",");
4656 
4657 		if (!option)
4658 			break;
4659 
4660 		if (*option)
4661 			trace_set_options(&global_trace, option);
4662 
4663 		/* Put back the comma to allow this to be called again */
4664 		if (buf)
4665 			*(buf - 1) = ',';
4666 	}
4667 }
4668 
4669 static ssize_t
4670 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4671 			size_t cnt, loff_t *ppos)
4672 {
4673 	struct seq_file *m = filp->private_data;
4674 	struct trace_array *tr = m->private;
4675 	char buf[64];
4676 	int ret;
4677 
4678 	if (cnt >= sizeof(buf))
4679 		return -EINVAL;
4680 
4681 	if (copy_from_user(buf, ubuf, cnt))
4682 		return -EFAULT;
4683 
4684 	buf[cnt] = 0;
4685 
4686 	ret = trace_set_options(tr, buf);
4687 	if (ret < 0)
4688 		return ret;
4689 
4690 	*ppos += cnt;
4691 
4692 	return cnt;
4693 }
4694 
4695 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4696 {
4697 	struct trace_array *tr = inode->i_private;
4698 	int ret;
4699 
4700 	if (tracing_disabled)
4701 		return -ENODEV;
4702 
4703 	if (trace_array_get(tr) < 0)
4704 		return -ENODEV;
4705 
4706 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4707 	if (ret < 0)
4708 		trace_array_put(tr);
4709 
4710 	return ret;
4711 }
4712 
4713 static const struct file_operations tracing_iter_fops = {
4714 	.open		= tracing_trace_options_open,
4715 	.read		= seq_read,
4716 	.llseek		= seq_lseek,
4717 	.release	= tracing_single_release_tr,
4718 	.write		= tracing_trace_options_write,
4719 };
4720 
4721 static const char readme_msg[] =
4722 	"tracing mini-HOWTO:\n\n"
4723 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4724 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4725 	" Important files:\n"
4726 	"  trace\t\t\t- The static contents of the buffer\n"
4727 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4728 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4729 	"  current_tracer\t- function and latency tracers\n"
4730 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4731 	"  error_log\t- error log for failed commands (that support it)\n"
4732 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4733 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4734 	"  trace_clock\t\t-change the clock used to order events\n"
4735 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4736 	"      global:   Synced across CPUs but slows tracing down.\n"
4737 	"     counter:   Not a clock, but just an increment\n"
4738 	"      uptime:   Jiffy counter from time of boot\n"
4739 	"        perf:   Same clock that perf events use\n"
4740 #ifdef CONFIG_X86_64
4741 	"     x86-tsc:   TSC cycle counter\n"
4742 #endif
4743 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4744 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4745 	"    absolute:   Absolute (standalone) timestamp\n"
4746 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4747 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4748 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4749 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4750 	"\t\t\t  Remove sub-buffer with rmdir\n"
4751 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4752 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4753 	"\t\t\t  option name\n"
4754 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4755 #ifdef CONFIG_DYNAMIC_FTRACE
4756 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4757 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4758 	"\t\t\t  functions\n"
4759 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4760 	"\t     modules: Can select a group via module\n"
4761 	"\t      Format: :mod:<module-name>\n"
4762 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4763 	"\t    triggers: a command to perform when function is hit\n"
4764 	"\t      Format: <function>:<trigger>[:count]\n"
4765 	"\t     trigger: traceon, traceoff\n"
4766 	"\t\t      enable_event:<system>:<event>\n"
4767 	"\t\t      disable_event:<system>:<event>\n"
4768 #ifdef CONFIG_STACKTRACE
4769 	"\t\t      stacktrace\n"
4770 #endif
4771 #ifdef CONFIG_TRACER_SNAPSHOT
4772 	"\t\t      snapshot\n"
4773 #endif
4774 	"\t\t      dump\n"
4775 	"\t\t      cpudump\n"
4776 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4777 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4778 	"\t     The first one will disable tracing every time do_fault is hit\n"
4779 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4780 	"\t       The first time do trap is hit and it disables tracing, the\n"
4781 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4782 	"\t       the counter will not decrement. It only decrements when the\n"
4783 	"\t       trigger did work\n"
4784 	"\t     To remove trigger without count:\n"
4785 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4786 	"\t     To remove trigger with a count:\n"
4787 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4788 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4789 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4790 	"\t    modules: Can select a group via module command :mod:\n"
4791 	"\t    Does not accept triggers\n"
4792 #endif /* CONFIG_DYNAMIC_FTRACE */
4793 #ifdef CONFIG_FUNCTION_TRACER
4794 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4795 	"\t\t    (function)\n"
4796 #endif
4797 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4798 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4799 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4800 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4801 #endif
4802 #ifdef CONFIG_TRACER_SNAPSHOT
4803 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4804 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4805 	"\t\t\t  information\n"
4806 #endif
4807 #ifdef CONFIG_STACK_TRACER
4808 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4809 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4810 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4811 	"\t\t\t  new trace)\n"
4812 #ifdef CONFIG_DYNAMIC_FTRACE
4813 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4814 	"\t\t\t  traces\n"
4815 #endif
4816 #endif /* CONFIG_STACK_TRACER */
4817 #ifdef CONFIG_DYNAMIC_EVENTS
4818 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4819 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4820 #endif
4821 #ifdef CONFIG_KPROBE_EVENTS
4822 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4823 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4824 #endif
4825 #ifdef CONFIG_UPROBE_EVENTS
4826 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4827 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4828 #endif
4829 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4830 	"\t  accepts: event-definitions (one definition per line)\n"
4831 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4832 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4833 #ifdef CONFIG_HIST_TRIGGERS
4834 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4835 #endif
4836 	"\t           -:[<group>/]<event>\n"
4837 #ifdef CONFIG_KPROBE_EVENTS
4838 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4839   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4840 #endif
4841 #ifdef CONFIG_UPROBE_EVENTS
4842   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4843 #endif
4844 	"\t     args: <name>=fetcharg[:type]\n"
4845 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4846 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4847 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4848 #else
4849 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4850 #endif
4851 	"\t           +|-[u]<offset>(<fetcharg>)\n"
4852 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4853 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4854 	"\t           <type>\\[<array-size>\\]\n"
4855 #ifdef CONFIG_HIST_TRIGGERS
4856 	"\t    field: <stype> <name>;\n"
4857 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4858 	"\t           [unsigned] char/int/long\n"
4859 #endif
4860 #endif
4861 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4862 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4863 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4864 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4865 	"\t\t\t  events\n"
4866 	"      filter\t\t- If set, only events passing filter are traced\n"
4867 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4868 	"\t\t\t  <event>:\n"
4869 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4870 	"      filter\t\t- If set, only events passing filter are traced\n"
4871 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4872 	"\t    Format: <trigger>[:count][if <filter>]\n"
4873 	"\t   trigger: traceon, traceoff\n"
4874 	"\t            enable_event:<system>:<event>\n"
4875 	"\t            disable_event:<system>:<event>\n"
4876 #ifdef CONFIG_HIST_TRIGGERS
4877 	"\t            enable_hist:<system>:<event>\n"
4878 	"\t            disable_hist:<system>:<event>\n"
4879 #endif
4880 #ifdef CONFIG_STACKTRACE
4881 	"\t\t    stacktrace\n"
4882 #endif
4883 #ifdef CONFIG_TRACER_SNAPSHOT
4884 	"\t\t    snapshot\n"
4885 #endif
4886 #ifdef CONFIG_HIST_TRIGGERS
4887 	"\t\t    hist (see below)\n"
4888 #endif
4889 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4890 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4891 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4892 	"\t                  events/block/block_unplug/trigger\n"
4893 	"\t   The first disables tracing every time block_unplug is hit.\n"
4894 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4895 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4896 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4897 	"\t   Like function triggers, the counter is only decremented if it\n"
4898 	"\t    enabled or disabled tracing.\n"
4899 	"\t   To remove a trigger without a count:\n"
4900 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4901 	"\t   To remove a trigger with a count:\n"
4902 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4903 	"\t   Filters can be ignored when removing a trigger.\n"
4904 #ifdef CONFIG_HIST_TRIGGERS
4905 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4906 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4907 	"\t            [:values=<field1[,field2,...]>]\n"
4908 	"\t            [:sort=<field1[,field2,...]>]\n"
4909 	"\t            [:size=#entries]\n"
4910 	"\t            [:pause][:continue][:clear]\n"
4911 	"\t            [:name=histname1]\n"
4912 	"\t            [:<handler>.<action>]\n"
4913 	"\t            [if <filter>]\n\n"
4914 	"\t    When a matching event is hit, an entry is added to a hash\n"
4915 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4916 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4917 	"\t    correspond to fields in the event's format description.  Keys\n"
4918 	"\t    can be any field, or the special string 'stacktrace'.\n"
4919 	"\t    Compound keys consisting of up to two fields can be specified\n"
4920 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4921 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4922 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4923 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4924 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4925 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4926 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4927 	"\t    its histogram data will be shared with other triggers of the\n"
4928 	"\t    same name, and trigger hits will update this common data.\n\n"
4929 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4930 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4931 	"\t    triggers attached to an event, there will be a table for each\n"
4932 	"\t    trigger in the output.  The table displayed for a named\n"
4933 	"\t    trigger will be the same as any other instance having the\n"
4934 	"\t    same name.  The default format used to display a given field\n"
4935 	"\t    can be modified by appending any of the following modifiers\n"
4936 	"\t    to the field name, as applicable:\n\n"
4937 	"\t            .hex        display a number as a hex value\n"
4938 	"\t            .sym        display an address as a symbol\n"
4939 	"\t            .sym-offset display an address as a symbol and offset\n"
4940 	"\t            .execname   display a common_pid as a program name\n"
4941 	"\t            .syscall    display a syscall id as a syscall name\n"
4942 	"\t            .log2       display log2 value rather than raw number\n"
4943 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4944 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4945 	"\t    trigger or to start a hist trigger but not log any events\n"
4946 	"\t    until told to do so.  'continue' can be used to start or\n"
4947 	"\t    restart a paused hist trigger.\n\n"
4948 	"\t    The 'clear' parameter will clear the contents of a running\n"
4949 	"\t    hist trigger and leave its current paused/active state\n"
4950 	"\t    unchanged.\n\n"
4951 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4952 	"\t    have one event conditionally start and stop another event's\n"
4953 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4954 	"\t    the enable_event and disable_event triggers.\n\n"
4955 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4956 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4957 	"\t        <handler>.<action>\n\n"
4958 	"\t    The available handlers are:\n\n"
4959 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4960 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4961 	"\t        onchange(var)            - invoke action if var changes\n\n"
4962 	"\t    The available actions are:\n\n"
4963 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4964 	"\t        save(field,...)                      - save current event fields\n"
4965 #ifdef CONFIG_TRACER_SNAPSHOT
4966 	"\t        snapshot()                           - snapshot the trace buffer\n"
4967 #endif
4968 #endif
4969 ;
4970 
4971 static ssize_t
4972 tracing_readme_read(struct file *filp, char __user *ubuf,
4973 		       size_t cnt, loff_t *ppos)
4974 {
4975 	return simple_read_from_buffer(ubuf, cnt, ppos,
4976 					readme_msg, strlen(readme_msg));
4977 }
4978 
4979 static const struct file_operations tracing_readme_fops = {
4980 	.open		= tracing_open_generic,
4981 	.read		= tracing_readme_read,
4982 	.llseek		= generic_file_llseek,
4983 };
4984 
4985 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4986 {
4987 	int *ptr = v;
4988 
4989 	if (*pos || m->count)
4990 		ptr++;
4991 
4992 	(*pos)++;
4993 
4994 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4995 		if (trace_find_tgid(*ptr))
4996 			return ptr;
4997 	}
4998 
4999 	return NULL;
5000 }
5001 
5002 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5003 {
5004 	void *v;
5005 	loff_t l = 0;
5006 
5007 	if (!tgid_map)
5008 		return NULL;
5009 
5010 	v = &tgid_map[0];
5011 	while (l <= *pos) {
5012 		v = saved_tgids_next(m, v, &l);
5013 		if (!v)
5014 			return NULL;
5015 	}
5016 
5017 	return v;
5018 }
5019 
5020 static void saved_tgids_stop(struct seq_file *m, void *v)
5021 {
5022 }
5023 
5024 static int saved_tgids_show(struct seq_file *m, void *v)
5025 {
5026 	int pid = (int *)v - tgid_map;
5027 
5028 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5029 	return 0;
5030 }
5031 
5032 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5033 	.start		= saved_tgids_start,
5034 	.stop		= saved_tgids_stop,
5035 	.next		= saved_tgids_next,
5036 	.show		= saved_tgids_show,
5037 };
5038 
5039 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5040 {
5041 	if (tracing_disabled)
5042 		return -ENODEV;
5043 
5044 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5045 }
5046 
5047 
5048 static const struct file_operations tracing_saved_tgids_fops = {
5049 	.open		= tracing_saved_tgids_open,
5050 	.read		= seq_read,
5051 	.llseek		= seq_lseek,
5052 	.release	= seq_release,
5053 };
5054 
5055 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5056 {
5057 	unsigned int *ptr = v;
5058 
5059 	if (*pos || m->count)
5060 		ptr++;
5061 
5062 	(*pos)++;
5063 
5064 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5065 	     ptr++) {
5066 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5067 			continue;
5068 
5069 		return ptr;
5070 	}
5071 
5072 	return NULL;
5073 }
5074 
5075 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5076 {
5077 	void *v;
5078 	loff_t l = 0;
5079 
5080 	preempt_disable();
5081 	arch_spin_lock(&trace_cmdline_lock);
5082 
5083 	v = &savedcmd->map_cmdline_to_pid[0];
5084 	while (l <= *pos) {
5085 		v = saved_cmdlines_next(m, v, &l);
5086 		if (!v)
5087 			return NULL;
5088 	}
5089 
5090 	return v;
5091 }
5092 
5093 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5094 {
5095 	arch_spin_unlock(&trace_cmdline_lock);
5096 	preempt_enable();
5097 }
5098 
5099 static int saved_cmdlines_show(struct seq_file *m, void *v)
5100 {
5101 	char buf[TASK_COMM_LEN];
5102 	unsigned int *pid = v;
5103 
5104 	__trace_find_cmdline(*pid, buf);
5105 	seq_printf(m, "%d %s\n", *pid, buf);
5106 	return 0;
5107 }
5108 
5109 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5110 	.start		= saved_cmdlines_start,
5111 	.next		= saved_cmdlines_next,
5112 	.stop		= saved_cmdlines_stop,
5113 	.show		= saved_cmdlines_show,
5114 };
5115 
5116 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5117 {
5118 	if (tracing_disabled)
5119 		return -ENODEV;
5120 
5121 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5122 }
5123 
5124 static const struct file_operations tracing_saved_cmdlines_fops = {
5125 	.open		= tracing_saved_cmdlines_open,
5126 	.read		= seq_read,
5127 	.llseek		= seq_lseek,
5128 	.release	= seq_release,
5129 };
5130 
5131 static ssize_t
5132 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5133 				 size_t cnt, loff_t *ppos)
5134 {
5135 	char buf[64];
5136 	int r;
5137 
5138 	arch_spin_lock(&trace_cmdline_lock);
5139 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5140 	arch_spin_unlock(&trace_cmdline_lock);
5141 
5142 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5143 }
5144 
5145 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5146 {
5147 	kfree(s->saved_cmdlines);
5148 	kfree(s->map_cmdline_to_pid);
5149 	kfree(s);
5150 }
5151 
5152 static int tracing_resize_saved_cmdlines(unsigned int val)
5153 {
5154 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5155 
5156 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5157 	if (!s)
5158 		return -ENOMEM;
5159 
5160 	if (allocate_cmdlines_buffer(val, s) < 0) {
5161 		kfree(s);
5162 		return -ENOMEM;
5163 	}
5164 
5165 	arch_spin_lock(&trace_cmdline_lock);
5166 	savedcmd_temp = savedcmd;
5167 	savedcmd = s;
5168 	arch_spin_unlock(&trace_cmdline_lock);
5169 	free_saved_cmdlines_buffer(savedcmd_temp);
5170 
5171 	return 0;
5172 }
5173 
5174 static ssize_t
5175 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5176 				  size_t cnt, loff_t *ppos)
5177 {
5178 	unsigned long val;
5179 	int ret;
5180 
5181 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5182 	if (ret)
5183 		return ret;
5184 
5185 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5186 	if (!val || val > PID_MAX_DEFAULT)
5187 		return -EINVAL;
5188 
5189 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5190 	if (ret < 0)
5191 		return ret;
5192 
5193 	*ppos += cnt;
5194 
5195 	return cnt;
5196 }
5197 
5198 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5199 	.open		= tracing_open_generic,
5200 	.read		= tracing_saved_cmdlines_size_read,
5201 	.write		= tracing_saved_cmdlines_size_write,
5202 };
5203 
5204 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5205 static union trace_eval_map_item *
5206 update_eval_map(union trace_eval_map_item *ptr)
5207 {
5208 	if (!ptr->map.eval_string) {
5209 		if (ptr->tail.next) {
5210 			ptr = ptr->tail.next;
5211 			/* Set ptr to the next real item (skip head) */
5212 			ptr++;
5213 		} else
5214 			return NULL;
5215 	}
5216 	return ptr;
5217 }
5218 
5219 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5220 {
5221 	union trace_eval_map_item *ptr = v;
5222 
5223 	/*
5224 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5225 	 * This really should never happen.
5226 	 */
5227 	ptr = update_eval_map(ptr);
5228 	if (WARN_ON_ONCE(!ptr))
5229 		return NULL;
5230 
5231 	ptr++;
5232 
5233 	(*pos)++;
5234 
5235 	ptr = update_eval_map(ptr);
5236 
5237 	return ptr;
5238 }
5239 
5240 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5241 {
5242 	union trace_eval_map_item *v;
5243 	loff_t l = 0;
5244 
5245 	mutex_lock(&trace_eval_mutex);
5246 
5247 	v = trace_eval_maps;
5248 	if (v)
5249 		v++;
5250 
5251 	while (v && l < *pos) {
5252 		v = eval_map_next(m, v, &l);
5253 	}
5254 
5255 	return v;
5256 }
5257 
5258 static void eval_map_stop(struct seq_file *m, void *v)
5259 {
5260 	mutex_unlock(&trace_eval_mutex);
5261 }
5262 
5263 static int eval_map_show(struct seq_file *m, void *v)
5264 {
5265 	union trace_eval_map_item *ptr = v;
5266 
5267 	seq_printf(m, "%s %ld (%s)\n",
5268 		   ptr->map.eval_string, ptr->map.eval_value,
5269 		   ptr->map.system);
5270 
5271 	return 0;
5272 }
5273 
5274 static const struct seq_operations tracing_eval_map_seq_ops = {
5275 	.start		= eval_map_start,
5276 	.next		= eval_map_next,
5277 	.stop		= eval_map_stop,
5278 	.show		= eval_map_show,
5279 };
5280 
5281 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5282 {
5283 	if (tracing_disabled)
5284 		return -ENODEV;
5285 
5286 	return seq_open(filp, &tracing_eval_map_seq_ops);
5287 }
5288 
5289 static const struct file_operations tracing_eval_map_fops = {
5290 	.open		= tracing_eval_map_open,
5291 	.read		= seq_read,
5292 	.llseek		= seq_lseek,
5293 	.release	= seq_release,
5294 };
5295 
5296 static inline union trace_eval_map_item *
5297 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5298 {
5299 	/* Return tail of array given the head */
5300 	return ptr + ptr->head.length + 1;
5301 }
5302 
5303 static void
5304 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5305 			   int len)
5306 {
5307 	struct trace_eval_map **stop;
5308 	struct trace_eval_map **map;
5309 	union trace_eval_map_item *map_array;
5310 	union trace_eval_map_item *ptr;
5311 
5312 	stop = start + len;
5313 
5314 	/*
5315 	 * The trace_eval_maps contains the map plus a head and tail item,
5316 	 * where the head holds the module and length of array, and the
5317 	 * tail holds a pointer to the next list.
5318 	 */
5319 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5320 	if (!map_array) {
5321 		pr_warn("Unable to allocate trace eval mapping\n");
5322 		return;
5323 	}
5324 
5325 	mutex_lock(&trace_eval_mutex);
5326 
5327 	if (!trace_eval_maps)
5328 		trace_eval_maps = map_array;
5329 	else {
5330 		ptr = trace_eval_maps;
5331 		for (;;) {
5332 			ptr = trace_eval_jmp_to_tail(ptr);
5333 			if (!ptr->tail.next)
5334 				break;
5335 			ptr = ptr->tail.next;
5336 
5337 		}
5338 		ptr->tail.next = map_array;
5339 	}
5340 	map_array->head.mod = mod;
5341 	map_array->head.length = len;
5342 	map_array++;
5343 
5344 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5345 		map_array->map = **map;
5346 		map_array++;
5347 	}
5348 	memset(map_array, 0, sizeof(*map_array));
5349 
5350 	mutex_unlock(&trace_eval_mutex);
5351 }
5352 
5353 static void trace_create_eval_file(struct dentry *d_tracer)
5354 {
5355 	trace_create_file("eval_map", 0444, d_tracer,
5356 			  NULL, &tracing_eval_map_fops);
5357 }
5358 
5359 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5360 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5361 static inline void trace_insert_eval_map_file(struct module *mod,
5362 			      struct trace_eval_map **start, int len) { }
5363 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5364 
5365 static void trace_insert_eval_map(struct module *mod,
5366 				  struct trace_eval_map **start, int len)
5367 {
5368 	struct trace_eval_map **map;
5369 
5370 	if (len <= 0)
5371 		return;
5372 
5373 	map = start;
5374 
5375 	trace_event_eval_update(map, len);
5376 
5377 	trace_insert_eval_map_file(mod, start, len);
5378 }
5379 
5380 static ssize_t
5381 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5382 		       size_t cnt, loff_t *ppos)
5383 {
5384 	struct trace_array *tr = filp->private_data;
5385 	char buf[MAX_TRACER_SIZE+2];
5386 	int r;
5387 
5388 	mutex_lock(&trace_types_lock);
5389 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5390 	mutex_unlock(&trace_types_lock);
5391 
5392 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5393 }
5394 
5395 int tracer_init(struct tracer *t, struct trace_array *tr)
5396 {
5397 	tracing_reset_online_cpus(&tr->trace_buffer);
5398 	return t->init(tr);
5399 }
5400 
5401 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5402 {
5403 	int cpu;
5404 
5405 	for_each_tracing_cpu(cpu)
5406 		per_cpu_ptr(buf->data, cpu)->entries = val;
5407 }
5408 
5409 #ifdef CONFIG_TRACER_MAX_TRACE
5410 /* resize @tr's buffer to the size of @size_tr's entries */
5411 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5412 					struct trace_buffer *size_buf, int cpu_id)
5413 {
5414 	int cpu, ret = 0;
5415 
5416 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5417 		for_each_tracing_cpu(cpu) {
5418 			ret = ring_buffer_resize(trace_buf->buffer,
5419 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5420 			if (ret < 0)
5421 				break;
5422 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5423 				per_cpu_ptr(size_buf->data, cpu)->entries;
5424 		}
5425 	} else {
5426 		ret = ring_buffer_resize(trace_buf->buffer,
5427 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5428 		if (ret == 0)
5429 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5430 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5431 	}
5432 
5433 	return ret;
5434 }
5435 #endif /* CONFIG_TRACER_MAX_TRACE */
5436 
5437 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5438 					unsigned long size, int cpu)
5439 {
5440 	int ret;
5441 
5442 	/*
5443 	 * If kernel or user changes the size of the ring buffer
5444 	 * we use the size that was given, and we can forget about
5445 	 * expanding it later.
5446 	 */
5447 	ring_buffer_expanded = true;
5448 
5449 	/* May be called before buffers are initialized */
5450 	if (!tr->trace_buffer.buffer)
5451 		return 0;
5452 
5453 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5454 	if (ret < 0)
5455 		return ret;
5456 
5457 #ifdef CONFIG_TRACER_MAX_TRACE
5458 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5459 	    !tr->current_trace->use_max_tr)
5460 		goto out;
5461 
5462 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5463 	if (ret < 0) {
5464 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5465 						     &tr->trace_buffer, cpu);
5466 		if (r < 0) {
5467 			/*
5468 			 * AARGH! We are left with different
5469 			 * size max buffer!!!!
5470 			 * The max buffer is our "snapshot" buffer.
5471 			 * When a tracer needs a snapshot (one of the
5472 			 * latency tracers), it swaps the max buffer
5473 			 * with the saved snap shot. We succeeded to
5474 			 * update the size of the main buffer, but failed to
5475 			 * update the size of the max buffer. But when we tried
5476 			 * to reset the main buffer to the original size, we
5477 			 * failed there too. This is very unlikely to
5478 			 * happen, but if it does, warn and kill all
5479 			 * tracing.
5480 			 */
5481 			WARN_ON(1);
5482 			tracing_disabled = 1;
5483 		}
5484 		return ret;
5485 	}
5486 
5487 	if (cpu == RING_BUFFER_ALL_CPUS)
5488 		set_buffer_entries(&tr->max_buffer, size);
5489 	else
5490 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5491 
5492  out:
5493 #endif /* CONFIG_TRACER_MAX_TRACE */
5494 
5495 	if (cpu == RING_BUFFER_ALL_CPUS)
5496 		set_buffer_entries(&tr->trace_buffer, size);
5497 	else
5498 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5499 
5500 	return ret;
5501 }
5502 
5503 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5504 					  unsigned long size, int cpu_id)
5505 {
5506 	int ret = size;
5507 
5508 	mutex_lock(&trace_types_lock);
5509 
5510 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5511 		/* make sure, this cpu is enabled in the mask */
5512 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5513 			ret = -EINVAL;
5514 			goto out;
5515 		}
5516 	}
5517 
5518 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5519 	if (ret < 0)
5520 		ret = -ENOMEM;
5521 
5522 out:
5523 	mutex_unlock(&trace_types_lock);
5524 
5525 	return ret;
5526 }
5527 
5528 
5529 /**
5530  * tracing_update_buffers - used by tracing facility to expand ring buffers
5531  *
5532  * To save on memory when the tracing is never used on a system with it
5533  * configured in. The ring buffers are set to a minimum size. But once
5534  * a user starts to use the tracing facility, then they need to grow
5535  * to their default size.
5536  *
5537  * This function is to be called when a tracer is about to be used.
5538  */
5539 int tracing_update_buffers(void)
5540 {
5541 	int ret = 0;
5542 
5543 	mutex_lock(&trace_types_lock);
5544 	if (!ring_buffer_expanded)
5545 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5546 						RING_BUFFER_ALL_CPUS);
5547 	mutex_unlock(&trace_types_lock);
5548 
5549 	return ret;
5550 }
5551 
5552 struct trace_option_dentry;
5553 
5554 static void
5555 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5556 
5557 /*
5558  * Used to clear out the tracer before deletion of an instance.
5559  * Must have trace_types_lock held.
5560  */
5561 static void tracing_set_nop(struct trace_array *tr)
5562 {
5563 	if (tr->current_trace == &nop_trace)
5564 		return;
5565 
5566 	tr->current_trace->enabled--;
5567 
5568 	if (tr->current_trace->reset)
5569 		tr->current_trace->reset(tr);
5570 
5571 	tr->current_trace = &nop_trace;
5572 }
5573 
5574 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5575 {
5576 	/* Only enable if the directory has been created already. */
5577 	if (!tr->dir)
5578 		return;
5579 
5580 	create_trace_option_files(tr, t);
5581 }
5582 
5583 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5584 {
5585 	struct tracer *t;
5586 #ifdef CONFIG_TRACER_MAX_TRACE
5587 	bool had_max_tr;
5588 #endif
5589 	int ret = 0;
5590 
5591 	mutex_lock(&trace_types_lock);
5592 
5593 	if (!ring_buffer_expanded) {
5594 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5595 						RING_BUFFER_ALL_CPUS);
5596 		if (ret < 0)
5597 			goto out;
5598 		ret = 0;
5599 	}
5600 
5601 	for (t = trace_types; t; t = t->next) {
5602 		if (strcmp(t->name, buf) == 0)
5603 			break;
5604 	}
5605 	if (!t) {
5606 		ret = -EINVAL;
5607 		goto out;
5608 	}
5609 	if (t == tr->current_trace)
5610 		goto out;
5611 
5612 #ifdef CONFIG_TRACER_SNAPSHOT
5613 	if (t->use_max_tr) {
5614 		arch_spin_lock(&tr->max_lock);
5615 		if (tr->cond_snapshot)
5616 			ret = -EBUSY;
5617 		arch_spin_unlock(&tr->max_lock);
5618 		if (ret)
5619 			goto out;
5620 	}
5621 #endif
5622 	/* Some tracers won't work on kernel command line */
5623 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5624 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5625 			t->name);
5626 		goto out;
5627 	}
5628 
5629 	/* Some tracers are only allowed for the top level buffer */
5630 	if (!trace_ok_for_array(t, tr)) {
5631 		ret = -EINVAL;
5632 		goto out;
5633 	}
5634 
5635 	/* If trace pipe files are being read, we can't change the tracer */
5636 	if (tr->current_trace->ref) {
5637 		ret = -EBUSY;
5638 		goto out;
5639 	}
5640 
5641 	trace_branch_disable();
5642 
5643 	tr->current_trace->enabled--;
5644 
5645 	if (tr->current_trace->reset)
5646 		tr->current_trace->reset(tr);
5647 
5648 	/* Current trace needs to be nop_trace before synchronize_rcu */
5649 	tr->current_trace = &nop_trace;
5650 
5651 #ifdef CONFIG_TRACER_MAX_TRACE
5652 	had_max_tr = tr->allocated_snapshot;
5653 
5654 	if (had_max_tr && !t->use_max_tr) {
5655 		/*
5656 		 * We need to make sure that the update_max_tr sees that
5657 		 * current_trace changed to nop_trace to keep it from
5658 		 * swapping the buffers after we resize it.
5659 		 * The update_max_tr is called from interrupts disabled
5660 		 * so a synchronized_sched() is sufficient.
5661 		 */
5662 		synchronize_rcu();
5663 		free_snapshot(tr);
5664 	}
5665 #endif
5666 
5667 #ifdef CONFIG_TRACER_MAX_TRACE
5668 	if (t->use_max_tr && !had_max_tr) {
5669 		ret = tracing_alloc_snapshot_instance(tr);
5670 		if (ret < 0)
5671 			goto out;
5672 	}
5673 #endif
5674 
5675 	if (t->init) {
5676 		ret = tracer_init(t, tr);
5677 		if (ret)
5678 			goto out;
5679 	}
5680 
5681 	tr->current_trace = t;
5682 	tr->current_trace->enabled++;
5683 	trace_branch_enable(tr);
5684  out:
5685 	mutex_unlock(&trace_types_lock);
5686 
5687 	return ret;
5688 }
5689 
5690 static ssize_t
5691 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5692 			size_t cnt, loff_t *ppos)
5693 {
5694 	struct trace_array *tr = filp->private_data;
5695 	char buf[MAX_TRACER_SIZE+1];
5696 	int i;
5697 	size_t ret;
5698 	int err;
5699 
5700 	ret = cnt;
5701 
5702 	if (cnt > MAX_TRACER_SIZE)
5703 		cnt = MAX_TRACER_SIZE;
5704 
5705 	if (copy_from_user(buf, ubuf, cnt))
5706 		return -EFAULT;
5707 
5708 	buf[cnt] = 0;
5709 
5710 	/* strip ending whitespace. */
5711 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5712 		buf[i] = 0;
5713 
5714 	err = tracing_set_tracer(tr, buf);
5715 	if (err)
5716 		return err;
5717 
5718 	*ppos += ret;
5719 
5720 	return ret;
5721 }
5722 
5723 static ssize_t
5724 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5725 		   size_t cnt, loff_t *ppos)
5726 {
5727 	char buf[64];
5728 	int r;
5729 
5730 	r = snprintf(buf, sizeof(buf), "%ld\n",
5731 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5732 	if (r > sizeof(buf))
5733 		r = sizeof(buf);
5734 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5735 }
5736 
5737 static ssize_t
5738 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5739 		    size_t cnt, loff_t *ppos)
5740 {
5741 	unsigned long val;
5742 	int ret;
5743 
5744 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5745 	if (ret)
5746 		return ret;
5747 
5748 	*ptr = val * 1000;
5749 
5750 	return cnt;
5751 }
5752 
5753 static ssize_t
5754 tracing_thresh_read(struct file *filp, char __user *ubuf,
5755 		    size_t cnt, loff_t *ppos)
5756 {
5757 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5758 }
5759 
5760 static ssize_t
5761 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5762 		     size_t cnt, loff_t *ppos)
5763 {
5764 	struct trace_array *tr = filp->private_data;
5765 	int ret;
5766 
5767 	mutex_lock(&trace_types_lock);
5768 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5769 	if (ret < 0)
5770 		goto out;
5771 
5772 	if (tr->current_trace->update_thresh) {
5773 		ret = tr->current_trace->update_thresh(tr);
5774 		if (ret < 0)
5775 			goto out;
5776 	}
5777 
5778 	ret = cnt;
5779 out:
5780 	mutex_unlock(&trace_types_lock);
5781 
5782 	return ret;
5783 }
5784 
5785 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5786 
5787 static ssize_t
5788 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5789 		     size_t cnt, loff_t *ppos)
5790 {
5791 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5792 }
5793 
5794 static ssize_t
5795 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5796 		      size_t cnt, loff_t *ppos)
5797 {
5798 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5799 }
5800 
5801 #endif
5802 
5803 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5804 {
5805 	struct trace_array *tr = inode->i_private;
5806 	struct trace_iterator *iter;
5807 	int ret = 0;
5808 
5809 	if (tracing_disabled)
5810 		return -ENODEV;
5811 
5812 	if (trace_array_get(tr) < 0)
5813 		return -ENODEV;
5814 
5815 	mutex_lock(&trace_types_lock);
5816 
5817 	/* create a buffer to store the information to pass to userspace */
5818 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5819 	if (!iter) {
5820 		ret = -ENOMEM;
5821 		__trace_array_put(tr);
5822 		goto out;
5823 	}
5824 
5825 	trace_seq_init(&iter->seq);
5826 	iter->trace = tr->current_trace;
5827 
5828 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5829 		ret = -ENOMEM;
5830 		goto fail;
5831 	}
5832 
5833 	/* trace pipe does not show start of buffer */
5834 	cpumask_setall(iter->started);
5835 
5836 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5837 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5838 
5839 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5840 	if (trace_clocks[tr->clock_id].in_ns)
5841 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5842 
5843 	iter->tr = tr;
5844 	iter->trace_buffer = &tr->trace_buffer;
5845 	iter->cpu_file = tracing_get_cpu(inode);
5846 	mutex_init(&iter->mutex);
5847 	filp->private_data = iter;
5848 
5849 	if (iter->trace->pipe_open)
5850 		iter->trace->pipe_open(iter);
5851 
5852 	nonseekable_open(inode, filp);
5853 
5854 	tr->current_trace->ref++;
5855 out:
5856 	mutex_unlock(&trace_types_lock);
5857 	return ret;
5858 
5859 fail:
5860 	kfree(iter);
5861 	__trace_array_put(tr);
5862 	mutex_unlock(&trace_types_lock);
5863 	return ret;
5864 }
5865 
5866 static int tracing_release_pipe(struct inode *inode, struct file *file)
5867 {
5868 	struct trace_iterator *iter = file->private_data;
5869 	struct trace_array *tr = inode->i_private;
5870 
5871 	mutex_lock(&trace_types_lock);
5872 
5873 	tr->current_trace->ref--;
5874 
5875 	if (iter->trace->pipe_close)
5876 		iter->trace->pipe_close(iter);
5877 
5878 	mutex_unlock(&trace_types_lock);
5879 
5880 	free_cpumask_var(iter->started);
5881 	mutex_destroy(&iter->mutex);
5882 	kfree(iter);
5883 
5884 	trace_array_put(tr);
5885 
5886 	return 0;
5887 }
5888 
5889 static __poll_t
5890 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5891 {
5892 	struct trace_array *tr = iter->tr;
5893 
5894 	/* Iterators are static, they should be filled or empty */
5895 	if (trace_buffer_iter(iter, iter->cpu_file))
5896 		return EPOLLIN | EPOLLRDNORM;
5897 
5898 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5899 		/*
5900 		 * Always select as readable when in blocking mode
5901 		 */
5902 		return EPOLLIN | EPOLLRDNORM;
5903 	else
5904 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5905 					     filp, poll_table);
5906 }
5907 
5908 static __poll_t
5909 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5910 {
5911 	struct trace_iterator *iter = filp->private_data;
5912 
5913 	return trace_poll(iter, filp, poll_table);
5914 }
5915 
5916 /* Must be called with iter->mutex held. */
5917 static int tracing_wait_pipe(struct file *filp)
5918 {
5919 	struct trace_iterator *iter = filp->private_data;
5920 	int ret;
5921 
5922 	while (trace_empty(iter)) {
5923 
5924 		if ((filp->f_flags & O_NONBLOCK)) {
5925 			return -EAGAIN;
5926 		}
5927 
5928 		/*
5929 		 * We block until we read something and tracing is disabled.
5930 		 * We still block if tracing is disabled, but we have never
5931 		 * read anything. This allows a user to cat this file, and
5932 		 * then enable tracing. But after we have read something,
5933 		 * we give an EOF when tracing is again disabled.
5934 		 *
5935 		 * iter->pos will be 0 if we haven't read anything.
5936 		 */
5937 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5938 			break;
5939 
5940 		mutex_unlock(&iter->mutex);
5941 
5942 		ret = wait_on_pipe(iter, 0);
5943 
5944 		mutex_lock(&iter->mutex);
5945 
5946 		if (ret)
5947 			return ret;
5948 	}
5949 
5950 	return 1;
5951 }
5952 
5953 /*
5954  * Consumer reader.
5955  */
5956 static ssize_t
5957 tracing_read_pipe(struct file *filp, char __user *ubuf,
5958 		  size_t cnt, loff_t *ppos)
5959 {
5960 	struct trace_iterator *iter = filp->private_data;
5961 	ssize_t sret;
5962 
5963 	/*
5964 	 * Avoid more than one consumer on a single file descriptor
5965 	 * This is just a matter of traces coherency, the ring buffer itself
5966 	 * is protected.
5967 	 */
5968 	mutex_lock(&iter->mutex);
5969 
5970 	/* return any leftover data */
5971 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5972 	if (sret != -EBUSY)
5973 		goto out;
5974 
5975 	trace_seq_init(&iter->seq);
5976 
5977 	if (iter->trace->read) {
5978 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5979 		if (sret)
5980 			goto out;
5981 	}
5982 
5983 waitagain:
5984 	sret = tracing_wait_pipe(filp);
5985 	if (sret <= 0)
5986 		goto out;
5987 
5988 	/* stop when tracing is finished */
5989 	if (trace_empty(iter)) {
5990 		sret = 0;
5991 		goto out;
5992 	}
5993 
5994 	if (cnt >= PAGE_SIZE)
5995 		cnt = PAGE_SIZE - 1;
5996 
5997 	/* reset all but tr, trace, and overruns */
5998 	memset(&iter->seq, 0,
5999 	       sizeof(struct trace_iterator) -
6000 	       offsetof(struct trace_iterator, seq));
6001 	cpumask_clear(iter->started);
6002 	iter->pos = -1;
6003 
6004 	trace_event_read_lock();
6005 	trace_access_lock(iter->cpu_file);
6006 	while (trace_find_next_entry_inc(iter) != NULL) {
6007 		enum print_line_t ret;
6008 		int save_len = iter->seq.seq.len;
6009 
6010 		ret = print_trace_line(iter);
6011 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6012 			/* don't print partial lines */
6013 			iter->seq.seq.len = save_len;
6014 			break;
6015 		}
6016 		if (ret != TRACE_TYPE_NO_CONSUME)
6017 			trace_consume(iter);
6018 
6019 		if (trace_seq_used(&iter->seq) >= cnt)
6020 			break;
6021 
6022 		/*
6023 		 * Setting the full flag means we reached the trace_seq buffer
6024 		 * size and we should leave by partial output condition above.
6025 		 * One of the trace_seq_* functions is not used properly.
6026 		 */
6027 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6028 			  iter->ent->type);
6029 	}
6030 	trace_access_unlock(iter->cpu_file);
6031 	trace_event_read_unlock();
6032 
6033 	/* Now copy what we have to the user */
6034 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6035 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6036 		trace_seq_init(&iter->seq);
6037 
6038 	/*
6039 	 * If there was nothing to send to user, in spite of consuming trace
6040 	 * entries, go back to wait for more entries.
6041 	 */
6042 	if (sret == -EBUSY)
6043 		goto waitagain;
6044 
6045 out:
6046 	mutex_unlock(&iter->mutex);
6047 
6048 	return sret;
6049 }
6050 
6051 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6052 				     unsigned int idx)
6053 {
6054 	__free_page(spd->pages[idx]);
6055 }
6056 
6057 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6058 	.confirm		= generic_pipe_buf_confirm,
6059 	.release		= generic_pipe_buf_release,
6060 	.steal			= generic_pipe_buf_steal,
6061 	.get			= generic_pipe_buf_get,
6062 };
6063 
6064 static size_t
6065 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6066 {
6067 	size_t count;
6068 	int save_len;
6069 	int ret;
6070 
6071 	/* Seq buffer is page-sized, exactly what we need. */
6072 	for (;;) {
6073 		save_len = iter->seq.seq.len;
6074 		ret = print_trace_line(iter);
6075 
6076 		if (trace_seq_has_overflowed(&iter->seq)) {
6077 			iter->seq.seq.len = save_len;
6078 			break;
6079 		}
6080 
6081 		/*
6082 		 * This should not be hit, because it should only
6083 		 * be set if the iter->seq overflowed. But check it
6084 		 * anyway to be safe.
6085 		 */
6086 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6087 			iter->seq.seq.len = save_len;
6088 			break;
6089 		}
6090 
6091 		count = trace_seq_used(&iter->seq) - save_len;
6092 		if (rem < count) {
6093 			rem = 0;
6094 			iter->seq.seq.len = save_len;
6095 			break;
6096 		}
6097 
6098 		if (ret != TRACE_TYPE_NO_CONSUME)
6099 			trace_consume(iter);
6100 		rem -= count;
6101 		if (!trace_find_next_entry_inc(iter))	{
6102 			rem = 0;
6103 			iter->ent = NULL;
6104 			break;
6105 		}
6106 	}
6107 
6108 	return rem;
6109 }
6110 
6111 static ssize_t tracing_splice_read_pipe(struct file *filp,
6112 					loff_t *ppos,
6113 					struct pipe_inode_info *pipe,
6114 					size_t len,
6115 					unsigned int flags)
6116 {
6117 	struct page *pages_def[PIPE_DEF_BUFFERS];
6118 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6119 	struct trace_iterator *iter = filp->private_data;
6120 	struct splice_pipe_desc spd = {
6121 		.pages		= pages_def,
6122 		.partial	= partial_def,
6123 		.nr_pages	= 0, /* This gets updated below. */
6124 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6125 		.ops		= &tracing_pipe_buf_ops,
6126 		.spd_release	= tracing_spd_release_pipe,
6127 	};
6128 	ssize_t ret;
6129 	size_t rem;
6130 	unsigned int i;
6131 
6132 	if (splice_grow_spd(pipe, &spd))
6133 		return -ENOMEM;
6134 
6135 	mutex_lock(&iter->mutex);
6136 
6137 	if (iter->trace->splice_read) {
6138 		ret = iter->trace->splice_read(iter, filp,
6139 					       ppos, pipe, len, flags);
6140 		if (ret)
6141 			goto out_err;
6142 	}
6143 
6144 	ret = tracing_wait_pipe(filp);
6145 	if (ret <= 0)
6146 		goto out_err;
6147 
6148 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6149 		ret = -EFAULT;
6150 		goto out_err;
6151 	}
6152 
6153 	trace_event_read_lock();
6154 	trace_access_lock(iter->cpu_file);
6155 
6156 	/* Fill as many pages as possible. */
6157 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6158 		spd.pages[i] = alloc_page(GFP_KERNEL);
6159 		if (!spd.pages[i])
6160 			break;
6161 
6162 		rem = tracing_fill_pipe_page(rem, iter);
6163 
6164 		/* Copy the data into the page, so we can start over. */
6165 		ret = trace_seq_to_buffer(&iter->seq,
6166 					  page_address(spd.pages[i]),
6167 					  trace_seq_used(&iter->seq));
6168 		if (ret < 0) {
6169 			__free_page(spd.pages[i]);
6170 			break;
6171 		}
6172 		spd.partial[i].offset = 0;
6173 		spd.partial[i].len = trace_seq_used(&iter->seq);
6174 
6175 		trace_seq_init(&iter->seq);
6176 	}
6177 
6178 	trace_access_unlock(iter->cpu_file);
6179 	trace_event_read_unlock();
6180 	mutex_unlock(&iter->mutex);
6181 
6182 	spd.nr_pages = i;
6183 
6184 	if (i)
6185 		ret = splice_to_pipe(pipe, &spd);
6186 	else
6187 		ret = 0;
6188 out:
6189 	splice_shrink_spd(&spd);
6190 	return ret;
6191 
6192 out_err:
6193 	mutex_unlock(&iter->mutex);
6194 	goto out;
6195 }
6196 
6197 static ssize_t
6198 tracing_entries_read(struct file *filp, char __user *ubuf,
6199 		     size_t cnt, loff_t *ppos)
6200 {
6201 	struct inode *inode = file_inode(filp);
6202 	struct trace_array *tr = inode->i_private;
6203 	int cpu = tracing_get_cpu(inode);
6204 	char buf[64];
6205 	int r = 0;
6206 	ssize_t ret;
6207 
6208 	mutex_lock(&trace_types_lock);
6209 
6210 	if (cpu == RING_BUFFER_ALL_CPUS) {
6211 		int cpu, buf_size_same;
6212 		unsigned long size;
6213 
6214 		size = 0;
6215 		buf_size_same = 1;
6216 		/* check if all cpu sizes are same */
6217 		for_each_tracing_cpu(cpu) {
6218 			/* fill in the size from first enabled cpu */
6219 			if (size == 0)
6220 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6221 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6222 				buf_size_same = 0;
6223 				break;
6224 			}
6225 		}
6226 
6227 		if (buf_size_same) {
6228 			if (!ring_buffer_expanded)
6229 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6230 					    size >> 10,
6231 					    trace_buf_size >> 10);
6232 			else
6233 				r = sprintf(buf, "%lu\n", size >> 10);
6234 		} else
6235 			r = sprintf(buf, "X\n");
6236 	} else
6237 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6238 
6239 	mutex_unlock(&trace_types_lock);
6240 
6241 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6242 	return ret;
6243 }
6244 
6245 static ssize_t
6246 tracing_entries_write(struct file *filp, const char __user *ubuf,
6247 		      size_t cnt, loff_t *ppos)
6248 {
6249 	struct inode *inode = file_inode(filp);
6250 	struct trace_array *tr = inode->i_private;
6251 	unsigned long val;
6252 	int ret;
6253 
6254 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6255 	if (ret)
6256 		return ret;
6257 
6258 	/* must have at least 1 entry */
6259 	if (!val)
6260 		return -EINVAL;
6261 
6262 	/* value is in KB */
6263 	val <<= 10;
6264 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6265 	if (ret < 0)
6266 		return ret;
6267 
6268 	*ppos += cnt;
6269 
6270 	return cnt;
6271 }
6272 
6273 static ssize_t
6274 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6275 				size_t cnt, loff_t *ppos)
6276 {
6277 	struct trace_array *tr = filp->private_data;
6278 	char buf[64];
6279 	int r, cpu;
6280 	unsigned long size = 0, expanded_size = 0;
6281 
6282 	mutex_lock(&trace_types_lock);
6283 	for_each_tracing_cpu(cpu) {
6284 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6285 		if (!ring_buffer_expanded)
6286 			expanded_size += trace_buf_size >> 10;
6287 	}
6288 	if (ring_buffer_expanded)
6289 		r = sprintf(buf, "%lu\n", size);
6290 	else
6291 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6292 	mutex_unlock(&trace_types_lock);
6293 
6294 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6295 }
6296 
6297 static ssize_t
6298 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6299 			  size_t cnt, loff_t *ppos)
6300 {
6301 	/*
6302 	 * There is no need to read what the user has written, this function
6303 	 * is just to make sure that there is no error when "echo" is used
6304 	 */
6305 
6306 	*ppos += cnt;
6307 
6308 	return cnt;
6309 }
6310 
6311 static int
6312 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6313 {
6314 	struct trace_array *tr = inode->i_private;
6315 
6316 	/* disable tracing ? */
6317 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6318 		tracer_tracing_off(tr);
6319 	/* resize the ring buffer to 0 */
6320 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6321 
6322 	trace_array_put(tr);
6323 
6324 	return 0;
6325 }
6326 
6327 static ssize_t
6328 tracing_mark_write(struct file *filp, const char __user *ubuf,
6329 					size_t cnt, loff_t *fpos)
6330 {
6331 	struct trace_array *tr = filp->private_data;
6332 	struct ring_buffer_event *event;
6333 	enum event_trigger_type tt = ETT_NONE;
6334 	struct ring_buffer *buffer;
6335 	struct print_entry *entry;
6336 	unsigned long irq_flags;
6337 	ssize_t written;
6338 	int size;
6339 	int len;
6340 
6341 /* Used in tracing_mark_raw_write() as well */
6342 #define FAULTED_STR "<faulted>"
6343 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6344 
6345 	if (tracing_disabled)
6346 		return -EINVAL;
6347 
6348 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6349 		return -EINVAL;
6350 
6351 	if (cnt > TRACE_BUF_SIZE)
6352 		cnt = TRACE_BUF_SIZE;
6353 
6354 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6355 
6356 	local_save_flags(irq_flags);
6357 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6358 
6359 	/* If less than "<faulted>", then make sure we can still add that */
6360 	if (cnt < FAULTED_SIZE)
6361 		size += FAULTED_SIZE - cnt;
6362 
6363 	buffer = tr->trace_buffer.buffer;
6364 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6365 					    irq_flags, preempt_count());
6366 	if (unlikely(!event))
6367 		/* Ring buffer disabled, return as if not open for write */
6368 		return -EBADF;
6369 
6370 	entry = ring_buffer_event_data(event);
6371 	entry->ip = _THIS_IP_;
6372 
6373 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6374 	if (len) {
6375 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6376 		cnt = FAULTED_SIZE;
6377 		written = -EFAULT;
6378 	} else
6379 		written = cnt;
6380 	len = cnt;
6381 
6382 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6383 		/* do not add \n before testing triggers, but add \0 */
6384 		entry->buf[cnt] = '\0';
6385 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6386 	}
6387 
6388 	if (entry->buf[cnt - 1] != '\n') {
6389 		entry->buf[cnt] = '\n';
6390 		entry->buf[cnt + 1] = '\0';
6391 	} else
6392 		entry->buf[cnt] = '\0';
6393 
6394 	__buffer_unlock_commit(buffer, event);
6395 
6396 	if (tt)
6397 		event_triggers_post_call(tr->trace_marker_file, tt);
6398 
6399 	if (written > 0)
6400 		*fpos += written;
6401 
6402 	return written;
6403 }
6404 
6405 /* Limit it for now to 3K (including tag) */
6406 #define RAW_DATA_MAX_SIZE (1024*3)
6407 
6408 static ssize_t
6409 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6410 					size_t cnt, loff_t *fpos)
6411 {
6412 	struct trace_array *tr = filp->private_data;
6413 	struct ring_buffer_event *event;
6414 	struct ring_buffer *buffer;
6415 	struct raw_data_entry *entry;
6416 	unsigned long irq_flags;
6417 	ssize_t written;
6418 	int size;
6419 	int len;
6420 
6421 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6422 
6423 	if (tracing_disabled)
6424 		return -EINVAL;
6425 
6426 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6427 		return -EINVAL;
6428 
6429 	/* The marker must at least have a tag id */
6430 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6431 		return -EINVAL;
6432 
6433 	if (cnt > TRACE_BUF_SIZE)
6434 		cnt = TRACE_BUF_SIZE;
6435 
6436 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6437 
6438 	local_save_flags(irq_flags);
6439 	size = sizeof(*entry) + cnt;
6440 	if (cnt < FAULT_SIZE_ID)
6441 		size += FAULT_SIZE_ID - cnt;
6442 
6443 	buffer = tr->trace_buffer.buffer;
6444 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6445 					    irq_flags, preempt_count());
6446 	if (!event)
6447 		/* Ring buffer disabled, return as if not open for write */
6448 		return -EBADF;
6449 
6450 	entry = ring_buffer_event_data(event);
6451 
6452 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6453 	if (len) {
6454 		entry->id = -1;
6455 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6456 		written = -EFAULT;
6457 	} else
6458 		written = cnt;
6459 
6460 	__buffer_unlock_commit(buffer, event);
6461 
6462 	if (written > 0)
6463 		*fpos += written;
6464 
6465 	return written;
6466 }
6467 
6468 static int tracing_clock_show(struct seq_file *m, void *v)
6469 {
6470 	struct trace_array *tr = m->private;
6471 	int i;
6472 
6473 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6474 		seq_printf(m,
6475 			"%s%s%s%s", i ? " " : "",
6476 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6477 			i == tr->clock_id ? "]" : "");
6478 	seq_putc(m, '\n');
6479 
6480 	return 0;
6481 }
6482 
6483 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6484 {
6485 	int i;
6486 
6487 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6488 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6489 			break;
6490 	}
6491 	if (i == ARRAY_SIZE(trace_clocks))
6492 		return -EINVAL;
6493 
6494 	mutex_lock(&trace_types_lock);
6495 
6496 	tr->clock_id = i;
6497 
6498 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6499 
6500 	/*
6501 	 * New clock may not be consistent with the previous clock.
6502 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6503 	 */
6504 	tracing_reset_online_cpus(&tr->trace_buffer);
6505 
6506 #ifdef CONFIG_TRACER_MAX_TRACE
6507 	if (tr->max_buffer.buffer)
6508 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6509 	tracing_reset_online_cpus(&tr->max_buffer);
6510 #endif
6511 
6512 	mutex_unlock(&trace_types_lock);
6513 
6514 	return 0;
6515 }
6516 
6517 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6518 				   size_t cnt, loff_t *fpos)
6519 {
6520 	struct seq_file *m = filp->private_data;
6521 	struct trace_array *tr = m->private;
6522 	char buf[64];
6523 	const char *clockstr;
6524 	int ret;
6525 
6526 	if (cnt >= sizeof(buf))
6527 		return -EINVAL;
6528 
6529 	if (copy_from_user(buf, ubuf, cnt))
6530 		return -EFAULT;
6531 
6532 	buf[cnt] = 0;
6533 
6534 	clockstr = strstrip(buf);
6535 
6536 	ret = tracing_set_clock(tr, clockstr);
6537 	if (ret)
6538 		return ret;
6539 
6540 	*fpos += cnt;
6541 
6542 	return cnt;
6543 }
6544 
6545 static int tracing_clock_open(struct inode *inode, struct file *file)
6546 {
6547 	struct trace_array *tr = inode->i_private;
6548 	int ret;
6549 
6550 	if (tracing_disabled)
6551 		return -ENODEV;
6552 
6553 	if (trace_array_get(tr))
6554 		return -ENODEV;
6555 
6556 	ret = single_open(file, tracing_clock_show, inode->i_private);
6557 	if (ret < 0)
6558 		trace_array_put(tr);
6559 
6560 	return ret;
6561 }
6562 
6563 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6564 {
6565 	struct trace_array *tr = m->private;
6566 
6567 	mutex_lock(&trace_types_lock);
6568 
6569 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6570 		seq_puts(m, "delta [absolute]\n");
6571 	else
6572 		seq_puts(m, "[delta] absolute\n");
6573 
6574 	mutex_unlock(&trace_types_lock);
6575 
6576 	return 0;
6577 }
6578 
6579 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6580 {
6581 	struct trace_array *tr = inode->i_private;
6582 	int ret;
6583 
6584 	if (tracing_disabled)
6585 		return -ENODEV;
6586 
6587 	if (trace_array_get(tr))
6588 		return -ENODEV;
6589 
6590 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6591 	if (ret < 0)
6592 		trace_array_put(tr);
6593 
6594 	return ret;
6595 }
6596 
6597 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6598 {
6599 	int ret = 0;
6600 
6601 	mutex_lock(&trace_types_lock);
6602 
6603 	if (abs && tr->time_stamp_abs_ref++)
6604 		goto out;
6605 
6606 	if (!abs) {
6607 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6608 			ret = -EINVAL;
6609 			goto out;
6610 		}
6611 
6612 		if (--tr->time_stamp_abs_ref)
6613 			goto out;
6614 	}
6615 
6616 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6617 
6618 #ifdef CONFIG_TRACER_MAX_TRACE
6619 	if (tr->max_buffer.buffer)
6620 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6621 #endif
6622  out:
6623 	mutex_unlock(&trace_types_lock);
6624 
6625 	return ret;
6626 }
6627 
6628 struct ftrace_buffer_info {
6629 	struct trace_iterator	iter;
6630 	void			*spare;
6631 	unsigned int		spare_cpu;
6632 	unsigned int		read;
6633 };
6634 
6635 #ifdef CONFIG_TRACER_SNAPSHOT
6636 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6637 {
6638 	struct trace_array *tr = inode->i_private;
6639 	struct trace_iterator *iter;
6640 	struct seq_file *m;
6641 	int ret = 0;
6642 
6643 	if (trace_array_get(tr) < 0)
6644 		return -ENODEV;
6645 
6646 	if (file->f_mode & FMODE_READ) {
6647 		iter = __tracing_open(inode, file, true);
6648 		if (IS_ERR(iter))
6649 			ret = PTR_ERR(iter);
6650 	} else {
6651 		/* Writes still need the seq_file to hold the private data */
6652 		ret = -ENOMEM;
6653 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6654 		if (!m)
6655 			goto out;
6656 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6657 		if (!iter) {
6658 			kfree(m);
6659 			goto out;
6660 		}
6661 		ret = 0;
6662 
6663 		iter->tr = tr;
6664 		iter->trace_buffer = &tr->max_buffer;
6665 		iter->cpu_file = tracing_get_cpu(inode);
6666 		m->private = iter;
6667 		file->private_data = m;
6668 	}
6669 out:
6670 	if (ret < 0)
6671 		trace_array_put(tr);
6672 
6673 	return ret;
6674 }
6675 
6676 static ssize_t
6677 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6678 		       loff_t *ppos)
6679 {
6680 	struct seq_file *m = filp->private_data;
6681 	struct trace_iterator *iter = m->private;
6682 	struct trace_array *tr = iter->tr;
6683 	unsigned long val;
6684 	int ret;
6685 
6686 	ret = tracing_update_buffers();
6687 	if (ret < 0)
6688 		return ret;
6689 
6690 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6691 	if (ret)
6692 		return ret;
6693 
6694 	mutex_lock(&trace_types_lock);
6695 
6696 	if (tr->current_trace->use_max_tr) {
6697 		ret = -EBUSY;
6698 		goto out;
6699 	}
6700 
6701 	arch_spin_lock(&tr->max_lock);
6702 	if (tr->cond_snapshot)
6703 		ret = -EBUSY;
6704 	arch_spin_unlock(&tr->max_lock);
6705 	if (ret)
6706 		goto out;
6707 
6708 	switch (val) {
6709 	case 0:
6710 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6711 			ret = -EINVAL;
6712 			break;
6713 		}
6714 		if (tr->allocated_snapshot)
6715 			free_snapshot(tr);
6716 		break;
6717 	case 1:
6718 /* Only allow per-cpu swap if the ring buffer supports it */
6719 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6720 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6721 			ret = -EINVAL;
6722 			break;
6723 		}
6724 #endif
6725 		if (tr->allocated_snapshot)
6726 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6727 					&tr->trace_buffer, iter->cpu_file);
6728 		else
6729 			ret = tracing_alloc_snapshot_instance(tr);
6730 		if (ret < 0)
6731 			break;
6732 		local_irq_disable();
6733 		/* Now, we're going to swap */
6734 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6735 			update_max_tr(tr, current, smp_processor_id(), NULL);
6736 		else
6737 			update_max_tr_single(tr, current, iter->cpu_file);
6738 		local_irq_enable();
6739 		break;
6740 	default:
6741 		if (tr->allocated_snapshot) {
6742 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6743 				tracing_reset_online_cpus(&tr->max_buffer);
6744 			else
6745 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6746 		}
6747 		break;
6748 	}
6749 
6750 	if (ret >= 0) {
6751 		*ppos += cnt;
6752 		ret = cnt;
6753 	}
6754 out:
6755 	mutex_unlock(&trace_types_lock);
6756 	return ret;
6757 }
6758 
6759 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6760 {
6761 	struct seq_file *m = file->private_data;
6762 	int ret;
6763 
6764 	ret = tracing_release(inode, file);
6765 
6766 	if (file->f_mode & FMODE_READ)
6767 		return ret;
6768 
6769 	/* If write only, the seq_file is just a stub */
6770 	if (m)
6771 		kfree(m->private);
6772 	kfree(m);
6773 
6774 	return 0;
6775 }
6776 
6777 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6778 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6779 				    size_t count, loff_t *ppos);
6780 static int tracing_buffers_release(struct inode *inode, struct file *file);
6781 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6782 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6783 
6784 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6785 {
6786 	struct ftrace_buffer_info *info;
6787 	int ret;
6788 
6789 	ret = tracing_buffers_open(inode, filp);
6790 	if (ret < 0)
6791 		return ret;
6792 
6793 	info = filp->private_data;
6794 
6795 	if (info->iter.trace->use_max_tr) {
6796 		tracing_buffers_release(inode, filp);
6797 		return -EBUSY;
6798 	}
6799 
6800 	info->iter.snapshot = true;
6801 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6802 
6803 	return ret;
6804 }
6805 
6806 #endif /* CONFIG_TRACER_SNAPSHOT */
6807 
6808 
6809 static const struct file_operations tracing_thresh_fops = {
6810 	.open		= tracing_open_generic,
6811 	.read		= tracing_thresh_read,
6812 	.write		= tracing_thresh_write,
6813 	.llseek		= generic_file_llseek,
6814 };
6815 
6816 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6817 static const struct file_operations tracing_max_lat_fops = {
6818 	.open		= tracing_open_generic,
6819 	.read		= tracing_max_lat_read,
6820 	.write		= tracing_max_lat_write,
6821 	.llseek		= generic_file_llseek,
6822 };
6823 #endif
6824 
6825 static const struct file_operations set_tracer_fops = {
6826 	.open		= tracing_open_generic,
6827 	.read		= tracing_set_trace_read,
6828 	.write		= tracing_set_trace_write,
6829 	.llseek		= generic_file_llseek,
6830 };
6831 
6832 static const struct file_operations tracing_pipe_fops = {
6833 	.open		= tracing_open_pipe,
6834 	.poll		= tracing_poll_pipe,
6835 	.read		= tracing_read_pipe,
6836 	.splice_read	= tracing_splice_read_pipe,
6837 	.release	= tracing_release_pipe,
6838 	.llseek		= no_llseek,
6839 };
6840 
6841 static const struct file_operations tracing_entries_fops = {
6842 	.open		= tracing_open_generic_tr,
6843 	.read		= tracing_entries_read,
6844 	.write		= tracing_entries_write,
6845 	.llseek		= generic_file_llseek,
6846 	.release	= tracing_release_generic_tr,
6847 };
6848 
6849 static const struct file_operations tracing_total_entries_fops = {
6850 	.open		= tracing_open_generic_tr,
6851 	.read		= tracing_total_entries_read,
6852 	.llseek		= generic_file_llseek,
6853 	.release	= tracing_release_generic_tr,
6854 };
6855 
6856 static const struct file_operations tracing_free_buffer_fops = {
6857 	.open		= tracing_open_generic_tr,
6858 	.write		= tracing_free_buffer_write,
6859 	.release	= tracing_free_buffer_release,
6860 };
6861 
6862 static const struct file_operations tracing_mark_fops = {
6863 	.open		= tracing_open_generic_tr,
6864 	.write		= tracing_mark_write,
6865 	.llseek		= generic_file_llseek,
6866 	.release	= tracing_release_generic_tr,
6867 };
6868 
6869 static const struct file_operations tracing_mark_raw_fops = {
6870 	.open		= tracing_open_generic_tr,
6871 	.write		= tracing_mark_raw_write,
6872 	.llseek		= generic_file_llseek,
6873 	.release	= tracing_release_generic_tr,
6874 };
6875 
6876 static const struct file_operations trace_clock_fops = {
6877 	.open		= tracing_clock_open,
6878 	.read		= seq_read,
6879 	.llseek		= seq_lseek,
6880 	.release	= tracing_single_release_tr,
6881 	.write		= tracing_clock_write,
6882 };
6883 
6884 static const struct file_operations trace_time_stamp_mode_fops = {
6885 	.open		= tracing_time_stamp_mode_open,
6886 	.read		= seq_read,
6887 	.llseek		= seq_lseek,
6888 	.release	= tracing_single_release_tr,
6889 };
6890 
6891 #ifdef CONFIG_TRACER_SNAPSHOT
6892 static const struct file_operations snapshot_fops = {
6893 	.open		= tracing_snapshot_open,
6894 	.read		= seq_read,
6895 	.write		= tracing_snapshot_write,
6896 	.llseek		= tracing_lseek,
6897 	.release	= tracing_snapshot_release,
6898 };
6899 
6900 static const struct file_operations snapshot_raw_fops = {
6901 	.open		= snapshot_raw_open,
6902 	.read		= tracing_buffers_read,
6903 	.release	= tracing_buffers_release,
6904 	.splice_read	= tracing_buffers_splice_read,
6905 	.llseek		= no_llseek,
6906 };
6907 
6908 #endif /* CONFIG_TRACER_SNAPSHOT */
6909 
6910 #define TRACING_LOG_ERRS_MAX	8
6911 #define TRACING_LOG_LOC_MAX	128
6912 
6913 #define CMD_PREFIX "  Command: "
6914 
6915 struct err_info {
6916 	const char	**errs;	/* ptr to loc-specific array of err strings */
6917 	u8		type;	/* index into errs -> specific err string */
6918 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
6919 	u64		ts;
6920 };
6921 
6922 struct tracing_log_err {
6923 	struct list_head	list;
6924 	struct err_info		info;
6925 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6926 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6927 };
6928 
6929 static DEFINE_MUTEX(tracing_err_log_lock);
6930 
6931 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6932 {
6933 	struct tracing_log_err *err;
6934 
6935 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6936 		err = kzalloc(sizeof(*err), GFP_KERNEL);
6937 		if (!err)
6938 			err = ERR_PTR(-ENOMEM);
6939 		tr->n_err_log_entries++;
6940 
6941 		return err;
6942 	}
6943 
6944 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6945 	list_del(&err->list);
6946 
6947 	return err;
6948 }
6949 
6950 /**
6951  * err_pos - find the position of a string within a command for error careting
6952  * @cmd: The tracing command that caused the error
6953  * @str: The string to position the caret at within @cmd
6954  *
6955  * Finds the position of the first occurence of @str within @cmd.  The
6956  * return value can be passed to tracing_log_err() for caret placement
6957  * within @cmd.
6958  *
6959  * Returns the index within @cmd of the first occurence of @str or 0
6960  * if @str was not found.
6961  */
6962 unsigned int err_pos(char *cmd, const char *str)
6963 {
6964 	char *found;
6965 
6966 	if (WARN_ON(!strlen(cmd)))
6967 		return 0;
6968 
6969 	found = strstr(cmd, str);
6970 	if (found)
6971 		return found - cmd;
6972 
6973 	return 0;
6974 }
6975 
6976 /**
6977  * tracing_log_err - write an error to the tracing error log
6978  * @tr: The associated trace array for the error (NULL for top level array)
6979  * @loc: A string describing where the error occurred
6980  * @cmd: The tracing command that caused the error
6981  * @errs: The array of loc-specific static error strings
6982  * @type: The index into errs[], which produces the specific static err string
6983  * @pos: The position the caret should be placed in the cmd
6984  *
6985  * Writes an error into tracing/error_log of the form:
6986  *
6987  * <loc>: error: <text>
6988  *   Command: <cmd>
6989  *              ^
6990  *
6991  * tracing/error_log is a small log file containing the last
6992  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
6993  * unless there has been a tracing error, and the error log can be
6994  * cleared and have its memory freed by writing the empty string in
6995  * truncation mode to it i.e. echo > tracing/error_log.
6996  *
6997  * NOTE: the @errs array along with the @type param are used to
6998  * produce a static error string - this string is not copied and saved
6999  * when the error is logged - only a pointer to it is saved.  See
7000  * existing callers for examples of how static strings are typically
7001  * defined for use with tracing_log_err().
7002  */
7003 void tracing_log_err(struct trace_array *tr,
7004 		     const char *loc, const char *cmd,
7005 		     const char **errs, u8 type, u8 pos)
7006 {
7007 	struct tracing_log_err *err;
7008 
7009 	if (!tr)
7010 		tr = &global_trace;
7011 
7012 	mutex_lock(&tracing_err_log_lock);
7013 	err = get_tracing_log_err(tr);
7014 	if (PTR_ERR(err) == -ENOMEM) {
7015 		mutex_unlock(&tracing_err_log_lock);
7016 		return;
7017 	}
7018 
7019 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7020 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7021 
7022 	err->info.errs = errs;
7023 	err->info.type = type;
7024 	err->info.pos = pos;
7025 	err->info.ts = local_clock();
7026 
7027 	list_add_tail(&err->list, &tr->err_log);
7028 	mutex_unlock(&tracing_err_log_lock);
7029 }
7030 
7031 static void clear_tracing_err_log(struct trace_array *tr)
7032 {
7033 	struct tracing_log_err *err, *next;
7034 
7035 	mutex_lock(&tracing_err_log_lock);
7036 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7037 		list_del(&err->list);
7038 		kfree(err);
7039 	}
7040 
7041 	tr->n_err_log_entries = 0;
7042 	mutex_unlock(&tracing_err_log_lock);
7043 }
7044 
7045 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7046 {
7047 	struct trace_array *tr = m->private;
7048 
7049 	mutex_lock(&tracing_err_log_lock);
7050 
7051 	return seq_list_start(&tr->err_log, *pos);
7052 }
7053 
7054 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7055 {
7056 	struct trace_array *tr = m->private;
7057 
7058 	return seq_list_next(v, &tr->err_log, pos);
7059 }
7060 
7061 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7062 {
7063 	mutex_unlock(&tracing_err_log_lock);
7064 }
7065 
7066 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7067 {
7068 	u8 i;
7069 
7070 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7071 		seq_putc(m, ' ');
7072 	for (i = 0; i < pos; i++)
7073 		seq_putc(m, ' ');
7074 	seq_puts(m, "^\n");
7075 }
7076 
7077 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7078 {
7079 	struct tracing_log_err *err = v;
7080 
7081 	if (err) {
7082 		const char *err_text = err->info.errs[err->info.type];
7083 		u64 sec = err->info.ts;
7084 		u32 nsec;
7085 
7086 		nsec = do_div(sec, NSEC_PER_SEC);
7087 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7088 			   err->loc, err_text);
7089 		seq_printf(m, "%s", err->cmd);
7090 		tracing_err_log_show_pos(m, err->info.pos);
7091 	}
7092 
7093 	return 0;
7094 }
7095 
7096 static const struct seq_operations tracing_err_log_seq_ops = {
7097 	.start  = tracing_err_log_seq_start,
7098 	.next   = tracing_err_log_seq_next,
7099 	.stop   = tracing_err_log_seq_stop,
7100 	.show   = tracing_err_log_seq_show
7101 };
7102 
7103 static int tracing_err_log_open(struct inode *inode, struct file *file)
7104 {
7105 	struct trace_array *tr = inode->i_private;
7106 	int ret = 0;
7107 
7108 	if (trace_array_get(tr) < 0)
7109 		return -ENODEV;
7110 
7111 	/* If this file was opened for write, then erase contents */
7112 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7113 		clear_tracing_err_log(tr);
7114 
7115 	if (file->f_mode & FMODE_READ) {
7116 		ret = seq_open(file, &tracing_err_log_seq_ops);
7117 		if (!ret) {
7118 			struct seq_file *m = file->private_data;
7119 			m->private = tr;
7120 		} else {
7121 			trace_array_put(tr);
7122 		}
7123 	}
7124 	return ret;
7125 }
7126 
7127 static ssize_t tracing_err_log_write(struct file *file,
7128 				     const char __user *buffer,
7129 				     size_t count, loff_t *ppos)
7130 {
7131 	return count;
7132 }
7133 
7134 static int tracing_err_log_release(struct inode *inode, struct file *file)
7135 {
7136 	struct trace_array *tr = inode->i_private;
7137 
7138 	trace_array_put(tr);
7139 
7140 	if (file->f_mode & FMODE_READ)
7141 		seq_release(inode, file);
7142 
7143 	return 0;
7144 }
7145 
7146 static const struct file_operations tracing_err_log_fops = {
7147 	.open           = tracing_err_log_open,
7148 	.write		= tracing_err_log_write,
7149 	.read           = seq_read,
7150 	.llseek         = seq_lseek,
7151 	.release        = tracing_err_log_release,
7152 };
7153 
7154 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7155 {
7156 	struct trace_array *tr = inode->i_private;
7157 	struct ftrace_buffer_info *info;
7158 	int ret;
7159 
7160 	if (tracing_disabled)
7161 		return -ENODEV;
7162 
7163 	if (trace_array_get(tr) < 0)
7164 		return -ENODEV;
7165 
7166 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7167 	if (!info) {
7168 		trace_array_put(tr);
7169 		return -ENOMEM;
7170 	}
7171 
7172 	mutex_lock(&trace_types_lock);
7173 
7174 	info->iter.tr		= tr;
7175 	info->iter.cpu_file	= tracing_get_cpu(inode);
7176 	info->iter.trace	= tr->current_trace;
7177 	info->iter.trace_buffer = &tr->trace_buffer;
7178 	info->spare		= NULL;
7179 	/* Force reading ring buffer for first read */
7180 	info->read		= (unsigned int)-1;
7181 
7182 	filp->private_data = info;
7183 
7184 	tr->current_trace->ref++;
7185 
7186 	mutex_unlock(&trace_types_lock);
7187 
7188 	ret = nonseekable_open(inode, filp);
7189 	if (ret < 0)
7190 		trace_array_put(tr);
7191 
7192 	return ret;
7193 }
7194 
7195 static __poll_t
7196 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7197 {
7198 	struct ftrace_buffer_info *info = filp->private_data;
7199 	struct trace_iterator *iter = &info->iter;
7200 
7201 	return trace_poll(iter, filp, poll_table);
7202 }
7203 
7204 static ssize_t
7205 tracing_buffers_read(struct file *filp, char __user *ubuf,
7206 		     size_t count, loff_t *ppos)
7207 {
7208 	struct ftrace_buffer_info *info = filp->private_data;
7209 	struct trace_iterator *iter = &info->iter;
7210 	ssize_t ret = 0;
7211 	ssize_t size;
7212 
7213 	if (!count)
7214 		return 0;
7215 
7216 #ifdef CONFIG_TRACER_MAX_TRACE
7217 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7218 		return -EBUSY;
7219 #endif
7220 
7221 	if (!info->spare) {
7222 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7223 							  iter->cpu_file);
7224 		if (IS_ERR(info->spare)) {
7225 			ret = PTR_ERR(info->spare);
7226 			info->spare = NULL;
7227 		} else {
7228 			info->spare_cpu = iter->cpu_file;
7229 		}
7230 	}
7231 	if (!info->spare)
7232 		return ret;
7233 
7234 	/* Do we have previous read data to read? */
7235 	if (info->read < PAGE_SIZE)
7236 		goto read;
7237 
7238  again:
7239 	trace_access_lock(iter->cpu_file);
7240 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7241 				    &info->spare,
7242 				    count,
7243 				    iter->cpu_file, 0);
7244 	trace_access_unlock(iter->cpu_file);
7245 
7246 	if (ret < 0) {
7247 		if (trace_empty(iter)) {
7248 			if ((filp->f_flags & O_NONBLOCK))
7249 				return -EAGAIN;
7250 
7251 			ret = wait_on_pipe(iter, 0);
7252 			if (ret)
7253 				return ret;
7254 
7255 			goto again;
7256 		}
7257 		return 0;
7258 	}
7259 
7260 	info->read = 0;
7261  read:
7262 	size = PAGE_SIZE - info->read;
7263 	if (size > count)
7264 		size = count;
7265 
7266 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7267 	if (ret == size)
7268 		return -EFAULT;
7269 
7270 	size -= ret;
7271 
7272 	*ppos += size;
7273 	info->read += size;
7274 
7275 	return size;
7276 }
7277 
7278 static int tracing_buffers_release(struct inode *inode, struct file *file)
7279 {
7280 	struct ftrace_buffer_info *info = file->private_data;
7281 	struct trace_iterator *iter = &info->iter;
7282 
7283 	mutex_lock(&trace_types_lock);
7284 
7285 	iter->tr->current_trace->ref--;
7286 
7287 	__trace_array_put(iter->tr);
7288 
7289 	if (info->spare)
7290 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7291 					   info->spare_cpu, info->spare);
7292 	kfree(info);
7293 
7294 	mutex_unlock(&trace_types_lock);
7295 
7296 	return 0;
7297 }
7298 
7299 struct buffer_ref {
7300 	struct ring_buffer	*buffer;
7301 	void			*page;
7302 	int			cpu;
7303 	refcount_t		refcount;
7304 };
7305 
7306 static void buffer_ref_release(struct buffer_ref *ref)
7307 {
7308 	if (!refcount_dec_and_test(&ref->refcount))
7309 		return;
7310 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7311 	kfree(ref);
7312 }
7313 
7314 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7315 				    struct pipe_buffer *buf)
7316 {
7317 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7318 
7319 	buffer_ref_release(ref);
7320 	buf->private = 0;
7321 }
7322 
7323 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7324 				struct pipe_buffer *buf)
7325 {
7326 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7327 
7328 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7329 		return false;
7330 
7331 	refcount_inc(&ref->refcount);
7332 	return true;
7333 }
7334 
7335 /* Pipe buffer operations for a buffer. */
7336 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7337 	.confirm		= generic_pipe_buf_confirm,
7338 	.release		= buffer_pipe_buf_release,
7339 	.steal			= generic_pipe_buf_nosteal,
7340 	.get			= buffer_pipe_buf_get,
7341 };
7342 
7343 /*
7344  * Callback from splice_to_pipe(), if we need to release some pages
7345  * at the end of the spd in case we error'ed out in filling the pipe.
7346  */
7347 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7348 {
7349 	struct buffer_ref *ref =
7350 		(struct buffer_ref *)spd->partial[i].private;
7351 
7352 	buffer_ref_release(ref);
7353 	spd->partial[i].private = 0;
7354 }
7355 
7356 static ssize_t
7357 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7358 			    struct pipe_inode_info *pipe, size_t len,
7359 			    unsigned int flags)
7360 {
7361 	struct ftrace_buffer_info *info = file->private_data;
7362 	struct trace_iterator *iter = &info->iter;
7363 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7364 	struct page *pages_def[PIPE_DEF_BUFFERS];
7365 	struct splice_pipe_desc spd = {
7366 		.pages		= pages_def,
7367 		.partial	= partial_def,
7368 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7369 		.ops		= &buffer_pipe_buf_ops,
7370 		.spd_release	= buffer_spd_release,
7371 	};
7372 	struct buffer_ref *ref;
7373 	int entries, i;
7374 	ssize_t ret = 0;
7375 
7376 #ifdef CONFIG_TRACER_MAX_TRACE
7377 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7378 		return -EBUSY;
7379 #endif
7380 
7381 	if (*ppos & (PAGE_SIZE - 1))
7382 		return -EINVAL;
7383 
7384 	if (len & (PAGE_SIZE - 1)) {
7385 		if (len < PAGE_SIZE)
7386 			return -EINVAL;
7387 		len &= PAGE_MASK;
7388 	}
7389 
7390 	if (splice_grow_spd(pipe, &spd))
7391 		return -ENOMEM;
7392 
7393  again:
7394 	trace_access_lock(iter->cpu_file);
7395 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7396 
7397 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7398 		struct page *page;
7399 		int r;
7400 
7401 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7402 		if (!ref) {
7403 			ret = -ENOMEM;
7404 			break;
7405 		}
7406 
7407 		refcount_set(&ref->refcount, 1);
7408 		ref->buffer = iter->trace_buffer->buffer;
7409 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7410 		if (IS_ERR(ref->page)) {
7411 			ret = PTR_ERR(ref->page);
7412 			ref->page = NULL;
7413 			kfree(ref);
7414 			break;
7415 		}
7416 		ref->cpu = iter->cpu_file;
7417 
7418 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7419 					  len, iter->cpu_file, 1);
7420 		if (r < 0) {
7421 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7422 						   ref->page);
7423 			kfree(ref);
7424 			break;
7425 		}
7426 
7427 		page = virt_to_page(ref->page);
7428 
7429 		spd.pages[i] = page;
7430 		spd.partial[i].len = PAGE_SIZE;
7431 		spd.partial[i].offset = 0;
7432 		spd.partial[i].private = (unsigned long)ref;
7433 		spd.nr_pages++;
7434 		*ppos += PAGE_SIZE;
7435 
7436 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7437 	}
7438 
7439 	trace_access_unlock(iter->cpu_file);
7440 	spd.nr_pages = i;
7441 
7442 	/* did we read anything? */
7443 	if (!spd.nr_pages) {
7444 		if (ret)
7445 			goto out;
7446 
7447 		ret = -EAGAIN;
7448 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7449 			goto out;
7450 
7451 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7452 		if (ret)
7453 			goto out;
7454 
7455 		goto again;
7456 	}
7457 
7458 	ret = splice_to_pipe(pipe, &spd);
7459 out:
7460 	splice_shrink_spd(&spd);
7461 
7462 	return ret;
7463 }
7464 
7465 static const struct file_operations tracing_buffers_fops = {
7466 	.open		= tracing_buffers_open,
7467 	.read		= tracing_buffers_read,
7468 	.poll		= tracing_buffers_poll,
7469 	.release	= tracing_buffers_release,
7470 	.splice_read	= tracing_buffers_splice_read,
7471 	.llseek		= no_llseek,
7472 };
7473 
7474 static ssize_t
7475 tracing_stats_read(struct file *filp, char __user *ubuf,
7476 		   size_t count, loff_t *ppos)
7477 {
7478 	struct inode *inode = file_inode(filp);
7479 	struct trace_array *tr = inode->i_private;
7480 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7481 	int cpu = tracing_get_cpu(inode);
7482 	struct trace_seq *s;
7483 	unsigned long cnt;
7484 	unsigned long long t;
7485 	unsigned long usec_rem;
7486 
7487 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7488 	if (!s)
7489 		return -ENOMEM;
7490 
7491 	trace_seq_init(s);
7492 
7493 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7494 	trace_seq_printf(s, "entries: %ld\n", cnt);
7495 
7496 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7497 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7498 
7499 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7500 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7501 
7502 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7503 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7504 
7505 	if (trace_clocks[tr->clock_id].in_ns) {
7506 		/* local or global for trace_clock */
7507 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7508 		usec_rem = do_div(t, USEC_PER_SEC);
7509 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7510 								t, usec_rem);
7511 
7512 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7513 		usec_rem = do_div(t, USEC_PER_SEC);
7514 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7515 	} else {
7516 		/* counter or tsc mode for trace_clock */
7517 		trace_seq_printf(s, "oldest event ts: %llu\n",
7518 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7519 
7520 		trace_seq_printf(s, "now ts: %llu\n",
7521 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7522 	}
7523 
7524 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7525 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7526 
7527 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7528 	trace_seq_printf(s, "read events: %ld\n", cnt);
7529 
7530 	count = simple_read_from_buffer(ubuf, count, ppos,
7531 					s->buffer, trace_seq_used(s));
7532 
7533 	kfree(s);
7534 
7535 	return count;
7536 }
7537 
7538 static const struct file_operations tracing_stats_fops = {
7539 	.open		= tracing_open_generic_tr,
7540 	.read		= tracing_stats_read,
7541 	.llseek		= generic_file_llseek,
7542 	.release	= tracing_release_generic_tr,
7543 };
7544 
7545 #ifdef CONFIG_DYNAMIC_FTRACE
7546 
7547 static ssize_t
7548 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7549 		  size_t cnt, loff_t *ppos)
7550 {
7551 	unsigned long *p = filp->private_data;
7552 	char buf[64]; /* Not too big for a shallow stack */
7553 	int r;
7554 
7555 	r = scnprintf(buf, 63, "%ld", *p);
7556 	buf[r++] = '\n';
7557 
7558 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7559 }
7560 
7561 static const struct file_operations tracing_dyn_info_fops = {
7562 	.open		= tracing_open_generic,
7563 	.read		= tracing_read_dyn_info,
7564 	.llseek		= generic_file_llseek,
7565 };
7566 #endif /* CONFIG_DYNAMIC_FTRACE */
7567 
7568 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7569 static void
7570 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7571 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7572 		void *data)
7573 {
7574 	tracing_snapshot_instance(tr);
7575 }
7576 
7577 static void
7578 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7579 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7580 		      void *data)
7581 {
7582 	struct ftrace_func_mapper *mapper = data;
7583 	long *count = NULL;
7584 
7585 	if (mapper)
7586 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7587 
7588 	if (count) {
7589 
7590 		if (*count <= 0)
7591 			return;
7592 
7593 		(*count)--;
7594 	}
7595 
7596 	tracing_snapshot_instance(tr);
7597 }
7598 
7599 static int
7600 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7601 		      struct ftrace_probe_ops *ops, void *data)
7602 {
7603 	struct ftrace_func_mapper *mapper = data;
7604 	long *count = NULL;
7605 
7606 	seq_printf(m, "%ps:", (void *)ip);
7607 
7608 	seq_puts(m, "snapshot");
7609 
7610 	if (mapper)
7611 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7612 
7613 	if (count)
7614 		seq_printf(m, ":count=%ld\n", *count);
7615 	else
7616 		seq_puts(m, ":unlimited\n");
7617 
7618 	return 0;
7619 }
7620 
7621 static int
7622 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7623 		     unsigned long ip, void *init_data, void **data)
7624 {
7625 	struct ftrace_func_mapper *mapper = *data;
7626 
7627 	if (!mapper) {
7628 		mapper = allocate_ftrace_func_mapper();
7629 		if (!mapper)
7630 			return -ENOMEM;
7631 		*data = mapper;
7632 	}
7633 
7634 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7635 }
7636 
7637 static void
7638 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7639 		     unsigned long ip, void *data)
7640 {
7641 	struct ftrace_func_mapper *mapper = data;
7642 
7643 	if (!ip) {
7644 		if (!mapper)
7645 			return;
7646 		free_ftrace_func_mapper(mapper, NULL);
7647 		return;
7648 	}
7649 
7650 	ftrace_func_mapper_remove_ip(mapper, ip);
7651 }
7652 
7653 static struct ftrace_probe_ops snapshot_probe_ops = {
7654 	.func			= ftrace_snapshot,
7655 	.print			= ftrace_snapshot_print,
7656 };
7657 
7658 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7659 	.func			= ftrace_count_snapshot,
7660 	.print			= ftrace_snapshot_print,
7661 	.init			= ftrace_snapshot_init,
7662 	.free			= ftrace_snapshot_free,
7663 };
7664 
7665 static int
7666 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7667 			       char *glob, char *cmd, char *param, int enable)
7668 {
7669 	struct ftrace_probe_ops *ops;
7670 	void *count = (void *)-1;
7671 	char *number;
7672 	int ret;
7673 
7674 	if (!tr)
7675 		return -ENODEV;
7676 
7677 	/* hash funcs only work with set_ftrace_filter */
7678 	if (!enable)
7679 		return -EINVAL;
7680 
7681 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7682 
7683 	if (glob[0] == '!')
7684 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7685 
7686 	if (!param)
7687 		goto out_reg;
7688 
7689 	number = strsep(&param, ":");
7690 
7691 	if (!strlen(number))
7692 		goto out_reg;
7693 
7694 	/*
7695 	 * We use the callback data field (which is a pointer)
7696 	 * as our counter.
7697 	 */
7698 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7699 	if (ret)
7700 		return ret;
7701 
7702  out_reg:
7703 	ret = tracing_alloc_snapshot_instance(tr);
7704 	if (ret < 0)
7705 		goto out;
7706 
7707 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7708 
7709  out:
7710 	return ret < 0 ? ret : 0;
7711 }
7712 
7713 static struct ftrace_func_command ftrace_snapshot_cmd = {
7714 	.name			= "snapshot",
7715 	.func			= ftrace_trace_snapshot_callback,
7716 };
7717 
7718 static __init int register_snapshot_cmd(void)
7719 {
7720 	return register_ftrace_command(&ftrace_snapshot_cmd);
7721 }
7722 #else
7723 static inline __init int register_snapshot_cmd(void) { return 0; }
7724 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7725 
7726 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7727 {
7728 	if (WARN_ON(!tr->dir))
7729 		return ERR_PTR(-ENODEV);
7730 
7731 	/* Top directory uses NULL as the parent */
7732 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7733 		return NULL;
7734 
7735 	/* All sub buffers have a descriptor */
7736 	return tr->dir;
7737 }
7738 
7739 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7740 {
7741 	struct dentry *d_tracer;
7742 
7743 	if (tr->percpu_dir)
7744 		return tr->percpu_dir;
7745 
7746 	d_tracer = tracing_get_dentry(tr);
7747 	if (IS_ERR(d_tracer))
7748 		return NULL;
7749 
7750 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7751 
7752 	WARN_ONCE(!tr->percpu_dir,
7753 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7754 
7755 	return tr->percpu_dir;
7756 }
7757 
7758 static struct dentry *
7759 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7760 		      void *data, long cpu, const struct file_operations *fops)
7761 {
7762 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7763 
7764 	if (ret) /* See tracing_get_cpu() */
7765 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7766 	return ret;
7767 }
7768 
7769 static void
7770 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7771 {
7772 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7773 	struct dentry *d_cpu;
7774 	char cpu_dir[30]; /* 30 characters should be more than enough */
7775 
7776 	if (!d_percpu)
7777 		return;
7778 
7779 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7780 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7781 	if (!d_cpu) {
7782 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7783 		return;
7784 	}
7785 
7786 	/* per cpu trace_pipe */
7787 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7788 				tr, cpu, &tracing_pipe_fops);
7789 
7790 	/* per cpu trace */
7791 	trace_create_cpu_file("trace", 0644, d_cpu,
7792 				tr, cpu, &tracing_fops);
7793 
7794 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7795 				tr, cpu, &tracing_buffers_fops);
7796 
7797 	trace_create_cpu_file("stats", 0444, d_cpu,
7798 				tr, cpu, &tracing_stats_fops);
7799 
7800 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7801 				tr, cpu, &tracing_entries_fops);
7802 
7803 #ifdef CONFIG_TRACER_SNAPSHOT
7804 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7805 				tr, cpu, &snapshot_fops);
7806 
7807 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7808 				tr, cpu, &snapshot_raw_fops);
7809 #endif
7810 }
7811 
7812 #ifdef CONFIG_FTRACE_SELFTEST
7813 /* Let selftest have access to static functions in this file */
7814 #include "trace_selftest.c"
7815 #endif
7816 
7817 static ssize_t
7818 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7819 			loff_t *ppos)
7820 {
7821 	struct trace_option_dentry *topt = filp->private_data;
7822 	char *buf;
7823 
7824 	if (topt->flags->val & topt->opt->bit)
7825 		buf = "1\n";
7826 	else
7827 		buf = "0\n";
7828 
7829 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7830 }
7831 
7832 static ssize_t
7833 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7834 			 loff_t *ppos)
7835 {
7836 	struct trace_option_dentry *topt = filp->private_data;
7837 	unsigned long val;
7838 	int ret;
7839 
7840 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7841 	if (ret)
7842 		return ret;
7843 
7844 	if (val != 0 && val != 1)
7845 		return -EINVAL;
7846 
7847 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7848 		mutex_lock(&trace_types_lock);
7849 		ret = __set_tracer_option(topt->tr, topt->flags,
7850 					  topt->opt, !val);
7851 		mutex_unlock(&trace_types_lock);
7852 		if (ret)
7853 			return ret;
7854 	}
7855 
7856 	*ppos += cnt;
7857 
7858 	return cnt;
7859 }
7860 
7861 
7862 static const struct file_operations trace_options_fops = {
7863 	.open = tracing_open_generic,
7864 	.read = trace_options_read,
7865 	.write = trace_options_write,
7866 	.llseek	= generic_file_llseek,
7867 };
7868 
7869 /*
7870  * In order to pass in both the trace_array descriptor as well as the index
7871  * to the flag that the trace option file represents, the trace_array
7872  * has a character array of trace_flags_index[], which holds the index
7873  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7874  * The address of this character array is passed to the flag option file
7875  * read/write callbacks.
7876  *
7877  * In order to extract both the index and the trace_array descriptor,
7878  * get_tr_index() uses the following algorithm.
7879  *
7880  *   idx = *ptr;
7881  *
7882  * As the pointer itself contains the address of the index (remember
7883  * index[1] == 1).
7884  *
7885  * Then to get the trace_array descriptor, by subtracting that index
7886  * from the ptr, we get to the start of the index itself.
7887  *
7888  *   ptr - idx == &index[0]
7889  *
7890  * Then a simple container_of() from that pointer gets us to the
7891  * trace_array descriptor.
7892  */
7893 static void get_tr_index(void *data, struct trace_array **ptr,
7894 			 unsigned int *pindex)
7895 {
7896 	*pindex = *(unsigned char *)data;
7897 
7898 	*ptr = container_of(data - *pindex, struct trace_array,
7899 			    trace_flags_index);
7900 }
7901 
7902 static ssize_t
7903 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7904 			loff_t *ppos)
7905 {
7906 	void *tr_index = filp->private_data;
7907 	struct trace_array *tr;
7908 	unsigned int index;
7909 	char *buf;
7910 
7911 	get_tr_index(tr_index, &tr, &index);
7912 
7913 	if (tr->trace_flags & (1 << index))
7914 		buf = "1\n";
7915 	else
7916 		buf = "0\n";
7917 
7918 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7919 }
7920 
7921 static ssize_t
7922 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7923 			 loff_t *ppos)
7924 {
7925 	void *tr_index = filp->private_data;
7926 	struct trace_array *tr;
7927 	unsigned int index;
7928 	unsigned long val;
7929 	int ret;
7930 
7931 	get_tr_index(tr_index, &tr, &index);
7932 
7933 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7934 	if (ret)
7935 		return ret;
7936 
7937 	if (val != 0 && val != 1)
7938 		return -EINVAL;
7939 
7940 	mutex_lock(&trace_types_lock);
7941 	ret = set_tracer_flag(tr, 1 << index, val);
7942 	mutex_unlock(&trace_types_lock);
7943 
7944 	if (ret < 0)
7945 		return ret;
7946 
7947 	*ppos += cnt;
7948 
7949 	return cnt;
7950 }
7951 
7952 static const struct file_operations trace_options_core_fops = {
7953 	.open = tracing_open_generic,
7954 	.read = trace_options_core_read,
7955 	.write = trace_options_core_write,
7956 	.llseek = generic_file_llseek,
7957 };
7958 
7959 struct dentry *trace_create_file(const char *name,
7960 				 umode_t mode,
7961 				 struct dentry *parent,
7962 				 void *data,
7963 				 const struct file_operations *fops)
7964 {
7965 	struct dentry *ret;
7966 
7967 	ret = tracefs_create_file(name, mode, parent, data, fops);
7968 	if (!ret)
7969 		pr_warn("Could not create tracefs '%s' entry\n", name);
7970 
7971 	return ret;
7972 }
7973 
7974 
7975 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7976 {
7977 	struct dentry *d_tracer;
7978 
7979 	if (tr->options)
7980 		return tr->options;
7981 
7982 	d_tracer = tracing_get_dentry(tr);
7983 	if (IS_ERR(d_tracer))
7984 		return NULL;
7985 
7986 	tr->options = tracefs_create_dir("options", d_tracer);
7987 	if (!tr->options) {
7988 		pr_warn("Could not create tracefs directory 'options'\n");
7989 		return NULL;
7990 	}
7991 
7992 	return tr->options;
7993 }
7994 
7995 static void
7996 create_trace_option_file(struct trace_array *tr,
7997 			 struct trace_option_dentry *topt,
7998 			 struct tracer_flags *flags,
7999 			 struct tracer_opt *opt)
8000 {
8001 	struct dentry *t_options;
8002 
8003 	t_options = trace_options_init_dentry(tr);
8004 	if (!t_options)
8005 		return;
8006 
8007 	topt->flags = flags;
8008 	topt->opt = opt;
8009 	topt->tr = tr;
8010 
8011 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8012 				    &trace_options_fops);
8013 
8014 }
8015 
8016 static void
8017 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8018 {
8019 	struct trace_option_dentry *topts;
8020 	struct trace_options *tr_topts;
8021 	struct tracer_flags *flags;
8022 	struct tracer_opt *opts;
8023 	int cnt;
8024 	int i;
8025 
8026 	if (!tracer)
8027 		return;
8028 
8029 	flags = tracer->flags;
8030 
8031 	if (!flags || !flags->opts)
8032 		return;
8033 
8034 	/*
8035 	 * If this is an instance, only create flags for tracers
8036 	 * the instance may have.
8037 	 */
8038 	if (!trace_ok_for_array(tracer, tr))
8039 		return;
8040 
8041 	for (i = 0; i < tr->nr_topts; i++) {
8042 		/* Make sure there's no duplicate flags. */
8043 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8044 			return;
8045 	}
8046 
8047 	opts = flags->opts;
8048 
8049 	for (cnt = 0; opts[cnt].name; cnt++)
8050 		;
8051 
8052 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8053 	if (!topts)
8054 		return;
8055 
8056 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8057 			    GFP_KERNEL);
8058 	if (!tr_topts) {
8059 		kfree(topts);
8060 		return;
8061 	}
8062 
8063 	tr->topts = tr_topts;
8064 	tr->topts[tr->nr_topts].tracer = tracer;
8065 	tr->topts[tr->nr_topts].topts = topts;
8066 	tr->nr_topts++;
8067 
8068 	for (cnt = 0; opts[cnt].name; cnt++) {
8069 		create_trace_option_file(tr, &topts[cnt], flags,
8070 					 &opts[cnt]);
8071 		WARN_ONCE(topts[cnt].entry == NULL,
8072 			  "Failed to create trace option: %s",
8073 			  opts[cnt].name);
8074 	}
8075 }
8076 
8077 static struct dentry *
8078 create_trace_option_core_file(struct trace_array *tr,
8079 			      const char *option, long index)
8080 {
8081 	struct dentry *t_options;
8082 
8083 	t_options = trace_options_init_dentry(tr);
8084 	if (!t_options)
8085 		return NULL;
8086 
8087 	return trace_create_file(option, 0644, t_options,
8088 				 (void *)&tr->trace_flags_index[index],
8089 				 &trace_options_core_fops);
8090 }
8091 
8092 static void create_trace_options_dir(struct trace_array *tr)
8093 {
8094 	struct dentry *t_options;
8095 	bool top_level = tr == &global_trace;
8096 	int i;
8097 
8098 	t_options = trace_options_init_dentry(tr);
8099 	if (!t_options)
8100 		return;
8101 
8102 	for (i = 0; trace_options[i]; i++) {
8103 		if (top_level ||
8104 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8105 			create_trace_option_core_file(tr, trace_options[i], i);
8106 	}
8107 }
8108 
8109 static ssize_t
8110 rb_simple_read(struct file *filp, char __user *ubuf,
8111 	       size_t cnt, loff_t *ppos)
8112 {
8113 	struct trace_array *tr = filp->private_data;
8114 	char buf[64];
8115 	int r;
8116 
8117 	r = tracer_tracing_is_on(tr);
8118 	r = sprintf(buf, "%d\n", r);
8119 
8120 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8121 }
8122 
8123 static ssize_t
8124 rb_simple_write(struct file *filp, const char __user *ubuf,
8125 		size_t cnt, loff_t *ppos)
8126 {
8127 	struct trace_array *tr = filp->private_data;
8128 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8129 	unsigned long val;
8130 	int ret;
8131 
8132 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8133 	if (ret)
8134 		return ret;
8135 
8136 	if (buffer) {
8137 		mutex_lock(&trace_types_lock);
8138 		if (!!val == tracer_tracing_is_on(tr)) {
8139 			val = 0; /* do nothing */
8140 		} else if (val) {
8141 			tracer_tracing_on(tr);
8142 			if (tr->current_trace->start)
8143 				tr->current_trace->start(tr);
8144 		} else {
8145 			tracer_tracing_off(tr);
8146 			if (tr->current_trace->stop)
8147 				tr->current_trace->stop(tr);
8148 		}
8149 		mutex_unlock(&trace_types_lock);
8150 	}
8151 
8152 	(*ppos)++;
8153 
8154 	return cnt;
8155 }
8156 
8157 static const struct file_operations rb_simple_fops = {
8158 	.open		= tracing_open_generic_tr,
8159 	.read		= rb_simple_read,
8160 	.write		= rb_simple_write,
8161 	.release	= tracing_release_generic_tr,
8162 	.llseek		= default_llseek,
8163 };
8164 
8165 static ssize_t
8166 buffer_percent_read(struct file *filp, char __user *ubuf,
8167 		    size_t cnt, loff_t *ppos)
8168 {
8169 	struct trace_array *tr = filp->private_data;
8170 	char buf[64];
8171 	int r;
8172 
8173 	r = tr->buffer_percent;
8174 	r = sprintf(buf, "%d\n", r);
8175 
8176 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8177 }
8178 
8179 static ssize_t
8180 buffer_percent_write(struct file *filp, const char __user *ubuf,
8181 		     size_t cnt, loff_t *ppos)
8182 {
8183 	struct trace_array *tr = filp->private_data;
8184 	unsigned long val;
8185 	int ret;
8186 
8187 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8188 	if (ret)
8189 		return ret;
8190 
8191 	if (val > 100)
8192 		return -EINVAL;
8193 
8194 	if (!val)
8195 		val = 1;
8196 
8197 	tr->buffer_percent = val;
8198 
8199 	(*ppos)++;
8200 
8201 	return cnt;
8202 }
8203 
8204 static const struct file_operations buffer_percent_fops = {
8205 	.open		= tracing_open_generic_tr,
8206 	.read		= buffer_percent_read,
8207 	.write		= buffer_percent_write,
8208 	.release	= tracing_release_generic_tr,
8209 	.llseek		= default_llseek,
8210 };
8211 
8212 static struct dentry *trace_instance_dir;
8213 
8214 static void
8215 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8216 
8217 static int
8218 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8219 {
8220 	enum ring_buffer_flags rb_flags;
8221 
8222 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8223 
8224 	buf->tr = tr;
8225 
8226 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8227 	if (!buf->buffer)
8228 		return -ENOMEM;
8229 
8230 	buf->data = alloc_percpu(struct trace_array_cpu);
8231 	if (!buf->data) {
8232 		ring_buffer_free(buf->buffer);
8233 		buf->buffer = NULL;
8234 		return -ENOMEM;
8235 	}
8236 
8237 	/* Allocate the first page for all buffers */
8238 	set_buffer_entries(&tr->trace_buffer,
8239 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8240 
8241 	return 0;
8242 }
8243 
8244 static int allocate_trace_buffers(struct trace_array *tr, int size)
8245 {
8246 	int ret;
8247 
8248 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8249 	if (ret)
8250 		return ret;
8251 
8252 #ifdef CONFIG_TRACER_MAX_TRACE
8253 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8254 				    allocate_snapshot ? size : 1);
8255 	if (WARN_ON(ret)) {
8256 		ring_buffer_free(tr->trace_buffer.buffer);
8257 		tr->trace_buffer.buffer = NULL;
8258 		free_percpu(tr->trace_buffer.data);
8259 		tr->trace_buffer.data = NULL;
8260 		return -ENOMEM;
8261 	}
8262 	tr->allocated_snapshot = allocate_snapshot;
8263 
8264 	/*
8265 	 * Only the top level trace array gets its snapshot allocated
8266 	 * from the kernel command line.
8267 	 */
8268 	allocate_snapshot = false;
8269 #endif
8270 	return 0;
8271 }
8272 
8273 static void free_trace_buffer(struct trace_buffer *buf)
8274 {
8275 	if (buf->buffer) {
8276 		ring_buffer_free(buf->buffer);
8277 		buf->buffer = NULL;
8278 		free_percpu(buf->data);
8279 		buf->data = NULL;
8280 	}
8281 }
8282 
8283 static void free_trace_buffers(struct trace_array *tr)
8284 {
8285 	if (!tr)
8286 		return;
8287 
8288 	free_trace_buffer(&tr->trace_buffer);
8289 
8290 #ifdef CONFIG_TRACER_MAX_TRACE
8291 	free_trace_buffer(&tr->max_buffer);
8292 #endif
8293 }
8294 
8295 static void init_trace_flags_index(struct trace_array *tr)
8296 {
8297 	int i;
8298 
8299 	/* Used by the trace options files */
8300 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8301 		tr->trace_flags_index[i] = i;
8302 }
8303 
8304 static void __update_tracer_options(struct trace_array *tr)
8305 {
8306 	struct tracer *t;
8307 
8308 	for (t = trace_types; t; t = t->next)
8309 		add_tracer_options(tr, t);
8310 }
8311 
8312 static void update_tracer_options(struct trace_array *tr)
8313 {
8314 	mutex_lock(&trace_types_lock);
8315 	__update_tracer_options(tr);
8316 	mutex_unlock(&trace_types_lock);
8317 }
8318 
8319 struct trace_array *trace_array_create(const char *name)
8320 {
8321 	struct trace_array *tr;
8322 	int ret;
8323 
8324 	mutex_lock(&event_mutex);
8325 	mutex_lock(&trace_types_lock);
8326 
8327 	ret = -EEXIST;
8328 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8329 		if (tr->name && strcmp(tr->name, name) == 0)
8330 			goto out_unlock;
8331 	}
8332 
8333 	ret = -ENOMEM;
8334 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8335 	if (!tr)
8336 		goto out_unlock;
8337 
8338 	tr->name = kstrdup(name, GFP_KERNEL);
8339 	if (!tr->name)
8340 		goto out_free_tr;
8341 
8342 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8343 		goto out_free_tr;
8344 
8345 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8346 
8347 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8348 
8349 	raw_spin_lock_init(&tr->start_lock);
8350 
8351 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8352 
8353 	tr->current_trace = &nop_trace;
8354 
8355 	INIT_LIST_HEAD(&tr->systems);
8356 	INIT_LIST_HEAD(&tr->events);
8357 	INIT_LIST_HEAD(&tr->hist_vars);
8358 	INIT_LIST_HEAD(&tr->err_log);
8359 
8360 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8361 		goto out_free_tr;
8362 
8363 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8364 	if (!tr->dir)
8365 		goto out_free_tr;
8366 
8367 	ret = event_trace_add_tracer(tr->dir, tr);
8368 	if (ret) {
8369 		tracefs_remove_recursive(tr->dir);
8370 		goto out_free_tr;
8371 	}
8372 
8373 	ftrace_init_trace_array(tr);
8374 
8375 	init_tracer_tracefs(tr, tr->dir);
8376 	init_trace_flags_index(tr);
8377 	__update_tracer_options(tr);
8378 
8379 	list_add(&tr->list, &ftrace_trace_arrays);
8380 
8381 	mutex_unlock(&trace_types_lock);
8382 	mutex_unlock(&event_mutex);
8383 
8384 	return tr;
8385 
8386  out_free_tr:
8387 	free_trace_buffers(tr);
8388 	free_cpumask_var(tr->tracing_cpumask);
8389 	kfree(tr->name);
8390 	kfree(tr);
8391 
8392  out_unlock:
8393 	mutex_unlock(&trace_types_lock);
8394 	mutex_unlock(&event_mutex);
8395 
8396 	return ERR_PTR(ret);
8397 }
8398 EXPORT_SYMBOL_GPL(trace_array_create);
8399 
8400 static int instance_mkdir(const char *name)
8401 {
8402 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8403 }
8404 
8405 static int __remove_instance(struct trace_array *tr)
8406 {
8407 	int i;
8408 
8409 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8410 		return -EBUSY;
8411 
8412 	list_del(&tr->list);
8413 
8414 	/* Disable all the flags that were enabled coming in */
8415 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8416 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8417 			set_tracer_flag(tr, 1 << i, 0);
8418 	}
8419 
8420 	tracing_set_nop(tr);
8421 	clear_ftrace_function_probes(tr);
8422 	event_trace_del_tracer(tr);
8423 	ftrace_clear_pids(tr);
8424 	ftrace_destroy_function_files(tr);
8425 	tracefs_remove_recursive(tr->dir);
8426 	free_trace_buffers(tr);
8427 
8428 	for (i = 0; i < tr->nr_topts; i++) {
8429 		kfree(tr->topts[i].topts);
8430 	}
8431 	kfree(tr->topts);
8432 
8433 	free_cpumask_var(tr->tracing_cpumask);
8434 	kfree(tr->name);
8435 	kfree(tr);
8436 	tr = NULL;
8437 
8438 	return 0;
8439 }
8440 
8441 int trace_array_destroy(struct trace_array *tr)
8442 {
8443 	int ret;
8444 
8445 	if (!tr)
8446 		return -EINVAL;
8447 
8448 	mutex_lock(&event_mutex);
8449 	mutex_lock(&trace_types_lock);
8450 
8451 	ret = __remove_instance(tr);
8452 
8453 	mutex_unlock(&trace_types_lock);
8454 	mutex_unlock(&event_mutex);
8455 
8456 	return ret;
8457 }
8458 EXPORT_SYMBOL_GPL(trace_array_destroy);
8459 
8460 static int instance_rmdir(const char *name)
8461 {
8462 	struct trace_array *tr;
8463 	int ret;
8464 
8465 	mutex_lock(&event_mutex);
8466 	mutex_lock(&trace_types_lock);
8467 
8468 	ret = -ENODEV;
8469 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8470 		if (tr->name && strcmp(tr->name, name) == 0) {
8471 			ret = __remove_instance(tr);
8472 			break;
8473 		}
8474 	}
8475 
8476 	mutex_unlock(&trace_types_lock);
8477 	mutex_unlock(&event_mutex);
8478 
8479 	return ret;
8480 }
8481 
8482 static __init void create_trace_instances(struct dentry *d_tracer)
8483 {
8484 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8485 							 instance_mkdir,
8486 							 instance_rmdir);
8487 	if (WARN_ON(!trace_instance_dir))
8488 		return;
8489 }
8490 
8491 static void
8492 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8493 {
8494 	struct trace_event_file *file;
8495 	int cpu;
8496 
8497 	trace_create_file("available_tracers", 0444, d_tracer,
8498 			tr, &show_traces_fops);
8499 
8500 	trace_create_file("current_tracer", 0644, d_tracer,
8501 			tr, &set_tracer_fops);
8502 
8503 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8504 			  tr, &tracing_cpumask_fops);
8505 
8506 	trace_create_file("trace_options", 0644, d_tracer,
8507 			  tr, &tracing_iter_fops);
8508 
8509 	trace_create_file("trace", 0644, d_tracer,
8510 			  tr, &tracing_fops);
8511 
8512 	trace_create_file("trace_pipe", 0444, d_tracer,
8513 			  tr, &tracing_pipe_fops);
8514 
8515 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8516 			  tr, &tracing_entries_fops);
8517 
8518 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8519 			  tr, &tracing_total_entries_fops);
8520 
8521 	trace_create_file("free_buffer", 0200, d_tracer,
8522 			  tr, &tracing_free_buffer_fops);
8523 
8524 	trace_create_file("trace_marker", 0220, d_tracer,
8525 			  tr, &tracing_mark_fops);
8526 
8527 	file = __find_event_file(tr, "ftrace", "print");
8528 	if (file && file->dir)
8529 		trace_create_file("trigger", 0644, file->dir, file,
8530 				  &event_trigger_fops);
8531 	tr->trace_marker_file = file;
8532 
8533 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8534 			  tr, &tracing_mark_raw_fops);
8535 
8536 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8537 			  &trace_clock_fops);
8538 
8539 	trace_create_file("tracing_on", 0644, d_tracer,
8540 			  tr, &rb_simple_fops);
8541 
8542 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8543 			  &trace_time_stamp_mode_fops);
8544 
8545 	tr->buffer_percent = 50;
8546 
8547 	trace_create_file("buffer_percent", 0444, d_tracer,
8548 			tr, &buffer_percent_fops);
8549 
8550 	create_trace_options_dir(tr);
8551 
8552 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8553 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8554 			&tr->max_latency, &tracing_max_lat_fops);
8555 #endif
8556 
8557 	if (ftrace_create_function_files(tr, d_tracer))
8558 		WARN(1, "Could not allocate function filter files");
8559 
8560 #ifdef CONFIG_TRACER_SNAPSHOT
8561 	trace_create_file("snapshot", 0644, d_tracer,
8562 			  tr, &snapshot_fops);
8563 #endif
8564 
8565 	trace_create_file("error_log", 0644, d_tracer,
8566 			  tr, &tracing_err_log_fops);
8567 
8568 	for_each_tracing_cpu(cpu)
8569 		tracing_init_tracefs_percpu(tr, cpu);
8570 
8571 	ftrace_init_tracefs(tr, d_tracer);
8572 }
8573 
8574 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8575 {
8576 	struct vfsmount *mnt;
8577 	struct file_system_type *type;
8578 
8579 	/*
8580 	 * To maintain backward compatibility for tools that mount
8581 	 * debugfs to get to the tracing facility, tracefs is automatically
8582 	 * mounted to the debugfs/tracing directory.
8583 	 */
8584 	type = get_fs_type("tracefs");
8585 	if (!type)
8586 		return NULL;
8587 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8588 	put_filesystem(type);
8589 	if (IS_ERR(mnt))
8590 		return NULL;
8591 	mntget(mnt);
8592 
8593 	return mnt;
8594 }
8595 
8596 /**
8597  * tracing_init_dentry - initialize top level trace array
8598  *
8599  * This is called when creating files or directories in the tracing
8600  * directory. It is called via fs_initcall() by any of the boot up code
8601  * and expects to return the dentry of the top level tracing directory.
8602  */
8603 struct dentry *tracing_init_dentry(void)
8604 {
8605 	struct trace_array *tr = &global_trace;
8606 
8607 	/* The top level trace array uses  NULL as parent */
8608 	if (tr->dir)
8609 		return NULL;
8610 
8611 	if (WARN_ON(!tracefs_initialized()) ||
8612 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8613 		 WARN_ON(!debugfs_initialized())))
8614 		return ERR_PTR(-ENODEV);
8615 
8616 	/*
8617 	 * As there may still be users that expect the tracing
8618 	 * files to exist in debugfs/tracing, we must automount
8619 	 * the tracefs file system there, so older tools still
8620 	 * work with the newer kerenl.
8621 	 */
8622 	tr->dir = debugfs_create_automount("tracing", NULL,
8623 					   trace_automount, NULL);
8624 
8625 	return NULL;
8626 }
8627 
8628 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8629 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8630 
8631 static void __init trace_eval_init(void)
8632 {
8633 	int len;
8634 
8635 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8636 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8637 }
8638 
8639 #ifdef CONFIG_MODULES
8640 static void trace_module_add_evals(struct module *mod)
8641 {
8642 	if (!mod->num_trace_evals)
8643 		return;
8644 
8645 	/*
8646 	 * Modules with bad taint do not have events created, do
8647 	 * not bother with enums either.
8648 	 */
8649 	if (trace_module_has_bad_taint(mod))
8650 		return;
8651 
8652 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8653 }
8654 
8655 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8656 static void trace_module_remove_evals(struct module *mod)
8657 {
8658 	union trace_eval_map_item *map;
8659 	union trace_eval_map_item **last = &trace_eval_maps;
8660 
8661 	if (!mod->num_trace_evals)
8662 		return;
8663 
8664 	mutex_lock(&trace_eval_mutex);
8665 
8666 	map = trace_eval_maps;
8667 
8668 	while (map) {
8669 		if (map->head.mod == mod)
8670 			break;
8671 		map = trace_eval_jmp_to_tail(map);
8672 		last = &map->tail.next;
8673 		map = map->tail.next;
8674 	}
8675 	if (!map)
8676 		goto out;
8677 
8678 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8679 	kfree(map);
8680  out:
8681 	mutex_unlock(&trace_eval_mutex);
8682 }
8683 #else
8684 static inline void trace_module_remove_evals(struct module *mod) { }
8685 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8686 
8687 static int trace_module_notify(struct notifier_block *self,
8688 			       unsigned long val, void *data)
8689 {
8690 	struct module *mod = data;
8691 
8692 	switch (val) {
8693 	case MODULE_STATE_COMING:
8694 		trace_module_add_evals(mod);
8695 		break;
8696 	case MODULE_STATE_GOING:
8697 		trace_module_remove_evals(mod);
8698 		break;
8699 	}
8700 
8701 	return 0;
8702 }
8703 
8704 static struct notifier_block trace_module_nb = {
8705 	.notifier_call = trace_module_notify,
8706 	.priority = 0,
8707 };
8708 #endif /* CONFIG_MODULES */
8709 
8710 static __init int tracer_init_tracefs(void)
8711 {
8712 	struct dentry *d_tracer;
8713 
8714 	trace_access_lock_init();
8715 
8716 	d_tracer = tracing_init_dentry();
8717 	if (IS_ERR(d_tracer))
8718 		return 0;
8719 
8720 	event_trace_init();
8721 
8722 	init_tracer_tracefs(&global_trace, d_tracer);
8723 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8724 
8725 	trace_create_file("tracing_thresh", 0644, d_tracer,
8726 			&global_trace, &tracing_thresh_fops);
8727 
8728 	trace_create_file("README", 0444, d_tracer,
8729 			NULL, &tracing_readme_fops);
8730 
8731 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8732 			NULL, &tracing_saved_cmdlines_fops);
8733 
8734 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8735 			  NULL, &tracing_saved_cmdlines_size_fops);
8736 
8737 	trace_create_file("saved_tgids", 0444, d_tracer,
8738 			NULL, &tracing_saved_tgids_fops);
8739 
8740 	trace_eval_init();
8741 
8742 	trace_create_eval_file(d_tracer);
8743 
8744 #ifdef CONFIG_MODULES
8745 	register_module_notifier(&trace_module_nb);
8746 #endif
8747 
8748 #ifdef CONFIG_DYNAMIC_FTRACE
8749 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8750 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8751 #endif
8752 
8753 	create_trace_instances(d_tracer);
8754 
8755 	update_tracer_options(&global_trace);
8756 
8757 	return 0;
8758 }
8759 
8760 static int trace_panic_handler(struct notifier_block *this,
8761 			       unsigned long event, void *unused)
8762 {
8763 	if (ftrace_dump_on_oops)
8764 		ftrace_dump(ftrace_dump_on_oops);
8765 	return NOTIFY_OK;
8766 }
8767 
8768 static struct notifier_block trace_panic_notifier = {
8769 	.notifier_call  = trace_panic_handler,
8770 	.next           = NULL,
8771 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8772 };
8773 
8774 static int trace_die_handler(struct notifier_block *self,
8775 			     unsigned long val,
8776 			     void *data)
8777 {
8778 	switch (val) {
8779 	case DIE_OOPS:
8780 		if (ftrace_dump_on_oops)
8781 			ftrace_dump(ftrace_dump_on_oops);
8782 		break;
8783 	default:
8784 		break;
8785 	}
8786 	return NOTIFY_OK;
8787 }
8788 
8789 static struct notifier_block trace_die_notifier = {
8790 	.notifier_call = trace_die_handler,
8791 	.priority = 200
8792 };
8793 
8794 /*
8795  * printk is set to max of 1024, we really don't need it that big.
8796  * Nothing should be printing 1000 characters anyway.
8797  */
8798 #define TRACE_MAX_PRINT		1000
8799 
8800 /*
8801  * Define here KERN_TRACE so that we have one place to modify
8802  * it if we decide to change what log level the ftrace dump
8803  * should be at.
8804  */
8805 #define KERN_TRACE		KERN_EMERG
8806 
8807 void
8808 trace_printk_seq(struct trace_seq *s)
8809 {
8810 	/* Probably should print a warning here. */
8811 	if (s->seq.len >= TRACE_MAX_PRINT)
8812 		s->seq.len = TRACE_MAX_PRINT;
8813 
8814 	/*
8815 	 * More paranoid code. Although the buffer size is set to
8816 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8817 	 * an extra layer of protection.
8818 	 */
8819 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8820 		s->seq.len = s->seq.size - 1;
8821 
8822 	/* should be zero ended, but we are paranoid. */
8823 	s->buffer[s->seq.len] = 0;
8824 
8825 	printk(KERN_TRACE "%s", s->buffer);
8826 
8827 	trace_seq_init(s);
8828 }
8829 
8830 void trace_init_global_iter(struct trace_iterator *iter)
8831 {
8832 	iter->tr = &global_trace;
8833 	iter->trace = iter->tr->current_trace;
8834 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8835 	iter->trace_buffer = &global_trace.trace_buffer;
8836 
8837 	if (iter->trace && iter->trace->open)
8838 		iter->trace->open(iter);
8839 
8840 	/* Annotate start of buffers if we had overruns */
8841 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8842 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8843 
8844 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8845 	if (trace_clocks[iter->tr->clock_id].in_ns)
8846 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8847 }
8848 
8849 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8850 {
8851 	/* use static because iter can be a bit big for the stack */
8852 	static struct trace_iterator iter;
8853 	static atomic_t dump_running;
8854 	struct trace_array *tr = &global_trace;
8855 	unsigned int old_userobj;
8856 	unsigned long flags;
8857 	int cnt = 0, cpu;
8858 
8859 	/* Only allow one dump user at a time. */
8860 	if (atomic_inc_return(&dump_running) != 1) {
8861 		atomic_dec(&dump_running);
8862 		return;
8863 	}
8864 
8865 	/*
8866 	 * Always turn off tracing when we dump.
8867 	 * We don't need to show trace output of what happens
8868 	 * between multiple crashes.
8869 	 *
8870 	 * If the user does a sysrq-z, then they can re-enable
8871 	 * tracing with echo 1 > tracing_on.
8872 	 */
8873 	tracing_off();
8874 
8875 	local_irq_save(flags);
8876 	printk_nmi_direct_enter();
8877 
8878 	/* Simulate the iterator */
8879 	trace_init_global_iter(&iter);
8880 
8881 	for_each_tracing_cpu(cpu) {
8882 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8883 	}
8884 
8885 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8886 
8887 	/* don't look at user memory in panic mode */
8888 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8889 
8890 	switch (oops_dump_mode) {
8891 	case DUMP_ALL:
8892 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8893 		break;
8894 	case DUMP_ORIG:
8895 		iter.cpu_file = raw_smp_processor_id();
8896 		break;
8897 	case DUMP_NONE:
8898 		goto out_enable;
8899 	default:
8900 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8901 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8902 	}
8903 
8904 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8905 
8906 	/* Did function tracer already get disabled? */
8907 	if (ftrace_is_dead()) {
8908 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8909 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8910 	}
8911 
8912 	/*
8913 	 * We need to stop all tracing on all CPUS to read the
8914 	 * the next buffer. This is a bit expensive, but is
8915 	 * not done often. We fill all what we can read,
8916 	 * and then release the locks again.
8917 	 */
8918 
8919 	while (!trace_empty(&iter)) {
8920 
8921 		if (!cnt)
8922 			printk(KERN_TRACE "---------------------------------\n");
8923 
8924 		cnt++;
8925 
8926 		trace_iterator_reset(&iter);
8927 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8928 
8929 		if (trace_find_next_entry_inc(&iter) != NULL) {
8930 			int ret;
8931 
8932 			ret = print_trace_line(&iter);
8933 			if (ret != TRACE_TYPE_NO_CONSUME)
8934 				trace_consume(&iter);
8935 		}
8936 		touch_nmi_watchdog();
8937 
8938 		trace_printk_seq(&iter.seq);
8939 	}
8940 
8941 	if (!cnt)
8942 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8943 	else
8944 		printk(KERN_TRACE "---------------------------------\n");
8945 
8946  out_enable:
8947 	tr->trace_flags |= old_userobj;
8948 
8949 	for_each_tracing_cpu(cpu) {
8950 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8951 	}
8952 	atomic_dec(&dump_running);
8953 	printk_nmi_direct_exit();
8954 	local_irq_restore(flags);
8955 }
8956 EXPORT_SYMBOL_GPL(ftrace_dump);
8957 
8958 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8959 {
8960 	char **argv;
8961 	int argc, ret;
8962 
8963 	argc = 0;
8964 	ret = 0;
8965 	argv = argv_split(GFP_KERNEL, buf, &argc);
8966 	if (!argv)
8967 		return -ENOMEM;
8968 
8969 	if (argc)
8970 		ret = createfn(argc, argv);
8971 
8972 	argv_free(argv);
8973 
8974 	return ret;
8975 }
8976 
8977 #define WRITE_BUFSIZE  4096
8978 
8979 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8980 				size_t count, loff_t *ppos,
8981 				int (*createfn)(int, char **))
8982 {
8983 	char *kbuf, *buf, *tmp;
8984 	int ret = 0;
8985 	size_t done = 0;
8986 	size_t size;
8987 
8988 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8989 	if (!kbuf)
8990 		return -ENOMEM;
8991 
8992 	while (done < count) {
8993 		size = count - done;
8994 
8995 		if (size >= WRITE_BUFSIZE)
8996 			size = WRITE_BUFSIZE - 1;
8997 
8998 		if (copy_from_user(kbuf, buffer + done, size)) {
8999 			ret = -EFAULT;
9000 			goto out;
9001 		}
9002 		kbuf[size] = '\0';
9003 		buf = kbuf;
9004 		do {
9005 			tmp = strchr(buf, '\n');
9006 			if (tmp) {
9007 				*tmp = '\0';
9008 				size = tmp - buf + 1;
9009 			} else {
9010 				size = strlen(buf);
9011 				if (done + size < count) {
9012 					if (buf != kbuf)
9013 						break;
9014 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9015 					pr_warn("Line length is too long: Should be less than %d\n",
9016 						WRITE_BUFSIZE - 2);
9017 					ret = -EINVAL;
9018 					goto out;
9019 				}
9020 			}
9021 			done += size;
9022 
9023 			/* Remove comments */
9024 			tmp = strchr(buf, '#');
9025 
9026 			if (tmp)
9027 				*tmp = '\0';
9028 
9029 			ret = trace_run_command(buf, createfn);
9030 			if (ret)
9031 				goto out;
9032 			buf += size;
9033 
9034 		} while (done < count);
9035 	}
9036 	ret = done;
9037 
9038 out:
9039 	kfree(kbuf);
9040 
9041 	return ret;
9042 }
9043 
9044 __init static int tracer_alloc_buffers(void)
9045 {
9046 	int ring_buf_size;
9047 	int ret = -ENOMEM;
9048 
9049 	/*
9050 	 * Make sure we don't accidently add more trace options
9051 	 * than we have bits for.
9052 	 */
9053 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9054 
9055 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9056 		goto out;
9057 
9058 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9059 		goto out_free_buffer_mask;
9060 
9061 	/* Only allocate trace_printk buffers if a trace_printk exists */
9062 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9063 		/* Must be called before global_trace.buffer is allocated */
9064 		trace_printk_init_buffers();
9065 
9066 	/* To save memory, keep the ring buffer size to its minimum */
9067 	if (ring_buffer_expanded)
9068 		ring_buf_size = trace_buf_size;
9069 	else
9070 		ring_buf_size = 1;
9071 
9072 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9073 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9074 
9075 	raw_spin_lock_init(&global_trace.start_lock);
9076 
9077 	/*
9078 	 * The prepare callbacks allocates some memory for the ring buffer. We
9079 	 * don't free the buffer if the if the CPU goes down. If we were to free
9080 	 * the buffer, then the user would lose any trace that was in the
9081 	 * buffer. The memory will be removed once the "instance" is removed.
9082 	 */
9083 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9084 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9085 				      NULL);
9086 	if (ret < 0)
9087 		goto out_free_cpumask;
9088 	/* Used for event triggers */
9089 	ret = -ENOMEM;
9090 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9091 	if (!temp_buffer)
9092 		goto out_rm_hp_state;
9093 
9094 	if (trace_create_savedcmd() < 0)
9095 		goto out_free_temp_buffer;
9096 
9097 	/* TODO: make the number of buffers hot pluggable with CPUS */
9098 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9099 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9100 		WARN_ON(1);
9101 		goto out_free_savedcmd;
9102 	}
9103 
9104 	if (global_trace.buffer_disabled)
9105 		tracing_off();
9106 
9107 	if (trace_boot_clock) {
9108 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9109 		if (ret < 0)
9110 			pr_warn("Trace clock %s not defined, going back to default\n",
9111 				trace_boot_clock);
9112 	}
9113 
9114 	/*
9115 	 * register_tracer() might reference current_trace, so it
9116 	 * needs to be set before we register anything. This is
9117 	 * just a bootstrap of current_trace anyway.
9118 	 */
9119 	global_trace.current_trace = &nop_trace;
9120 
9121 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9122 
9123 	ftrace_init_global_array_ops(&global_trace);
9124 
9125 	init_trace_flags_index(&global_trace);
9126 
9127 	register_tracer(&nop_trace);
9128 
9129 	/* Function tracing may start here (via kernel command line) */
9130 	init_function_trace();
9131 
9132 	/* All seems OK, enable tracing */
9133 	tracing_disabled = 0;
9134 
9135 	atomic_notifier_chain_register(&panic_notifier_list,
9136 				       &trace_panic_notifier);
9137 
9138 	register_die_notifier(&trace_die_notifier);
9139 
9140 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9141 
9142 	INIT_LIST_HEAD(&global_trace.systems);
9143 	INIT_LIST_HEAD(&global_trace.events);
9144 	INIT_LIST_HEAD(&global_trace.hist_vars);
9145 	INIT_LIST_HEAD(&global_trace.err_log);
9146 	list_add(&global_trace.list, &ftrace_trace_arrays);
9147 
9148 	apply_trace_boot_options();
9149 
9150 	register_snapshot_cmd();
9151 
9152 	return 0;
9153 
9154 out_free_savedcmd:
9155 	free_saved_cmdlines_buffer(savedcmd);
9156 out_free_temp_buffer:
9157 	ring_buffer_free(temp_buffer);
9158 out_rm_hp_state:
9159 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9160 out_free_cpumask:
9161 	free_cpumask_var(global_trace.tracing_cpumask);
9162 out_free_buffer_mask:
9163 	free_cpumask_var(tracing_buffer_mask);
9164 out:
9165 	return ret;
9166 }
9167 
9168 void __init early_trace_init(void)
9169 {
9170 	if (tracepoint_printk) {
9171 		tracepoint_print_iter =
9172 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9173 		if (WARN_ON(!tracepoint_print_iter))
9174 			tracepoint_printk = 0;
9175 		else
9176 			static_key_enable(&tracepoint_printk_key.key);
9177 	}
9178 	tracer_alloc_buffers();
9179 }
9180 
9181 void __init trace_init(void)
9182 {
9183 	trace_event_init();
9184 }
9185 
9186 __init static int clear_boot_tracer(void)
9187 {
9188 	/*
9189 	 * The default tracer at boot buffer is an init section.
9190 	 * This function is called in lateinit. If we did not
9191 	 * find the boot tracer, then clear it out, to prevent
9192 	 * later registration from accessing the buffer that is
9193 	 * about to be freed.
9194 	 */
9195 	if (!default_bootup_tracer)
9196 		return 0;
9197 
9198 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9199 	       default_bootup_tracer);
9200 	default_bootup_tracer = NULL;
9201 
9202 	return 0;
9203 }
9204 
9205 fs_initcall(tracer_init_tracefs);
9206 late_initcall_sync(clear_boot_tracer);
9207 
9208 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9209 __init static int tracing_set_default_clock(void)
9210 {
9211 	/* sched_clock_stable() is determined in late_initcall */
9212 	if (!trace_boot_clock && !sched_clock_stable()) {
9213 		printk(KERN_WARNING
9214 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9215 		       "If you want to keep using the local clock, then add:\n"
9216 		       "  \"trace_clock=local\"\n"
9217 		       "on the kernel command line\n");
9218 		tracing_set_clock(&global_trace, "global");
9219 	}
9220 
9221 	return 0;
9222 }
9223 late_initcall_sync(tracing_set_default_clock);
9224 #endif
9225