xref: /openbmc/linux/kernel/trace/trace.c (revision b830f94f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163 				   unsigned long flags, int pc);
164 
165 #define MAX_TRACER_SIZE		100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
171 static int __init set_cmdline_ftrace(char *str)
172 {
173 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 	default_bootup_tracer = bootup_tracer_buf;
175 	/* We are using ftrace early, expand it */
176 	ring_buffer_expanded = true;
177 	return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 	if (*str++ != '=' || !*str) {
184 		ftrace_dump_on_oops = DUMP_ALL;
185 		return 1;
186 	}
187 
188 	if (!strcmp("orig_cpu", str)) {
189 		ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
197 static int __init stop_trace_on_warning(char *str)
198 {
199 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 		__disable_trace_on_warning = 1;
201 	return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
205 static int __init boot_alloc_snapshot(char *str)
206 {
207 	allocate_snapshot = true;
208 	/* We also need the main ring buffer expanded */
209 	ring_buffer_expanded = true;
210 	return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
217 static int __init set_trace_boot_options(char *str)
218 {
219 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 	return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
227 static int __init set_trace_boot_clock(char *str)
228 {
229 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 	trace_boot_clock = trace_boot_clock_buf;
231 	return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
235 static int __init set_tracepoint_printk(char *str)
236 {
237 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 		tracepoint_printk = 1;
239 	return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
243 unsigned long long ns2usecs(u64 nsec)
244 {
245 	nsec += 500;
246 	do_div(nsec, 1000);
247 	return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS						\
252 	(FUNCTION_DEFAULT_FLAGS |					\
253 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
254 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
255 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
256 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
260 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265 
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271 	.trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273 
274 LIST_HEAD(ftrace_trace_arrays);
275 
276 int trace_array_get(struct trace_array *this_tr)
277 {
278 	struct trace_array *tr;
279 	int ret = -ENODEV;
280 
281 	mutex_lock(&trace_types_lock);
282 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283 		if (tr == this_tr) {
284 			tr->ref++;
285 			ret = 0;
286 			break;
287 		}
288 	}
289 	mutex_unlock(&trace_types_lock);
290 
291 	return ret;
292 }
293 
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296 	WARN_ON(!this_tr->ref);
297 	this_tr->ref--;
298 }
299 
300 void trace_array_put(struct trace_array *this_tr)
301 {
302 	mutex_lock(&trace_types_lock);
303 	__trace_array_put(this_tr);
304 	mutex_unlock(&trace_types_lock);
305 }
306 
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308 			      struct ring_buffer *buffer,
309 			      struct ring_buffer_event *event)
310 {
311 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312 	    !filter_match_preds(call->filter, rec)) {
313 		__trace_event_discard_commit(buffer, event);
314 		return 1;
315 	}
316 
317 	return 0;
318 }
319 
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322 	vfree(pid_list->pids);
323 	kfree(pid_list);
324 }
325 
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336 	/*
337 	 * If pid_max changed after filtered_pids was created, we
338 	 * by default ignore all pids greater than the previous pid_max.
339 	 */
340 	if (search_pid >= filtered_pids->pid_max)
341 		return false;
342 
343 	return test_bit(search_pid, filtered_pids->pids);
344 }
345 
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358 	/*
359 	 * Return false, because if filtered_pids does not exist,
360 	 * all pids are good to trace.
361 	 */
362 	if (!filtered_pids)
363 		return false;
364 
365 	return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367 
368 /**
369  * trace_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381 				  struct task_struct *self,
382 				  struct task_struct *task)
383 {
384 	if (!pid_list)
385 		return;
386 
387 	/* For forks, we only add if the forking task is listed */
388 	if (self) {
389 		if (!trace_find_filtered_pid(pid_list, self->pid))
390 			return;
391 	}
392 
393 	/* Sorry, but we don't support pid_max changing after setting */
394 	if (task->pid >= pid_list->pid_max)
395 		return;
396 
397 	/* "self" is set for forks, and NULL for exits */
398 	if (self)
399 		set_bit(task->pid, pid_list->pids);
400 	else
401 		clear_bit(task->pid, pid_list->pids);
402 }
403 
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418 	unsigned long pid = (unsigned long)v;
419 
420 	(*pos)++;
421 
422 	/* pid already is +1 of the actual prevous bit */
423 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424 
425 	/* Return pid + 1 to allow zero to be represented */
426 	if (pid < pid_list->pid_max)
427 		return (void *)(pid + 1);
428 
429 	return NULL;
430 }
431 
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445 	unsigned long pid;
446 	loff_t l = 0;
447 
448 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449 	if (pid >= pid_list->pid_max)
450 		return NULL;
451 
452 	/* Return pid + 1 so that zero can be the exit value */
453 	for (pid++; pid && l < *pos;
454 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455 		;
456 	return (void *)pid;
457 }
458 
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469 	unsigned long pid = (unsigned long)v - 1;
470 
471 	seq_printf(m, "%lu\n", pid);
472 	return 0;
473 }
474 
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE		127
477 
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479 		    struct trace_pid_list **new_pid_list,
480 		    const char __user *ubuf, size_t cnt)
481 {
482 	struct trace_pid_list *pid_list;
483 	struct trace_parser parser;
484 	unsigned long val;
485 	int nr_pids = 0;
486 	ssize_t read = 0;
487 	ssize_t ret = 0;
488 	loff_t pos;
489 	pid_t pid;
490 
491 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492 		return -ENOMEM;
493 
494 	/*
495 	 * Always recreate a new array. The write is an all or nothing
496 	 * operation. Always create a new array when adding new pids by
497 	 * the user. If the operation fails, then the current list is
498 	 * not modified.
499 	 */
500 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501 	if (!pid_list) {
502 		trace_parser_put(&parser);
503 		return -ENOMEM;
504 	}
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		trace_parser_put(&parser);
515 		kfree(pid_list);
516 		return -ENOMEM;
517 	}
518 
519 	if (filtered_pids) {
520 		/* copy the current bits to the new max */
521 		for_each_set_bit(pid, filtered_pids->pids,
522 				 filtered_pids->pid_max) {
523 			set_bit(pid, pid_list->pids);
524 			nr_pids++;
525 		}
526 	}
527 
528 	while (cnt > 0) {
529 
530 		pos = 0;
531 
532 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
533 		if (ret < 0 || !trace_parser_loaded(&parser))
534 			break;
535 
536 		read += ret;
537 		ubuf += ret;
538 		cnt -= ret;
539 
540 		ret = -EINVAL;
541 		if (kstrtoul(parser.buffer, 0, &val))
542 			break;
543 		if (val >= pid_list->pid_max)
544 			break;
545 
546 		pid = (pid_t)val;
547 
548 		set_bit(pid, pid_list->pids);
549 		nr_pids++;
550 
551 		trace_parser_clear(&parser);
552 		ret = 0;
553 	}
554 	trace_parser_put(&parser);
555 
556 	if (ret < 0) {
557 		trace_free_pid_list(pid_list);
558 		return ret;
559 	}
560 
561 	if (!nr_pids) {
562 		/* Cleared the list of pids */
563 		trace_free_pid_list(pid_list);
564 		read = ret;
565 		pid_list = NULL;
566 	}
567 
568 	*new_pid_list = pid_list;
569 
570 	return read;
571 }
572 
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575 	u64 ts;
576 
577 	/* Early boot up does not have a buffer yet */
578 	if (!buf->buffer)
579 		return trace_clock_local();
580 
581 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
582 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583 
584 	return ts;
585 }
586 
587 u64 ftrace_now(int cpu)
588 {
589 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591 
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603 	/*
604 	 * For quick access (irqsoff uses this in fast path), just
605 	 * return the mirror variable of the state of the ring buffer.
606 	 * It's a little racy, but we don't really care.
607 	 */
608 	smp_rmb();
609 	return !global_trace.buffer_disabled;
610 }
611 
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
623 
624 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625 
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer		*trace_types __read_mostly;
628 
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633 
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655 
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659 
660 static inline void trace_access_lock(int cpu)
661 {
662 	if (cpu == RING_BUFFER_ALL_CPUS) {
663 		/* gain it for accessing the whole ring buffer. */
664 		down_write(&all_cpu_access_lock);
665 	} else {
666 		/* gain it for accessing a cpu ring buffer. */
667 
668 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669 		down_read(&all_cpu_access_lock);
670 
671 		/* Secondly block other access to this @cpu ring buffer. */
672 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
673 	}
674 }
675 
676 static inline void trace_access_unlock(int cpu)
677 {
678 	if (cpu == RING_BUFFER_ALL_CPUS) {
679 		up_write(&all_cpu_access_lock);
680 	} else {
681 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682 		up_read(&all_cpu_access_lock);
683 	}
684 }
685 
686 static inline void trace_access_lock_init(void)
687 {
688 	int cpu;
689 
690 	for_each_possible_cpu(cpu)
691 		mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693 
694 #else
695 
696 static DEFINE_MUTEX(access_lock);
697 
698 static inline void trace_access_lock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_lock(&access_lock);
702 }
703 
704 static inline void trace_access_unlock(int cpu)
705 {
706 	(void)cpu;
707 	mutex_unlock(&access_lock);
708 }
709 
710 static inline void trace_access_lock_init(void)
711 {
712 }
713 
714 #endif
715 
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718 				 unsigned long flags,
719 				 int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721 				      struct ring_buffer *buffer,
722 				      unsigned long flags,
723 				      int skip, int pc, struct pt_regs *regs);
724 
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727 					unsigned long flags,
728 					int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732 				      struct ring_buffer *buffer,
733 				      unsigned long flags,
734 				      int skip, int pc, struct pt_regs *regs)
735 {
736 }
737 
738 #endif
739 
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742 		  int type, unsigned long flags, int pc)
743 {
744 	struct trace_entry *ent = ring_buffer_event_data(event);
745 
746 	tracing_generic_entry_update(ent, type, flags, pc);
747 }
748 
749 static __always_inline struct ring_buffer_event *
750 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
751 			  int type,
752 			  unsigned long len,
753 			  unsigned long flags, int pc)
754 {
755 	struct ring_buffer_event *event;
756 
757 	event = ring_buffer_lock_reserve(buffer, len);
758 	if (event != NULL)
759 		trace_event_setup(event, type, flags, pc);
760 
761 	return event;
762 }
763 
764 void tracer_tracing_on(struct trace_array *tr)
765 {
766 	if (tr->trace_buffer.buffer)
767 		ring_buffer_record_on(tr->trace_buffer.buffer);
768 	/*
769 	 * This flag is looked at when buffers haven't been allocated
770 	 * yet, or by some tracers (like irqsoff), that just want to
771 	 * know if the ring buffer has been disabled, but it can handle
772 	 * races of where it gets disabled but we still do a record.
773 	 * As the check is in the fast path of the tracers, it is more
774 	 * important to be fast than accurate.
775 	 */
776 	tr->buffer_disabled = 0;
777 	/* Make the flag seen by readers */
778 	smp_wmb();
779 }
780 
781 /**
782  * tracing_on - enable tracing buffers
783  *
784  * This function enables tracing buffers that may have been
785  * disabled with tracing_off.
786  */
787 void tracing_on(void)
788 {
789 	tracer_tracing_on(&global_trace);
790 }
791 EXPORT_SYMBOL_GPL(tracing_on);
792 
793 
794 static __always_inline void
795 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
796 {
797 	__this_cpu_write(trace_taskinfo_save, true);
798 
799 	/* If this is the temp buffer, we need to commit fully */
800 	if (this_cpu_read(trace_buffered_event) == event) {
801 		/* Length is in event->array[0] */
802 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
803 		/* Release the temp buffer */
804 		this_cpu_dec(trace_buffered_event_cnt);
805 	} else
806 		ring_buffer_unlock_commit(buffer, event);
807 }
808 
809 /**
810  * __trace_puts - write a constant string into the trace buffer.
811  * @ip:	   The address of the caller
812  * @str:   The constant string to write
813  * @size:  The size of the string.
814  */
815 int __trace_puts(unsigned long ip, const char *str, int size)
816 {
817 	struct ring_buffer_event *event;
818 	struct ring_buffer *buffer;
819 	struct print_entry *entry;
820 	unsigned long irq_flags;
821 	int alloc;
822 	int pc;
823 
824 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
825 		return 0;
826 
827 	pc = preempt_count();
828 
829 	if (unlikely(tracing_selftest_running || tracing_disabled))
830 		return 0;
831 
832 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
833 
834 	local_save_flags(irq_flags);
835 	buffer = global_trace.trace_buffer.buffer;
836 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
837 					    irq_flags, pc);
838 	if (!event)
839 		return 0;
840 
841 	entry = ring_buffer_event_data(event);
842 	entry->ip = ip;
843 
844 	memcpy(&entry->buf, str, size);
845 
846 	/* Add a newline if necessary */
847 	if (entry->buf[size - 1] != '\n') {
848 		entry->buf[size] = '\n';
849 		entry->buf[size + 1] = '\0';
850 	} else
851 		entry->buf[size] = '\0';
852 
853 	__buffer_unlock_commit(buffer, event);
854 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
855 
856 	return size;
857 }
858 EXPORT_SYMBOL_GPL(__trace_puts);
859 
860 /**
861  * __trace_bputs - write the pointer to a constant string into trace buffer
862  * @ip:	   The address of the caller
863  * @str:   The constant string to write to the buffer to
864  */
865 int __trace_bputs(unsigned long ip, const char *str)
866 {
867 	struct ring_buffer_event *event;
868 	struct ring_buffer *buffer;
869 	struct bputs_entry *entry;
870 	unsigned long irq_flags;
871 	int size = sizeof(struct bputs_entry);
872 	int pc;
873 
874 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
875 		return 0;
876 
877 	pc = preempt_count();
878 
879 	if (unlikely(tracing_selftest_running || tracing_disabled))
880 		return 0;
881 
882 	local_save_flags(irq_flags);
883 	buffer = global_trace.trace_buffer.buffer;
884 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885 					    irq_flags, pc);
886 	if (!event)
887 		return 0;
888 
889 	entry = ring_buffer_event_data(event);
890 	entry->ip			= ip;
891 	entry->str			= str;
892 
893 	__buffer_unlock_commit(buffer, event);
894 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
895 
896 	return 1;
897 }
898 EXPORT_SYMBOL_GPL(__trace_bputs);
899 
900 #ifdef CONFIG_TRACER_SNAPSHOT
901 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
902 {
903 	struct tracer *tracer = tr->current_trace;
904 	unsigned long flags;
905 
906 	if (in_nmi()) {
907 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
908 		internal_trace_puts("*** snapshot is being ignored        ***\n");
909 		return;
910 	}
911 
912 	if (!tr->allocated_snapshot) {
913 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
914 		internal_trace_puts("*** stopping trace here!   ***\n");
915 		tracing_off();
916 		return;
917 	}
918 
919 	/* Note, snapshot can not be used when the tracer uses it */
920 	if (tracer->use_max_tr) {
921 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
922 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
923 		return;
924 	}
925 
926 	local_irq_save(flags);
927 	update_max_tr(tr, current, smp_processor_id(), cond_data);
928 	local_irq_restore(flags);
929 }
930 
931 void tracing_snapshot_instance(struct trace_array *tr)
932 {
933 	tracing_snapshot_instance_cond(tr, NULL);
934 }
935 
936 /**
937  * tracing_snapshot - take a snapshot of the current buffer.
938  *
939  * This causes a swap between the snapshot buffer and the current live
940  * tracing buffer. You can use this to take snapshots of the live
941  * trace when some condition is triggered, but continue to trace.
942  *
943  * Note, make sure to allocate the snapshot with either
944  * a tracing_snapshot_alloc(), or by doing it manually
945  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
946  *
947  * If the snapshot buffer is not allocated, it will stop tracing.
948  * Basically making a permanent snapshot.
949  */
950 void tracing_snapshot(void)
951 {
952 	struct trace_array *tr = &global_trace;
953 
954 	tracing_snapshot_instance(tr);
955 }
956 EXPORT_SYMBOL_GPL(tracing_snapshot);
957 
958 /**
959  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
960  * @tr:		The tracing instance to snapshot
961  * @cond_data:	The data to be tested conditionally, and possibly saved
962  *
963  * This is the same as tracing_snapshot() except that the snapshot is
964  * conditional - the snapshot will only happen if the
965  * cond_snapshot.update() implementation receiving the cond_data
966  * returns true, which means that the trace array's cond_snapshot
967  * update() operation used the cond_data to determine whether the
968  * snapshot should be taken, and if it was, presumably saved it along
969  * with the snapshot.
970  */
971 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
972 {
973 	tracing_snapshot_instance_cond(tr, cond_data);
974 }
975 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
976 
977 /**
978  * tracing_snapshot_cond_data - get the user data associated with a snapshot
979  * @tr:		The tracing instance
980  *
981  * When the user enables a conditional snapshot using
982  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
983  * with the snapshot.  This accessor is used to retrieve it.
984  *
985  * Should not be called from cond_snapshot.update(), since it takes
986  * the tr->max_lock lock, which the code calling
987  * cond_snapshot.update() has already done.
988  *
989  * Returns the cond_data associated with the trace array's snapshot.
990  */
991 void *tracing_cond_snapshot_data(struct trace_array *tr)
992 {
993 	void *cond_data = NULL;
994 
995 	arch_spin_lock(&tr->max_lock);
996 
997 	if (tr->cond_snapshot)
998 		cond_data = tr->cond_snapshot->cond_data;
999 
1000 	arch_spin_unlock(&tr->max_lock);
1001 
1002 	return cond_data;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1005 
1006 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1007 					struct trace_buffer *size_buf, int cpu_id);
1008 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1009 
1010 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1011 {
1012 	int ret;
1013 
1014 	if (!tr->allocated_snapshot) {
1015 
1016 		/* allocate spare buffer */
1017 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1018 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1019 		if (ret < 0)
1020 			return ret;
1021 
1022 		tr->allocated_snapshot = true;
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 static void free_snapshot(struct trace_array *tr)
1029 {
1030 	/*
1031 	 * We don't free the ring buffer. instead, resize it because
1032 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1033 	 * we want preserve it.
1034 	 */
1035 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1036 	set_buffer_entries(&tr->max_buffer, 1);
1037 	tracing_reset_online_cpus(&tr->max_buffer);
1038 	tr->allocated_snapshot = false;
1039 }
1040 
1041 /**
1042  * tracing_alloc_snapshot - allocate snapshot buffer.
1043  *
1044  * This only allocates the snapshot buffer if it isn't already
1045  * allocated - it doesn't also take a snapshot.
1046  *
1047  * This is meant to be used in cases where the snapshot buffer needs
1048  * to be set up for events that can't sleep but need to be able to
1049  * trigger a snapshot.
1050  */
1051 int tracing_alloc_snapshot(void)
1052 {
1053 	struct trace_array *tr = &global_trace;
1054 	int ret;
1055 
1056 	ret = tracing_alloc_snapshot_instance(tr);
1057 	WARN_ON(ret < 0);
1058 
1059 	return ret;
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1062 
1063 /**
1064  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1065  *
1066  * This is similar to tracing_snapshot(), but it will allocate the
1067  * snapshot buffer if it isn't already allocated. Use this only
1068  * where it is safe to sleep, as the allocation may sleep.
1069  *
1070  * This causes a swap between the snapshot buffer and the current live
1071  * tracing buffer. You can use this to take snapshots of the live
1072  * trace when some condition is triggered, but continue to trace.
1073  */
1074 void tracing_snapshot_alloc(void)
1075 {
1076 	int ret;
1077 
1078 	ret = tracing_alloc_snapshot();
1079 	if (ret < 0)
1080 		return;
1081 
1082 	tracing_snapshot();
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1085 
1086 /**
1087  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1088  * @tr:		The tracing instance
1089  * @cond_data:	User data to associate with the snapshot
1090  * @update:	Implementation of the cond_snapshot update function
1091  *
1092  * Check whether the conditional snapshot for the given instance has
1093  * already been enabled, or if the current tracer is already using a
1094  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1095  * save the cond_data and update function inside.
1096  *
1097  * Returns 0 if successful, error otherwise.
1098  */
1099 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1100 				 cond_update_fn_t update)
1101 {
1102 	struct cond_snapshot *cond_snapshot;
1103 	int ret = 0;
1104 
1105 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1106 	if (!cond_snapshot)
1107 		return -ENOMEM;
1108 
1109 	cond_snapshot->cond_data = cond_data;
1110 	cond_snapshot->update = update;
1111 
1112 	mutex_lock(&trace_types_lock);
1113 
1114 	ret = tracing_alloc_snapshot_instance(tr);
1115 	if (ret)
1116 		goto fail_unlock;
1117 
1118 	if (tr->current_trace->use_max_tr) {
1119 		ret = -EBUSY;
1120 		goto fail_unlock;
1121 	}
1122 
1123 	/*
1124 	 * The cond_snapshot can only change to NULL without the
1125 	 * trace_types_lock. We don't care if we race with it going
1126 	 * to NULL, but we want to make sure that it's not set to
1127 	 * something other than NULL when we get here, which we can
1128 	 * do safely with only holding the trace_types_lock and not
1129 	 * having to take the max_lock.
1130 	 */
1131 	if (tr->cond_snapshot) {
1132 		ret = -EBUSY;
1133 		goto fail_unlock;
1134 	}
1135 
1136 	arch_spin_lock(&tr->max_lock);
1137 	tr->cond_snapshot = cond_snapshot;
1138 	arch_spin_unlock(&tr->max_lock);
1139 
1140 	mutex_unlock(&trace_types_lock);
1141 
1142 	return ret;
1143 
1144  fail_unlock:
1145 	mutex_unlock(&trace_types_lock);
1146 	kfree(cond_snapshot);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1150 
1151 /**
1152  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1153  * @tr:		The tracing instance
1154  *
1155  * Check whether the conditional snapshot for the given instance is
1156  * enabled; if so, free the cond_snapshot associated with it,
1157  * otherwise return -EINVAL.
1158  *
1159  * Returns 0 if successful, error otherwise.
1160  */
1161 int tracing_snapshot_cond_disable(struct trace_array *tr)
1162 {
1163 	int ret = 0;
1164 
1165 	arch_spin_lock(&tr->max_lock);
1166 
1167 	if (!tr->cond_snapshot)
1168 		ret = -EINVAL;
1169 	else {
1170 		kfree(tr->cond_snapshot);
1171 		tr->cond_snapshot = NULL;
1172 	}
1173 
1174 	arch_spin_unlock(&tr->max_lock);
1175 
1176 	return ret;
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1179 #else
1180 void tracing_snapshot(void)
1181 {
1182 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1183 }
1184 EXPORT_SYMBOL_GPL(tracing_snapshot);
1185 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1186 {
1187 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1190 int tracing_alloc_snapshot(void)
1191 {
1192 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1193 	return -ENODEV;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1196 void tracing_snapshot_alloc(void)
1197 {
1198 	/* Give warning */
1199 	tracing_snapshot();
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204 	return NULL;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1208 {
1209 	return -ENODEV;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1212 int tracing_snapshot_cond_disable(struct trace_array *tr)
1213 {
1214 	return false;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1217 #endif /* CONFIG_TRACER_SNAPSHOT */
1218 
1219 void tracer_tracing_off(struct trace_array *tr)
1220 {
1221 	if (tr->trace_buffer.buffer)
1222 		ring_buffer_record_off(tr->trace_buffer.buffer);
1223 	/*
1224 	 * This flag is looked at when buffers haven't been allocated
1225 	 * yet, or by some tracers (like irqsoff), that just want to
1226 	 * know if the ring buffer has been disabled, but it can handle
1227 	 * races of where it gets disabled but we still do a record.
1228 	 * As the check is in the fast path of the tracers, it is more
1229 	 * important to be fast than accurate.
1230 	 */
1231 	tr->buffer_disabled = 1;
1232 	/* Make the flag seen by readers */
1233 	smp_wmb();
1234 }
1235 
1236 /**
1237  * tracing_off - turn off tracing buffers
1238  *
1239  * This function stops the tracing buffers from recording data.
1240  * It does not disable any overhead the tracers themselves may
1241  * be causing. This function simply causes all recording to
1242  * the ring buffers to fail.
1243  */
1244 void tracing_off(void)
1245 {
1246 	tracer_tracing_off(&global_trace);
1247 }
1248 EXPORT_SYMBOL_GPL(tracing_off);
1249 
1250 void disable_trace_on_warning(void)
1251 {
1252 	if (__disable_trace_on_warning)
1253 		tracing_off();
1254 }
1255 
1256 /**
1257  * tracer_tracing_is_on - show real state of ring buffer enabled
1258  * @tr : the trace array to know if ring buffer is enabled
1259  *
1260  * Shows real state of the ring buffer if it is enabled or not.
1261  */
1262 bool tracer_tracing_is_on(struct trace_array *tr)
1263 {
1264 	if (tr->trace_buffer.buffer)
1265 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1266 	return !tr->buffer_disabled;
1267 }
1268 
1269 /**
1270  * tracing_is_on - show state of ring buffers enabled
1271  */
1272 int tracing_is_on(void)
1273 {
1274 	return tracer_tracing_is_on(&global_trace);
1275 }
1276 EXPORT_SYMBOL_GPL(tracing_is_on);
1277 
1278 static int __init set_buf_size(char *str)
1279 {
1280 	unsigned long buf_size;
1281 
1282 	if (!str)
1283 		return 0;
1284 	buf_size = memparse(str, &str);
1285 	/* nr_entries can not be zero */
1286 	if (buf_size == 0)
1287 		return 0;
1288 	trace_buf_size = buf_size;
1289 	return 1;
1290 }
1291 __setup("trace_buf_size=", set_buf_size);
1292 
1293 static int __init set_tracing_thresh(char *str)
1294 {
1295 	unsigned long threshold;
1296 	int ret;
1297 
1298 	if (!str)
1299 		return 0;
1300 	ret = kstrtoul(str, 0, &threshold);
1301 	if (ret < 0)
1302 		return 0;
1303 	tracing_thresh = threshold * 1000;
1304 	return 1;
1305 }
1306 __setup("tracing_thresh=", set_tracing_thresh);
1307 
1308 unsigned long nsecs_to_usecs(unsigned long nsecs)
1309 {
1310 	return nsecs / 1000;
1311 }
1312 
1313 /*
1314  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1315  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1316  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1317  * of strings in the order that the evals (enum) were defined.
1318  */
1319 #undef C
1320 #define C(a, b) b
1321 
1322 /* These must match the bit postions in trace_iterator_flags */
1323 static const char *trace_options[] = {
1324 	TRACE_FLAGS
1325 	NULL
1326 };
1327 
1328 static struct {
1329 	u64 (*func)(void);
1330 	const char *name;
1331 	int in_ns;		/* is this clock in nanoseconds? */
1332 } trace_clocks[] = {
1333 	{ trace_clock_local,		"local",	1 },
1334 	{ trace_clock_global,		"global",	1 },
1335 	{ trace_clock_counter,		"counter",	0 },
1336 	{ trace_clock_jiffies,		"uptime",	0 },
1337 	{ trace_clock,			"perf",		1 },
1338 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1339 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1340 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1341 	ARCH_TRACE_CLOCKS
1342 };
1343 
1344 bool trace_clock_in_ns(struct trace_array *tr)
1345 {
1346 	if (trace_clocks[tr->clock_id].in_ns)
1347 		return true;
1348 
1349 	return false;
1350 }
1351 
1352 /*
1353  * trace_parser_get_init - gets the buffer for trace parser
1354  */
1355 int trace_parser_get_init(struct trace_parser *parser, int size)
1356 {
1357 	memset(parser, 0, sizeof(*parser));
1358 
1359 	parser->buffer = kmalloc(size, GFP_KERNEL);
1360 	if (!parser->buffer)
1361 		return 1;
1362 
1363 	parser->size = size;
1364 	return 0;
1365 }
1366 
1367 /*
1368  * trace_parser_put - frees the buffer for trace parser
1369  */
1370 void trace_parser_put(struct trace_parser *parser)
1371 {
1372 	kfree(parser->buffer);
1373 	parser->buffer = NULL;
1374 }
1375 
1376 /*
1377  * trace_get_user - reads the user input string separated by  space
1378  * (matched by isspace(ch))
1379  *
1380  * For each string found the 'struct trace_parser' is updated,
1381  * and the function returns.
1382  *
1383  * Returns number of bytes read.
1384  *
1385  * See kernel/trace/trace.h for 'struct trace_parser' details.
1386  */
1387 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1388 	size_t cnt, loff_t *ppos)
1389 {
1390 	char ch;
1391 	size_t read = 0;
1392 	ssize_t ret;
1393 
1394 	if (!*ppos)
1395 		trace_parser_clear(parser);
1396 
1397 	ret = get_user(ch, ubuf++);
1398 	if (ret)
1399 		goto out;
1400 
1401 	read++;
1402 	cnt--;
1403 
1404 	/*
1405 	 * The parser is not finished with the last write,
1406 	 * continue reading the user input without skipping spaces.
1407 	 */
1408 	if (!parser->cont) {
1409 		/* skip white space */
1410 		while (cnt && isspace(ch)) {
1411 			ret = get_user(ch, ubuf++);
1412 			if (ret)
1413 				goto out;
1414 			read++;
1415 			cnt--;
1416 		}
1417 
1418 		parser->idx = 0;
1419 
1420 		/* only spaces were written */
1421 		if (isspace(ch) || !ch) {
1422 			*ppos += read;
1423 			ret = read;
1424 			goto out;
1425 		}
1426 	}
1427 
1428 	/* read the non-space input */
1429 	while (cnt && !isspace(ch) && ch) {
1430 		if (parser->idx < parser->size - 1)
1431 			parser->buffer[parser->idx++] = ch;
1432 		else {
1433 			ret = -EINVAL;
1434 			goto out;
1435 		}
1436 		ret = get_user(ch, ubuf++);
1437 		if (ret)
1438 			goto out;
1439 		read++;
1440 		cnt--;
1441 	}
1442 
1443 	/* We either got finished input or we have to wait for another call. */
1444 	if (isspace(ch) || !ch) {
1445 		parser->buffer[parser->idx] = 0;
1446 		parser->cont = false;
1447 	} else if (parser->idx < parser->size - 1) {
1448 		parser->cont = true;
1449 		parser->buffer[parser->idx++] = ch;
1450 		/* Make sure the parsed string always terminates with '\0'. */
1451 		parser->buffer[parser->idx] = 0;
1452 	} else {
1453 		ret = -EINVAL;
1454 		goto out;
1455 	}
1456 
1457 	*ppos += read;
1458 	ret = read;
1459 
1460 out:
1461 	return ret;
1462 }
1463 
1464 /* TODO add a seq_buf_to_buffer() */
1465 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1466 {
1467 	int len;
1468 
1469 	if (trace_seq_used(s) <= s->seq.readpos)
1470 		return -EBUSY;
1471 
1472 	len = trace_seq_used(s) - s->seq.readpos;
1473 	if (cnt > len)
1474 		cnt = len;
1475 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1476 
1477 	s->seq.readpos += cnt;
1478 	return cnt;
1479 }
1480 
1481 unsigned long __read_mostly	tracing_thresh;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 /*
1485  * Copy the new maximum trace into the separate maximum-trace
1486  * structure. (this way the maximum trace is permanently saved,
1487  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1488  */
1489 static void
1490 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1491 {
1492 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1493 	struct trace_buffer *max_buf = &tr->max_buffer;
1494 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1495 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1496 
1497 	max_buf->cpu = cpu;
1498 	max_buf->time_start = data->preempt_timestamp;
1499 
1500 	max_data->saved_latency = tr->max_latency;
1501 	max_data->critical_start = data->critical_start;
1502 	max_data->critical_end = data->critical_end;
1503 
1504 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1505 	max_data->pid = tsk->pid;
1506 	/*
1507 	 * If tsk == current, then use current_uid(), as that does not use
1508 	 * RCU. The irq tracer can be called out of RCU scope.
1509 	 */
1510 	if (tsk == current)
1511 		max_data->uid = current_uid();
1512 	else
1513 		max_data->uid = task_uid(tsk);
1514 
1515 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1516 	max_data->policy = tsk->policy;
1517 	max_data->rt_priority = tsk->rt_priority;
1518 
1519 	/* record this tasks comm */
1520 	tracing_record_cmdline(tsk);
1521 }
1522 
1523 /**
1524  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1525  * @tr: tracer
1526  * @tsk: the task with the latency
1527  * @cpu: The cpu that initiated the trace.
1528  * @cond_data: User data associated with a conditional snapshot
1529  *
1530  * Flip the buffers between the @tr and the max_tr and record information
1531  * about which task was the cause of this latency.
1532  */
1533 void
1534 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1535 	      void *cond_data)
1536 {
1537 	if (tr->stop_count)
1538 		return;
1539 
1540 	WARN_ON_ONCE(!irqs_disabled());
1541 
1542 	if (!tr->allocated_snapshot) {
1543 		/* Only the nop tracer should hit this when disabling */
1544 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1545 		return;
1546 	}
1547 
1548 	arch_spin_lock(&tr->max_lock);
1549 
1550 	/* Inherit the recordable setting from trace_buffer */
1551 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1552 		ring_buffer_record_on(tr->max_buffer.buffer);
1553 	else
1554 		ring_buffer_record_off(tr->max_buffer.buffer);
1555 
1556 #ifdef CONFIG_TRACER_SNAPSHOT
1557 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1558 		goto out_unlock;
1559 #endif
1560 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1561 
1562 	__update_max_tr(tr, tsk, cpu);
1563 
1564  out_unlock:
1565 	arch_spin_unlock(&tr->max_lock);
1566 }
1567 
1568 /**
1569  * update_max_tr_single - only copy one trace over, and reset the rest
1570  * @tr - tracer
1571  * @tsk - task with the latency
1572  * @cpu - the cpu of the buffer to copy.
1573  *
1574  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1575  */
1576 void
1577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1578 {
1579 	int ret;
1580 
1581 	if (tr->stop_count)
1582 		return;
1583 
1584 	WARN_ON_ONCE(!irqs_disabled());
1585 	if (!tr->allocated_snapshot) {
1586 		/* Only the nop tracer should hit this when disabling */
1587 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1588 		return;
1589 	}
1590 
1591 	arch_spin_lock(&tr->max_lock);
1592 
1593 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1594 
1595 	if (ret == -EBUSY) {
1596 		/*
1597 		 * We failed to swap the buffer due to a commit taking
1598 		 * place on this CPU. We fail to record, but we reset
1599 		 * the max trace buffer (no one writes directly to it)
1600 		 * and flag that it failed.
1601 		 */
1602 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1603 			"Failed to swap buffers due to commit in progress\n");
1604 	}
1605 
1606 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1607 
1608 	__update_max_tr(tr, tsk, cpu);
1609 	arch_spin_unlock(&tr->max_lock);
1610 }
1611 #endif /* CONFIG_TRACER_MAX_TRACE */
1612 
1613 static int wait_on_pipe(struct trace_iterator *iter, int full)
1614 {
1615 	/* Iterators are static, they should be filled or empty */
1616 	if (trace_buffer_iter(iter, iter->cpu_file))
1617 		return 0;
1618 
1619 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1620 				full);
1621 }
1622 
1623 #ifdef CONFIG_FTRACE_STARTUP_TEST
1624 static bool selftests_can_run;
1625 
1626 struct trace_selftests {
1627 	struct list_head		list;
1628 	struct tracer			*type;
1629 };
1630 
1631 static LIST_HEAD(postponed_selftests);
1632 
1633 static int save_selftest(struct tracer *type)
1634 {
1635 	struct trace_selftests *selftest;
1636 
1637 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1638 	if (!selftest)
1639 		return -ENOMEM;
1640 
1641 	selftest->type = type;
1642 	list_add(&selftest->list, &postponed_selftests);
1643 	return 0;
1644 }
1645 
1646 static int run_tracer_selftest(struct tracer *type)
1647 {
1648 	struct trace_array *tr = &global_trace;
1649 	struct tracer *saved_tracer = tr->current_trace;
1650 	int ret;
1651 
1652 	if (!type->selftest || tracing_selftest_disabled)
1653 		return 0;
1654 
1655 	/*
1656 	 * If a tracer registers early in boot up (before scheduling is
1657 	 * initialized and such), then do not run its selftests yet.
1658 	 * Instead, run it a little later in the boot process.
1659 	 */
1660 	if (!selftests_can_run)
1661 		return save_selftest(type);
1662 
1663 	/*
1664 	 * Run a selftest on this tracer.
1665 	 * Here we reset the trace buffer, and set the current
1666 	 * tracer to be this tracer. The tracer can then run some
1667 	 * internal tracing to verify that everything is in order.
1668 	 * If we fail, we do not register this tracer.
1669 	 */
1670 	tracing_reset_online_cpus(&tr->trace_buffer);
1671 
1672 	tr->current_trace = type;
1673 
1674 #ifdef CONFIG_TRACER_MAX_TRACE
1675 	if (type->use_max_tr) {
1676 		/* If we expanded the buffers, make sure the max is expanded too */
1677 		if (ring_buffer_expanded)
1678 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1679 					   RING_BUFFER_ALL_CPUS);
1680 		tr->allocated_snapshot = true;
1681 	}
1682 #endif
1683 
1684 	/* the test is responsible for initializing and enabling */
1685 	pr_info("Testing tracer %s: ", type->name);
1686 	ret = type->selftest(type, tr);
1687 	/* the test is responsible for resetting too */
1688 	tr->current_trace = saved_tracer;
1689 	if (ret) {
1690 		printk(KERN_CONT "FAILED!\n");
1691 		/* Add the warning after printing 'FAILED' */
1692 		WARN_ON(1);
1693 		return -1;
1694 	}
1695 	/* Only reset on passing, to avoid touching corrupted buffers */
1696 	tracing_reset_online_cpus(&tr->trace_buffer);
1697 
1698 #ifdef CONFIG_TRACER_MAX_TRACE
1699 	if (type->use_max_tr) {
1700 		tr->allocated_snapshot = false;
1701 
1702 		/* Shrink the max buffer again */
1703 		if (ring_buffer_expanded)
1704 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1705 					   RING_BUFFER_ALL_CPUS);
1706 	}
1707 #endif
1708 
1709 	printk(KERN_CONT "PASSED\n");
1710 	return 0;
1711 }
1712 
1713 static __init int init_trace_selftests(void)
1714 {
1715 	struct trace_selftests *p, *n;
1716 	struct tracer *t, **last;
1717 	int ret;
1718 
1719 	selftests_can_run = true;
1720 
1721 	mutex_lock(&trace_types_lock);
1722 
1723 	if (list_empty(&postponed_selftests))
1724 		goto out;
1725 
1726 	pr_info("Running postponed tracer tests:\n");
1727 
1728 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1729 		/* This loop can take minutes when sanitizers are enabled, so
1730 		 * lets make sure we allow RCU processing.
1731 		 */
1732 		cond_resched();
1733 		ret = run_tracer_selftest(p->type);
1734 		/* If the test fails, then warn and remove from available_tracers */
1735 		if (ret < 0) {
1736 			WARN(1, "tracer: %s failed selftest, disabling\n",
1737 			     p->type->name);
1738 			last = &trace_types;
1739 			for (t = trace_types; t; t = t->next) {
1740 				if (t == p->type) {
1741 					*last = t->next;
1742 					break;
1743 				}
1744 				last = &t->next;
1745 			}
1746 		}
1747 		list_del(&p->list);
1748 		kfree(p);
1749 	}
1750 
1751  out:
1752 	mutex_unlock(&trace_types_lock);
1753 
1754 	return 0;
1755 }
1756 core_initcall(init_trace_selftests);
1757 #else
1758 static inline int run_tracer_selftest(struct tracer *type)
1759 {
1760 	return 0;
1761 }
1762 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1763 
1764 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1765 
1766 static void __init apply_trace_boot_options(void);
1767 
1768 /**
1769  * register_tracer - register a tracer with the ftrace system.
1770  * @type - the plugin for the tracer
1771  *
1772  * Register a new plugin tracer.
1773  */
1774 int __init register_tracer(struct tracer *type)
1775 {
1776 	struct tracer *t;
1777 	int ret = 0;
1778 
1779 	if (!type->name) {
1780 		pr_info("Tracer must have a name\n");
1781 		return -1;
1782 	}
1783 
1784 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1785 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1786 		return -1;
1787 	}
1788 
1789 	mutex_lock(&trace_types_lock);
1790 
1791 	tracing_selftest_running = true;
1792 
1793 	for (t = trace_types; t; t = t->next) {
1794 		if (strcmp(type->name, t->name) == 0) {
1795 			/* already found */
1796 			pr_info("Tracer %s already registered\n",
1797 				type->name);
1798 			ret = -1;
1799 			goto out;
1800 		}
1801 	}
1802 
1803 	if (!type->set_flag)
1804 		type->set_flag = &dummy_set_flag;
1805 	if (!type->flags) {
1806 		/*allocate a dummy tracer_flags*/
1807 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1808 		if (!type->flags) {
1809 			ret = -ENOMEM;
1810 			goto out;
1811 		}
1812 		type->flags->val = 0;
1813 		type->flags->opts = dummy_tracer_opt;
1814 	} else
1815 		if (!type->flags->opts)
1816 			type->flags->opts = dummy_tracer_opt;
1817 
1818 	/* store the tracer for __set_tracer_option */
1819 	type->flags->trace = type;
1820 
1821 	ret = run_tracer_selftest(type);
1822 	if (ret < 0)
1823 		goto out;
1824 
1825 	type->next = trace_types;
1826 	trace_types = type;
1827 	add_tracer_options(&global_trace, type);
1828 
1829  out:
1830 	tracing_selftest_running = false;
1831 	mutex_unlock(&trace_types_lock);
1832 
1833 	if (ret || !default_bootup_tracer)
1834 		goto out_unlock;
1835 
1836 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1837 		goto out_unlock;
1838 
1839 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1840 	/* Do we want this tracer to start on bootup? */
1841 	tracing_set_tracer(&global_trace, type->name);
1842 	default_bootup_tracer = NULL;
1843 
1844 	apply_trace_boot_options();
1845 
1846 	/* disable other selftests, since this will break it. */
1847 	tracing_selftest_disabled = true;
1848 #ifdef CONFIG_FTRACE_STARTUP_TEST
1849 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1850 	       type->name);
1851 #endif
1852 
1853  out_unlock:
1854 	return ret;
1855 }
1856 
1857 void tracing_reset(struct trace_buffer *buf, int cpu)
1858 {
1859 	struct ring_buffer *buffer = buf->buffer;
1860 
1861 	if (!buffer)
1862 		return;
1863 
1864 	ring_buffer_record_disable(buffer);
1865 
1866 	/* Make sure all commits have finished */
1867 	synchronize_rcu();
1868 	ring_buffer_reset_cpu(buffer, cpu);
1869 
1870 	ring_buffer_record_enable(buffer);
1871 }
1872 
1873 void tracing_reset_online_cpus(struct trace_buffer *buf)
1874 {
1875 	struct ring_buffer *buffer = buf->buffer;
1876 	int cpu;
1877 
1878 	if (!buffer)
1879 		return;
1880 
1881 	ring_buffer_record_disable(buffer);
1882 
1883 	/* Make sure all commits have finished */
1884 	synchronize_rcu();
1885 
1886 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1887 
1888 	for_each_online_cpu(cpu)
1889 		ring_buffer_reset_cpu(buffer, cpu);
1890 
1891 	ring_buffer_record_enable(buffer);
1892 }
1893 
1894 /* Must have trace_types_lock held */
1895 void tracing_reset_all_online_cpus(void)
1896 {
1897 	struct trace_array *tr;
1898 
1899 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1900 		if (!tr->clear_trace)
1901 			continue;
1902 		tr->clear_trace = false;
1903 		tracing_reset_online_cpus(&tr->trace_buffer);
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 		tracing_reset_online_cpus(&tr->max_buffer);
1906 #endif
1907 	}
1908 }
1909 
1910 static int *tgid_map;
1911 
1912 #define SAVED_CMDLINES_DEFAULT 128
1913 #define NO_CMDLINE_MAP UINT_MAX
1914 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1915 struct saved_cmdlines_buffer {
1916 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1917 	unsigned *map_cmdline_to_pid;
1918 	unsigned cmdline_num;
1919 	int cmdline_idx;
1920 	char *saved_cmdlines;
1921 };
1922 static struct saved_cmdlines_buffer *savedcmd;
1923 
1924 /* temporary disable recording */
1925 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1926 
1927 static inline char *get_saved_cmdlines(int idx)
1928 {
1929 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1930 }
1931 
1932 static inline void set_cmdline(int idx, const char *cmdline)
1933 {
1934 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1935 }
1936 
1937 static int allocate_cmdlines_buffer(unsigned int val,
1938 				    struct saved_cmdlines_buffer *s)
1939 {
1940 	s->map_cmdline_to_pid = kmalloc_array(val,
1941 					      sizeof(*s->map_cmdline_to_pid),
1942 					      GFP_KERNEL);
1943 	if (!s->map_cmdline_to_pid)
1944 		return -ENOMEM;
1945 
1946 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1947 	if (!s->saved_cmdlines) {
1948 		kfree(s->map_cmdline_to_pid);
1949 		return -ENOMEM;
1950 	}
1951 
1952 	s->cmdline_idx = 0;
1953 	s->cmdline_num = val;
1954 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1955 	       sizeof(s->map_pid_to_cmdline));
1956 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1957 	       val * sizeof(*s->map_cmdline_to_pid));
1958 
1959 	return 0;
1960 }
1961 
1962 static int trace_create_savedcmd(void)
1963 {
1964 	int ret;
1965 
1966 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1967 	if (!savedcmd)
1968 		return -ENOMEM;
1969 
1970 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1971 	if (ret < 0) {
1972 		kfree(savedcmd);
1973 		savedcmd = NULL;
1974 		return -ENOMEM;
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 int is_tracing_stopped(void)
1981 {
1982 	return global_trace.stop_count;
1983 }
1984 
1985 /**
1986  * tracing_start - quick start of the tracer
1987  *
1988  * If tracing is enabled but was stopped by tracing_stop,
1989  * this will start the tracer back up.
1990  */
1991 void tracing_start(void)
1992 {
1993 	struct ring_buffer *buffer;
1994 	unsigned long flags;
1995 
1996 	if (tracing_disabled)
1997 		return;
1998 
1999 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2000 	if (--global_trace.stop_count) {
2001 		if (global_trace.stop_count < 0) {
2002 			/* Someone screwed up their debugging */
2003 			WARN_ON_ONCE(1);
2004 			global_trace.stop_count = 0;
2005 		}
2006 		goto out;
2007 	}
2008 
2009 	/* Prevent the buffers from switching */
2010 	arch_spin_lock(&global_trace.max_lock);
2011 
2012 	buffer = global_trace.trace_buffer.buffer;
2013 	if (buffer)
2014 		ring_buffer_record_enable(buffer);
2015 
2016 #ifdef CONFIG_TRACER_MAX_TRACE
2017 	buffer = global_trace.max_buffer.buffer;
2018 	if (buffer)
2019 		ring_buffer_record_enable(buffer);
2020 #endif
2021 
2022 	arch_spin_unlock(&global_trace.max_lock);
2023 
2024  out:
2025 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2026 }
2027 
2028 static void tracing_start_tr(struct trace_array *tr)
2029 {
2030 	struct ring_buffer *buffer;
2031 	unsigned long flags;
2032 
2033 	if (tracing_disabled)
2034 		return;
2035 
2036 	/* If global, we need to also start the max tracer */
2037 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2038 		return tracing_start();
2039 
2040 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2041 
2042 	if (--tr->stop_count) {
2043 		if (tr->stop_count < 0) {
2044 			/* Someone screwed up their debugging */
2045 			WARN_ON_ONCE(1);
2046 			tr->stop_count = 0;
2047 		}
2048 		goto out;
2049 	}
2050 
2051 	buffer = tr->trace_buffer.buffer;
2052 	if (buffer)
2053 		ring_buffer_record_enable(buffer);
2054 
2055  out:
2056 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2057 }
2058 
2059 /**
2060  * tracing_stop - quick stop of the tracer
2061  *
2062  * Light weight way to stop tracing. Use in conjunction with
2063  * tracing_start.
2064  */
2065 void tracing_stop(void)
2066 {
2067 	struct ring_buffer *buffer;
2068 	unsigned long flags;
2069 
2070 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071 	if (global_trace.stop_count++)
2072 		goto out;
2073 
2074 	/* Prevent the buffers from switching */
2075 	arch_spin_lock(&global_trace.max_lock);
2076 
2077 	buffer = global_trace.trace_buffer.buffer;
2078 	if (buffer)
2079 		ring_buffer_record_disable(buffer);
2080 
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082 	buffer = global_trace.max_buffer.buffer;
2083 	if (buffer)
2084 		ring_buffer_record_disable(buffer);
2085 #endif
2086 
2087 	arch_spin_unlock(&global_trace.max_lock);
2088 
2089  out:
2090 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2091 }
2092 
2093 static void tracing_stop_tr(struct trace_array *tr)
2094 {
2095 	struct ring_buffer *buffer;
2096 	unsigned long flags;
2097 
2098 	/* If global, we need to also stop the max tracer */
2099 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2100 		return tracing_stop();
2101 
2102 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2103 	if (tr->stop_count++)
2104 		goto out;
2105 
2106 	buffer = tr->trace_buffer.buffer;
2107 	if (buffer)
2108 		ring_buffer_record_disable(buffer);
2109 
2110  out:
2111 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2112 }
2113 
2114 static int trace_save_cmdline(struct task_struct *tsk)
2115 {
2116 	unsigned pid, idx;
2117 
2118 	/* treat recording of idle task as a success */
2119 	if (!tsk->pid)
2120 		return 1;
2121 
2122 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2123 		return 0;
2124 
2125 	/*
2126 	 * It's not the end of the world if we don't get
2127 	 * the lock, but we also don't want to spin
2128 	 * nor do we want to disable interrupts,
2129 	 * so if we miss here, then better luck next time.
2130 	 */
2131 	if (!arch_spin_trylock(&trace_cmdline_lock))
2132 		return 0;
2133 
2134 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2135 	if (idx == NO_CMDLINE_MAP) {
2136 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2137 
2138 		/*
2139 		 * Check whether the cmdline buffer at idx has a pid
2140 		 * mapped. We are going to overwrite that entry so we
2141 		 * need to clear the map_pid_to_cmdline. Otherwise we
2142 		 * would read the new comm for the old pid.
2143 		 */
2144 		pid = savedcmd->map_cmdline_to_pid[idx];
2145 		if (pid != NO_CMDLINE_MAP)
2146 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2147 
2148 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2149 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2150 
2151 		savedcmd->cmdline_idx = idx;
2152 	}
2153 
2154 	set_cmdline(idx, tsk->comm);
2155 
2156 	arch_spin_unlock(&trace_cmdline_lock);
2157 
2158 	return 1;
2159 }
2160 
2161 static void __trace_find_cmdline(int pid, char comm[])
2162 {
2163 	unsigned map;
2164 
2165 	if (!pid) {
2166 		strcpy(comm, "<idle>");
2167 		return;
2168 	}
2169 
2170 	if (WARN_ON_ONCE(pid < 0)) {
2171 		strcpy(comm, "<XXX>");
2172 		return;
2173 	}
2174 
2175 	if (pid > PID_MAX_DEFAULT) {
2176 		strcpy(comm, "<...>");
2177 		return;
2178 	}
2179 
2180 	map = savedcmd->map_pid_to_cmdline[pid];
2181 	if (map != NO_CMDLINE_MAP)
2182 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2183 	else
2184 		strcpy(comm, "<...>");
2185 }
2186 
2187 void trace_find_cmdline(int pid, char comm[])
2188 {
2189 	preempt_disable();
2190 	arch_spin_lock(&trace_cmdline_lock);
2191 
2192 	__trace_find_cmdline(pid, comm);
2193 
2194 	arch_spin_unlock(&trace_cmdline_lock);
2195 	preempt_enable();
2196 }
2197 
2198 int trace_find_tgid(int pid)
2199 {
2200 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2201 		return 0;
2202 
2203 	return tgid_map[pid];
2204 }
2205 
2206 static int trace_save_tgid(struct task_struct *tsk)
2207 {
2208 	/* treat recording of idle task as a success */
2209 	if (!tsk->pid)
2210 		return 1;
2211 
2212 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2213 		return 0;
2214 
2215 	tgid_map[tsk->pid] = tsk->tgid;
2216 	return 1;
2217 }
2218 
2219 static bool tracing_record_taskinfo_skip(int flags)
2220 {
2221 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2222 		return true;
2223 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2224 		return true;
2225 	if (!__this_cpu_read(trace_taskinfo_save))
2226 		return true;
2227 	return false;
2228 }
2229 
2230 /**
2231  * tracing_record_taskinfo - record the task info of a task
2232  *
2233  * @task  - task to record
2234  * @flags - TRACE_RECORD_CMDLINE for recording comm
2235  *        - TRACE_RECORD_TGID for recording tgid
2236  */
2237 void tracing_record_taskinfo(struct task_struct *task, int flags)
2238 {
2239 	bool done;
2240 
2241 	if (tracing_record_taskinfo_skip(flags))
2242 		return;
2243 
2244 	/*
2245 	 * Record as much task information as possible. If some fail, continue
2246 	 * to try to record the others.
2247 	 */
2248 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2249 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2250 
2251 	/* If recording any information failed, retry again soon. */
2252 	if (!done)
2253 		return;
2254 
2255 	__this_cpu_write(trace_taskinfo_save, false);
2256 }
2257 
2258 /**
2259  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2260  *
2261  * @prev - previous task during sched_switch
2262  * @next - next task during sched_switch
2263  * @flags - TRACE_RECORD_CMDLINE for recording comm
2264  *          TRACE_RECORD_TGID for recording tgid
2265  */
2266 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2267 					  struct task_struct *next, int flags)
2268 {
2269 	bool done;
2270 
2271 	if (tracing_record_taskinfo_skip(flags))
2272 		return;
2273 
2274 	/*
2275 	 * Record as much task information as possible. If some fail, continue
2276 	 * to try to record the others.
2277 	 */
2278 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2279 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2280 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2281 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2282 
2283 	/* If recording any information failed, retry again soon. */
2284 	if (!done)
2285 		return;
2286 
2287 	__this_cpu_write(trace_taskinfo_save, false);
2288 }
2289 
2290 /* Helpers to record a specific task information */
2291 void tracing_record_cmdline(struct task_struct *task)
2292 {
2293 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2294 }
2295 
2296 void tracing_record_tgid(struct task_struct *task)
2297 {
2298 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2299 }
2300 
2301 /*
2302  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2303  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2304  * simplifies those functions and keeps them in sync.
2305  */
2306 enum print_line_t trace_handle_return(struct trace_seq *s)
2307 {
2308 	return trace_seq_has_overflowed(s) ?
2309 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_handle_return);
2312 
2313 void
2314 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2315 			     unsigned long flags, int pc)
2316 {
2317 	struct task_struct *tsk = current;
2318 
2319 	entry->preempt_count		= pc & 0xff;
2320 	entry->pid			= (tsk) ? tsk->pid : 0;
2321 	entry->type			= type;
2322 	entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326 		TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2329 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335 
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338 			  int type,
2339 			  unsigned long len,
2340 			  unsigned long flags, int pc)
2341 {
2342 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344 
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348 
2349 /**
2350  * trace_buffered_event_enable - enable buffering events
2351  *
2352  * When events are being filtered, it is quicker to use a temporary
2353  * buffer to write the event data into if there's a likely chance
2354  * that it will not be committed. The discard of the ring buffer
2355  * is not as fast as committing, and is much slower than copying
2356  * a commit.
2357  *
2358  * When an event is to be filtered, allocate per cpu buffers to
2359  * write the event data into, and if the event is filtered and discarded
2360  * it is simply dropped, otherwise, the entire data is to be committed
2361  * in one shot.
2362  */
2363 void trace_buffered_event_enable(void)
2364 {
2365 	struct ring_buffer_event *event;
2366 	struct page *page;
2367 	int cpu;
2368 
2369 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370 
2371 	if (trace_buffered_event_ref++)
2372 		return;
2373 
2374 	for_each_tracing_cpu(cpu) {
2375 		page = alloc_pages_node(cpu_to_node(cpu),
2376 					GFP_KERNEL | __GFP_NORETRY, 0);
2377 		if (!page)
2378 			goto failed;
2379 
2380 		event = page_address(page);
2381 		memset(event, 0, sizeof(*event));
2382 
2383 		per_cpu(trace_buffered_event, cpu) = event;
2384 
2385 		preempt_disable();
2386 		if (cpu == smp_processor_id() &&
2387 		    this_cpu_read(trace_buffered_event) !=
2388 		    per_cpu(trace_buffered_event, cpu))
2389 			WARN_ON_ONCE(1);
2390 		preempt_enable();
2391 	}
2392 
2393 	return;
2394  failed:
2395 	trace_buffered_event_disable();
2396 }
2397 
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400 	/* Probably not needed, but do it anyway */
2401 	smp_rmb();
2402 	this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404 
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407 	this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409 
2410 /**
2411  * trace_buffered_event_disable - disable buffering events
2412  *
2413  * When a filter is removed, it is faster to not use the buffered
2414  * events, and to commit directly into the ring buffer. Free up
2415  * the temp buffers when there are no more users. This requires
2416  * special synchronization with current events.
2417  */
2418 void trace_buffered_event_disable(void)
2419 {
2420 	int cpu;
2421 
2422 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423 
2424 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425 		return;
2426 
2427 	if (--trace_buffered_event_ref)
2428 		return;
2429 
2430 	preempt_disable();
2431 	/* For each CPU, set the buffer as used. */
2432 	smp_call_function_many(tracing_buffer_mask,
2433 			       disable_trace_buffered_event, NULL, 1);
2434 	preempt_enable();
2435 
2436 	/* Wait for all current users to finish */
2437 	synchronize_rcu();
2438 
2439 	for_each_tracing_cpu(cpu) {
2440 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441 		per_cpu(trace_buffered_event, cpu) = NULL;
2442 	}
2443 	/*
2444 	 * Make sure trace_buffered_event is NULL before clearing
2445 	 * trace_buffered_event_cnt.
2446 	 */
2447 	smp_wmb();
2448 
2449 	preempt_disable();
2450 	/* Do the work on each cpu */
2451 	smp_call_function_many(tracing_buffer_mask,
2452 			       enable_trace_buffered_event, NULL, 1);
2453 	preempt_enable();
2454 }
2455 
2456 static struct ring_buffer *temp_buffer;
2457 
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460 			  struct trace_event_file *trace_file,
2461 			  int type, unsigned long len,
2462 			  unsigned long flags, int pc)
2463 {
2464 	struct ring_buffer_event *entry;
2465 	int val;
2466 
2467 	*current_rb = trace_file->tr->trace_buffer.buffer;
2468 
2469 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471 	    (entry = this_cpu_read(trace_buffered_event))) {
2472 		/* Try to use the per cpu buffer first */
2473 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2474 		if (val == 1) {
2475 			trace_event_setup(entry, type, flags, pc);
2476 			entry->array[0] = len;
2477 			return entry;
2478 		}
2479 		this_cpu_dec(trace_buffered_event_cnt);
2480 	}
2481 
2482 	entry = __trace_buffer_lock_reserve(*current_rb,
2483 					    type, len, flags, pc);
2484 	/*
2485 	 * If tracing is off, but we have triggers enabled
2486 	 * we still need to look at the event data. Use the temp_buffer
2487 	 * to store the trace event for the tigger to use. It's recusive
2488 	 * safe and will not be recorded anywhere.
2489 	 */
2490 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491 		*current_rb = temp_buffer;
2492 		entry = __trace_buffer_lock_reserve(*current_rb,
2493 						    type, len, flags, pc);
2494 	}
2495 	return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498 
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501 
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504 	struct trace_event_call *event_call;
2505 	struct trace_event *event;
2506 	unsigned long flags;
2507 	struct trace_iterator *iter = tracepoint_print_iter;
2508 
2509 	/* We should never get here if iter is NULL */
2510 	if (WARN_ON_ONCE(!iter))
2511 		return;
2512 
2513 	event_call = fbuffer->trace_file->event_call;
2514 	if (!event_call || !event_call->event.funcs ||
2515 	    !event_call->event.funcs->trace)
2516 		return;
2517 
2518 	event = &fbuffer->trace_file->event_call->event;
2519 
2520 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521 	trace_seq_init(&iter->seq);
2522 	iter->ent = fbuffer->entry;
2523 	event_call->event.funcs->trace(iter, 0, event);
2524 	trace_seq_putc(&iter->seq, 0);
2525 	printk("%s", iter->seq.buffer);
2526 
2527 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529 
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531 			     void __user *buffer, size_t *lenp,
2532 			     loff_t *ppos)
2533 {
2534 	int save_tracepoint_printk;
2535 	int ret;
2536 
2537 	mutex_lock(&tracepoint_printk_mutex);
2538 	save_tracepoint_printk = tracepoint_printk;
2539 
2540 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541 
2542 	/*
2543 	 * This will force exiting early, as tracepoint_printk
2544 	 * is always zero when tracepoint_printk_iter is not allocated
2545 	 */
2546 	if (!tracepoint_print_iter)
2547 		tracepoint_printk = 0;
2548 
2549 	if (save_tracepoint_printk == tracepoint_printk)
2550 		goto out;
2551 
2552 	if (tracepoint_printk)
2553 		static_key_enable(&tracepoint_printk_key.key);
2554 	else
2555 		static_key_disable(&tracepoint_printk_key.key);
2556 
2557  out:
2558 	mutex_unlock(&tracepoint_printk_mutex);
2559 
2560 	return ret;
2561 }
2562 
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565 	if (static_key_false(&tracepoint_printk_key.key))
2566 		output_printk(fbuffer);
2567 
2568 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569 				    fbuffer->event, fbuffer->entry,
2570 				    fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573 
2574 /*
2575  * Skip 3:
2576  *
2577  *   trace_buffer_unlock_commit_regs()
2578  *   trace_event_buffer_commit()
2579  *   trace_event_raw_event_xxx()
2580  */
2581 # define STACK_SKIP 3
2582 
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584 				     struct ring_buffer *buffer,
2585 				     struct ring_buffer_event *event,
2586 				     unsigned long flags, int pc,
2587 				     struct pt_regs *regs)
2588 {
2589 	__buffer_unlock_commit(buffer, event);
2590 
2591 	/*
2592 	 * If regs is not set, then skip the necessary functions.
2593 	 * Note, we can still get here via blktrace, wakeup tracer
2594 	 * and mmiotrace, but that's ok if they lose a function or
2595 	 * two. They are not that meaningful.
2596 	 */
2597 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598 	ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600 
2601 /*
2602  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603  */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606 				   struct ring_buffer_event *event)
2607 {
2608 	__buffer_unlock_commit(buffer, event);
2609 }
2610 
2611 static void
2612 trace_process_export(struct trace_export *export,
2613 	       struct ring_buffer_event *event)
2614 {
2615 	struct trace_entry *entry;
2616 	unsigned int size = 0;
2617 
2618 	entry = ring_buffer_event_data(event);
2619 	size = ring_buffer_event_length(event);
2620 	export->write(export, entry, size);
2621 }
2622 
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624 
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626 
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628 
2629 static inline void ftrace_exports_enable(void)
2630 {
2631 	static_branch_enable(&ftrace_exports_enabled);
2632 }
2633 
2634 static inline void ftrace_exports_disable(void)
2635 {
2636 	static_branch_disable(&ftrace_exports_enabled);
2637 }
2638 
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641 	struct trace_export *export;
2642 
2643 	preempt_disable_notrace();
2644 
2645 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2646 	while (export) {
2647 		trace_process_export(export, event);
2648 		export = rcu_dereference_raw_notrace(export->next);
2649 	}
2650 
2651 	preempt_enable_notrace();
2652 }
2653 
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657 	rcu_assign_pointer(export->next, *list);
2658 	/*
2659 	 * We are entering export into the list but another
2660 	 * CPU might be walking that list. We need to make sure
2661 	 * the export->next pointer is valid before another CPU sees
2662 	 * the export pointer included into the list.
2663 	 */
2664 	rcu_assign_pointer(*list, export);
2665 }
2666 
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670 	struct trace_export **p;
2671 
2672 	for (p = list; *p != NULL; p = &(*p)->next)
2673 		if (*p == export)
2674 			break;
2675 
2676 	if (*p != export)
2677 		return -1;
2678 
2679 	rcu_assign_pointer(*p, (*p)->next);
2680 
2681 	return 0;
2682 }
2683 
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	if (*list == NULL)
2688 		ftrace_exports_enable();
2689 
2690 	add_trace_export(list, export);
2691 }
2692 
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696 	int ret;
2697 
2698 	ret = rm_trace_export(list, export);
2699 	if (*list == NULL)
2700 		ftrace_exports_disable();
2701 
2702 	return ret;
2703 }
2704 
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707 	if (WARN_ON_ONCE(!export->write))
2708 		return -1;
2709 
2710 	mutex_lock(&ftrace_export_lock);
2711 
2712 	add_ftrace_export(&ftrace_exports_list, export);
2713 
2714 	mutex_unlock(&ftrace_export_lock);
2715 
2716 	return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719 
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722 	int ret;
2723 
2724 	mutex_lock(&ftrace_export_lock);
2725 
2726 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2727 
2728 	mutex_unlock(&ftrace_export_lock);
2729 
2730 	return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733 
2734 void
2735 trace_function(struct trace_array *tr,
2736 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737 	       int pc)
2738 {
2739 	struct trace_event_call *call = &event_function;
2740 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741 	struct ring_buffer_event *event;
2742 	struct ftrace_entry *entry;
2743 
2744 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745 					    flags, pc);
2746 	if (!event)
2747 		return;
2748 	entry	= ring_buffer_event_data(event);
2749 	entry->ip			= ip;
2750 	entry->parent_ip		= parent_ip;
2751 
2752 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2753 		if (static_branch_unlikely(&ftrace_exports_enabled))
2754 			ftrace_exports(event);
2755 		__buffer_unlock_commit(buffer, event);
2756 	}
2757 }
2758 
2759 #ifdef CONFIG_STACKTRACE
2760 
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING	4
2763 
2764 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765 
2766 struct ftrace_stack {
2767 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769 
2770 
2771 struct ftrace_stacks {
2772 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2773 };
2774 
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777 
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779 				 unsigned long flags,
2780 				 int skip, int pc, struct pt_regs *regs)
2781 {
2782 	struct trace_event_call *call = &event_kernel_stack;
2783 	struct ring_buffer_event *event;
2784 	unsigned int size, nr_entries;
2785 	struct ftrace_stack *fstack;
2786 	struct stack_entry *entry;
2787 	int stackidx;
2788 
2789 	/*
2790 	 * Add one, for this function and the call to save_stack_trace()
2791 	 * If regs is set, then these functions will not be in the way.
2792 	 */
2793 #ifndef CONFIG_UNWINDER_ORC
2794 	if (!regs)
2795 		skip++;
2796 #endif
2797 
2798 	/*
2799 	 * Since events can happen in NMIs there's no safe way to
2800 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801 	 * or NMI comes in, it will just have to use the default
2802 	 * FTRACE_STACK_SIZE.
2803 	 */
2804 	preempt_disable_notrace();
2805 
2806 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807 
2808 	/* This should never happen. If it does, yell once and skip */
2809 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810 		goto out;
2811 
2812 	/*
2813 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814 	 * interrupt will either see the value pre increment or post
2815 	 * increment. If the interrupt happens pre increment it will have
2816 	 * restored the counter when it returns.  We just need a barrier to
2817 	 * keep gcc from moving things around.
2818 	 */
2819 	barrier();
2820 
2821 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822 	size = ARRAY_SIZE(fstack->calls);
2823 
2824 	if (regs) {
2825 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826 						   size, skip);
2827 	} else {
2828 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2829 	}
2830 
2831 	size = nr_entries * sizeof(unsigned long);
2832 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833 					    sizeof(*entry) + size, flags, pc);
2834 	if (!event)
2835 		goto out;
2836 	entry = ring_buffer_event_data(event);
2837 
2838 	memcpy(&entry->caller, fstack->calls, size);
2839 	entry->size = nr_entries;
2840 
2841 	if (!call_filter_check_discard(call, entry, buffer, event))
2842 		__buffer_unlock_commit(buffer, event);
2843 
2844  out:
2845 	/* Again, don't let gcc optimize things here */
2846 	barrier();
2847 	__this_cpu_dec(ftrace_stack_reserve);
2848 	preempt_enable_notrace();
2849 
2850 }
2851 
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853 				      struct ring_buffer *buffer,
2854 				      unsigned long flags,
2855 				      int skip, int pc, struct pt_regs *regs)
2856 {
2857 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858 		return;
2859 
2860 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862 
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864 		   int pc)
2865 {
2866 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867 
2868 	if (rcu_is_watching()) {
2869 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870 		return;
2871 	}
2872 
2873 	/*
2874 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875 	 * but if the above rcu_is_watching() failed, then the NMI
2876 	 * triggered someplace critical, and rcu_irq_enter() should
2877 	 * not be called from NMI.
2878 	 */
2879 	if (unlikely(in_nmi()))
2880 		return;
2881 
2882 	rcu_irq_enter_irqson();
2883 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884 	rcu_irq_exit_irqson();
2885 }
2886 
2887 /**
2888  * trace_dump_stack - record a stack back trace in the trace buffer
2889  * @skip: Number of functions to skip (helper handlers)
2890  */
2891 void trace_dump_stack(int skip)
2892 {
2893 	unsigned long flags;
2894 
2895 	if (tracing_disabled || tracing_selftest_running)
2896 		return;
2897 
2898 	local_save_flags(flags);
2899 
2900 #ifndef CONFIG_UNWINDER_ORC
2901 	/* Skip 1 to skip this function. */
2902 	skip++;
2903 #endif
2904 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905 			     flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908 
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911 
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 	struct trace_event_call *call = &event_user_stack;
2916 	struct ring_buffer_event *event;
2917 	struct userstack_entry *entry;
2918 
2919 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920 		return;
2921 
2922 	/*
2923 	 * NMIs can not handle page faults, even with fix ups.
2924 	 * The save user stack can (and often does) fault.
2925 	 */
2926 	if (unlikely(in_nmi()))
2927 		return;
2928 
2929 	/*
2930 	 * prevent recursion, since the user stack tracing may
2931 	 * trigger other kernel events.
2932 	 */
2933 	preempt_disable();
2934 	if (__this_cpu_read(user_stack_count))
2935 		goto out;
2936 
2937 	__this_cpu_inc(user_stack_count);
2938 
2939 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940 					    sizeof(*entry), flags, pc);
2941 	if (!event)
2942 		goto out_drop_count;
2943 	entry	= ring_buffer_event_data(event);
2944 
2945 	entry->tgid		= current->tgid;
2946 	memset(&entry->caller, 0, sizeof(entry->caller));
2947 
2948 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949 	if (!call_filter_check_discard(call, entry, buffer, event))
2950 		__buffer_unlock_commit(buffer, event);
2951 
2952  out_drop_count:
2953 	__this_cpu_dec(user_stack_count);
2954  out:
2955 	preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959 				   unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963 
2964 #endif /* CONFIG_STACKTRACE */
2965 
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968 	int nesting;
2969 	char buffer[4][TRACE_BUF_SIZE];
2970 };
2971 
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973 
2974 /*
2975  * Thise allows for lockless recording.  If we're nested too deeply, then
2976  * this returns NULL.
2977  */
2978 static char *get_trace_buf(void)
2979 {
2980 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981 
2982 	if (!buffer || buffer->nesting >= 4)
2983 		return NULL;
2984 
2985 	buffer->nesting++;
2986 
2987 	/* Interrupts must see nesting incremented before we use the buffer */
2988 	barrier();
2989 	return &buffer->buffer[buffer->nesting][0];
2990 }
2991 
2992 static void put_trace_buf(void)
2993 {
2994 	/* Don't let the decrement of nesting leak before this */
2995 	barrier();
2996 	this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998 
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001 	struct trace_buffer_struct *buffers;
3002 
3003 	buffers = alloc_percpu(struct trace_buffer_struct);
3004 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005 		return -ENOMEM;
3006 
3007 	trace_percpu_buffer = buffers;
3008 	return 0;
3009 }
3010 
3011 static int buffers_allocated;
3012 
3013 void trace_printk_init_buffers(void)
3014 {
3015 	if (buffers_allocated)
3016 		return;
3017 
3018 	if (alloc_percpu_trace_buffer())
3019 		return;
3020 
3021 	/* trace_printk() is for debug use only. Don't use it in production. */
3022 
3023 	pr_warn("\n");
3024 	pr_warn("**********************************************************\n");
3025 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3026 	pr_warn("**                                                      **\n");
3027 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3028 	pr_warn("**                                                      **\n");
3029 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3030 	pr_warn("** unsafe for production use.                           **\n");
3031 	pr_warn("**                                                      **\n");
3032 	pr_warn("** If you see this message and you are not debugging    **\n");
3033 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3034 	pr_warn("**                                                      **\n");
3035 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3036 	pr_warn("**********************************************************\n");
3037 
3038 	/* Expand the buffers to set size */
3039 	tracing_update_buffers();
3040 
3041 	buffers_allocated = 1;
3042 
3043 	/*
3044 	 * trace_printk_init_buffers() can be called by modules.
3045 	 * If that happens, then we need to start cmdline recording
3046 	 * directly here. If the global_trace.buffer is already
3047 	 * allocated here, then this was called by module code.
3048 	 */
3049 	if (global_trace.trace_buffer.buffer)
3050 		tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053 
3054 void trace_printk_start_comm(void)
3055 {
3056 	/* Start tracing comms if trace printk is set */
3057 	if (!buffers_allocated)
3058 		return;
3059 	tracing_start_cmdline_record();
3060 }
3061 
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064 	if (!buffers_allocated)
3065 		return;
3066 
3067 	if (enabled)
3068 		tracing_start_cmdline_record();
3069 	else
3070 		tracing_stop_cmdline_record();
3071 }
3072 
3073 /**
3074  * trace_vbprintk - write binary msg to tracing buffer
3075  *
3076  */
3077 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3078 {
3079 	struct trace_event_call *call = &event_bprint;
3080 	struct ring_buffer_event *event;
3081 	struct ring_buffer *buffer;
3082 	struct trace_array *tr = &global_trace;
3083 	struct bprint_entry *entry;
3084 	unsigned long flags;
3085 	char *tbuffer;
3086 	int len = 0, size, pc;
3087 
3088 	if (unlikely(tracing_selftest_running || tracing_disabled))
3089 		return 0;
3090 
3091 	/* Don't pollute graph traces with trace_vprintk internals */
3092 	pause_graph_tracing();
3093 
3094 	pc = preempt_count();
3095 	preempt_disable_notrace();
3096 
3097 	tbuffer = get_trace_buf();
3098 	if (!tbuffer) {
3099 		len = 0;
3100 		goto out_nobuffer;
3101 	}
3102 
3103 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3104 
3105 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3106 		goto out;
3107 
3108 	local_save_flags(flags);
3109 	size = sizeof(*entry) + sizeof(u32) * len;
3110 	buffer = tr->trace_buffer.buffer;
3111 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3112 					    flags, pc);
3113 	if (!event)
3114 		goto out;
3115 	entry = ring_buffer_event_data(event);
3116 	entry->ip			= ip;
3117 	entry->fmt			= fmt;
3118 
3119 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3120 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3121 		__buffer_unlock_commit(buffer, event);
3122 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3123 	}
3124 
3125 out:
3126 	put_trace_buf();
3127 
3128 out_nobuffer:
3129 	preempt_enable_notrace();
3130 	unpause_graph_tracing();
3131 
3132 	return len;
3133 }
3134 EXPORT_SYMBOL_GPL(trace_vbprintk);
3135 
3136 __printf(3, 0)
3137 static int
3138 __trace_array_vprintk(struct ring_buffer *buffer,
3139 		      unsigned long ip, const char *fmt, va_list args)
3140 {
3141 	struct trace_event_call *call = &event_print;
3142 	struct ring_buffer_event *event;
3143 	int len = 0, size, pc;
3144 	struct print_entry *entry;
3145 	unsigned long flags;
3146 	char *tbuffer;
3147 
3148 	if (tracing_disabled || tracing_selftest_running)
3149 		return 0;
3150 
3151 	/* Don't pollute graph traces with trace_vprintk internals */
3152 	pause_graph_tracing();
3153 
3154 	pc = preempt_count();
3155 	preempt_disable_notrace();
3156 
3157 
3158 	tbuffer = get_trace_buf();
3159 	if (!tbuffer) {
3160 		len = 0;
3161 		goto out_nobuffer;
3162 	}
3163 
3164 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3165 
3166 	local_save_flags(flags);
3167 	size = sizeof(*entry) + len + 1;
3168 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3169 					    flags, pc);
3170 	if (!event)
3171 		goto out;
3172 	entry = ring_buffer_event_data(event);
3173 	entry->ip = ip;
3174 
3175 	memcpy(&entry->buf, tbuffer, len + 1);
3176 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3177 		__buffer_unlock_commit(buffer, event);
3178 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3179 	}
3180 
3181 out:
3182 	put_trace_buf();
3183 
3184 out_nobuffer:
3185 	preempt_enable_notrace();
3186 	unpause_graph_tracing();
3187 
3188 	return len;
3189 }
3190 
3191 __printf(3, 0)
3192 int trace_array_vprintk(struct trace_array *tr,
3193 			unsigned long ip, const char *fmt, va_list args)
3194 {
3195 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3196 }
3197 
3198 __printf(3, 0)
3199 int trace_array_printk(struct trace_array *tr,
3200 		       unsigned long ip, const char *fmt, ...)
3201 {
3202 	int ret;
3203 	va_list ap;
3204 
3205 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3206 		return 0;
3207 
3208 	va_start(ap, fmt);
3209 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3210 	va_end(ap);
3211 	return ret;
3212 }
3213 EXPORT_SYMBOL_GPL(trace_array_printk);
3214 
3215 __printf(3, 4)
3216 int trace_array_printk_buf(struct ring_buffer *buffer,
3217 			   unsigned long ip, const char *fmt, ...)
3218 {
3219 	int ret;
3220 	va_list ap;
3221 
3222 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3223 		return 0;
3224 
3225 	va_start(ap, fmt);
3226 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3227 	va_end(ap);
3228 	return ret;
3229 }
3230 
3231 __printf(2, 0)
3232 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3233 {
3234 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3235 }
3236 EXPORT_SYMBOL_GPL(trace_vprintk);
3237 
3238 static void trace_iterator_increment(struct trace_iterator *iter)
3239 {
3240 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3241 
3242 	iter->idx++;
3243 	if (buf_iter)
3244 		ring_buffer_read(buf_iter, NULL);
3245 }
3246 
3247 static struct trace_entry *
3248 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3249 		unsigned long *lost_events)
3250 {
3251 	struct ring_buffer_event *event;
3252 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3253 
3254 	if (buf_iter)
3255 		event = ring_buffer_iter_peek(buf_iter, ts);
3256 	else
3257 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3258 					 lost_events);
3259 
3260 	if (event) {
3261 		iter->ent_size = ring_buffer_event_length(event);
3262 		return ring_buffer_event_data(event);
3263 	}
3264 	iter->ent_size = 0;
3265 	return NULL;
3266 }
3267 
3268 static struct trace_entry *
3269 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3270 		  unsigned long *missing_events, u64 *ent_ts)
3271 {
3272 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3273 	struct trace_entry *ent, *next = NULL;
3274 	unsigned long lost_events = 0, next_lost = 0;
3275 	int cpu_file = iter->cpu_file;
3276 	u64 next_ts = 0, ts;
3277 	int next_cpu = -1;
3278 	int next_size = 0;
3279 	int cpu;
3280 
3281 	/*
3282 	 * If we are in a per_cpu trace file, don't bother by iterating over
3283 	 * all cpu and peek directly.
3284 	 */
3285 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3286 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3287 			return NULL;
3288 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3289 		if (ent_cpu)
3290 			*ent_cpu = cpu_file;
3291 
3292 		return ent;
3293 	}
3294 
3295 	for_each_tracing_cpu(cpu) {
3296 
3297 		if (ring_buffer_empty_cpu(buffer, cpu))
3298 			continue;
3299 
3300 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3301 
3302 		/*
3303 		 * Pick the entry with the smallest timestamp:
3304 		 */
3305 		if (ent && (!next || ts < next_ts)) {
3306 			next = ent;
3307 			next_cpu = cpu;
3308 			next_ts = ts;
3309 			next_lost = lost_events;
3310 			next_size = iter->ent_size;
3311 		}
3312 	}
3313 
3314 	iter->ent_size = next_size;
3315 
3316 	if (ent_cpu)
3317 		*ent_cpu = next_cpu;
3318 
3319 	if (ent_ts)
3320 		*ent_ts = next_ts;
3321 
3322 	if (missing_events)
3323 		*missing_events = next_lost;
3324 
3325 	return next;
3326 }
3327 
3328 /* Find the next real entry, without updating the iterator itself */
3329 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3330 					  int *ent_cpu, u64 *ent_ts)
3331 {
3332 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3333 }
3334 
3335 /* Find the next real entry, and increment the iterator to the next entry */
3336 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3337 {
3338 	iter->ent = __find_next_entry(iter, &iter->cpu,
3339 				      &iter->lost_events, &iter->ts);
3340 
3341 	if (iter->ent)
3342 		trace_iterator_increment(iter);
3343 
3344 	return iter->ent ? iter : NULL;
3345 }
3346 
3347 static void trace_consume(struct trace_iterator *iter)
3348 {
3349 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3350 			    &iter->lost_events);
3351 }
3352 
3353 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3354 {
3355 	struct trace_iterator *iter = m->private;
3356 	int i = (int)*pos;
3357 	void *ent;
3358 
3359 	WARN_ON_ONCE(iter->leftover);
3360 
3361 	(*pos)++;
3362 
3363 	/* can't go backwards */
3364 	if (iter->idx > i)
3365 		return NULL;
3366 
3367 	if (iter->idx < 0)
3368 		ent = trace_find_next_entry_inc(iter);
3369 	else
3370 		ent = iter;
3371 
3372 	while (ent && iter->idx < i)
3373 		ent = trace_find_next_entry_inc(iter);
3374 
3375 	iter->pos = *pos;
3376 
3377 	return ent;
3378 }
3379 
3380 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3381 {
3382 	struct ring_buffer_event *event;
3383 	struct ring_buffer_iter *buf_iter;
3384 	unsigned long entries = 0;
3385 	u64 ts;
3386 
3387 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3388 
3389 	buf_iter = trace_buffer_iter(iter, cpu);
3390 	if (!buf_iter)
3391 		return;
3392 
3393 	ring_buffer_iter_reset(buf_iter);
3394 
3395 	/*
3396 	 * We could have the case with the max latency tracers
3397 	 * that a reset never took place on a cpu. This is evident
3398 	 * by the timestamp being before the start of the buffer.
3399 	 */
3400 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3401 		if (ts >= iter->trace_buffer->time_start)
3402 			break;
3403 		entries++;
3404 		ring_buffer_read(buf_iter, NULL);
3405 	}
3406 
3407 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3408 }
3409 
3410 /*
3411  * The current tracer is copied to avoid a global locking
3412  * all around.
3413  */
3414 static void *s_start(struct seq_file *m, loff_t *pos)
3415 {
3416 	struct trace_iterator *iter = m->private;
3417 	struct trace_array *tr = iter->tr;
3418 	int cpu_file = iter->cpu_file;
3419 	void *p = NULL;
3420 	loff_t l = 0;
3421 	int cpu;
3422 
3423 	/*
3424 	 * copy the tracer to avoid using a global lock all around.
3425 	 * iter->trace is a copy of current_trace, the pointer to the
3426 	 * name may be used instead of a strcmp(), as iter->trace->name
3427 	 * will point to the same string as current_trace->name.
3428 	 */
3429 	mutex_lock(&trace_types_lock);
3430 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3431 		*iter->trace = *tr->current_trace;
3432 	mutex_unlock(&trace_types_lock);
3433 
3434 #ifdef CONFIG_TRACER_MAX_TRACE
3435 	if (iter->snapshot && iter->trace->use_max_tr)
3436 		return ERR_PTR(-EBUSY);
3437 #endif
3438 
3439 	if (!iter->snapshot)
3440 		atomic_inc(&trace_record_taskinfo_disabled);
3441 
3442 	if (*pos != iter->pos) {
3443 		iter->ent = NULL;
3444 		iter->cpu = 0;
3445 		iter->idx = -1;
3446 
3447 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3448 			for_each_tracing_cpu(cpu)
3449 				tracing_iter_reset(iter, cpu);
3450 		} else
3451 			tracing_iter_reset(iter, cpu_file);
3452 
3453 		iter->leftover = 0;
3454 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3455 			;
3456 
3457 	} else {
3458 		/*
3459 		 * If we overflowed the seq_file before, then we want
3460 		 * to just reuse the trace_seq buffer again.
3461 		 */
3462 		if (iter->leftover)
3463 			p = iter;
3464 		else {
3465 			l = *pos - 1;
3466 			p = s_next(m, p, &l);
3467 		}
3468 	}
3469 
3470 	trace_event_read_lock();
3471 	trace_access_lock(cpu_file);
3472 	return p;
3473 }
3474 
3475 static void s_stop(struct seq_file *m, void *p)
3476 {
3477 	struct trace_iterator *iter = m->private;
3478 
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 	if (iter->snapshot && iter->trace->use_max_tr)
3481 		return;
3482 #endif
3483 
3484 	if (!iter->snapshot)
3485 		atomic_dec(&trace_record_taskinfo_disabled);
3486 
3487 	trace_access_unlock(iter->cpu_file);
3488 	trace_event_read_unlock();
3489 }
3490 
3491 static void
3492 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3493 		      unsigned long *entries, int cpu)
3494 {
3495 	unsigned long count;
3496 
3497 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3498 	/*
3499 	 * If this buffer has skipped entries, then we hold all
3500 	 * entries for the trace and we need to ignore the
3501 	 * ones before the time stamp.
3502 	 */
3503 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3504 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3505 		/* total is the same as the entries */
3506 		*total = count;
3507 	} else
3508 		*total = count +
3509 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3510 	*entries = count;
3511 }
3512 
3513 static void
3514 get_total_entries(struct trace_buffer *buf,
3515 		  unsigned long *total, unsigned long *entries)
3516 {
3517 	unsigned long t, e;
3518 	int cpu;
3519 
3520 	*total = 0;
3521 	*entries = 0;
3522 
3523 	for_each_tracing_cpu(cpu) {
3524 		get_total_entries_cpu(buf, &t, &e, cpu);
3525 		*total += t;
3526 		*entries += e;
3527 	}
3528 }
3529 
3530 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3531 {
3532 	unsigned long total, entries;
3533 
3534 	if (!tr)
3535 		tr = &global_trace;
3536 
3537 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3538 
3539 	return entries;
3540 }
3541 
3542 unsigned long trace_total_entries(struct trace_array *tr)
3543 {
3544 	unsigned long total, entries;
3545 
3546 	if (!tr)
3547 		tr = &global_trace;
3548 
3549 	get_total_entries(&tr->trace_buffer, &total, &entries);
3550 
3551 	return entries;
3552 }
3553 
3554 static void print_lat_help_header(struct seq_file *m)
3555 {
3556 	seq_puts(m, "#                  _------=> CPU#            \n"
3557 		    "#                 / _-----=> irqs-off        \n"
3558 		    "#                | / _----=> need-resched    \n"
3559 		    "#                || / _---=> hardirq/softirq \n"
3560 		    "#                ||| / _--=> preempt-depth   \n"
3561 		    "#                |||| /     delay            \n"
3562 		    "#  cmd     pid   ||||| time  |   caller      \n"
3563 		    "#     \\   /      |||||  \\    |   /         \n");
3564 }
3565 
3566 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3567 {
3568 	unsigned long total;
3569 	unsigned long entries;
3570 
3571 	get_total_entries(buf, &total, &entries);
3572 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3573 		   entries, total, num_online_cpus());
3574 	seq_puts(m, "#\n");
3575 }
3576 
3577 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3578 				   unsigned int flags)
3579 {
3580 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3581 
3582 	print_event_info(buf, m);
3583 
3584 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3585 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3586 }
3587 
3588 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3589 				       unsigned int flags)
3590 {
3591 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3592 	const char *space = "          ";
3593 	int prec = tgid ? 10 : 2;
3594 
3595 	print_event_info(buf, m);
3596 
3597 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3598 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3599 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3600 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3601 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3602 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3603 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3604 }
3605 
3606 void
3607 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3608 {
3609 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3610 	struct trace_buffer *buf = iter->trace_buffer;
3611 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3612 	struct tracer *type = iter->trace;
3613 	unsigned long entries;
3614 	unsigned long total;
3615 	const char *name = "preemption";
3616 
3617 	name = type->name;
3618 
3619 	get_total_entries(buf, &total, &entries);
3620 
3621 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3622 		   name, UTS_RELEASE);
3623 	seq_puts(m, "# -----------------------------------"
3624 		 "---------------------------------\n");
3625 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3626 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3627 		   nsecs_to_usecs(data->saved_latency),
3628 		   entries,
3629 		   total,
3630 		   buf->cpu,
3631 #if defined(CONFIG_PREEMPT_NONE)
3632 		   "server",
3633 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3634 		   "desktop",
3635 #elif defined(CONFIG_PREEMPT)
3636 		   "preempt",
3637 #else
3638 		   "unknown",
3639 #endif
3640 		   /* These are reserved for later use */
3641 		   0, 0, 0, 0);
3642 #ifdef CONFIG_SMP
3643 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3644 #else
3645 	seq_puts(m, ")\n");
3646 #endif
3647 	seq_puts(m, "#    -----------------\n");
3648 	seq_printf(m, "#    | task: %.16s-%d "
3649 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3650 		   data->comm, data->pid,
3651 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3652 		   data->policy, data->rt_priority);
3653 	seq_puts(m, "#    -----------------\n");
3654 
3655 	if (data->critical_start) {
3656 		seq_puts(m, "#  => started at: ");
3657 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3658 		trace_print_seq(m, &iter->seq);
3659 		seq_puts(m, "\n#  => ended at:   ");
3660 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3661 		trace_print_seq(m, &iter->seq);
3662 		seq_puts(m, "\n#\n");
3663 	}
3664 
3665 	seq_puts(m, "#\n");
3666 }
3667 
3668 static void test_cpu_buff_start(struct trace_iterator *iter)
3669 {
3670 	struct trace_seq *s = &iter->seq;
3671 	struct trace_array *tr = iter->tr;
3672 
3673 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3674 		return;
3675 
3676 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3677 		return;
3678 
3679 	if (cpumask_available(iter->started) &&
3680 	    cpumask_test_cpu(iter->cpu, iter->started))
3681 		return;
3682 
3683 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3684 		return;
3685 
3686 	if (cpumask_available(iter->started))
3687 		cpumask_set_cpu(iter->cpu, iter->started);
3688 
3689 	/* Don't print started cpu buffer for the first entry of the trace */
3690 	if (iter->idx > 1)
3691 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3692 				iter->cpu);
3693 }
3694 
3695 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3696 {
3697 	struct trace_array *tr = iter->tr;
3698 	struct trace_seq *s = &iter->seq;
3699 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3700 	struct trace_entry *entry;
3701 	struct trace_event *event;
3702 
3703 	entry = iter->ent;
3704 
3705 	test_cpu_buff_start(iter);
3706 
3707 	event = ftrace_find_event(entry->type);
3708 
3709 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3710 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3711 			trace_print_lat_context(iter);
3712 		else
3713 			trace_print_context(iter);
3714 	}
3715 
3716 	if (trace_seq_has_overflowed(s))
3717 		return TRACE_TYPE_PARTIAL_LINE;
3718 
3719 	if (event)
3720 		return event->funcs->trace(iter, sym_flags, event);
3721 
3722 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3723 
3724 	return trace_handle_return(s);
3725 }
3726 
3727 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3728 {
3729 	struct trace_array *tr = iter->tr;
3730 	struct trace_seq *s = &iter->seq;
3731 	struct trace_entry *entry;
3732 	struct trace_event *event;
3733 
3734 	entry = iter->ent;
3735 
3736 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3737 		trace_seq_printf(s, "%d %d %llu ",
3738 				 entry->pid, iter->cpu, iter->ts);
3739 
3740 	if (trace_seq_has_overflowed(s))
3741 		return TRACE_TYPE_PARTIAL_LINE;
3742 
3743 	event = ftrace_find_event(entry->type);
3744 	if (event)
3745 		return event->funcs->raw(iter, 0, event);
3746 
3747 	trace_seq_printf(s, "%d ?\n", entry->type);
3748 
3749 	return trace_handle_return(s);
3750 }
3751 
3752 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3753 {
3754 	struct trace_array *tr = iter->tr;
3755 	struct trace_seq *s = &iter->seq;
3756 	unsigned char newline = '\n';
3757 	struct trace_entry *entry;
3758 	struct trace_event *event;
3759 
3760 	entry = iter->ent;
3761 
3762 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3763 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3764 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3765 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3766 		if (trace_seq_has_overflowed(s))
3767 			return TRACE_TYPE_PARTIAL_LINE;
3768 	}
3769 
3770 	event = ftrace_find_event(entry->type);
3771 	if (event) {
3772 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3773 		if (ret != TRACE_TYPE_HANDLED)
3774 			return ret;
3775 	}
3776 
3777 	SEQ_PUT_FIELD(s, newline);
3778 
3779 	return trace_handle_return(s);
3780 }
3781 
3782 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3783 {
3784 	struct trace_array *tr = iter->tr;
3785 	struct trace_seq *s = &iter->seq;
3786 	struct trace_entry *entry;
3787 	struct trace_event *event;
3788 
3789 	entry = iter->ent;
3790 
3791 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3792 		SEQ_PUT_FIELD(s, entry->pid);
3793 		SEQ_PUT_FIELD(s, iter->cpu);
3794 		SEQ_PUT_FIELD(s, iter->ts);
3795 		if (trace_seq_has_overflowed(s))
3796 			return TRACE_TYPE_PARTIAL_LINE;
3797 	}
3798 
3799 	event = ftrace_find_event(entry->type);
3800 	return event ? event->funcs->binary(iter, 0, event) :
3801 		TRACE_TYPE_HANDLED;
3802 }
3803 
3804 int trace_empty(struct trace_iterator *iter)
3805 {
3806 	struct ring_buffer_iter *buf_iter;
3807 	int cpu;
3808 
3809 	/* If we are looking at one CPU buffer, only check that one */
3810 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3811 		cpu = iter->cpu_file;
3812 		buf_iter = trace_buffer_iter(iter, cpu);
3813 		if (buf_iter) {
3814 			if (!ring_buffer_iter_empty(buf_iter))
3815 				return 0;
3816 		} else {
3817 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3818 				return 0;
3819 		}
3820 		return 1;
3821 	}
3822 
3823 	for_each_tracing_cpu(cpu) {
3824 		buf_iter = trace_buffer_iter(iter, cpu);
3825 		if (buf_iter) {
3826 			if (!ring_buffer_iter_empty(buf_iter))
3827 				return 0;
3828 		} else {
3829 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3830 				return 0;
3831 		}
3832 	}
3833 
3834 	return 1;
3835 }
3836 
3837 /*  Called with trace_event_read_lock() held. */
3838 enum print_line_t print_trace_line(struct trace_iterator *iter)
3839 {
3840 	struct trace_array *tr = iter->tr;
3841 	unsigned long trace_flags = tr->trace_flags;
3842 	enum print_line_t ret;
3843 
3844 	if (iter->lost_events) {
3845 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3846 				 iter->cpu, iter->lost_events);
3847 		if (trace_seq_has_overflowed(&iter->seq))
3848 			return TRACE_TYPE_PARTIAL_LINE;
3849 	}
3850 
3851 	if (iter->trace && iter->trace->print_line) {
3852 		ret = iter->trace->print_line(iter);
3853 		if (ret != TRACE_TYPE_UNHANDLED)
3854 			return ret;
3855 	}
3856 
3857 	if (iter->ent->type == TRACE_BPUTS &&
3858 			trace_flags & TRACE_ITER_PRINTK &&
3859 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3860 		return trace_print_bputs_msg_only(iter);
3861 
3862 	if (iter->ent->type == TRACE_BPRINT &&
3863 			trace_flags & TRACE_ITER_PRINTK &&
3864 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3865 		return trace_print_bprintk_msg_only(iter);
3866 
3867 	if (iter->ent->type == TRACE_PRINT &&
3868 			trace_flags & TRACE_ITER_PRINTK &&
3869 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3870 		return trace_print_printk_msg_only(iter);
3871 
3872 	if (trace_flags & TRACE_ITER_BIN)
3873 		return print_bin_fmt(iter);
3874 
3875 	if (trace_flags & TRACE_ITER_HEX)
3876 		return print_hex_fmt(iter);
3877 
3878 	if (trace_flags & TRACE_ITER_RAW)
3879 		return print_raw_fmt(iter);
3880 
3881 	return print_trace_fmt(iter);
3882 }
3883 
3884 void trace_latency_header(struct seq_file *m)
3885 {
3886 	struct trace_iterator *iter = m->private;
3887 	struct trace_array *tr = iter->tr;
3888 
3889 	/* print nothing if the buffers are empty */
3890 	if (trace_empty(iter))
3891 		return;
3892 
3893 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3894 		print_trace_header(m, iter);
3895 
3896 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3897 		print_lat_help_header(m);
3898 }
3899 
3900 void trace_default_header(struct seq_file *m)
3901 {
3902 	struct trace_iterator *iter = m->private;
3903 	struct trace_array *tr = iter->tr;
3904 	unsigned long trace_flags = tr->trace_flags;
3905 
3906 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3907 		return;
3908 
3909 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3910 		/* print nothing if the buffers are empty */
3911 		if (trace_empty(iter))
3912 			return;
3913 		print_trace_header(m, iter);
3914 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3915 			print_lat_help_header(m);
3916 	} else {
3917 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3918 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3919 				print_func_help_header_irq(iter->trace_buffer,
3920 							   m, trace_flags);
3921 			else
3922 				print_func_help_header(iter->trace_buffer, m,
3923 						       trace_flags);
3924 		}
3925 	}
3926 }
3927 
3928 static void test_ftrace_alive(struct seq_file *m)
3929 {
3930 	if (!ftrace_is_dead())
3931 		return;
3932 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3933 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3934 }
3935 
3936 #ifdef CONFIG_TRACER_MAX_TRACE
3937 static void show_snapshot_main_help(struct seq_file *m)
3938 {
3939 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3940 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3941 		    "#                      Takes a snapshot of the main buffer.\n"
3942 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3943 		    "#                      (Doesn't have to be '2' works with any number that\n"
3944 		    "#                       is not a '0' or '1')\n");
3945 }
3946 
3947 static void show_snapshot_percpu_help(struct seq_file *m)
3948 {
3949 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3950 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3951 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3952 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3953 #else
3954 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3955 		    "#                     Must use main snapshot file to allocate.\n");
3956 #endif
3957 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3958 		    "#                      (Doesn't have to be '2' works with any number that\n"
3959 		    "#                       is not a '0' or '1')\n");
3960 }
3961 
3962 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3963 {
3964 	if (iter->tr->allocated_snapshot)
3965 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3966 	else
3967 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3968 
3969 	seq_puts(m, "# Snapshot commands:\n");
3970 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3971 		show_snapshot_main_help(m);
3972 	else
3973 		show_snapshot_percpu_help(m);
3974 }
3975 #else
3976 /* Should never be called */
3977 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3978 #endif
3979 
3980 static int s_show(struct seq_file *m, void *v)
3981 {
3982 	struct trace_iterator *iter = v;
3983 	int ret;
3984 
3985 	if (iter->ent == NULL) {
3986 		if (iter->tr) {
3987 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3988 			seq_puts(m, "#\n");
3989 			test_ftrace_alive(m);
3990 		}
3991 		if (iter->snapshot && trace_empty(iter))
3992 			print_snapshot_help(m, iter);
3993 		else if (iter->trace && iter->trace->print_header)
3994 			iter->trace->print_header(m);
3995 		else
3996 			trace_default_header(m);
3997 
3998 	} else if (iter->leftover) {
3999 		/*
4000 		 * If we filled the seq_file buffer earlier, we
4001 		 * want to just show it now.
4002 		 */
4003 		ret = trace_print_seq(m, &iter->seq);
4004 
4005 		/* ret should this time be zero, but you never know */
4006 		iter->leftover = ret;
4007 
4008 	} else {
4009 		print_trace_line(iter);
4010 		ret = trace_print_seq(m, &iter->seq);
4011 		/*
4012 		 * If we overflow the seq_file buffer, then it will
4013 		 * ask us for this data again at start up.
4014 		 * Use that instead.
4015 		 *  ret is 0 if seq_file write succeeded.
4016 		 *        -1 otherwise.
4017 		 */
4018 		iter->leftover = ret;
4019 	}
4020 
4021 	return 0;
4022 }
4023 
4024 /*
4025  * Should be used after trace_array_get(), trace_types_lock
4026  * ensures that i_cdev was already initialized.
4027  */
4028 static inline int tracing_get_cpu(struct inode *inode)
4029 {
4030 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4031 		return (long)inode->i_cdev - 1;
4032 	return RING_BUFFER_ALL_CPUS;
4033 }
4034 
4035 static const struct seq_operations tracer_seq_ops = {
4036 	.start		= s_start,
4037 	.next		= s_next,
4038 	.stop		= s_stop,
4039 	.show		= s_show,
4040 };
4041 
4042 static struct trace_iterator *
4043 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4044 {
4045 	struct trace_array *tr = inode->i_private;
4046 	struct trace_iterator *iter;
4047 	int cpu;
4048 
4049 	if (tracing_disabled)
4050 		return ERR_PTR(-ENODEV);
4051 
4052 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4053 	if (!iter)
4054 		return ERR_PTR(-ENOMEM);
4055 
4056 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4057 				    GFP_KERNEL);
4058 	if (!iter->buffer_iter)
4059 		goto release;
4060 
4061 	/*
4062 	 * We make a copy of the current tracer to avoid concurrent
4063 	 * changes on it while we are reading.
4064 	 */
4065 	mutex_lock(&trace_types_lock);
4066 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4067 	if (!iter->trace)
4068 		goto fail;
4069 
4070 	*iter->trace = *tr->current_trace;
4071 
4072 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4073 		goto fail;
4074 
4075 	iter->tr = tr;
4076 
4077 #ifdef CONFIG_TRACER_MAX_TRACE
4078 	/* Currently only the top directory has a snapshot */
4079 	if (tr->current_trace->print_max || snapshot)
4080 		iter->trace_buffer = &tr->max_buffer;
4081 	else
4082 #endif
4083 		iter->trace_buffer = &tr->trace_buffer;
4084 	iter->snapshot = snapshot;
4085 	iter->pos = -1;
4086 	iter->cpu_file = tracing_get_cpu(inode);
4087 	mutex_init(&iter->mutex);
4088 
4089 	/* Notify the tracer early; before we stop tracing. */
4090 	if (iter->trace && iter->trace->open)
4091 		iter->trace->open(iter);
4092 
4093 	/* Annotate start of buffers if we had overruns */
4094 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4095 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4096 
4097 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4098 	if (trace_clocks[tr->clock_id].in_ns)
4099 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4100 
4101 	/* stop the trace while dumping if we are not opening "snapshot" */
4102 	if (!iter->snapshot)
4103 		tracing_stop_tr(tr);
4104 
4105 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4106 		for_each_tracing_cpu(cpu) {
4107 			iter->buffer_iter[cpu] =
4108 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4109 							 cpu, GFP_KERNEL);
4110 		}
4111 		ring_buffer_read_prepare_sync();
4112 		for_each_tracing_cpu(cpu) {
4113 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4114 			tracing_iter_reset(iter, cpu);
4115 		}
4116 	} else {
4117 		cpu = iter->cpu_file;
4118 		iter->buffer_iter[cpu] =
4119 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4120 						 cpu, GFP_KERNEL);
4121 		ring_buffer_read_prepare_sync();
4122 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4123 		tracing_iter_reset(iter, cpu);
4124 	}
4125 
4126 	mutex_unlock(&trace_types_lock);
4127 
4128 	return iter;
4129 
4130  fail:
4131 	mutex_unlock(&trace_types_lock);
4132 	kfree(iter->trace);
4133 	kfree(iter->buffer_iter);
4134 release:
4135 	seq_release_private(inode, file);
4136 	return ERR_PTR(-ENOMEM);
4137 }
4138 
4139 int tracing_open_generic(struct inode *inode, struct file *filp)
4140 {
4141 	if (tracing_disabled)
4142 		return -ENODEV;
4143 
4144 	filp->private_data = inode->i_private;
4145 	return 0;
4146 }
4147 
4148 bool tracing_is_disabled(void)
4149 {
4150 	return (tracing_disabled) ? true: false;
4151 }
4152 
4153 /*
4154  * Open and update trace_array ref count.
4155  * Must have the current trace_array passed to it.
4156  */
4157 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4158 {
4159 	struct trace_array *tr = inode->i_private;
4160 
4161 	if (tracing_disabled)
4162 		return -ENODEV;
4163 
4164 	if (trace_array_get(tr) < 0)
4165 		return -ENODEV;
4166 
4167 	filp->private_data = inode->i_private;
4168 
4169 	return 0;
4170 }
4171 
4172 static int tracing_release(struct inode *inode, struct file *file)
4173 {
4174 	struct trace_array *tr = inode->i_private;
4175 	struct seq_file *m = file->private_data;
4176 	struct trace_iterator *iter;
4177 	int cpu;
4178 
4179 	if (!(file->f_mode & FMODE_READ)) {
4180 		trace_array_put(tr);
4181 		return 0;
4182 	}
4183 
4184 	/* Writes do not use seq_file */
4185 	iter = m->private;
4186 	mutex_lock(&trace_types_lock);
4187 
4188 	for_each_tracing_cpu(cpu) {
4189 		if (iter->buffer_iter[cpu])
4190 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4191 	}
4192 
4193 	if (iter->trace && iter->trace->close)
4194 		iter->trace->close(iter);
4195 
4196 	if (!iter->snapshot)
4197 		/* reenable tracing if it was previously enabled */
4198 		tracing_start_tr(tr);
4199 
4200 	__trace_array_put(tr);
4201 
4202 	mutex_unlock(&trace_types_lock);
4203 
4204 	mutex_destroy(&iter->mutex);
4205 	free_cpumask_var(iter->started);
4206 	kfree(iter->trace);
4207 	kfree(iter->buffer_iter);
4208 	seq_release_private(inode, file);
4209 
4210 	return 0;
4211 }
4212 
4213 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4214 {
4215 	struct trace_array *tr = inode->i_private;
4216 
4217 	trace_array_put(tr);
4218 	return 0;
4219 }
4220 
4221 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4222 {
4223 	struct trace_array *tr = inode->i_private;
4224 
4225 	trace_array_put(tr);
4226 
4227 	return single_release(inode, file);
4228 }
4229 
4230 static int tracing_open(struct inode *inode, struct file *file)
4231 {
4232 	struct trace_array *tr = inode->i_private;
4233 	struct trace_iterator *iter;
4234 	int ret = 0;
4235 
4236 	if (trace_array_get(tr) < 0)
4237 		return -ENODEV;
4238 
4239 	/* If this file was open for write, then erase contents */
4240 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4241 		int cpu = tracing_get_cpu(inode);
4242 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4243 
4244 #ifdef CONFIG_TRACER_MAX_TRACE
4245 		if (tr->current_trace->print_max)
4246 			trace_buf = &tr->max_buffer;
4247 #endif
4248 
4249 		if (cpu == RING_BUFFER_ALL_CPUS)
4250 			tracing_reset_online_cpus(trace_buf);
4251 		else
4252 			tracing_reset(trace_buf, cpu);
4253 	}
4254 
4255 	if (file->f_mode & FMODE_READ) {
4256 		iter = __tracing_open(inode, file, false);
4257 		if (IS_ERR(iter))
4258 			ret = PTR_ERR(iter);
4259 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4260 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4261 	}
4262 
4263 	if (ret < 0)
4264 		trace_array_put(tr);
4265 
4266 	return ret;
4267 }
4268 
4269 /*
4270  * Some tracers are not suitable for instance buffers.
4271  * A tracer is always available for the global array (toplevel)
4272  * or if it explicitly states that it is.
4273  */
4274 static bool
4275 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4276 {
4277 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4278 }
4279 
4280 /* Find the next tracer that this trace array may use */
4281 static struct tracer *
4282 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4283 {
4284 	while (t && !trace_ok_for_array(t, tr))
4285 		t = t->next;
4286 
4287 	return t;
4288 }
4289 
4290 static void *
4291 t_next(struct seq_file *m, void *v, loff_t *pos)
4292 {
4293 	struct trace_array *tr = m->private;
4294 	struct tracer *t = v;
4295 
4296 	(*pos)++;
4297 
4298 	if (t)
4299 		t = get_tracer_for_array(tr, t->next);
4300 
4301 	return t;
4302 }
4303 
4304 static void *t_start(struct seq_file *m, loff_t *pos)
4305 {
4306 	struct trace_array *tr = m->private;
4307 	struct tracer *t;
4308 	loff_t l = 0;
4309 
4310 	mutex_lock(&trace_types_lock);
4311 
4312 	t = get_tracer_for_array(tr, trace_types);
4313 	for (; t && l < *pos; t = t_next(m, t, &l))
4314 			;
4315 
4316 	return t;
4317 }
4318 
4319 static void t_stop(struct seq_file *m, void *p)
4320 {
4321 	mutex_unlock(&trace_types_lock);
4322 }
4323 
4324 static int t_show(struct seq_file *m, void *v)
4325 {
4326 	struct tracer *t = v;
4327 
4328 	if (!t)
4329 		return 0;
4330 
4331 	seq_puts(m, t->name);
4332 	if (t->next)
4333 		seq_putc(m, ' ');
4334 	else
4335 		seq_putc(m, '\n');
4336 
4337 	return 0;
4338 }
4339 
4340 static const struct seq_operations show_traces_seq_ops = {
4341 	.start		= t_start,
4342 	.next		= t_next,
4343 	.stop		= t_stop,
4344 	.show		= t_show,
4345 };
4346 
4347 static int show_traces_open(struct inode *inode, struct file *file)
4348 {
4349 	struct trace_array *tr = inode->i_private;
4350 	struct seq_file *m;
4351 	int ret;
4352 
4353 	if (tracing_disabled)
4354 		return -ENODEV;
4355 
4356 	ret = seq_open(file, &show_traces_seq_ops);
4357 	if (ret)
4358 		return ret;
4359 
4360 	m = file->private_data;
4361 	m->private = tr;
4362 
4363 	return 0;
4364 }
4365 
4366 static ssize_t
4367 tracing_write_stub(struct file *filp, const char __user *ubuf,
4368 		   size_t count, loff_t *ppos)
4369 {
4370 	return count;
4371 }
4372 
4373 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4374 {
4375 	int ret;
4376 
4377 	if (file->f_mode & FMODE_READ)
4378 		ret = seq_lseek(file, offset, whence);
4379 	else
4380 		file->f_pos = ret = 0;
4381 
4382 	return ret;
4383 }
4384 
4385 static const struct file_operations tracing_fops = {
4386 	.open		= tracing_open,
4387 	.read		= seq_read,
4388 	.write		= tracing_write_stub,
4389 	.llseek		= tracing_lseek,
4390 	.release	= tracing_release,
4391 };
4392 
4393 static const struct file_operations show_traces_fops = {
4394 	.open		= show_traces_open,
4395 	.read		= seq_read,
4396 	.release	= seq_release,
4397 	.llseek		= seq_lseek,
4398 };
4399 
4400 static ssize_t
4401 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4402 		     size_t count, loff_t *ppos)
4403 {
4404 	struct trace_array *tr = file_inode(filp)->i_private;
4405 	char *mask_str;
4406 	int len;
4407 
4408 	len = snprintf(NULL, 0, "%*pb\n",
4409 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4410 	mask_str = kmalloc(len, GFP_KERNEL);
4411 	if (!mask_str)
4412 		return -ENOMEM;
4413 
4414 	len = snprintf(mask_str, len, "%*pb\n",
4415 		       cpumask_pr_args(tr->tracing_cpumask));
4416 	if (len >= count) {
4417 		count = -EINVAL;
4418 		goto out_err;
4419 	}
4420 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4421 
4422 out_err:
4423 	kfree(mask_str);
4424 
4425 	return count;
4426 }
4427 
4428 static ssize_t
4429 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4430 		      size_t count, loff_t *ppos)
4431 {
4432 	struct trace_array *tr = file_inode(filp)->i_private;
4433 	cpumask_var_t tracing_cpumask_new;
4434 	int err, cpu;
4435 
4436 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4437 		return -ENOMEM;
4438 
4439 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4440 	if (err)
4441 		goto err_unlock;
4442 
4443 	local_irq_disable();
4444 	arch_spin_lock(&tr->max_lock);
4445 	for_each_tracing_cpu(cpu) {
4446 		/*
4447 		 * Increase/decrease the disabled counter if we are
4448 		 * about to flip a bit in the cpumask:
4449 		 */
4450 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4451 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4452 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4453 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4454 		}
4455 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4456 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4457 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4458 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4459 		}
4460 	}
4461 	arch_spin_unlock(&tr->max_lock);
4462 	local_irq_enable();
4463 
4464 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4465 	free_cpumask_var(tracing_cpumask_new);
4466 
4467 	return count;
4468 
4469 err_unlock:
4470 	free_cpumask_var(tracing_cpumask_new);
4471 
4472 	return err;
4473 }
4474 
4475 static const struct file_operations tracing_cpumask_fops = {
4476 	.open		= tracing_open_generic_tr,
4477 	.read		= tracing_cpumask_read,
4478 	.write		= tracing_cpumask_write,
4479 	.release	= tracing_release_generic_tr,
4480 	.llseek		= generic_file_llseek,
4481 };
4482 
4483 static int tracing_trace_options_show(struct seq_file *m, void *v)
4484 {
4485 	struct tracer_opt *trace_opts;
4486 	struct trace_array *tr = m->private;
4487 	u32 tracer_flags;
4488 	int i;
4489 
4490 	mutex_lock(&trace_types_lock);
4491 	tracer_flags = tr->current_trace->flags->val;
4492 	trace_opts = tr->current_trace->flags->opts;
4493 
4494 	for (i = 0; trace_options[i]; i++) {
4495 		if (tr->trace_flags & (1 << i))
4496 			seq_printf(m, "%s\n", trace_options[i]);
4497 		else
4498 			seq_printf(m, "no%s\n", trace_options[i]);
4499 	}
4500 
4501 	for (i = 0; trace_opts[i].name; i++) {
4502 		if (tracer_flags & trace_opts[i].bit)
4503 			seq_printf(m, "%s\n", trace_opts[i].name);
4504 		else
4505 			seq_printf(m, "no%s\n", trace_opts[i].name);
4506 	}
4507 	mutex_unlock(&trace_types_lock);
4508 
4509 	return 0;
4510 }
4511 
4512 static int __set_tracer_option(struct trace_array *tr,
4513 			       struct tracer_flags *tracer_flags,
4514 			       struct tracer_opt *opts, int neg)
4515 {
4516 	struct tracer *trace = tracer_flags->trace;
4517 	int ret;
4518 
4519 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4520 	if (ret)
4521 		return ret;
4522 
4523 	if (neg)
4524 		tracer_flags->val &= ~opts->bit;
4525 	else
4526 		tracer_flags->val |= opts->bit;
4527 	return 0;
4528 }
4529 
4530 /* Try to assign a tracer specific option */
4531 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4532 {
4533 	struct tracer *trace = tr->current_trace;
4534 	struct tracer_flags *tracer_flags = trace->flags;
4535 	struct tracer_opt *opts = NULL;
4536 	int i;
4537 
4538 	for (i = 0; tracer_flags->opts[i].name; i++) {
4539 		opts = &tracer_flags->opts[i];
4540 
4541 		if (strcmp(cmp, opts->name) == 0)
4542 			return __set_tracer_option(tr, trace->flags, opts, neg);
4543 	}
4544 
4545 	return -EINVAL;
4546 }
4547 
4548 /* Some tracers require overwrite to stay enabled */
4549 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4550 {
4551 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4552 		return -1;
4553 
4554 	return 0;
4555 }
4556 
4557 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4558 {
4559 	/* do nothing if flag is already set */
4560 	if (!!(tr->trace_flags & mask) == !!enabled)
4561 		return 0;
4562 
4563 	/* Give the tracer a chance to approve the change */
4564 	if (tr->current_trace->flag_changed)
4565 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4566 			return -EINVAL;
4567 
4568 	if (enabled)
4569 		tr->trace_flags |= mask;
4570 	else
4571 		tr->trace_flags &= ~mask;
4572 
4573 	if (mask == TRACE_ITER_RECORD_CMD)
4574 		trace_event_enable_cmd_record(enabled);
4575 
4576 	if (mask == TRACE_ITER_RECORD_TGID) {
4577 		if (!tgid_map)
4578 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4579 					   sizeof(*tgid_map),
4580 					   GFP_KERNEL);
4581 		if (!tgid_map) {
4582 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4583 			return -ENOMEM;
4584 		}
4585 
4586 		trace_event_enable_tgid_record(enabled);
4587 	}
4588 
4589 	if (mask == TRACE_ITER_EVENT_FORK)
4590 		trace_event_follow_fork(tr, enabled);
4591 
4592 	if (mask == TRACE_ITER_FUNC_FORK)
4593 		ftrace_pid_follow_fork(tr, enabled);
4594 
4595 	if (mask == TRACE_ITER_OVERWRITE) {
4596 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4597 #ifdef CONFIG_TRACER_MAX_TRACE
4598 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4599 #endif
4600 	}
4601 
4602 	if (mask == TRACE_ITER_PRINTK) {
4603 		trace_printk_start_stop_comm(enabled);
4604 		trace_printk_control(enabled);
4605 	}
4606 
4607 	return 0;
4608 }
4609 
4610 static int trace_set_options(struct trace_array *tr, char *option)
4611 {
4612 	char *cmp;
4613 	int neg = 0;
4614 	int ret;
4615 	size_t orig_len = strlen(option);
4616 	int len;
4617 
4618 	cmp = strstrip(option);
4619 
4620 	len = str_has_prefix(cmp, "no");
4621 	if (len)
4622 		neg = 1;
4623 
4624 	cmp += len;
4625 
4626 	mutex_lock(&trace_types_lock);
4627 
4628 	ret = match_string(trace_options, -1, cmp);
4629 	/* If no option could be set, test the specific tracer options */
4630 	if (ret < 0)
4631 		ret = set_tracer_option(tr, cmp, neg);
4632 	else
4633 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4634 
4635 	mutex_unlock(&trace_types_lock);
4636 
4637 	/*
4638 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4639 	 * turn it back into a space.
4640 	 */
4641 	if (orig_len > strlen(option))
4642 		option[strlen(option)] = ' ';
4643 
4644 	return ret;
4645 }
4646 
4647 static void __init apply_trace_boot_options(void)
4648 {
4649 	char *buf = trace_boot_options_buf;
4650 	char *option;
4651 
4652 	while (true) {
4653 		option = strsep(&buf, ",");
4654 
4655 		if (!option)
4656 			break;
4657 
4658 		if (*option)
4659 			trace_set_options(&global_trace, option);
4660 
4661 		/* Put back the comma to allow this to be called again */
4662 		if (buf)
4663 			*(buf - 1) = ',';
4664 	}
4665 }
4666 
4667 static ssize_t
4668 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4669 			size_t cnt, loff_t *ppos)
4670 {
4671 	struct seq_file *m = filp->private_data;
4672 	struct trace_array *tr = m->private;
4673 	char buf[64];
4674 	int ret;
4675 
4676 	if (cnt >= sizeof(buf))
4677 		return -EINVAL;
4678 
4679 	if (copy_from_user(buf, ubuf, cnt))
4680 		return -EFAULT;
4681 
4682 	buf[cnt] = 0;
4683 
4684 	ret = trace_set_options(tr, buf);
4685 	if (ret < 0)
4686 		return ret;
4687 
4688 	*ppos += cnt;
4689 
4690 	return cnt;
4691 }
4692 
4693 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4694 {
4695 	struct trace_array *tr = inode->i_private;
4696 	int ret;
4697 
4698 	if (tracing_disabled)
4699 		return -ENODEV;
4700 
4701 	if (trace_array_get(tr) < 0)
4702 		return -ENODEV;
4703 
4704 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4705 	if (ret < 0)
4706 		trace_array_put(tr);
4707 
4708 	return ret;
4709 }
4710 
4711 static const struct file_operations tracing_iter_fops = {
4712 	.open		= tracing_trace_options_open,
4713 	.read		= seq_read,
4714 	.llseek		= seq_lseek,
4715 	.release	= tracing_single_release_tr,
4716 	.write		= tracing_trace_options_write,
4717 };
4718 
4719 static const char readme_msg[] =
4720 	"tracing mini-HOWTO:\n\n"
4721 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4722 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4723 	" Important files:\n"
4724 	"  trace\t\t\t- The static contents of the buffer\n"
4725 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4726 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4727 	"  current_tracer\t- function and latency tracers\n"
4728 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4729 	"  error_log\t- error log for failed commands (that support it)\n"
4730 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4731 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4732 	"  trace_clock\t\t-change the clock used to order events\n"
4733 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4734 	"      global:   Synced across CPUs but slows tracing down.\n"
4735 	"     counter:   Not a clock, but just an increment\n"
4736 	"      uptime:   Jiffy counter from time of boot\n"
4737 	"        perf:   Same clock that perf events use\n"
4738 #ifdef CONFIG_X86_64
4739 	"     x86-tsc:   TSC cycle counter\n"
4740 #endif
4741 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4742 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4743 	"    absolute:   Absolute (standalone) timestamp\n"
4744 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4745 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4746 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4747 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4748 	"\t\t\t  Remove sub-buffer with rmdir\n"
4749 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4750 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4751 	"\t\t\t  option name\n"
4752 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4753 #ifdef CONFIG_DYNAMIC_FTRACE
4754 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4755 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4756 	"\t\t\t  functions\n"
4757 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4758 	"\t     modules: Can select a group via module\n"
4759 	"\t      Format: :mod:<module-name>\n"
4760 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4761 	"\t    triggers: a command to perform when function is hit\n"
4762 	"\t      Format: <function>:<trigger>[:count]\n"
4763 	"\t     trigger: traceon, traceoff\n"
4764 	"\t\t      enable_event:<system>:<event>\n"
4765 	"\t\t      disable_event:<system>:<event>\n"
4766 #ifdef CONFIG_STACKTRACE
4767 	"\t\t      stacktrace\n"
4768 #endif
4769 #ifdef CONFIG_TRACER_SNAPSHOT
4770 	"\t\t      snapshot\n"
4771 #endif
4772 	"\t\t      dump\n"
4773 	"\t\t      cpudump\n"
4774 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4775 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4776 	"\t     The first one will disable tracing every time do_fault is hit\n"
4777 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4778 	"\t       The first time do trap is hit and it disables tracing, the\n"
4779 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4780 	"\t       the counter will not decrement. It only decrements when the\n"
4781 	"\t       trigger did work\n"
4782 	"\t     To remove trigger without count:\n"
4783 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4784 	"\t     To remove trigger with a count:\n"
4785 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4786 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4787 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4788 	"\t    modules: Can select a group via module command :mod:\n"
4789 	"\t    Does not accept triggers\n"
4790 #endif /* CONFIG_DYNAMIC_FTRACE */
4791 #ifdef CONFIG_FUNCTION_TRACER
4792 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4793 	"\t\t    (function)\n"
4794 #endif
4795 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4796 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4797 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4798 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4799 #endif
4800 #ifdef CONFIG_TRACER_SNAPSHOT
4801 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4802 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4803 	"\t\t\t  information\n"
4804 #endif
4805 #ifdef CONFIG_STACK_TRACER
4806 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4807 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4808 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4809 	"\t\t\t  new trace)\n"
4810 #ifdef CONFIG_DYNAMIC_FTRACE
4811 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4812 	"\t\t\t  traces\n"
4813 #endif
4814 #endif /* CONFIG_STACK_TRACER */
4815 #ifdef CONFIG_DYNAMIC_EVENTS
4816 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4817 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4818 #endif
4819 #ifdef CONFIG_KPROBE_EVENTS
4820 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4821 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4822 #endif
4823 #ifdef CONFIG_UPROBE_EVENTS
4824 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4825 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4826 #endif
4827 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4828 	"\t  accepts: event-definitions (one definition per line)\n"
4829 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4830 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4831 #ifdef CONFIG_HIST_TRIGGERS
4832 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4833 #endif
4834 	"\t           -:[<group>/]<event>\n"
4835 #ifdef CONFIG_KPROBE_EVENTS
4836 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4837   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4838 #endif
4839 #ifdef CONFIG_UPROBE_EVENTS
4840   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4841 #endif
4842 	"\t     args: <name>=fetcharg[:type]\n"
4843 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4844 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4845 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4846 #else
4847 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4848 #endif
4849 	"\t           +|-[u]<offset>(<fetcharg>)\n"
4850 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4851 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4852 	"\t           <type>\\[<array-size>\\]\n"
4853 #ifdef CONFIG_HIST_TRIGGERS
4854 	"\t    field: <stype> <name>;\n"
4855 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4856 	"\t           [unsigned] char/int/long\n"
4857 #endif
4858 #endif
4859 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4860 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4861 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4862 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4863 	"\t\t\t  events\n"
4864 	"      filter\t\t- If set, only events passing filter are traced\n"
4865 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4866 	"\t\t\t  <event>:\n"
4867 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4868 	"      filter\t\t- If set, only events passing filter are traced\n"
4869 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4870 	"\t    Format: <trigger>[:count][if <filter>]\n"
4871 	"\t   trigger: traceon, traceoff\n"
4872 	"\t            enable_event:<system>:<event>\n"
4873 	"\t            disable_event:<system>:<event>\n"
4874 #ifdef CONFIG_HIST_TRIGGERS
4875 	"\t            enable_hist:<system>:<event>\n"
4876 	"\t            disable_hist:<system>:<event>\n"
4877 #endif
4878 #ifdef CONFIG_STACKTRACE
4879 	"\t\t    stacktrace\n"
4880 #endif
4881 #ifdef CONFIG_TRACER_SNAPSHOT
4882 	"\t\t    snapshot\n"
4883 #endif
4884 #ifdef CONFIG_HIST_TRIGGERS
4885 	"\t\t    hist (see below)\n"
4886 #endif
4887 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4888 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4889 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4890 	"\t                  events/block/block_unplug/trigger\n"
4891 	"\t   The first disables tracing every time block_unplug is hit.\n"
4892 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4893 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4894 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4895 	"\t   Like function triggers, the counter is only decremented if it\n"
4896 	"\t    enabled or disabled tracing.\n"
4897 	"\t   To remove a trigger without a count:\n"
4898 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4899 	"\t   To remove a trigger with a count:\n"
4900 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4901 	"\t   Filters can be ignored when removing a trigger.\n"
4902 #ifdef CONFIG_HIST_TRIGGERS
4903 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4904 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4905 	"\t            [:values=<field1[,field2,...]>]\n"
4906 	"\t            [:sort=<field1[,field2,...]>]\n"
4907 	"\t            [:size=#entries]\n"
4908 	"\t            [:pause][:continue][:clear]\n"
4909 	"\t            [:name=histname1]\n"
4910 	"\t            [:<handler>.<action>]\n"
4911 	"\t            [if <filter>]\n\n"
4912 	"\t    When a matching event is hit, an entry is added to a hash\n"
4913 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4914 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4915 	"\t    correspond to fields in the event's format description.  Keys\n"
4916 	"\t    can be any field, or the special string 'stacktrace'.\n"
4917 	"\t    Compound keys consisting of up to two fields can be specified\n"
4918 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4919 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4920 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4921 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4922 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4923 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4924 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4925 	"\t    its histogram data will be shared with other triggers of the\n"
4926 	"\t    same name, and trigger hits will update this common data.\n\n"
4927 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4928 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4929 	"\t    triggers attached to an event, there will be a table for each\n"
4930 	"\t    trigger in the output.  The table displayed for a named\n"
4931 	"\t    trigger will be the same as any other instance having the\n"
4932 	"\t    same name.  The default format used to display a given field\n"
4933 	"\t    can be modified by appending any of the following modifiers\n"
4934 	"\t    to the field name, as applicable:\n\n"
4935 	"\t            .hex        display a number as a hex value\n"
4936 	"\t            .sym        display an address as a symbol\n"
4937 	"\t            .sym-offset display an address as a symbol and offset\n"
4938 	"\t            .execname   display a common_pid as a program name\n"
4939 	"\t            .syscall    display a syscall id as a syscall name\n"
4940 	"\t            .log2       display log2 value rather than raw number\n"
4941 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4942 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4943 	"\t    trigger or to start a hist trigger but not log any events\n"
4944 	"\t    until told to do so.  'continue' can be used to start or\n"
4945 	"\t    restart a paused hist trigger.\n\n"
4946 	"\t    The 'clear' parameter will clear the contents of a running\n"
4947 	"\t    hist trigger and leave its current paused/active state\n"
4948 	"\t    unchanged.\n\n"
4949 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4950 	"\t    have one event conditionally start and stop another event's\n"
4951 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4952 	"\t    the enable_event and disable_event triggers.\n\n"
4953 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4954 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4955 	"\t        <handler>.<action>\n\n"
4956 	"\t    The available handlers are:\n\n"
4957 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4958 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4959 	"\t        onchange(var)            - invoke action if var changes\n\n"
4960 	"\t    The available actions are:\n\n"
4961 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4962 	"\t        save(field,...)                      - save current event fields\n"
4963 #ifdef CONFIG_TRACER_SNAPSHOT
4964 	"\t        snapshot()                           - snapshot the trace buffer\n"
4965 #endif
4966 #endif
4967 ;
4968 
4969 static ssize_t
4970 tracing_readme_read(struct file *filp, char __user *ubuf,
4971 		       size_t cnt, loff_t *ppos)
4972 {
4973 	return simple_read_from_buffer(ubuf, cnt, ppos,
4974 					readme_msg, strlen(readme_msg));
4975 }
4976 
4977 static const struct file_operations tracing_readme_fops = {
4978 	.open		= tracing_open_generic,
4979 	.read		= tracing_readme_read,
4980 	.llseek		= generic_file_llseek,
4981 };
4982 
4983 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4984 {
4985 	int *ptr = v;
4986 
4987 	if (*pos || m->count)
4988 		ptr++;
4989 
4990 	(*pos)++;
4991 
4992 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4993 		if (trace_find_tgid(*ptr))
4994 			return ptr;
4995 	}
4996 
4997 	return NULL;
4998 }
4999 
5000 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5001 {
5002 	void *v;
5003 	loff_t l = 0;
5004 
5005 	if (!tgid_map)
5006 		return NULL;
5007 
5008 	v = &tgid_map[0];
5009 	while (l <= *pos) {
5010 		v = saved_tgids_next(m, v, &l);
5011 		if (!v)
5012 			return NULL;
5013 	}
5014 
5015 	return v;
5016 }
5017 
5018 static void saved_tgids_stop(struct seq_file *m, void *v)
5019 {
5020 }
5021 
5022 static int saved_tgids_show(struct seq_file *m, void *v)
5023 {
5024 	int pid = (int *)v - tgid_map;
5025 
5026 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5027 	return 0;
5028 }
5029 
5030 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5031 	.start		= saved_tgids_start,
5032 	.stop		= saved_tgids_stop,
5033 	.next		= saved_tgids_next,
5034 	.show		= saved_tgids_show,
5035 };
5036 
5037 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5038 {
5039 	if (tracing_disabled)
5040 		return -ENODEV;
5041 
5042 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5043 }
5044 
5045 
5046 static const struct file_operations tracing_saved_tgids_fops = {
5047 	.open		= tracing_saved_tgids_open,
5048 	.read		= seq_read,
5049 	.llseek		= seq_lseek,
5050 	.release	= seq_release,
5051 };
5052 
5053 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5054 {
5055 	unsigned int *ptr = v;
5056 
5057 	if (*pos || m->count)
5058 		ptr++;
5059 
5060 	(*pos)++;
5061 
5062 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5063 	     ptr++) {
5064 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5065 			continue;
5066 
5067 		return ptr;
5068 	}
5069 
5070 	return NULL;
5071 }
5072 
5073 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5074 {
5075 	void *v;
5076 	loff_t l = 0;
5077 
5078 	preempt_disable();
5079 	arch_spin_lock(&trace_cmdline_lock);
5080 
5081 	v = &savedcmd->map_cmdline_to_pid[0];
5082 	while (l <= *pos) {
5083 		v = saved_cmdlines_next(m, v, &l);
5084 		if (!v)
5085 			return NULL;
5086 	}
5087 
5088 	return v;
5089 }
5090 
5091 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5092 {
5093 	arch_spin_unlock(&trace_cmdline_lock);
5094 	preempt_enable();
5095 }
5096 
5097 static int saved_cmdlines_show(struct seq_file *m, void *v)
5098 {
5099 	char buf[TASK_COMM_LEN];
5100 	unsigned int *pid = v;
5101 
5102 	__trace_find_cmdline(*pid, buf);
5103 	seq_printf(m, "%d %s\n", *pid, buf);
5104 	return 0;
5105 }
5106 
5107 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5108 	.start		= saved_cmdlines_start,
5109 	.next		= saved_cmdlines_next,
5110 	.stop		= saved_cmdlines_stop,
5111 	.show		= saved_cmdlines_show,
5112 };
5113 
5114 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5115 {
5116 	if (tracing_disabled)
5117 		return -ENODEV;
5118 
5119 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5120 }
5121 
5122 static const struct file_operations tracing_saved_cmdlines_fops = {
5123 	.open		= tracing_saved_cmdlines_open,
5124 	.read		= seq_read,
5125 	.llseek		= seq_lseek,
5126 	.release	= seq_release,
5127 };
5128 
5129 static ssize_t
5130 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5131 				 size_t cnt, loff_t *ppos)
5132 {
5133 	char buf[64];
5134 	int r;
5135 
5136 	arch_spin_lock(&trace_cmdline_lock);
5137 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5138 	arch_spin_unlock(&trace_cmdline_lock);
5139 
5140 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5141 }
5142 
5143 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5144 {
5145 	kfree(s->saved_cmdlines);
5146 	kfree(s->map_cmdline_to_pid);
5147 	kfree(s);
5148 }
5149 
5150 static int tracing_resize_saved_cmdlines(unsigned int val)
5151 {
5152 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5153 
5154 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5155 	if (!s)
5156 		return -ENOMEM;
5157 
5158 	if (allocate_cmdlines_buffer(val, s) < 0) {
5159 		kfree(s);
5160 		return -ENOMEM;
5161 	}
5162 
5163 	arch_spin_lock(&trace_cmdline_lock);
5164 	savedcmd_temp = savedcmd;
5165 	savedcmd = s;
5166 	arch_spin_unlock(&trace_cmdline_lock);
5167 	free_saved_cmdlines_buffer(savedcmd_temp);
5168 
5169 	return 0;
5170 }
5171 
5172 static ssize_t
5173 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5174 				  size_t cnt, loff_t *ppos)
5175 {
5176 	unsigned long val;
5177 	int ret;
5178 
5179 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5180 	if (ret)
5181 		return ret;
5182 
5183 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5184 	if (!val || val > PID_MAX_DEFAULT)
5185 		return -EINVAL;
5186 
5187 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5188 	if (ret < 0)
5189 		return ret;
5190 
5191 	*ppos += cnt;
5192 
5193 	return cnt;
5194 }
5195 
5196 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5197 	.open		= tracing_open_generic,
5198 	.read		= tracing_saved_cmdlines_size_read,
5199 	.write		= tracing_saved_cmdlines_size_write,
5200 };
5201 
5202 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5203 static union trace_eval_map_item *
5204 update_eval_map(union trace_eval_map_item *ptr)
5205 {
5206 	if (!ptr->map.eval_string) {
5207 		if (ptr->tail.next) {
5208 			ptr = ptr->tail.next;
5209 			/* Set ptr to the next real item (skip head) */
5210 			ptr++;
5211 		} else
5212 			return NULL;
5213 	}
5214 	return ptr;
5215 }
5216 
5217 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5218 {
5219 	union trace_eval_map_item *ptr = v;
5220 
5221 	/*
5222 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5223 	 * This really should never happen.
5224 	 */
5225 	ptr = update_eval_map(ptr);
5226 	if (WARN_ON_ONCE(!ptr))
5227 		return NULL;
5228 
5229 	ptr++;
5230 
5231 	(*pos)++;
5232 
5233 	ptr = update_eval_map(ptr);
5234 
5235 	return ptr;
5236 }
5237 
5238 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5239 {
5240 	union trace_eval_map_item *v;
5241 	loff_t l = 0;
5242 
5243 	mutex_lock(&trace_eval_mutex);
5244 
5245 	v = trace_eval_maps;
5246 	if (v)
5247 		v++;
5248 
5249 	while (v && l < *pos) {
5250 		v = eval_map_next(m, v, &l);
5251 	}
5252 
5253 	return v;
5254 }
5255 
5256 static void eval_map_stop(struct seq_file *m, void *v)
5257 {
5258 	mutex_unlock(&trace_eval_mutex);
5259 }
5260 
5261 static int eval_map_show(struct seq_file *m, void *v)
5262 {
5263 	union trace_eval_map_item *ptr = v;
5264 
5265 	seq_printf(m, "%s %ld (%s)\n",
5266 		   ptr->map.eval_string, ptr->map.eval_value,
5267 		   ptr->map.system);
5268 
5269 	return 0;
5270 }
5271 
5272 static const struct seq_operations tracing_eval_map_seq_ops = {
5273 	.start		= eval_map_start,
5274 	.next		= eval_map_next,
5275 	.stop		= eval_map_stop,
5276 	.show		= eval_map_show,
5277 };
5278 
5279 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5280 {
5281 	if (tracing_disabled)
5282 		return -ENODEV;
5283 
5284 	return seq_open(filp, &tracing_eval_map_seq_ops);
5285 }
5286 
5287 static const struct file_operations tracing_eval_map_fops = {
5288 	.open		= tracing_eval_map_open,
5289 	.read		= seq_read,
5290 	.llseek		= seq_lseek,
5291 	.release	= seq_release,
5292 };
5293 
5294 static inline union trace_eval_map_item *
5295 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5296 {
5297 	/* Return tail of array given the head */
5298 	return ptr + ptr->head.length + 1;
5299 }
5300 
5301 static void
5302 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5303 			   int len)
5304 {
5305 	struct trace_eval_map **stop;
5306 	struct trace_eval_map **map;
5307 	union trace_eval_map_item *map_array;
5308 	union trace_eval_map_item *ptr;
5309 
5310 	stop = start + len;
5311 
5312 	/*
5313 	 * The trace_eval_maps contains the map plus a head and tail item,
5314 	 * where the head holds the module and length of array, and the
5315 	 * tail holds a pointer to the next list.
5316 	 */
5317 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5318 	if (!map_array) {
5319 		pr_warn("Unable to allocate trace eval mapping\n");
5320 		return;
5321 	}
5322 
5323 	mutex_lock(&trace_eval_mutex);
5324 
5325 	if (!trace_eval_maps)
5326 		trace_eval_maps = map_array;
5327 	else {
5328 		ptr = trace_eval_maps;
5329 		for (;;) {
5330 			ptr = trace_eval_jmp_to_tail(ptr);
5331 			if (!ptr->tail.next)
5332 				break;
5333 			ptr = ptr->tail.next;
5334 
5335 		}
5336 		ptr->tail.next = map_array;
5337 	}
5338 	map_array->head.mod = mod;
5339 	map_array->head.length = len;
5340 	map_array++;
5341 
5342 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5343 		map_array->map = **map;
5344 		map_array++;
5345 	}
5346 	memset(map_array, 0, sizeof(*map_array));
5347 
5348 	mutex_unlock(&trace_eval_mutex);
5349 }
5350 
5351 static void trace_create_eval_file(struct dentry *d_tracer)
5352 {
5353 	trace_create_file("eval_map", 0444, d_tracer,
5354 			  NULL, &tracing_eval_map_fops);
5355 }
5356 
5357 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5358 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5359 static inline void trace_insert_eval_map_file(struct module *mod,
5360 			      struct trace_eval_map **start, int len) { }
5361 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5362 
5363 static void trace_insert_eval_map(struct module *mod,
5364 				  struct trace_eval_map **start, int len)
5365 {
5366 	struct trace_eval_map **map;
5367 
5368 	if (len <= 0)
5369 		return;
5370 
5371 	map = start;
5372 
5373 	trace_event_eval_update(map, len);
5374 
5375 	trace_insert_eval_map_file(mod, start, len);
5376 }
5377 
5378 static ssize_t
5379 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5380 		       size_t cnt, loff_t *ppos)
5381 {
5382 	struct trace_array *tr = filp->private_data;
5383 	char buf[MAX_TRACER_SIZE+2];
5384 	int r;
5385 
5386 	mutex_lock(&trace_types_lock);
5387 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5388 	mutex_unlock(&trace_types_lock);
5389 
5390 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5391 }
5392 
5393 int tracer_init(struct tracer *t, struct trace_array *tr)
5394 {
5395 	tracing_reset_online_cpus(&tr->trace_buffer);
5396 	return t->init(tr);
5397 }
5398 
5399 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5400 {
5401 	int cpu;
5402 
5403 	for_each_tracing_cpu(cpu)
5404 		per_cpu_ptr(buf->data, cpu)->entries = val;
5405 }
5406 
5407 #ifdef CONFIG_TRACER_MAX_TRACE
5408 /* resize @tr's buffer to the size of @size_tr's entries */
5409 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5410 					struct trace_buffer *size_buf, int cpu_id)
5411 {
5412 	int cpu, ret = 0;
5413 
5414 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5415 		for_each_tracing_cpu(cpu) {
5416 			ret = ring_buffer_resize(trace_buf->buffer,
5417 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5418 			if (ret < 0)
5419 				break;
5420 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5421 				per_cpu_ptr(size_buf->data, cpu)->entries;
5422 		}
5423 	} else {
5424 		ret = ring_buffer_resize(trace_buf->buffer,
5425 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5426 		if (ret == 0)
5427 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5428 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5429 	}
5430 
5431 	return ret;
5432 }
5433 #endif /* CONFIG_TRACER_MAX_TRACE */
5434 
5435 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5436 					unsigned long size, int cpu)
5437 {
5438 	int ret;
5439 
5440 	/*
5441 	 * If kernel or user changes the size of the ring buffer
5442 	 * we use the size that was given, and we can forget about
5443 	 * expanding it later.
5444 	 */
5445 	ring_buffer_expanded = true;
5446 
5447 	/* May be called before buffers are initialized */
5448 	if (!tr->trace_buffer.buffer)
5449 		return 0;
5450 
5451 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5452 	if (ret < 0)
5453 		return ret;
5454 
5455 #ifdef CONFIG_TRACER_MAX_TRACE
5456 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5457 	    !tr->current_trace->use_max_tr)
5458 		goto out;
5459 
5460 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5461 	if (ret < 0) {
5462 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5463 						     &tr->trace_buffer, cpu);
5464 		if (r < 0) {
5465 			/*
5466 			 * AARGH! We are left with different
5467 			 * size max buffer!!!!
5468 			 * The max buffer is our "snapshot" buffer.
5469 			 * When a tracer needs a snapshot (one of the
5470 			 * latency tracers), it swaps the max buffer
5471 			 * with the saved snap shot. We succeeded to
5472 			 * update the size of the main buffer, but failed to
5473 			 * update the size of the max buffer. But when we tried
5474 			 * to reset the main buffer to the original size, we
5475 			 * failed there too. This is very unlikely to
5476 			 * happen, but if it does, warn and kill all
5477 			 * tracing.
5478 			 */
5479 			WARN_ON(1);
5480 			tracing_disabled = 1;
5481 		}
5482 		return ret;
5483 	}
5484 
5485 	if (cpu == RING_BUFFER_ALL_CPUS)
5486 		set_buffer_entries(&tr->max_buffer, size);
5487 	else
5488 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5489 
5490  out:
5491 #endif /* CONFIG_TRACER_MAX_TRACE */
5492 
5493 	if (cpu == RING_BUFFER_ALL_CPUS)
5494 		set_buffer_entries(&tr->trace_buffer, size);
5495 	else
5496 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5497 
5498 	return ret;
5499 }
5500 
5501 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5502 					  unsigned long size, int cpu_id)
5503 {
5504 	int ret = size;
5505 
5506 	mutex_lock(&trace_types_lock);
5507 
5508 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5509 		/* make sure, this cpu is enabled in the mask */
5510 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5511 			ret = -EINVAL;
5512 			goto out;
5513 		}
5514 	}
5515 
5516 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5517 	if (ret < 0)
5518 		ret = -ENOMEM;
5519 
5520 out:
5521 	mutex_unlock(&trace_types_lock);
5522 
5523 	return ret;
5524 }
5525 
5526 
5527 /**
5528  * tracing_update_buffers - used by tracing facility to expand ring buffers
5529  *
5530  * To save on memory when the tracing is never used on a system with it
5531  * configured in. The ring buffers are set to a minimum size. But once
5532  * a user starts to use the tracing facility, then they need to grow
5533  * to their default size.
5534  *
5535  * This function is to be called when a tracer is about to be used.
5536  */
5537 int tracing_update_buffers(void)
5538 {
5539 	int ret = 0;
5540 
5541 	mutex_lock(&trace_types_lock);
5542 	if (!ring_buffer_expanded)
5543 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5544 						RING_BUFFER_ALL_CPUS);
5545 	mutex_unlock(&trace_types_lock);
5546 
5547 	return ret;
5548 }
5549 
5550 struct trace_option_dentry;
5551 
5552 static void
5553 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5554 
5555 /*
5556  * Used to clear out the tracer before deletion of an instance.
5557  * Must have trace_types_lock held.
5558  */
5559 static void tracing_set_nop(struct trace_array *tr)
5560 {
5561 	if (tr->current_trace == &nop_trace)
5562 		return;
5563 
5564 	tr->current_trace->enabled--;
5565 
5566 	if (tr->current_trace->reset)
5567 		tr->current_trace->reset(tr);
5568 
5569 	tr->current_trace = &nop_trace;
5570 }
5571 
5572 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5573 {
5574 	/* Only enable if the directory has been created already. */
5575 	if (!tr->dir)
5576 		return;
5577 
5578 	create_trace_option_files(tr, t);
5579 }
5580 
5581 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5582 {
5583 	struct tracer *t;
5584 #ifdef CONFIG_TRACER_MAX_TRACE
5585 	bool had_max_tr;
5586 #endif
5587 	int ret = 0;
5588 
5589 	mutex_lock(&trace_types_lock);
5590 
5591 	if (!ring_buffer_expanded) {
5592 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5593 						RING_BUFFER_ALL_CPUS);
5594 		if (ret < 0)
5595 			goto out;
5596 		ret = 0;
5597 	}
5598 
5599 	for (t = trace_types; t; t = t->next) {
5600 		if (strcmp(t->name, buf) == 0)
5601 			break;
5602 	}
5603 	if (!t) {
5604 		ret = -EINVAL;
5605 		goto out;
5606 	}
5607 	if (t == tr->current_trace)
5608 		goto out;
5609 
5610 #ifdef CONFIG_TRACER_SNAPSHOT
5611 	if (t->use_max_tr) {
5612 		arch_spin_lock(&tr->max_lock);
5613 		if (tr->cond_snapshot)
5614 			ret = -EBUSY;
5615 		arch_spin_unlock(&tr->max_lock);
5616 		if (ret)
5617 			goto out;
5618 	}
5619 #endif
5620 	/* Some tracers won't work on kernel command line */
5621 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5622 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5623 			t->name);
5624 		goto out;
5625 	}
5626 
5627 	/* Some tracers are only allowed for the top level buffer */
5628 	if (!trace_ok_for_array(t, tr)) {
5629 		ret = -EINVAL;
5630 		goto out;
5631 	}
5632 
5633 	/* If trace pipe files are being read, we can't change the tracer */
5634 	if (tr->current_trace->ref) {
5635 		ret = -EBUSY;
5636 		goto out;
5637 	}
5638 
5639 	trace_branch_disable();
5640 
5641 	tr->current_trace->enabled--;
5642 
5643 	if (tr->current_trace->reset)
5644 		tr->current_trace->reset(tr);
5645 
5646 	/* Current trace needs to be nop_trace before synchronize_rcu */
5647 	tr->current_trace = &nop_trace;
5648 
5649 #ifdef CONFIG_TRACER_MAX_TRACE
5650 	had_max_tr = tr->allocated_snapshot;
5651 
5652 	if (had_max_tr && !t->use_max_tr) {
5653 		/*
5654 		 * We need to make sure that the update_max_tr sees that
5655 		 * current_trace changed to nop_trace to keep it from
5656 		 * swapping the buffers after we resize it.
5657 		 * The update_max_tr is called from interrupts disabled
5658 		 * so a synchronized_sched() is sufficient.
5659 		 */
5660 		synchronize_rcu();
5661 		free_snapshot(tr);
5662 	}
5663 #endif
5664 
5665 #ifdef CONFIG_TRACER_MAX_TRACE
5666 	if (t->use_max_tr && !had_max_tr) {
5667 		ret = tracing_alloc_snapshot_instance(tr);
5668 		if (ret < 0)
5669 			goto out;
5670 	}
5671 #endif
5672 
5673 	if (t->init) {
5674 		ret = tracer_init(t, tr);
5675 		if (ret)
5676 			goto out;
5677 	}
5678 
5679 	tr->current_trace = t;
5680 	tr->current_trace->enabled++;
5681 	trace_branch_enable(tr);
5682  out:
5683 	mutex_unlock(&trace_types_lock);
5684 
5685 	return ret;
5686 }
5687 
5688 static ssize_t
5689 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5690 			size_t cnt, loff_t *ppos)
5691 {
5692 	struct trace_array *tr = filp->private_data;
5693 	char buf[MAX_TRACER_SIZE+1];
5694 	int i;
5695 	size_t ret;
5696 	int err;
5697 
5698 	ret = cnt;
5699 
5700 	if (cnt > MAX_TRACER_SIZE)
5701 		cnt = MAX_TRACER_SIZE;
5702 
5703 	if (copy_from_user(buf, ubuf, cnt))
5704 		return -EFAULT;
5705 
5706 	buf[cnt] = 0;
5707 
5708 	/* strip ending whitespace. */
5709 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5710 		buf[i] = 0;
5711 
5712 	err = tracing_set_tracer(tr, buf);
5713 	if (err)
5714 		return err;
5715 
5716 	*ppos += ret;
5717 
5718 	return ret;
5719 }
5720 
5721 static ssize_t
5722 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5723 		   size_t cnt, loff_t *ppos)
5724 {
5725 	char buf[64];
5726 	int r;
5727 
5728 	r = snprintf(buf, sizeof(buf), "%ld\n",
5729 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5730 	if (r > sizeof(buf))
5731 		r = sizeof(buf);
5732 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5733 }
5734 
5735 static ssize_t
5736 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5737 		    size_t cnt, loff_t *ppos)
5738 {
5739 	unsigned long val;
5740 	int ret;
5741 
5742 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5743 	if (ret)
5744 		return ret;
5745 
5746 	*ptr = val * 1000;
5747 
5748 	return cnt;
5749 }
5750 
5751 static ssize_t
5752 tracing_thresh_read(struct file *filp, char __user *ubuf,
5753 		    size_t cnt, loff_t *ppos)
5754 {
5755 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5756 }
5757 
5758 static ssize_t
5759 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5760 		     size_t cnt, loff_t *ppos)
5761 {
5762 	struct trace_array *tr = filp->private_data;
5763 	int ret;
5764 
5765 	mutex_lock(&trace_types_lock);
5766 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5767 	if (ret < 0)
5768 		goto out;
5769 
5770 	if (tr->current_trace->update_thresh) {
5771 		ret = tr->current_trace->update_thresh(tr);
5772 		if (ret < 0)
5773 			goto out;
5774 	}
5775 
5776 	ret = cnt;
5777 out:
5778 	mutex_unlock(&trace_types_lock);
5779 
5780 	return ret;
5781 }
5782 
5783 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5784 
5785 static ssize_t
5786 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5787 		     size_t cnt, loff_t *ppos)
5788 {
5789 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5790 }
5791 
5792 static ssize_t
5793 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5794 		      size_t cnt, loff_t *ppos)
5795 {
5796 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5797 }
5798 
5799 #endif
5800 
5801 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5802 {
5803 	struct trace_array *tr = inode->i_private;
5804 	struct trace_iterator *iter;
5805 	int ret = 0;
5806 
5807 	if (tracing_disabled)
5808 		return -ENODEV;
5809 
5810 	if (trace_array_get(tr) < 0)
5811 		return -ENODEV;
5812 
5813 	mutex_lock(&trace_types_lock);
5814 
5815 	/* create a buffer to store the information to pass to userspace */
5816 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5817 	if (!iter) {
5818 		ret = -ENOMEM;
5819 		__trace_array_put(tr);
5820 		goto out;
5821 	}
5822 
5823 	trace_seq_init(&iter->seq);
5824 	iter->trace = tr->current_trace;
5825 
5826 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5827 		ret = -ENOMEM;
5828 		goto fail;
5829 	}
5830 
5831 	/* trace pipe does not show start of buffer */
5832 	cpumask_setall(iter->started);
5833 
5834 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5835 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5836 
5837 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5838 	if (trace_clocks[tr->clock_id].in_ns)
5839 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5840 
5841 	iter->tr = tr;
5842 	iter->trace_buffer = &tr->trace_buffer;
5843 	iter->cpu_file = tracing_get_cpu(inode);
5844 	mutex_init(&iter->mutex);
5845 	filp->private_data = iter;
5846 
5847 	if (iter->trace->pipe_open)
5848 		iter->trace->pipe_open(iter);
5849 
5850 	nonseekable_open(inode, filp);
5851 
5852 	tr->current_trace->ref++;
5853 out:
5854 	mutex_unlock(&trace_types_lock);
5855 	return ret;
5856 
5857 fail:
5858 	kfree(iter);
5859 	__trace_array_put(tr);
5860 	mutex_unlock(&trace_types_lock);
5861 	return ret;
5862 }
5863 
5864 static int tracing_release_pipe(struct inode *inode, struct file *file)
5865 {
5866 	struct trace_iterator *iter = file->private_data;
5867 	struct trace_array *tr = inode->i_private;
5868 
5869 	mutex_lock(&trace_types_lock);
5870 
5871 	tr->current_trace->ref--;
5872 
5873 	if (iter->trace->pipe_close)
5874 		iter->trace->pipe_close(iter);
5875 
5876 	mutex_unlock(&trace_types_lock);
5877 
5878 	free_cpumask_var(iter->started);
5879 	mutex_destroy(&iter->mutex);
5880 	kfree(iter);
5881 
5882 	trace_array_put(tr);
5883 
5884 	return 0;
5885 }
5886 
5887 static __poll_t
5888 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5889 {
5890 	struct trace_array *tr = iter->tr;
5891 
5892 	/* Iterators are static, they should be filled or empty */
5893 	if (trace_buffer_iter(iter, iter->cpu_file))
5894 		return EPOLLIN | EPOLLRDNORM;
5895 
5896 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5897 		/*
5898 		 * Always select as readable when in blocking mode
5899 		 */
5900 		return EPOLLIN | EPOLLRDNORM;
5901 	else
5902 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5903 					     filp, poll_table);
5904 }
5905 
5906 static __poll_t
5907 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5908 {
5909 	struct trace_iterator *iter = filp->private_data;
5910 
5911 	return trace_poll(iter, filp, poll_table);
5912 }
5913 
5914 /* Must be called with iter->mutex held. */
5915 static int tracing_wait_pipe(struct file *filp)
5916 {
5917 	struct trace_iterator *iter = filp->private_data;
5918 	int ret;
5919 
5920 	while (trace_empty(iter)) {
5921 
5922 		if ((filp->f_flags & O_NONBLOCK)) {
5923 			return -EAGAIN;
5924 		}
5925 
5926 		/*
5927 		 * We block until we read something and tracing is disabled.
5928 		 * We still block if tracing is disabled, but we have never
5929 		 * read anything. This allows a user to cat this file, and
5930 		 * then enable tracing. But after we have read something,
5931 		 * we give an EOF when tracing is again disabled.
5932 		 *
5933 		 * iter->pos will be 0 if we haven't read anything.
5934 		 */
5935 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5936 			break;
5937 
5938 		mutex_unlock(&iter->mutex);
5939 
5940 		ret = wait_on_pipe(iter, 0);
5941 
5942 		mutex_lock(&iter->mutex);
5943 
5944 		if (ret)
5945 			return ret;
5946 	}
5947 
5948 	return 1;
5949 }
5950 
5951 /*
5952  * Consumer reader.
5953  */
5954 static ssize_t
5955 tracing_read_pipe(struct file *filp, char __user *ubuf,
5956 		  size_t cnt, loff_t *ppos)
5957 {
5958 	struct trace_iterator *iter = filp->private_data;
5959 	ssize_t sret;
5960 
5961 	/*
5962 	 * Avoid more than one consumer on a single file descriptor
5963 	 * This is just a matter of traces coherency, the ring buffer itself
5964 	 * is protected.
5965 	 */
5966 	mutex_lock(&iter->mutex);
5967 
5968 	/* return any leftover data */
5969 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5970 	if (sret != -EBUSY)
5971 		goto out;
5972 
5973 	trace_seq_init(&iter->seq);
5974 
5975 	if (iter->trace->read) {
5976 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5977 		if (sret)
5978 			goto out;
5979 	}
5980 
5981 waitagain:
5982 	sret = tracing_wait_pipe(filp);
5983 	if (sret <= 0)
5984 		goto out;
5985 
5986 	/* stop when tracing is finished */
5987 	if (trace_empty(iter)) {
5988 		sret = 0;
5989 		goto out;
5990 	}
5991 
5992 	if (cnt >= PAGE_SIZE)
5993 		cnt = PAGE_SIZE - 1;
5994 
5995 	/* reset all but tr, trace, and overruns */
5996 	memset(&iter->seq, 0,
5997 	       sizeof(struct trace_iterator) -
5998 	       offsetof(struct trace_iterator, seq));
5999 	cpumask_clear(iter->started);
6000 	iter->pos = -1;
6001 
6002 	trace_event_read_lock();
6003 	trace_access_lock(iter->cpu_file);
6004 	while (trace_find_next_entry_inc(iter) != NULL) {
6005 		enum print_line_t ret;
6006 		int save_len = iter->seq.seq.len;
6007 
6008 		ret = print_trace_line(iter);
6009 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6010 			/* don't print partial lines */
6011 			iter->seq.seq.len = save_len;
6012 			break;
6013 		}
6014 		if (ret != TRACE_TYPE_NO_CONSUME)
6015 			trace_consume(iter);
6016 
6017 		if (trace_seq_used(&iter->seq) >= cnt)
6018 			break;
6019 
6020 		/*
6021 		 * Setting the full flag means we reached the trace_seq buffer
6022 		 * size and we should leave by partial output condition above.
6023 		 * One of the trace_seq_* functions is not used properly.
6024 		 */
6025 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6026 			  iter->ent->type);
6027 	}
6028 	trace_access_unlock(iter->cpu_file);
6029 	trace_event_read_unlock();
6030 
6031 	/* Now copy what we have to the user */
6032 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6033 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6034 		trace_seq_init(&iter->seq);
6035 
6036 	/*
6037 	 * If there was nothing to send to user, in spite of consuming trace
6038 	 * entries, go back to wait for more entries.
6039 	 */
6040 	if (sret == -EBUSY)
6041 		goto waitagain;
6042 
6043 out:
6044 	mutex_unlock(&iter->mutex);
6045 
6046 	return sret;
6047 }
6048 
6049 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6050 				     unsigned int idx)
6051 {
6052 	__free_page(spd->pages[idx]);
6053 }
6054 
6055 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6056 	.confirm		= generic_pipe_buf_confirm,
6057 	.release		= generic_pipe_buf_release,
6058 	.steal			= generic_pipe_buf_steal,
6059 	.get			= generic_pipe_buf_get,
6060 };
6061 
6062 static size_t
6063 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6064 {
6065 	size_t count;
6066 	int save_len;
6067 	int ret;
6068 
6069 	/* Seq buffer is page-sized, exactly what we need. */
6070 	for (;;) {
6071 		save_len = iter->seq.seq.len;
6072 		ret = print_trace_line(iter);
6073 
6074 		if (trace_seq_has_overflowed(&iter->seq)) {
6075 			iter->seq.seq.len = save_len;
6076 			break;
6077 		}
6078 
6079 		/*
6080 		 * This should not be hit, because it should only
6081 		 * be set if the iter->seq overflowed. But check it
6082 		 * anyway to be safe.
6083 		 */
6084 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6085 			iter->seq.seq.len = save_len;
6086 			break;
6087 		}
6088 
6089 		count = trace_seq_used(&iter->seq) - save_len;
6090 		if (rem < count) {
6091 			rem = 0;
6092 			iter->seq.seq.len = save_len;
6093 			break;
6094 		}
6095 
6096 		if (ret != TRACE_TYPE_NO_CONSUME)
6097 			trace_consume(iter);
6098 		rem -= count;
6099 		if (!trace_find_next_entry_inc(iter))	{
6100 			rem = 0;
6101 			iter->ent = NULL;
6102 			break;
6103 		}
6104 	}
6105 
6106 	return rem;
6107 }
6108 
6109 static ssize_t tracing_splice_read_pipe(struct file *filp,
6110 					loff_t *ppos,
6111 					struct pipe_inode_info *pipe,
6112 					size_t len,
6113 					unsigned int flags)
6114 {
6115 	struct page *pages_def[PIPE_DEF_BUFFERS];
6116 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6117 	struct trace_iterator *iter = filp->private_data;
6118 	struct splice_pipe_desc spd = {
6119 		.pages		= pages_def,
6120 		.partial	= partial_def,
6121 		.nr_pages	= 0, /* This gets updated below. */
6122 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6123 		.ops		= &tracing_pipe_buf_ops,
6124 		.spd_release	= tracing_spd_release_pipe,
6125 	};
6126 	ssize_t ret;
6127 	size_t rem;
6128 	unsigned int i;
6129 
6130 	if (splice_grow_spd(pipe, &spd))
6131 		return -ENOMEM;
6132 
6133 	mutex_lock(&iter->mutex);
6134 
6135 	if (iter->trace->splice_read) {
6136 		ret = iter->trace->splice_read(iter, filp,
6137 					       ppos, pipe, len, flags);
6138 		if (ret)
6139 			goto out_err;
6140 	}
6141 
6142 	ret = tracing_wait_pipe(filp);
6143 	if (ret <= 0)
6144 		goto out_err;
6145 
6146 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6147 		ret = -EFAULT;
6148 		goto out_err;
6149 	}
6150 
6151 	trace_event_read_lock();
6152 	trace_access_lock(iter->cpu_file);
6153 
6154 	/* Fill as many pages as possible. */
6155 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6156 		spd.pages[i] = alloc_page(GFP_KERNEL);
6157 		if (!spd.pages[i])
6158 			break;
6159 
6160 		rem = tracing_fill_pipe_page(rem, iter);
6161 
6162 		/* Copy the data into the page, so we can start over. */
6163 		ret = trace_seq_to_buffer(&iter->seq,
6164 					  page_address(spd.pages[i]),
6165 					  trace_seq_used(&iter->seq));
6166 		if (ret < 0) {
6167 			__free_page(spd.pages[i]);
6168 			break;
6169 		}
6170 		spd.partial[i].offset = 0;
6171 		spd.partial[i].len = trace_seq_used(&iter->seq);
6172 
6173 		trace_seq_init(&iter->seq);
6174 	}
6175 
6176 	trace_access_unlock(iter->cpu_file);
6177 	trace_event_read_unlock();
6178 	mutex_unlock(&iter->mutex);
6179 
6180 	spd.nr_pages = i;
6181 
6182 	if (i)
6183 		ret = splice_to_pipe(pipe, &spd);
6184 	else
6185 		ret = 0;
6186 out:
6187 	splice_shrink_spd(&spd);
6188 	return ret;
6189 
6190 out_err:
6191 	mutex_unlock(&iter->mutex);
6192 	goto out;
6193 }
6194 
6195 static ssize_t
6196 tracing_entries_read(struct file *filp, char __user *ubuf,
6197 		     size_t cnt, loff_t *ppos)
6198 {
6199 	struct inode *inode = file_inode(filp);
6200 	struct trace_array *tr = inode->i_private;
6201 	int cpu = tracing_get_cpu(inode);
6202 	char buf[64];
6203 	int r = 0;
6204 	ssize_t ret;
6205 
6206 	mutex_lock(&trace_types_lock);
6207 
6208 	if (cpu == RING_BUFFER_ALL_CPUS) {
6209 		int cpu, buf_size_same;
6210 		unsigned long size;
6211 
6212 		size = 0;
6213 		buf_size_same = 1;
6214 		/* check if all cpu sizes are same */
6215 		for_each_tracing_cpu(cpu) {
6216 			/* fill in the size from first enabled cpu */
6217 			if (size == 0)
6218 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6219 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6220 				buf_size_same = 0;
6221 				break;
6222 			}
6223 		}
6224 
6225 		if (buf_size_same) {
6226 			if (!ring_buffer_expanded)
6227 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6228 					    size >> 10,
6229 					    trace_buf_size >> 10);
6230 			else
6231 				r = sprintf(buf, "%lu\n", size >> 10);
6232 		} else
6233 			r = sprintf(buf, "X\n");
6234 	} else
6235 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6236 
6237 	mutex_unlock(&trace_types_lock);
6238 
6239 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6240 	return ret;
6241 }
6242 
6243 static ssize_t
6244 tracing_entries_write(struct file *filp, const char __user *ubuf,
6245 		      size_t cnt, loff_t *ppos)
6246 {
6247 	struct inode *inode = file_inode(filp);
6248 	struct trace_array *tr = inode->i_private;
6249 	unsigned long val;
6250 	int ret;
6251 
6252 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6253 	if (ret)
6254 		return ret;
6255 
6256 	/* must have at least 1 entry */
6257 	if (!val)
6258 		return -EINVAL;
6259 
6260 	/* value is in KB */
6261 	val <<= 10;
6262 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6263 	if (ret < 0)
6264 		return ret;
6265 
6266 	*ppos += cnt;
6267 
6268 	return cnt;
6269 }
6270 
6271 static ssize_t
6272 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6273 				size_t cnt, loff_t *ppos)
6274 {
6275 	struct trace_array *tr = filp->private_data;
6276 	char buf[64];
6277 	int r, cpu;
6278 	unsigned long size = 0, expanded_size = 0;
6279 
6280 	mutex_lock(&trace_types_lock);
6281 	for_each_tracing_cpu(cpu) {
6282 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6283 		if (!ring_buffer_expanded)
6284 			expanded_size += trace_buf_size >> 10;
6285 	}
6286 	if (ring_buffer_expanded)
6287 		r = sprintf(buf, "%lu\n", size);
6288 	else
6289 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6290 	mutex_unlock(&trace_types_lock);
6291 
6292 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6293 }
6294 
6295 static ssize_t
6296 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6297 			  size_t cnt, loff_t *ppos)
6298 {
6299 	/*
6300 	 * There is no need to read what the user has written, this function
6301 	 * is just to make sure that there is no error when "echo" is used
6302 	 */
6303 
6304 	*ppos += cnt;
6305 
6306 	return cnt;
6307 }
6308 
6309 static int
6310 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6311 {
6312 	struct trace_array *tr = inode->i_private;
6313 
6314 	/* disable tracing ? */
6315 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6316 		tracer_tracing_off(tr);
6317 	/* resize the ring buffer to 0 */
6318 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6319 
6320 	trace_array_put(tr);
6321 
6322 	return 0;
6323 }
6324 
6325 static ssize_t
6326 tracing_mark_write(struct file *filp, const char __user *ubuf,
6327 					size_t cnt, loff_t *fpos)
6328 {
6329 	struct trace_array *tr = filp->private_data;
6330 	struct ring_buffer_event *event;
6331 	enum event_trigger_type tt = ETT_NONE;
6332 	struct ring_buffer *buffer;
6333 	struct print_entry *entry;
6334 	unsigned long irq_flags;
6335 	ssize_t written;
6336 	int size;
6337 	int len;
6338 
6339 /* Used in tracing_mark_raw_write() as well */
6340 #define FAULTED_STR "<faulted>"
6341 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6342 
6343 	if (tracing_disabled)
6344 		return -EINVAL;
6345 
6346 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6347 		return -EINVAL;
6348 
6349 	if (cnt > TRACE_BUF_SIZE)
6350 		cnt = TRACE_BUF_SIZE;
6351 
6352 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6353 
6354 	local_save_flags(irq_flags);
6355 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6356 
6357 	/* If less than "<faulted>", then make sure we can still add that */
6358 	if (cnt < FAULTED_SIZE)
6359 		size += FAULTED_SIZE - cnt;
6360 
6361 	buffer = tr->trace_buffer.buffer;
6362 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6363 					    irq_flags, preempt_count());
6364 	if (unlikely(!event))
6365 		/* Ring buffer disabled, return as if not open for write */
6366 		return -EBADF;
6367 
6368 	entry = ring_buffer_event_data(event);
6369 	entry->ip = _THIS_IP_;
6370 
6371 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6372 	if (len) {
6373 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6374 		cnt = FAULTED_SIZE;
6375 		written = -EFAULT;
6376 	} else
6377 		written = cnt;
6378 	len = cnt;
6379 
6380 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6381 		/* do not add \n before testing triggers, but add \0 */
6382 		entry->buf[cnt] = '\0';
6383 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6384 	}
6385 
6386 	if (entry->buf[cnt - 1] != '\n') {
6387 		entry->buf[cnt] = '\n';
6388 		entry->buf[cnt + 1] = '\0';
6389 	} else
6390 		entry->buf[cnt] = '\0';
6391 
6392 	__buffer_unlock_commit(buffer, event);
6393 
6394 	if (tt)
6395 		event_triggers_post_call(tr->trace_marker_file, tt);
6396 
6397 	if (written > 0)
6398 		*fpos += written;
6399 
6400 	return written;
6401 }
6402 
6403 /* Limit it for now to 3K (including tag) */
6404 #define RAW_DATA_MAX_SIZE (1024*3)
6405 
6406 static ssize_t
6407 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6408 					size_t cnt, loff_t *fpos)
6409 {
6410 	struct trace_array *tr = filp->private_data;
6411 	struct ring_buffer_event *event;
6412 	struct ring_buffer *buffer;
6413 	struct raw_data_entry *entry;
6414 	unsigned long irq_flags;
6415 	ssize_t written;
6416 	int size;
6417 	int len;
6418 
6419 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6420 
6421 	if (tracing_disabled)
6422 		return -EINVAL;
6423 
6424 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6425 		return -EINVAL;
6426 
6427 	/* The marker must at least have a tag id */
6428 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6429 		return -EINVAL;
6430 
6431 	if (cnt > TRACE_BUF_SIZE)
6432 		cnt = TRACE_BUF_SIZE;
6433 
6434 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6435 
6436 	local_save_flags(irq_flags);
6437 	size = sizeof(*entry) + cnt;
6438 	if (cnt < FAULT_SIZE_ID)
6439 		size += FAULT_SIZE_ID - cnt;
6440 
6441 	buffer = tr->trace_buffer.buffer;
6442 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6443 					    irq_flags, preempt_count());
6444 	if (!event)
6445 		/* Ring buffer disabled, return as if not open for write */
6446 		return -EBADF;
6447 
6448 	entry = ring_buffer_event_data(event);
6449 
6450 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6451 	if (len) {
6452 		entry->id = -1;
6453 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6454 		written = -EFAULT;
6455 	} else
6456 		written = cnt;
6457 
6458 	__buffer_unlock_commit(buffer, event);
6459 
6460 	if (written > 0)
6461 		*fpos += written;
6462 
6463 	return written;
6464 }
6465 
6466 static int tracing_clock_show(struct seq_file *m, void *v)
6467 {
6468 	struct trace_array *tr = m->private;
6469 	int i;
6470 
6471 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6472 		seq_printf(m,
6473 			"%s%s%s%s", i ? " " : "",
6474 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6475 			i == tr->clock_id ? "]" : "");
6476 	seq_putc(m, '\n');
6477 
6478 	return 0;
6479 }
6480 
6481 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6482 {
6483 	int i;
6484 
6485 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6486 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6487 			break;
6488 	}
6489 	if (i == ARRAY_SIZE(trace_clocks))
6490 		return -EINVAL;
6491 
6492 	mutex_lock(&trace_types_lock);
6493 
6494 	tr->clock_id = i;
6495 
6496 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6497 
6498 	/*
6499 	 * New clock may not be consistent with the previous clock.
6500 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6501 	 */
6502 	tracing_reset_online_cpus(&tr->trace_buffer);
6503 
6504 #ifdef CONFIG_TRACER_MAX_TRACE
6505 	if (tr->max_buffer.buffer)
6506 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6507 	tracing_reset_online_cpus(&tr->max_buffer);
6508 #endif
6509 
6510 	mutex_unlock(&trace_types_lock);
6511 
6512 	return 0;
6513 }
6514 
6515 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6516 				   size_t cnt, loff_t *fpos)
6517 {
6518 	struct seq_file *m = filp->private_data;
6519 	struct trace_array *tr = m->private;
6520 	char buf[64];
6521 	const char *clockstr;
6522 	int ret;
6523 
6524 	if (cnt >= sizeof(buf))
6525 		return -EINVAL;
6526 
6527 	if (copy_from_user(buf, ubuf, cnt))
6528 		return -EFAULT;
6529 
6530 	buf[cnt] = 0;
6531 
6532 	clockstr = strstrip(buf);
6533 
6534 	ret = tracing_set_clock(tr, clockstr);
6535 	if (ret)
6536 		return ret;
6537 
6538 	*fpos += cnt;
6539 
6540 	return cnt;
6541 }
6542 
6543 static int tracing_clock_open(struct inode *inode, struct file *file)
6544 {
6545 	struct trace_array *tr = inode->i_private;
6546 	int ret;
6547 
6548 	if (tracing_disabled)
6549 		return -ENODEV;
6550 
6551 	if (trace_array_get(tr))
6552 		return -ENODEV;
6553 
6554 	ret = single_open(file, tracing_clock_show, inode->i_private);
6555 	if (ret < 0)
6556 		trace_array_put(tr);
6557 
6558 	return ret;
6559 }
6560 
6561 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6562 {
6563 	struct trace_array *tr = m->private;
6564 
6565 	mutex_lock(&trace_types_lock);
6566 
6567 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6568 		seq_puts(m, "delta [absolute]\n");
6569 	else
6570 		seq_puts(m, "[delta] absolute\n");
6571 
6572 	mutex_unlock(&trace_types_lock);
6573 
6574 	return 0;
6575 }
6576 
6577 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6578 {
6579 	struct trace_array *tr = inode->i_private;
6580 	int ret;
6581 
6582 	if (tracing_disabled)
6583 		return -ENODEV;
6584 
6585 	if (trace_array_get(tr))
6586 		return -ENODEV;
6587 
6588 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6589 	if (ret < 0)
6590 		trace_array_put(tr);
6591 
6592 	return ret;
6593 }
6594 
6595 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6596 {
6597 	int ret = 0;
6598 
6599 	mutex_lock(&trace_types_lock);
6600 
6601 	if (abs && tr->time_stamp_abs_ref++)
6602 		goto out;
6603 
6604 	if (!abs) {
6605 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6606 			ret = -EINVAL;
6607 			goto out;
6608 		}
6609 
6610 		if (--tr->time_stamp_abs_ref)
6611 			goto out;
6612 	}
6613 
6614 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6615 
6616 #ifdef CONFIG_TRACER_MAX_TRACE
6617 	if (tr->max_buffer.buffer)
6618 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6619 #endif
6620  out:
6621 	mutex_unlock(&trace_types_lock);
6622 
6623 	return ret;
6624 }
6625 
6626 struct ftrace_buffer_info {
6627 	struct trace_iterator	iter;
6628 	void			*spare;
6629 	unsigned int		spare_cpu;
6630 	unsigned int		read;
6631 };
6632 
6633 #ifdef CONFIG_TRACER_SNAPSHOT
6634 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6635 {
6636 	struct trace_array *tr = inode->i_private;
6637 	struct trace_iterator *iter;
6638 	struct seq_file *m;
6639 	int ret = 0;
6640 
6641 	if (trace_array_get(tr) < 0)
6642 		return -ENODEV;
6643 
6644 	if (file->f_mode & FMODE_READ) {
6645 		iter = __tracing_open(inode, file, true);
6646 		if (IS_ERR(iter))
6647 			ret = PTR_ERR(iter);
6648 	} else {
6649 		/* Writes still need the seq_file to hold the private data */
6650 		ret = -ENOMEM;
6651 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6652 		if (!m)
6653 			goto out;
6654 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6655 		if (!iter) {
6656 			kfree(m);
6657 			goto out;
6658 		}
6659 		ret = 0;
6660 
6661 		iter->tr = tr;
6662 		iter->trace_buffer = &tr->max_buffer;
6663 		iter->cpu_file = tracing_get_cpu(inode);
6664 		m->private = iter;
6665 		file->private_data = m;
6666 	}
6667 out:
6668 	if (ret < 0)
6669 		trace_array_put(tr);
6670 
6671 	return ret;
6672 }
6673 
6674 static ssize_t
6675 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6676 		       loff_t *ppos)
6677 {
6678 	struct seq_file *m = filp->private_data;
6679 	struct trace_iterator *iter = m->private;
6680 	struct trace_array *tr = iter->tr;
6681 	unsigned long val;
6682 	int ret;
6683 
6684 	ret = tracing_update_buffers();
6685 	if (ret < 0)
6686 		return ret;
6687 
6688 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6689 	if (ret)
6690 		return ret;
6691 
6692 	mutex_lock(&trace_types_lock);
6693 
6694 	if (tr->current_trace->use_max_tr) {
6695 		ret = -EBUSY;
6696 		goto out;
6697 	}
6698 
6699 	arch_spin_lock(&tr->max_lock);
6700 	if (tr->cond_snapshot)
6701 		ret = -EBUSY;
6702 	arch_spin_unlock(&tr->max_lock);
6703 	if (ret)
6704 		goto out;
6705 
6706 	switch (val) {
6707 	case 0:
6708 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6709 			ret = -EINVAL;
6710 			break;
6711 		}
6712 		if (tr->allocated_snapshot)
6713 			free_snapshot(tr);
6714 		break;
6715 	case 1:
6716 /* Only allow per-cpu swap if the ring buffer supports it */
6717 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6718 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6719 			ret = -EINVAL;
6720 			break;
6721 		}
6722 #endif
6723 		if (tr->allocated_snapshot)
6724 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6725 					&tr->trace_buffer, iter->cpu_file);
6726 		else
6727 			ret = tracing_alloc_snapshot_instance(tr);
6728 		if (ret < 0)
6729 			break;
6730 		local_irq_disable();
6731 		/* Now, we're going to swap */
6732 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6733 			update_max_tr(tr, current, smp_processor_id(), NULL);
6734 		else
6735 			update_max_tr_single(tr, current, iter->cpu_file);
6736 		local_irq_enable();
6737 		break;
6738 	default:
6739 		if (tr->allocated_snapshot) {
6740 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6741 				tracing_reset_online_cpus(&tr->max_buffer);
6742 			else
6743 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6744 		}
6745 		break;
6746 	}
6747 
6748 	if (ret >= 0) {
6749 		*ppos += cnt;
6750 		ret = cnt;
6751 	}
6752 out:
6753 	mutex_unlock(&trace_types_lock);
6754 	return ret;
6755 }
6756 
6757 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6758 {
6759 	struct seq_file *m = file->private_data;
6760 	int ret;
6761 
6762 	ret = tracing_release(inode, file);
6763 
6764 	if (file->f_mode & FMODE_READ)
6765 		return ret;
6766 
6767 	/* If write only, the seq_file is just a stub */
6768 	if (m)
6769 		kfree(m->private);
6770 	kfree(m);
6771 
6772 	return 0;
6773 }
6774 
6775 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6776 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6777 				    size_t count, loff_t *ppos);
6778 static int tracing_buffers_release(struct inode *inode, struct file *file);
6779 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6780 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6781 
6782 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6783 {
6784 	struct ftrace_buffer_info *info;
6785 	int ret;
6786 
6787 	ret = tracing_buffers_open(inode, filp);
6788 	if (ret < 0)
6789 		return ret;
6790 
6791 	info = filp->private_data;
6792 
6793 	if (info->iter.trace->use_max_tr) {
6794 		tracing_buffers_release(inode, filp);
6795 		return -EBUSY;
6796 	}
6797 
6798 	info->iter.snapshot = true;
6799 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6800 
6801 	return ret;
6802 }
6803 
6804 #endif /* CONFIG_TRACER_SNAPSHOT */
6805 
6806 
6807 static const struct file_operations tracing_thresh_fops = {
6808 	.open		= tracing_open_generic,
6809 	.read		= tracing_thresh_read,
6810 	.write		= tracing_thresh_write,
6811 	.llseek		= generic_file_llseek,
6812 };
6813 
6814 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6815 static const struct file_operations tracing_max_lat_fops = {
6816 	.open		= tracing_open_generic,
6817 	.read		= tracing_max_lat_read,
6818 	.write		= tracing_max_lat_write,
6819 	.llseek		= generic_file_llseek,
6820 };
6821 #endif
6822 
6823 static const struct file_operations set_tracer_fops = {
6824 	.open		= tracing_open_generic,
6825 	.read		= tracing_set_trace_read,
6826 	.write		= tracing_set_trace_write,
6827 	.llseek		= generic_file_llseek,
6828 };
6829 
6830 static const struct file_operations tracing_pipe_fops = {
6831 	.open		= tracing_open_pipe,
6832 	.poll		= tracing_poll_pipe,
6833 	.read		= tracing_read_pipe,
6834 	.splice_read	= tracing_splice_read_pipe,
6835 	.release	= tracing_release_pipe,
6836 	.llseek		= no_llseek,
6837 };
6838 
6839 static const struct file_operations tracing_entries_fops = {
6840 	.open		= tracing_open_generic_tr,
6841 	.read		= tracing_entries_read,
6842 	.write		= tracing_entries_write,
6843 	.llseek		= generic_file_llseek,
6844 	.release	= tracing_release_generic_tr,
6845 };
6846 
6847 static const struct file_operations tracing_total_entries_fops = {
6848 	.open		= tracing_open_generic_tr,
6849 	.read		= tracing_total_entries_read,
6850 	.llseek		= generic_file_llseek,
6851 	.release	= tracing_release_generic_tr,
6852 };
6853 
6854 static const struct file_operations tracing_free_buffer_fops = {
6855 	.open		= tracing_open_generic_tr,
6856 	.write		= tracing_free_buffer_write,
6857 	.release	= tracing_free_buffer_release,
6858 };
6859 
6860 static const struct file_operations tracing_mark_fops = {
6861 	.open		= tracing_open_generic_tr,
6862 	.write		= tracing_mark_write,
6863 	.llseek		= generic_file_llseek,
6864 	.release	= tracing_release_generic_tr,
6865 };
6866 
6867 static const struct file_operations tracing_mark_raw_fops = {
6868 	.open		= tracing_open_generic_tr,
6869 	.write		= tracing_mark_raw_write,
6870 	.llseek		= generic_file_llseek,
6871 	.release	= tracing_release_generic_tr,
6872 };
6873 
6874 static const struct file_operations trace_clock_fops = {
6875 	.open		= tracing_clock_open,
6876 	.read		= seq_read,
6877 	.llseek		= seq_lseek,
6878 	.release	= tracing_single_release_tr,
6879 	.write		= tracing_clock_write,
6880 };
6881 
6882 static const struct file_operations trace_time_stamp_mode_fops = {
6883 	.open		= tracing_time_stamp_mode_open,
6884 	.read		= seq_read,
6885 	.llseek		= seq_lseek,
6886 	.release	= tracing_single_release_tr,
6887 };
6888 
6889 #ifdef CONFIG_TRACER_SNAPSHOT
6890 static const struct file_operations snapshot_fops = {
6891 	.open		= tracing_snapshot_open,
6892 	.read		= seq_read,
6893 	.write		= tracing_snapshot_write,
6894 	.llseek		= tracing_lseek,
6895 	.release	= tracing_snapshot_release,
6896 };
6897 
6898 static const struct file_operations snapshot_raw_fops = {
6899 	.open		= snapshot_raw_open,
6900 	.read		= tracing_buffers_read,
6901 	.release	= tracing_buffers_release,
6902 	.splice_read	= tracing_buffers_splice_read,
6903 	.llseek		= no_llseek,
6904 };
6905 
6906 #endif /* CONFIG_TRACER_SNAPSHOT */
6907 
6908 #define TRACING_LOG_ERRS_MAX	8
6909 #define TRACING_LOG_LOC_MAX	128
6910 
6911 #define CMD_PREFIX "  Command: "
6912 
6913 struct err_info {
6914 	const char	**errs;	/* ptr to loc-specific array of err strings */
6915 	u8		type;	/* index into errs -> specific err string */
6916 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
6917 	u64		ts;
6918 };
6919 
6920 struct tracing_log_err {
6921 	struct list_head	list;
6922 	struct err_info		info;
6923 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6924 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6925 };
6926 
6927 static DEFINE_MUTEX(tracing_err_log_lock);
6928 
6929 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6930 {
6931 	struct tracing_log_err *err;
6932 
6933 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6934 		err = kzalloc(sizeof(*err), GFP_KERNEL);
6935 		if (!err)
6936 			err = ERR_PTR(-ENOMEM);
6937 		tr->n_err_log_entries++;
6938 
6939 		return err;
6940 	}
6941 
6942 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6943 	list_del(&err->list);
6944 
6945 	return err;
6946 }
6947 
6948 /**
6949  * err_pos - find the position of a string within a command for error careting
6950  * @cmd: The tracing command that caused the error
6951  * @str: The string to position the caret at within @cmd
6952  *
6953  * Finds the position of the first occurence of @str within @cmd.  The
6954  * return value can be passed to tracing_log_err() for caret placement
6955  * within @cmd.
6956  *
6957  * Returns the index within @cmd of the first occurence of @str or 0
6958  * if @str was not found.
6959  */
6960 unsigned int err_pos(char *cmd, const char *str)
6961 {
6962 	char *found;
6963 
6964 	if (WARN_ON(!strlen(cmd)))
6965 		return 0;
6966 
6967 	found = strstr(cmd, str);
6968 	if (found)
6969 		return found - cmd;
6970 
6971 	return 0;
6972 }
6973 
6974 /**
6975  * tracing_log_err - write an error to the tracing error log
6976  * @tr: The associated trace array for the error (NULL for top level array)
6977  * @loc: A string describing where the error occurred
6978  * @cmd: The tracing command that caused the error
6979  * @errs: The array of loc-specific static error strings
6980  * @type: The index into errs[], which produces the specific static err string
6981  * @pos: The position the caret should be placed in the cmd
6982  *
6983  * Writes an error into tracing/error_log of the form:
6984  *
6985  * <loc>: error: <text>
6986  *   Command: <cmd>
6987  *              ^
6988  *
6989  * tracing/error_log is a small log file containing the last
6990  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
6991  * unless there has been a tracing error, and the error log can be
6992  * cleared and have its memory freed by writing the empty string in
6993  * truncation mode to it i.e. echo > tracing/error_log.
6994  *
6995  * NOTE: the @errs array along with the @type param are used to
6996  * produce a static error string - this string is not copied and saved
6997  * when the error is logged - only a pointer to it is saved.  See
6998  * existing callers for examples of how static strings are typically
6999  * defined for use with tracing_log_err().
7000  */
7001 void tracing_log_err(struct trace_array *tr,
7002 		     const char *loc, const char *cmd,
7003 		     const char **errs, u8 type, u8 pos)
7004 {
7005 	struct tracing_log_err *err;
7006 
7007 	if (!tr)
7008 		tr = &global_trace;
7009 
7010 	mutex_lock(&tracing_err_log_lock);
7011 	err = get_tracing_log_err(tr);
7012 	if (PTR_ERR(err) == -ENOMEM) {
7013 		mutex_unlock(&tracing_err_log_lock);
7014 		return;
7015 	}
7016 
7017 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7018 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7019 
7020 	err->info.errs = errs;
7021 	err->info.type = type;
7022 	err->info.pos = pos;
7023 	err->info.ts = local_clock();
7024 
7025 	list_add_tail(&err->list, &tr->err_log);
7026 	mutex_unlock(&tracing_err_log_lock);
7027 }
7028 
7029 static void clear_tracing_err_log(struct trace_array *tr)
7030 {
7031 	struct tracing_log_err *err, *next;
7032 
7033 	mutex_lock(&tracing_err_log_lock);
7034 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7035 		list_del(&err->list);
7036 		kfree(err);
7037 	}
7038 
7039 	tr->n_err_log_entries = 0;
7040 	mutex_unlock(&tracing_err_log_lock);
7041 }
7042 
7043 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7044 {
7045 	struct trace_array *tr = m->private;
7046 
7047 	mutex_lock(&tracing_err_log_lock);
7048 
7049 	return seq_list_start(&tr->err_log, *pos);
7050 }
7051 
7052 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7053 {
7054 	struct trace_array *tr = m->private;
7055 
7056 	return seq_list_next(v, &tr->err_log, pos);
7057 }
7058 
7059 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7060 {
7061 	mutex_unlock(&tracing_err_log_lock);
7062 }
7063 
7064 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7065 {
7066 	u8 i;
7067 
7068 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7069 		seq_putc(m, ' ');
7070 	for (i = 0; i < pos; i++)
7071 		seq_putc(m, ' ');
7072 	seq_puts(m, "^\n");
7073 }
7074 
7075 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7076 {
7077 	struct tracing_log_err *err = v;
7078 
7079 	if (err) {
7080 		const char *err_text = err->info.errs[err->info.type];
7081 		u64 sec = err->info.ts;
7082 		u32 nsec;
7083 
7084 		nsec = do_div(sec, NSEC_PER_SEC);
7085 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7086 			   err->loc, err_text);
7087 		seq_printf(m, "%s", err->cmd);
7088 		tracing_err_log_show_pos(m, err->info.pos);
7089 	}
7090 
7091 	return 0;
7092 }
7093 
7094 static const struct seq_operations tracing_err_log_seq_ops = {
7095 	.start  = tracing_err_log_seq_start,
7096 	.next   = tracing_err_log_seq_next,
7097 	.stop   = tracing_err_log_seq_stop,
7098 	.show   = tracing_err_log_seq_show
7099 };
7100 
7101 static int tracing_err_log_open(struct inode *inode, struct file *file)
7102 {
7103 	struct trace_array *tr = inode->i_private;
7104 	int ret = 0;
7105 
7106 	if (trace_array_get(tr) < 0)
7107 		return -ENODEV;
7108 
7109 	/* If this file was opened for write, then erase contents */
7110 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7111 		clear_tracing_err_log(tr);
7112 
7113 	if (file->f_mode & FMODE_READ) {
7114 		ret = seq_open(file, &tracing_err_log_seq_ops);
7115 		if (!ret) {
7116 			struct seq_file *m = file->private_data;
7117 			m->private = tr;
7118 		} else {
7119 			trace_array_put(tr);
7120 		}
7121 	}
7122 	return ret;
7123 }
7124 
7125 static ssize_t tracing_err_log_write(struct file *file,
7126 				     const char __user *buffer,
7127 				     size_t count, loff_t *ppos)
7128 {
7129 	return count;
7130 }
7131 
7132 static int tracing_err_log_release(struct inode *inode, struct file *file)
7133 {
7134 	struct trace_array *tr = inode->i_private;
7135 
7136 	trace_array_put(tr);
7137 
7138 	if (file->f_mode & FMODE_READ)
7139 		seq_release(inode, file);
7140 
7141 	return 0;
7142 }
7143 
7144 static const struct file_operations tracing_err_log_fops = {
7145 	.open           = tracing_err_log_open,
7146 	.write		= tracing_err_log_write,
7147 	.read           = seq_read,
7148 	.llseek         = seq_lseek,
7149 	.release        = tracing_err_log_release,
7150 };
7151 
7152 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7153 {
7154 	struct trace_array *tr = inode->i_private;
7155 	struct ftrace_buffer_info *info;
7156 	int ret;
7157 
7158 	if (tracing_disabled)
7159 		return -ENODEV;
7160 
7161 	if (trace_array_get(tr) < 0)
7162 		return -ENODEV;
7163 
7164 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7165 	if (!info) {
7166 		trace_array_put(tr);
7167 		return -ENOMEM;
7168 	}
7169 
7170 	mutex_lock(&trace_types_lock);
7171 
7172 	info->iter.tr		= tr;
7173 	info->iter.cpu_file	= tracing_get_cpu(inode);
7174 	info->iter.trace	= tr->current_trace;
7175 	info->iter.trace_buffer = &tr->trace_buffer;
7176 	info->spare		= NULL;
7177 	/* Force reading ring buffer for first read */
7178 	info->read		= (unsigned int)-1;
7179 
7180 	filp->private_data = info;
7181 
7182 	tr->current_trace->ref++;
7183 
7184 	mutex_unlock(&trace_types_lock);
7185 
7186 	ret = nonseekable_open(inode, filp);
7187 	if (ret < 0)
7188 		trace_array_put(tr);
7189 
7190 	return ret;
7191 }
7192 
7193 static __poll_t
7194 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7195 {
7196 	struct ftrace_buffer_info *info = filp->private_data;
7197 	struct trace_iterator *iter = &info->iter;
7198 
7199 	return trace_poll(iter, filp, poll_table);
7200 }
7201 
7202 static ssize_t
7203 tracing_buffers_read(struct file *filp, char __user *ubuf,
7204 		     size_t count, loff_t *ppos)
7205 {
7206 	struct ftrace_buffer_info *info = filp->private_data;
7207 	struct trace_iterator *iter = &info->iter;
7208 	ssize_t ret = 0;
7209 	ssize_t size;
7210 
7211 	if (!count)
7212 		return 0;
7213 
7214 #ifdef CONFIG_TRACER_MAX_TRACE
7215 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7216 		return -EBUSY;
7217 #endif
7218 
7219 	if (!info->spare) {
7220 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7221 							  iter->cpu_file);
7222 		if (IS_ERR(info->spare)) {
7223 			ret = PTR_ERR(info->spare);
7224 			info->spare = NULL;
7225 		} else {
7226 			info->spare_cpu = iter->cpu_file;
7227 		}
7228 	}
7229 	if (!info->spare)
7230 		return ret;
7231 
7232 	/* Do we have previous read data to read? */
7233 	if (info->read < PAGE_SIZE)
7234 		goto read;
7235 
7236  again:
7237 	trace_access_lock(iter->cpu_file);
7238 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7239 				    &info->spare,
7240 				    count,
7241 				    iter->cpu_file, 0);
7242 	trace_access_unlock(iter->cpu_file);
7243 
7244 	if (ret < 0) {
7245 		if (trace_empty(iter)) {
7246 			if ((filp->f_flags & O_NONBLOCK))
7247 				return -EAGAIN;
7248 
7249 			ret = wait_on_pipe(iter, 0);
7250 			if (ret)
7251 				return ret;
7252 
7253 			goto again;
7254 		}
7255 		return 0;
7256 	}
7257 
7258 	info->read = 0;
7259  read:
7260 	size = PAGE_SIZE - info->read;
7261 	if (size > count)
7262 		size = count;
7263 
7264 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7265 	if (ret == size)
7266 		return -EFAULT;
7267 
7268 	size -= ret;
7269 
7270 	*ppos += size;
7271 	info->read += size;
7272 
7273 	return size;
7274 }
7275 
7276 static int tracing_buffers_release(struct inode *inode, struct file *file)
7277 {
7278 	struct ftrace_buffer_info *info = file->private_data;
7279 	struct trace_iterator *iter = &info->iter;
7280 
7281 	mutex_lock(&trace_types_lock);
7282 
7283 	iter->tr->current_trace->ref--;
7284 
7285 	__trace_array_put(iter->tr);
7286 
7287 	if (info->spare)
7288 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7289 					   info->spare_cpu, info->spare);
7290 	kfree(info);
7291 
7292 	mutex_unlock(&trace_types_lock);
7293 
7294 	return 0;
7295 }
7296 
7297 struct buffer_ref {
7298 	struct ring_buffer	*buffer;
7299 	void			*page;
7300 	int			cpu;
7301 	refcount_t		refcount;
7302 };
7303 
7304 static void buffer_ref_release(struct buffer_ref *ref)
7305 {
7306 	if (!refcount_dec_and_test(&ref->refcount))
7307 		return;
7308 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7309 	kfree(ref);
7310 }
7311 
7312 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7313 				    struct pipe_buffer *buf)
7314 {
7315 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7316 
7317 	buffer_ref_release(ref);
7318 	buf->private = 0;
7319 }
7320 
7321 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7322 				struct pipe_buffer *buf)
7323 {
7324 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7325 
7326 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7327 		return false;
7328 
7329 	refcount_inc(&ref->refcount);
7330 	return true;
7331 }
7332 
7333 /* Pipe buffer operations for a buffer. */
7334 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7335 	.confirm		= generic_pipe_buf_confirm,
7336 	.release		= buffer_pipe_buf_release,
7337 	.steal			= generic_pipe_buf_nosteal,
7338 	.get			= buffer_pipe_buf_get,
7339 };
7340 
7341 /*
7342  * Callback from splice_to_pipe(), if we need to release some pages
7343  * at the end of the spd in case we error'ed out in filling the pipe.
7344  */
7345 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7346 {
7347 	struct buffer_ref *ref =
7348 		(struct buffer_ref *)spd->partial[i].private;
7349 
7350 	buffer_ref_release(ref);
7351 	spd->partial[i].private = 0;
7352 }
7353 
7354 static ssize_t
7355 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7356 			    struct pipe_inode_info *pipe, size_t len,
7357 			    unsigned int flags)
7358 {
7359 	struct ftrace_buffer_info *info = file->private_data;
7360 	struct trace_iterator *iter = &info->iter;
7361 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7362 	struct page *pages_def[PIPE_DEF_BUFFERS];
7363 	struct splice_pipe_desc spd = {
7364 		.pages		= pages_def,
7365 		.partial	= partial_def,
7366 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7367 		.ops		= &buffer_pipe_buf_ops,
7368 		.spd_release	= buffer_spd_release,
7369 	};
7370 	struct buffer_ref *ref;
7371 	int entries, i;
7372 	ssize_t ret = 0;
7373 
7374 #ifdef CONFIG_TRACER_MAX_TRACE
7375 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7376 		return -EBUSY;
7377 #endif
7378 
7379 	if (*ppos & (PAGE_SIZE - 1))
7380 		return -EINVAL;
7381 
7382 	if (len & (PAGE_SIZE - 1)) {
7383 		if (len < PAGE_SIZE)
7384 			return -EINVAL;
7385 		len &= PAGE_MASK;
7386 	}
7387 
7388 	if (splice_grow_spd(pipe, &spd))
7389 		return -ENOMEM;
7390 
7391  again:
7392 	trace_access_lock(iter->cpu_file);
7393 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7394 
7395 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7396 		struct page *page;
7397 		int r;
7398 
7399 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7400 		if (!ref) {
7401 			ret = -ENOMEM;
7402 			break;
7403 		}
7404 
7405 		refcount_set(&ref->refcount, 1);
7406 		ref->buffer = iter->trace_buffer->buffer;
7407 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7408 		if (IS_ERR(ref->page)) {
7409 			ret = PTR_ERR(ref->page);
7410 			ref->page = NULL;
7411 			kfree(ref);
7412 			break;
7413 		}
7414 		ref->cpu = iter->cpu_file;
7415 
7416 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7417 					  len, iter->cpu_file, 1);
7418 		if (r < 0) {
7419 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7420 						   ref->page);
7421 			kfree(ref);
7422 			break;
7423 		}
7424 
7425 		page = virt_to_page(ref->page);
7426 
7427 		spd.pages[i] = page;
7428 		spd.partial[i].len = PAGE_SIZE;
7429 		spd.partial[i].offset = 0;
7430 		spd.partial[i].private = (unsigned long)ref;
7431 		spd.nr_pages++;
7432 		*ppos += PAGE_SIZE;
7433 
7434 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7435 	}
7436 
7437 	trace_access_unlock(iter->cpu_file);
7438 	spd.nr_pages = i;
7439 
7440 	/* did we read anything? */
7441 	if (!spd.nr_pages) {
7442 		if (ret)
7443 			goto out;
7444 
7445 		ret = -EAGAIN;
7446 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7447 			goto out;
7448 
7449 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7450 		if (ret)
7451 			goto out;
7452 
7453 		goto again;
7454 	}
7455 
7456 	ret = splice_to_pipe(pipe, &spd);
7457 out:
7458 	splice_shrink_spd(&spd);
7459 
7460 	return ret;
7461 }
7462 
7463 static const struct file_operations tracing_buffers_fops = {
7464 	.open		= tracing_buffers_open,
7465 	.read		= tracing_buffers_read,
7466 	.poll		= tracing_buffers_poll,
7467 	.release	= tracing_buffers_release,
7468 	.splice_read	= tracing_buffers_splice_read,
7469 	.llseek		= no_llseek,
7470 };
7471 
7472 static ssize_t
7473 tracing_stats_read(struct file *filp, char __user *ubuf,
7474 		   size_t count, loff_t *ppos)
7475 {
7476 	struct inode *inode = file_inode(filp);
7477 	struct trace_array *tr = inode->i_private;
7478 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7479 	int cpu = tracing_get_cpu(inode);
7480 	struct trace_seq *s;
7481 	unsigned long cnt;
7482 	unsigned long long t;
7483 	unsigned long usec_rem;
7484 
7485 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7486 	if (!s)
7487 		return -ENOMEM;
7488 
7489 	trace_seq_init(s);
7490 
7491 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7492 	trace_seq_printf(s, "entries: %ld\n", cnt);
7493 
7494 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7495 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7496 
7497 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7498 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7499 
7500 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7501 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7502 
7503 	if (trace_clocks[tr->clock_id].in_ns) {
7504 		/* local or global for trace_clock */
7505 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7506 		usec_rem = do_div(t, USEC_PER_SEC);
7507 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7508 								t, usec_rem);
7509 
7510 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7511 		usec_rem = do_div(t, USEC_PER_SEC);
7512 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7513 	} else {
7514 		/* counter or tsc mode for trace_clock */
7515 		trace_seq_printf(s, "oldest event ts: %llu\n",
7516 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7517 
7518 		trace_seq_printf(s, "now ts: %llu\n",
7519 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7520 	}
7521 
7522 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7523 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7524 
7525 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7526 	trace_seq_printf(s, "read events: %ld\n", cnt);
7527 
7528 	count = simple_read_from_buffer(ubuf, count, ppos,
7529 					s->buffer, trace_seq_used(s));
7530 
7531 	kfree(s);
7532 
7533 	return count;
7534 }
7535 
7536 static const struct file_operations tracing_stats_fops = {
7537 	.open		= tracing_open_generic_tr,
7538 	.read		= tracing_stats_read,
7539 	.llseek		= generic_file_llseek,
7540 	.release	= tracing_release_generic_tr,
7541 };
7542 
7543 #ifdef CONFIG_DYNAMIC_FTRACE
7544 
7545 static ssize_t
7546 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7547 		  size_t cnt, loff_t *ppos)
7548 {
7549 	unsigned long *p = filp->private_data;
7550 	char buf[64]; /* Not too big for a shallow stack */
7551 	int r;
7552 
7553 	r = scnprintf(buf, 63, "%ld", *p);
7554 	buf[r++] = '\n';
7555 
7556 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7557 }
7558 
7559 static const struct file_operations tracing_dyn_info_fops = {
7560 	.open		= tracing_open_generic,
7561 	.read		= tracing_read_dyn_info,
7562 	.llseek		= generic_file_llseek,
7563 };
7564 #endif /* CONFIG_DYNAMIC_FTRACE */
7565 
7566 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7567 static void
7568 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7569 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7570 		void *data)
7571 {
7572 	tracing_snapshot_instance(tr);
7573 }
7574 
7575 static void
7576 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7577 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7578 		      void *data)
7579 {
7580 	struct ftrace_func_mapper *mapper = data;
7581 	long *count = NULL;
7582 
7583 	if (mapper)
7584 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7585 
7586 	if (count) {
7587 
7588 		if (*count <= 0)
7589 			return;
7590 
7591 		(*count)--;
7592 	}
7593 
7594 	tracing_snapshot_instance(tr);
7595 }
7596 
7597 static int
7598 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7599 		      struct ftrace_probe_ops *ops, void *data)
7600 {
7601 	struct ftrace_func_mapper *mapper = data;
7602 	long *count = NULL;
7603 
7604 	seq_printf(m, "%ps:", (void *)ip);
7605 
7606 	seq_puts(m, "snapshot");
7607 
7608 	if (mapper)
7609 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7610 
7611 	if (count)
7612 		seq_printf(m, ":count=%ld\n", *count);
7613 	else
7614 		seq_puts(m, ":unlimited\n");
7615 
7616 	return 0;
7617 }
7618 
7619 static int
7620 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7621 		     unsigned long ip, void *init_data, void **data)
7622 {
7623 	struct ftrace_func_mapper *mapper = *data;
7624 
7625 	if (!mapper) {
7626 		mapper = allocate_ftrace_func_mapper();
7627 		if (!mapper)
7628 			return -ENOMEM;
7629 		*data = mapper;
7630 	}
7631 
7632 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7633 }
7634 
7635 static void
7636 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7637 		     unsigned long ip, void *data)
7638 {
7639 	struct ftrace_func_mapper *mapper = data;
7640 
7641 	if (!ip) {
7642 		if (!mapper)
7643 			return;
7644 		free_ftrace_func_mapper(mapper, NULL);
7645 		return;
7646 	}
7647 
7648 	ftrace_func_mapper_remove_ip(mapper, ip);
7649 }
7650 
7651 static struct ftrace_probe_ops snapshot_probe_ops = {
7652 	.func			= ftrace_snapshot,
7653 	.print			= ftrace_snapshot_print,
7654 };
7655 
7656 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7657 	.func			= ftrace_count_snapshot,
7658 	.print			= ftrace_snapshot_print,
7659 	.init			= ftrace_snapshot_init,
7660 	.free			= ftrace_snapshot_free,
7661 };
7662 
7663 static int
7664 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7665 			       char *glob, char *cmd, char *param, int enable)
7666 {
7667 	struct ftrace_probe_ops *ops;
7668 	void *count = (void *)-1;
7669 	char *number;
7670 	int ret;
7671 
7672 	if (!tr)
7673 		return -ENODEV;
7674 
7675 	/* hash funcs only work with set_ftrace_filter */
7676 	if (!enable)
7677 		return -EINVAL;
7678 
7679 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7680 
7681 	if (glob[0] == '!')
7682 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7683 
7684 	if (!param)
7685 		goto out_reg;
7686 
7687 	number = strsep(&param, ":");
7688 
7689 	if (!strlen(number))
7690 		goto out_reg;
7691 
7692 	/*
7693 	 * We use the callback data field (which is a pointer)
7694 	 * as our counter.
7695 	 */
7696 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7697 	if (ret)
7698 		return ret;
7699 
7700  out_reg:
7701 	ret = tracing_alloc_snapshot_instance(tr);
7702 	if (ret < 0)
7703 		goto out;
7704 
7705 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7706 
7707  out:
7708 	return ret < 0 ? ret : 0;
7709 }
7710 
7711 static struct ftrace_func_command ftrace_snapshot_cmd = {
7712 	.name			= "snapshot",
7713 	.func			= ftrace_trace_snapshot_callback,
7714 };
7715 
7716 static __init int register_snapshot_cmd(void)
7717 {
7718 	return register_ftrace_command(&ftrace_snapshot_cmd);
7719 }
7720 #else
7721 static inline __init int register_snapshot_cmd(void) { return 0; }
7722 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7723 
7724 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7725 {
7726 	if (WARN_ON(!tr->dir))
7727 		return ERR_PTR(-ENODEV);
7728 
7729 	/* Top directory uses NULL as the parent */
7730 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7731 		return NULL;
7732 
7733 	/* All sub buffers have a descriptor */
7734 	return tr->dir;
7735 }
7736 
7737 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7738 {
7739 	struct dentry *d_tracer;
7740 
7741 	if (tr->percpu_dir)
7742 		return tr->percpu_dir;
7743 
7744 	d_tracer = tracing_get_dentry(tr);
7745 	if (IS_ERR(d_tracer))
7746 		return NULL;
7747 
7748 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7749 
7750 	WARN_ONCE(!tr->percpu_dir,
7751 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7752 
7753 	return tr->percpu_dir;
7754 }
7755 
7756 static struct dentry *
7757 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7758 		      void *data, long cpu, const struct file_operations *fops)
7759 {
7760 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7761 
7762 	if (ret) /* See tracing_get_cpu() */
7763 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7764 	return ret;
7765 }
7766 
7767 static void
7768 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7769 {
7770 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7771 	struct dentry *d_cpu;
7772 	char cpu_dir[30]; /* 30 characters should be more than enough */
7773 
7774 	if (!d_percpu)
7775 		return;
7776 
7777 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7778 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7779 	if (!d_cpu) {
7780 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7781 		return;
7782 	}
7783 
7784 	/* per cpu trace_pipe */
7785 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7786 				tr, cpu, &tracing_pipe_fops);
7787 
7788 	/* per cpu trace */
7789 	trace_create_cpu_file("trace", 0644, d_cpu,
7790 				tr, cpu, &tracing_fops);
7791 
7792 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7793 				tr, cpu, &tracing_buffers_fops);
7794 
7795 	trace_create_cpu_file("stats", 0444, d_cpu,
7796 				tr, cpu, &tracing_stats_fops);
7797 
7798 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7799 				tr, cpu, &tracing_entries_fops);
7800 
7801 #ifdef CONFIG_TRACER_SNAPSHOT
7802 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7803 				tr, cpu, &snapshot_fops);
7804 
7805 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7806 				tr, cpu, &snapshot_raw_fops);
7807 #endif
7808 }
7809 
7810 #ifdef CONFIG_FTRACE_SELFTEST
7811 /* Let selftest have access to static functions in this file */
7812 #include "trace_selftest.c"
7813 #endif
7814 
7815 static ssize_t
7816 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7817 			loff_t *ppos)
7818 {
7819 	struct trace_option_dentry *topt = filp->private_data;
7820 	char *buf;
7821 
7822 	if (topt->flags->val & topt->opt->bit)
7823 		buf = "1\n";
7824 	else
7825 		buf = "0\n";
7826 
7827 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7828 }
7829 
7830 static ssize_t
7831 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7832 			 loff_t *ppos)
7833 {
7834 	struct trace_option_dentry *topt = filp->private_data;
7835 	unsigned long val;
7836 	int ret;
7837 
7838 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7839 	if (ret)
7840 		return ret;
7841 
7842 	if (val != 0 && val != 1)
7843 		return -EINVAL;
7844 
7845 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7846 		mutex_lock(&trace_types_lock);
7847 		ret = __set_tracer_option(topt->tr, topt->flags,
7848 					  topt->opt, !val);
7849 		mutex_unlock(&trace_types_lock);
7850 		if (ret)
7851 			return ret;
7852 	}
7853 
7854 	*ppos += cnt;
7855 
7856 	return cnt;
7857 }
7858 
7859 
7860 static const struct file_operations trace_options_fops = {
7861 	.open = tracing_open_generic,
7862 	.read = trace_options_read,
7863 	.write = trace_options_write,
7864 	.llseek	= generic_file_llseek,
7865 };
7866 
7867 /*
7868  * In order to pass in both the trace_array descriptor as well as the index
7869  * to the flag that the trace option file represents, the trace_array
7870  * has a character array of trace_flags_index[], which holds the index
7871  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7872  * The address of this character array is passed to the flag option file
7873  * read/write callbacks.
7874  *
7875  * In order to extract both the index and the trace_array descriptor,
7876  * get_tr_index() uses the following algorithm.
7877  *
7878  *   idx = *ptr;
7879  *
7880  * As the pointer itself contains the address of the index (remember
7881  * index[1] == 1).
7882  *
7883  * Then to get the trace_array descriptor, by subtracting that index
7884  * from the ptr, we get to the start of the index itself.
7885  *
7886  *   ptr - idx == &index[0]
7887  *
7888  * Then a simple container_of() from that pointer gets us to the
7889  * trace_array descriptor.
7890  */
7891 static void get_tr_index(void *data, struct trace_array **ptr,
7892 			 unsigned int *pindex)
7893 {
7894 	*pindex = *(unsigned char *)data;
7895 
7896 	*ptr = container_of(data - *pindex, struct trace_array,
7897 			    trace_flags_index);
7898 }
7899 
7900 static ssize_t
7901 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7902 			loff_t *ppos)
7903 {
7904 	void *tr_index = filp->private_data;
7905 	struct trace_array *tr;
7906 	unsigned int index;
7907 	char *buf;
7908 
7909 	get_tr_index(tr_index, &tr, &index);
7910 
7911 	if (tr->trace_flags & (1 << index))
7912 		buf = "1\n";
7913 	else
7914 		buf = "0\n";
7915 
7916 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7917 }
7918 
7919 static ssize_t
7920 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7921 			 loff_t *ppos)
7922 {
7923 	void *tr_index = filp->private_data;
7924 	struct trace_array *tr;
7925 	unsigned int index;
7926 	unsigned long val;
7927 	int ret;
7928 
7929 	get_tr_index(tr_index, &tr, &index);
7930 
7931 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7932 	if (ret)
7933 		return ret;
7934 
7935 	if (val != 0 && val != 1)
7936 		return -EINVAL;
7937 
7938 	mutex_lock(&trace_types_lock);
7939 	ret = set_tracer_flag(tr, 1 << index, val);
7940 	mutex_unlock(&trace_types_lock);
7941 
7942 	if (ret < 0)
7943 		return ret;
7944 
7945 	*ppos += cnt;
7946 
7947 	return cnt;
7948 }
7949 
7950 static const struct file_operations trace_options_core_fops = {
7951 	.open = tracing_open_generic,
7952 	.read = trace_options_core_read,
7953 	.write = trace_options_core_write,
7954 	.llseek = generic_file_llseek,
7955 };
7956 
7957 struct dentry *trace_create_file(const char *name,
7958 				 umode_t mode,
7959 				 struct dentry *parent,
7960 				 void *data,
7961 				 const struct file_operations *fops)
7962 {
7963 	struct dentry *ret;
7964 
7965 	ret = tracefs_create_file(name, mode, parent, data, fops);
7966 	if (!ret)
7967 		pr_warn("Could not create tracefs '%s' entry\n", name);
7968 
7969 	return ret;
7970 }
7971 
7972 
7973 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7974 {
7975 	struct dentry *d_tracer;
7976 
7977 	if (tr->options)
7978 		return tr->options;
7979 
7980 	d_tracer = tracing_get_dentry(tr);
7981 	if (IS_ERR(d_tracer))
7982 		return NULL;
7983 
7984 	tr->options = tracefs_create_dir("options", d_tracer);
7985 	if (!tr->options) {
7986 		pr_warn("Could not create tracefs directory 'options'\n");
7987 		return NULL;
7988 	}
7989 
7990 	return tr->options;
7991 }
7992 
7993 static void
7994 create_trace_option_file(struct trace_array *tr,
7995 			 struct trace_option_dentry *topt,
7996 			 struct tracer_flags *flags,
7997 			 struct tracer_opt *opt)
7998 {
7999 	struct dentry *t_options;
8000 
8001 	t_options = trace_options_init_dentry(tr);
8002 	if (!t_options)
8003 		return;
8004 
8005 	topt->flags = flags;
8006 	topt->opt = opt;
8007 	topt->tr = tr;
8008 
8009 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8010 				    &trace_options_fops);
8011 
8012 }
8013 
8014 static void
8015 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8016 {
8017 	struct trace_option_dentry *topts;
8018 	struct trace_options *tr_topts;
8019 	struct tracer_flags *flags;
8020 	struct tracer_opt *opts;
8021 	int cnt;
8022 	int i;
8023 
8024 	if (!tracer)
8025 		return;
8026 
8027 	flags = tracer->flags;
8028 
8029 	if (!flags || !flags->opts)
8030 		return;
8031 
8032 	/*
8033 	 * If this is an instance, only create flags for tracers
8034 	 * the instance may have.
8035 	 */
8036 	if (!trace_ok_for_array(tracer, tr))
8037 		return;
8038 
8039 	for (i = 0; i < tr->nr_topts; i++) {
8040 		/* Make sure there's no duplicate flags. */
8041 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8042 			return;
8043 	}
8044 
8045 	opts = flags->opts;
8046 
8047 	for (cnt = 0; opts[cnt].name; cnt++)
8048 		;
8049 
8050 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8051 	if (!topts)
8052 		return;
8053 
8054 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8055 			    GFP_KERNEL);
8056 	if (!tr_topts) {
8057 		kfree(topts);
8058 		return;
8059 	}
8060 
8061 	tr->topts = tr_topts;
8062 	tr->topts[tr->nr_topts].tracer = tracer;
8063 	tr->topts[tr->nr_topts].topts = topts;
8064 	tr->nr_topts++;
8065 
8066 	for (cnt = 0; opts[cnt].name; cnt++) {
8067 		create_trace_option_file(tr, &topts[cnt], flags,
8068 					 &opts[cnt]);
8069 		WARN_ONCE(topts[cnt].entry == NULL,
8070 			  "Failed to create trace option: %s",
8071 			  opts[cnt].name);
8072 	}
8073 }
8074 
8075 static struct dentry *
8076 create_trace_option_core_file(struct trace_array *tr,
8077 			      const char *option, long index)
8078 {
8079 	struct dentry *t_options;
8080 
8081 	t_options = trace_options_init_dentry(tr);
8082 	if (!t_options)
8083 		return NULL;
8084 
8085 	return trace_create_file(option, 0644, t_options,
8086 				 (void *)&tr->trace_flags_index[index],
8087 				 &trace_options_core_fops);
8088 }
8089 
8090 static void create_trace_options_dir(struct trace_array *tr)
8091 {
8092 	struct dentry *t_options;
8093 	bool top_level = tr == &global_trace;
8094 	int i;
8095 
8096 	t_options = trace_options_init_dentry(tr);
8097 	if (!t_options)
8098 		return;
8099 
8100 	for (i = 0; trace_options[i]; i++) {
8101 		if (top_level ||
8102 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8103 			create_trace_option_core_file(tr, trace_options[i], i);
8104 	}
8105 }
8106 
8107 static ssize_t
8108 rb_simple_read(struct file *filp, char __user *ubuf,
8109 	       size_t cnt, loff_t *ppos)
8110 {
8111 	struct trace_array *tr = filp->private_data;
8112 	char buf[64];
8113 	int r;
8114 
8115 	r = tracer_tracing_is_on(tr);
8116 	r = sprintf(buf, "%d\n", r);
8117 
8118 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8119 }
8120 
8121 static ssize_t
8122 rb_simple_write(struct file *filp, const char __user *ubuf,
8123 		size_t cnt, loff_t *ppos)
8124 {
8125 	struct trace_array *tr = filp->private_data;
8126 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8127 	unsigned long val;
8128 	int ret;
8129 
8130 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8131 	if (ret)
8132 		return ret;
8133 
8134 	if (buffer) {
8135 		mutex_lock(&trace_types_lock);
8136 		if (!!val == tracer_tracing_is_on(tr)) {
8137 			val = 0; /* do nothing */
8138 		} else if (val) {
8139 			tracer_tracing_on(tr);
8140 			if (tr->current_trace->start)
8141 				tr->current_trace->start(tr);
8142 		} else {
8143 			tracer_tracing_off(tr);
8144 			if (tr->current_trace->stop)
8145 				tr->current_trace->stop(tr);
8146 		}
8147 		mutex_unlock(&trace_types_lock);
8148 	}
8149 
8150 	(*ppos)++;
8151 
8152 	return cnt;
8153 }
8154 
8155 static const struct file_operations rb_simple_fops = {
8156 	.open		= tracing_open_generic_tr,
8157 	.read		= rb_simple_read,
8158 	.write		= rb_simple_write,
8159 	.release	= tracing_release_generic_tr,
8160 	.llseek		= default_llseek,
8161 };
8162 
8163 static ssize_t
8164 buffer_percent_read(struct file *filp, char __user *ubuf,
8165 		    size_t cnt, loff_t *ppos)
8166 {
8167 	struct trace_array *tr = filp->private_data;
8168 	char buf[64];
8169 	int r;
8170 
8171 	r = tr->buffer_percent;
8172 	r = sprintf(buf, "%d\n", r);
8173 
8174 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8175 }
8176 
8177 static ssize_t
8178 buffer_percent_write(struct file *filp, const char __user *ubuf,
8179 		     size_t cnt, loff_t *ppos)
8180 {
8181 	struct trace_array *tr = filp->private_data;
8182 	unsigned long val;
8183 	int ret;
8184 
8185 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8186 	if (ret)
8187 		return ret;
8188 
8189 	if (val > 100)
8190 		return -EINVAL;
8191 
8192 	if (!val)
8193 		val = 1;
8194 
8195 	tr->buffer_percent = val;
8196 
8197 	(*ppos)++;
8198 
8199 	return cnt;
8200 }
8201 
8202 static const struct file_operations buffer_percent_fops = {
8203 	.open		= tracing_open_generic_tr,
8204 	.read		= buffer_percent_read,
8205 	.write		= buffer_percent_write,
8206 	.release	= tracing_release_generic_tr,
8207 	.llseek		= default_llseek,
8208 };
8209 
8210 static struct dentry *trace_instance_dir;
8211 
8212 static void
8213 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8214 
8215 static int
8216 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8217 {
8218 	enum ring_buffer_flags rb_flags;
8219 
8220 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8221 
8222 	buf->tr = tr;
8223 
8224 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8225 	if (!buf->buffer)
8226 		return -ENOMEM;
8227 
8228 	buf->data = alloc_percpu(struct trace_array_cpu);
8229 	if (!buf->data) {
8230 		ring_buffer_free(buf->buffer);
8231 		buf->buffer = NULL;
8232 		return -ENOMEM;
8233 	}
8234 
8235 	/* Allocate the first page for all buffers */
8236 	set_buffer_entries(&tr->trace_buffer,
8237 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8238 
8239 	return 0;
8240 }
8241 
8242 static int allocate_trace_buffers(struct trace_array *tr, int size)
8243 {
8244 	int ret;
8245 
8246 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8247 	if (ret)
8248 		return ret;
8249 
8250 #ifdef CONFIG_TRACER_MAX_TRACE
8251 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8252 				    allocate_snapshot ? size : 1);
8253 	if (WARN_ON(ret)) {
8254 		ring_buffer_free(tr->trace_buffer.buffer);
8255 		tr->trace_buffer.buffer = NULL;
8256 		free_percpu(tr->trace_buffer.data);
8257 		tr->trace_buffer.data = NULL;
8258 		return -ENOMEM;
8259 	}
8260 	tr->allocated_snapshot = allocate_snapshot;
8261 
8262 	/*
8263 	 * Only the top level trace array gets its snapshot allocated
8264 	 * from the kernel command line.
8265 	 */
8266 	allocate_snapshot = false;
8267 #endif
8268 	return 0;
8269 }
8270 
8271 static void free_trace_buffer(struct trace_buffer *buf)
8272 {
8273 	if (buf->buffer) {
8274 		ring_buffer_free(buf->buffer);
8275 		buf->buffer = NULL;
8276 		free_percpu(buf->data);
8277 		buf->data = NULL;
8278 	}
8279 }
8280 
8281 static void free_trace_buffers(struct trace_array *tr)
8282 {
8283 	if (!tr)
8284 		return;
8285 
8286 	free_trace_buffer(&tr->trace_buffer);
8287 
8288 #ifdef CONFIG_TRACER_MAX_TRACE
8289 	free_trace_buffer(&tr->max_buffer);
8290 #endif
8291 }
8292 
8293 static void init_trace_flags_index(struct trace_array *tr)
8294 {
8295 	int i;
8296 
8297 	/* Used by the trace options files */
8298 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8299 		tr->trace_flags_index[i] = i;
8300 }
8301 
8302 static void __update_tracer_options(struct trace_array *tr)
8303 {
8304 	struct tracer *t;
8305 
8306 	for (t = trace_types; t; t = t->next)
8307 		add_tracer_options(tr, t);
8308 }
8309 
8310 static void update_tracer_options(struct trace_array *tr)
8311 {
8312 	mutex_lock(&trace_types_lock);
8313 	__update_tracer_options(tr);
8314 	mutex_unlock(&trace_types_lock);
8315 }
8316 
8317 struct trace_array *trace_array_create(const char *name)
8318 {
8319 	struct trace_array *tr;
8320 	int ret;
8321 
8322 	mutex_lock(&event_mutex);
8323 	mutex_lock(&trace_types_lock);
8324 
8325 	ret = -EEXIST;
8326 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8327 		if (tr->name && strcmp(tr->name, name) == 0)
8328 			goto out_unlock;
8329 	}
8330 
8331 	ret = -ENOMEM;
8332 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8333 	if (!tr)
8334 		goto out_unlock;
8335 
8336 	tr->name = kstrdup(name, GFP_KERNEL);
8337 	if (!tr->name)
8338 		goto out_free_tr;
8339 
8340 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8341 		goto out_free_tr;
8342 
8343 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8344 
8345 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8346 
8347 	raw_spin_lock_init(&tr->start_lock);
8348 
8349 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8350 
8351 	tr->current_trace = &nop_trace;
8352 
8353 	INIT_LIST_HEAD(&tr->systems);
8354 	INIT_LIST_HEAD(&tr->events);
8355 	INIT_LIST_HEAD(&tr->hist_vars);
8356 	INIT_LIST_HEAD(&tr->err_log);
8357 
8358 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8359 		goto out_free_tr;
8360 
8361 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8362 	if (!tr->dir)
8363 		goto out_free_tr;
8364 
8365 	ret = event_trace_add_tracer(tr->dir, tr);
8366 	if (ret) {
8367 		tracefs_remove_recursive(tr->dir);
8368 		goto out_free_tr;
8369 	}
8370 
8371 	ftrace_init_trace_array(tr);
8372 
8373 	init_tracer_tracefs(tr, tr->dir);
8374 	init_trace_flags_index(tr);
8375 	__update_tracer_options(tr);
8376 
8377 	list_add(&tr->list, &ftrace_trace_arrays);
8378 
8379 	mutex_unlock(&trace_types_lock);
8380 	mutex_unlock(&event_mutex);
8381 
8382 	return tr;
8383 
8384  out_free_tr:
8385 	free_trace_buffers(tr);
8386 	free_cpumask_var(tr->tracing_cpumask);
8387 	kfree(tr->name);
8388 	kfree(tr);
8389 
8390  out_unlock:
8391 	mutex_unlock(&trace_types_lock);
8392 	mutex_unlock(&event_mutex);
8393 
8394 	return ERR_PTR(ret);
8395 }
8396 EXPORT_SYMBOL_GPL(trace_array_create);
8397 
8398 static int instance_mkdir(const char *name)
8399 {
8400 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8401 }
8402 
8403 static int __remove_instance(struct trace_array *tr)
8404 {
8405 	int i;
8406 
8407 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8408 		return -EBUSY;
8409 
8410 	list_del(&tr->list);
8411 
8412 	/* Disable all the flags that were enabled coming in */
8413 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8414 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8415 			set_tracer_flag(tr, 1 << i, 0);
8416 	}
8417 
8418 	tracing_set_nop(tr);
8419 	clear_ftrace_function_probes(tr);
8420 	event_trace_del_tracer(tr);
8421 	ftrace_clear_pids(tr);
8422 	ftrace_destroy_function_files(tr);
8423 	tracefs_remove_recursive(tr->dir);
8424 	free_trace_buffers(tr);
8425 
8426 	for (i = 0; i < tr->nr_topts; i++) {
8427 		kfree(tr->topts[i].topts);
8428 	}
8429 	kfree(tr->topts);
8430 
8431 	free_cpumask_var(tr->tracing_cpumask);
8432 	kfree(tr->name);
8433 	kfree(tr);
8434 	tr = NULL;
8435 
8436 	return 0;
8437 }
8438 
8439 int trace_array_destroy(struct trace_array *tr)
8440 {
8441 	int ret;
8442 
8443 	if (!tr)
8444 		return -EINVAL;
8445 
8446 	mutex_lock(&event_mutex);
8447 	mutex_lock(&trace_types_lock);
8448 
8449 	ret = __remove_instance(tr);
8450 
8451 	mutex_unlock(&trace_types_lock);
8452 	mutex_unlock(&event_mutex);
8453 
8454 	return ret;
8455 }
8456 EXPORT_SYMBOL_GPL(trace_array_destroy);
8457 
8458 static int instance_rmdir(const char *name)
8459 {
8460 	struct trace_array *tr;
8461 	int ret;
8462 
8463 	mutex_lock(&event_mutex);
8464 	mutex_lock(&trace_types_lock);
8465 
8466 	ret = -ENODEV;
8467 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8468 		if (tr->name && strcmp(tr->name, name) == 0) {
8469 			ret = __remove_instance(tr);
8470 			break;
8471 		}
8472 	}
8473 
8474 	mutex_unlock(&trace_types_lock);
8475 	mutex_unlock(&event_mutex);
8476 
8477 	return ret;
8478 }
8479 
8480 static __init void create_trace_instances(struct dentry *d_tracer)
8481 {
8482 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8483 							 instance_mkdir,
8484 							 instance_rmdir);
8485 	if (WARN_ON(!trace_instance_dir))
8486 		return;
8487 }
8488 
8489 static void
8490 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8491 {
8492 	struct trace_event_file *file;
8493 	int cpu;
8494 
8495 	trace_create_file("available_tracers", 0444, d_tracer,
8496 			tr, &show_traces_fops);
8497 
8498 	trace_create_file("current_tracer", 0644, d_tracer,
8499 			tr, &set_tracer_fops);
8500 
8501 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8502 			  tr, &tracing_cpumask_fops);
8503 
8504 	trace_create_file("trace_options", 0644, d_tracer,
8505 			  tr, &tracing_iter_fops);
8506 
8507 	trace_create_file("trace", 0644, d_tracer,
8508 			  tr, &tracing_fops);
8509 
8510 	trace_create_file("trace_pipe", 0444, d_tracer,
8511 			  tr, &tracing_pipe_fops);
8512 
8513 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8514 			  tr, &tracing_entries_fops);
8515 
8516 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8517 			  tr, &tracing_total_entries_fops);
8518 
8519 	trace_create_file("free_buffer", 0200, d_tracer,
8520 			  tr, &tracing_free_buffer_fops);
8521 
8522 	trace_create_file("trace_marker", 0220, d_tracer,
8523 			  tr, &tracing_mark_fops);
8524 
8525 	file = __find_event_file(tr, "ftrace", "print");
8526 	if (file && file->dir)
8527 		trace_create_file("trigger", 0644, file->dir, file,
8528 				  &event_trigger_fops);
8529 	tr->trace_marker_file = file;
8530 
8531 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8532 			  tr, &tracing_mark_raw_fops);
8533 
8534 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8535 			  &trace_clock_fops);
8536 
8537 	trace_create_file("tracing_on", 0644, d_tracer,
8538 			  tr, &rb_simple_fops);
8539 
8540 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8541 			  &trace_time_stamp_mode_fops);
8542 
8543 	tr->buffer_percent = 50;
8544 
8545 	trace_create_file("buffer_percent", 0444, d_tracer,
8546 			tr, &buffer_percent_fops);
8547 
8548 	create_trace_options_dir(tr);
8549 
8550 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8551 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8552 			&tr->max_latency, &tracing_max_lat_fops);
8553 #endif
8554 
8555 	if (ftrace_create_function_files(tr, d_tracer))
8556 		WARN(1, "Could not allocate function filter files");
8557 
8558 #ifdef CONFIG_TRACER_SNAPSHOT
8559 	trace_create_file("snapshot", 0644, d_tracer,
8560 			  tr, &snapshot_fops);
8561 #endif
8562 
8563 	trace_create_file("error_log", 0644, d_tracer,
8564 			  tr, &tracing_err_log_fops);
8565 
8566 	for_each_tracing_cpu(cpu)
8567 		tracing_init_tracefs_percpu(tr, cpu);
8568 
8569 	ftrace_init_tracefs(tr, d_tracer);
8570 }
8571 
8572 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8573 {
8574 	struct vfsmount *mnt;
8575 	struct file_system_type *type;
8576 
8577 	/*
8578 	 * To maintain backward compatibility for tools that mount
8579 	 * debugfs to get to the tracing facility, tracefs is automatically
8580 	 * mounted to the debugfs/tracing directory.
8581 	 */
8582 	type = get_fs_type("tracefs");
8583 	if (!type)
8584 		return NULL;
8585 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8586 	put_filesystem(type);
8587 	if (IS_ERR(mnt))
8588 		return NULL;
8589 	mntget(mnt);
8590 
8591 	return mnt;
8592 }
8593 
8594 /**
8595  * tracing_init_dentry - initialize top level trace array
8596  *
8597  * This is called when creating files or directories in the tracing
8598  * directory. It is called via fs_initcall() by any of the boot up code
8599  * and expects to return the dentry of the top level tracing directory.
8600  */
8601 struct dentry *tracing_init_dentry(void)
8602 {
8603 	struct trace_array *tr = &global_trace;
8604 
8605 	/* The top level trace array uses  NULL as parent */
8606 	if (tr->dir)
8607 		return NULL;
8608 
8609 	if (WARN_ON(!tracefs_initialized()) ||
8610 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8611 		 WARN_ON(!debugfs_initialized())))
8612 		return ERR_PTR(-ENODEV);
8613 
8614 	/*
8615 	 * As there may still be users that expect the tracing
8616 	 * files to exist in debugfs/tracing, we must automount
8617 	 * the tracefs file system there, so older tools still
8618 	 * work with the newer kerenl.
8619 	 */
8620 	tr->dir = debugfs_create_automount("tracing", NULL,
8621 					   trace_automount, NULL);
8622 
8623 	return NULL;
8624 }
8625 
8626 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8627 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8628 
8629 static void __init trace_eval_init(void)
8630 {
8631 	int len;
8632 
8633 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8634 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8635 }
8636 
8637 #ifdef CONFIG_MODULES
8638 static void trace_module_add_evals(struct module *mod)
8639 {
8640 	if (!mod->num_trace_evals)
8641 		return;
8642 
8643 	/*
8644 	 * Modules with bad taint do not have events created, do
8645 	 * not bother with enums either.
8646 	 */
8647 	if (trace_module_has_bad_taint(mod))
8648 		return;
8649 
8650 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8651 }
8652 
8653 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8654 static void trace_module_remove_evals(struct module *mod)
8655 {
8656 	union trace_eval_map_item *map;
8657 	union trace_eval_map_item **last = &trace_eval_maps;
8658 
8659 	if (!mod->num_trace_evals)
8660 		return;
8661 
8662 	mutex_lock(&trace_eval_mutex);
8663 
8664 	map = trace_eval_maps;
8665 
8666 	while (map) {
8667 		if (map->head.mod == mod)
8668 			break;
8669 		map = trace_eval_jmp_to_tail(map);
8670 		last = &map->tail.next;
8671 		map = map->tail.next;
8672 	}
8673 	if (!map)
8674 		goto out;
8675 
8676 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8677 	kfree(map);
8678  out:
8679 	mutex_unlock(&trace_eval_mutex);
8680 }
8681 #else
8682 static inline void trace_module_remove_evals(struct module *mod) { }
8683 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8684 
8685 static int trace_module_notify(struct notifier_block *self,
8686 			       unsigned long val, void *data)
8687 {
8688 	struct module *mod = data;
8689 
8690 	switch (val) {
8691 	case MODULE_STATE_COMING:
8692 		trace_module_add_evals(mod);
8693 		break;
8694 	case MODULE_STATE_GOING:
8695 		trace_module_remove_evals(mod);
8696 		break;
8697 	}
8698 
8699 	return 0;
8700 }
8701 
8702 static struct notifier_block trace_module_nb = {
8703 	.notifier_call = trace_module_notify,
8704 	.priority = 0,
8705 };
8706 #endif /* CONFIG_MODULES */
8707 
8708 static __init int tracer_init_tracefs(void)
8709 {
8710 	struct dentry *d_tracer;
8711 
8712 	trace_access_lock_init();
8713 
8714 	d_tracer = tracing_init_dentry();
8715 	if (IS_ERR(d_tracer))
8716 		return 0;
8717 
8718 	event_trace_init();
8719 
8720 	init_tracer_tracefs(&global_trace, d_tracer);
8721 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8722 
8723 	trace_create_file("tracing_thresh", 0644, d_tracer,
8724 			&global_trace, &tracing_thresh_fops);
8725 
8726 	trace_create_file("README", 0444, d_tracer,
8727 			NULL, &tracing_readme_fops);
8728 
8729 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8730 			NULL, &tracing_saved_cmdlines_fops);
8731 
8732 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8733 			  NULL, &tracing_saved_cmdlines_size_fops);
8734 
8735 	trace_create_file("saved_tgids", 0444, d_tracer,
8736 			NULL, &tracing_saved_tgids_fops);
8737 
8738 	trace_eval_init();
8739 
8740 	trace_create_eval_file(d_tracer);
8741 
8742 #ifdef CONFIG_MODULES
8743 	register_module_notifier(&trace_module_nb);
8744 #endif
8745 
8746 #ifdef CONFIG_DYNAMIC_FTRACE
8747 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8748 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8749 #endif
8750 
8751 	create_trace_instances(d_tracer);
8752 
8753 	update_tracer_options(&global_trace);
8754 
8755 	return 0;
8756 }
8757 
8758 static int trace_panic_handler(struct notifier_block *this,
8759 			       unsigned long event, void *unused)
8760 {
8761 	if (ftrace_dump_on_oops)
8762 		ftrace_dump(ftrace_dump_on_oops);
8763 	return NOTIFY_OK;
8764 }
8765 
8766 static struct notifier_block trace_panic_notifier = {
8767 	.notifier_call  = trace_panic_handler,
8768 	.next           = NULL,
8769 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8770 };
8771 
8772 static int trace_die_handler(struct notifier_block *self,
8773 			     unsigned long val,
8774 			     void *data)
8775 {
8776 	switch (val) {
8777 	case DIE_OOPS:
8778 		if (ftrace_dump_on_oops)
8779 			ftrace_dump(ftrace_dump_on_oops);
8780 		break;
8781 	default:
8782 		break;
8783 	}
8784 	return NOTIFY_OK;
8785 }
8786 
8787 static struct notifier_block trace_die_notifier = {
8788 	.notifier_call = trace_die_handler,
8789 	.priority = 200
8790 };
8791 
8792 /*
8793  * printk is set to max of 1024, we really don't need it that big.
8794  * Nothing should be printing 1000 characters anyway.
8795  */
8796 #define TRACE_MAX_PRINT		1000
8797 
8798 /*
8799  * Define here KERN_TRACE so that we have one place to modify
8800  * it if we decide to change what log level the ftrace dump
8801  * should be at.
8802  */
8803 #define KERN_TRACE		KERN_EMERG
8804 
8805 void
8806 trace_printk_seq(struct trace_seq *s)
8807 {
8808 	/* Probably should print a warning here. */
8809 	if (s->seq.len >= TRACE_MAX_PRINT)
8810 		s->seq.len = TRACE_MAX_PRINT;
8811 
8812 	/*
8813 	 * More paranoid code. Although the buffer size is set to
8814 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8815 	 * an extra layer of protection.
8816 	 */
8817 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8818 		s->seq.len = s->seq.size - 1;
8819 
8820 	/* should be zero ended, but we are paranoid. */
8821 	s->buffer[s->seq.len] = 0;
8822 
8823 	printk(KERN_TRACE "%s", s->buffer);
8824 
8825 	trace_seq_init(s);
8826 }
8827 
8828 void trace_init_global_iter(struct trace_iterator *iter)
8829 {
8830 	iter->tr = &global_trace;
8831 	iter->trace = iter->tr->current_trace;
8832 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8833 	iter->trace_buffer = &global_trace.trace_buffer;
8834 
8835 	if (iter->trace && iter->trace->open)
8836 		iter->trace->open(iter);
8837 
8838 	/* Annotate start of buffers if we had overruns */
8839 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8840 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8841 
8842 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8843 	if (trace_clocks[iter->tr->clock_id].in_ns)
8844 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8845 }
8846 
8847 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8848 {
8849 	/* use static because iter can be a bit big for the stack */
8850 	static struct trace_iterator iter;
8851 	static atomic_t dump_running;
8852 	struct trace_array *tr = &global_trace;
8853 	unsigned int old_userobj;
8854 	unsigned long flags;
8855 	int cnt = 0, cpu;
8856 
8857 	/* Only allow one dump user at a time. */
8858 	if (atomic_inc_return(&dump_running) != 1) {
8859 		atomic_dec(&dump_running);
8860 		return;
8861 	}
8862 
8863 	/*
8864 	 * Always turn off tracing when we dump.
8865 	 * We don't need to show trace output of what happens
8866 	 * between multiple crashes.
8867 	 *
8868 	 * If the user does a sysrq-z, then they can re-enable
8869 	 * tracing with echo 1 > tracing_on.
8870 	 */
8871 	tracing_off();
8872 
8873 	local_irq_save(flags);
8874 	printk_nmi_direct_enter();
8875 
8876 	/* Simulate the iterator */
8877 	trace_init_global_iter(&iter);
8878 
8879 	for_each_tracing_cpu(cpu) {
8880 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8881 	}
8882 
8883 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8884 
8885 	/* don't look at user memory in panic mode */
8886 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8887 
8888 	switch (oops_dump_mode) {
8889 	case DUMP_ALL:
8890 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8891 		break;
8892 	case DUMP_ORIG:
8893 		iter.cpu_file = raw_smp_processor_id();
8894 		break;
8895 	case DUMP_NONE:
8896 		goto out_enable;
8897 	default:
8898 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8899 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8900 	}
8901 
8902 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8903 
8904 	/* Did function tracer already get disabled? */
8905 	if (ftrace_is_dead()) {
8906 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8907 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8908 	}
8909 
8910 	/*
8911 	 * We need to stop all tracing on all CPUS to read the
8912 	 * the next buffer. This is a bit expensive, but is
8913 	 * not done often. We fill all what we can read,
8914 	 * and then release the locks again.
8915 	 */
8916 
8917 	while (!trace_empty(&iter)) {
8918 
8919 		if (!cnt)
8920 			printk(KERN_TRACE "---------------------------------\n");
8921 
8922 		cnt++;
8923 
8924 		trace_iterator_reset(&iter);
8925 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8926 
8927 		if (trace_find_next_entry_inc(&iter) != NULL) {
8928 			int ret;
8929 
8930 			ret = print_trace_line(&iter);
8931 			if (ret != TRACE_TYPE_NO_CONSUME)
8932 				trace_consume(&iter);
8933 		}
8934 		touch_nmi_watchdog();
8935 
8936 		trace_printk_seq(&iter.seq);
8937 	}
8938 
8939 	if (!cnt)
8940 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8941 	else
8942 		printk(KERN_TRACE "---------------------------------\n");
8943 
8944  out_enable:
8945 	tr->trace_flags |= old_userobj;
8946 
8947 	for_each_tracing_cpu(cpu) {
8948 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8949 	}
8950 	atomic_dec(&dump_running);
8951 	printk_nmi_direct_exit();
8952 	local_irq_restore(flags);
8953 }
8954 EXPORT_SYMBOL_GPL(ftrace_dump);
8955 
8956 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8957 {
8958 	char **argv;
8959 	int argc, ret;
8960 
8961 	argc = 0;
8962 	ret = 0;
8963 	argv = argv_split(GFP_KERNEL, buf, &argc);
8964 	if (!argv)
8965 		return -ENOMEM;
8966 
8967 	if (argc)
8968 		ret = createfn(argc, argv);
8969 
8970 	argv_free(argv);
8971 
8972 	return ret;
8973 }
8974 
8975 #define WRITE_BUFSIZE  4096
8976 
8977 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8978 				size_t count, loff_t *ppos,
8979 				int (*createfn)(int, char **))
8980 {
8981 	char *kbuf, *buf, *tmp;
8982 	int ret = 0;
8983 	size_t done = 0;
8984 	size_t size;
8985 
8986 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8987 	if (!kbuf)
8988 		return -ENOMEM;
8989 
8990 	while (done < count) {
8991 		size = count - done;
8992 
8993 		if (size >= WRITE_BUFSIZE)
8994 			size = WRITE_BUFSIZE - 1;
8995 
8996 		if (copy_from_user(kbuf, buffer + done, size)) {
8997 			ret = -EFAULT;
8998 			goto out;
8999 		}
9000 		kbuf[size] = '\0';
9001 		buf = kbuf;
9002 		do {
9003 			tmp = strchr(buf, '\n');
9004 			if (tmp) {
9005 				*tmp = '\0';
9006 				size = tmp - buf + 1;
9007 			} else {
9008 				size = strlen(buf);
9009 				if (done + size < count) {
9010 					if (buf != kbuf)
9011 						break;
9012 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9013 					pr_warn("Line length is too long: Should be less than %d\n",
9014 						WRITE_BUFSIZE - 2);
9015 					ret = -EINVAL;
9016 					goto out;
9017 				}
9018 			}
9019 			done += size;
9020 
9021 			/* Remove comments */
9022 			tmp = strchr(buf, '#');
9023 
9024 			if (tmp)
9025 				*tmp = '\0';
9026 
9027 			ret = trace_run_command(buf, createfn);
9028 			if (ret)
9029 				goto out;
9030 			buf += size;
9031 
9032 		} while (done < count);
9033 	}
9034 	ret = done;
9035 
9036 out:
9037 	kfree(kbuf);
9038 
9039 	return ret;
9040 }
9041 
9042 __init static int tracer_alloc_buffers(void)
9043 {
9044 	int ring_buf_size;
9045 	int ret = -ENOMEM;
9046 
9047 	/*
9048 	 * Make sure we don't accidently add more trace options
9049 	 * than we have bits for.
9050 	 */
9051 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9052 
9053 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9054 		goto out;
9055 
9056 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9057 		goto out_free_buffer_mask;
9058 
9059 	/* Only allocate trace_printk buffers if a trace_printk exists */
9060 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9061 		/* Must be called before global_trace.buffer is allocated */
9062 		trace_printk_init_buffers();
9063 
9064 	/* To save memory, keep the ring buffer size to its minimum */
9065 	if (ring_buffer_expanded)
9066 		ring_buf_size = trace_buf_size;
9067 	else
9068 		ring_buf_size = 1;
9069 
9070 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9071 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9072 
9073 	raw_spin_lock_init(&global_trace.start_lock);
9074 
9075 	/*
9076 	 * The prepare callbacks allocates some memory for the ring buffer. We
9077 	 * don't free the buffer if the if the CPU goes down. If we were to free
9078 	 * the buffer, then the user would lose any trace that was in the
9079 	 * buffer. The memory will be removed once the "instance" is removed.
9080 	 */
9081 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9082 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9083 				      NULL);
9084 	if (ret < 0)
9085 		goto out_free_cpumask;
9086 	/* Used for event triggers */
9087 	ret = -ENOMEM;
9088 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9089 	if (!temp_buffer)
9090 		goto out_rm_hp_state;
9091 
9092 	if (trace_create_savedcmd() < 0)
9093 		goto out_free_temp_buffer;
9094 
9095 	/* TODO: make the number of buffers hot pluggable with CPUS */
9096 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9097 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9098 		WARN_ON(1);
9099 		goto out_free_savedcmd;
9100 	}
9101 
9102 	if (global_trace.buffer_disabled)
9103 		tracing_off();
9104 
9105 	if (trace_boot_clock) {
9106 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9107 		if (ret < 0)
9108 			pr_warn("Trace clock %s not defined, going back to default\n",
9109 				trace_boot_clock);
9110 	}
9111 
9112 	/*
9113 	 * register_tracer() might reference current_trace, so it
9114 	 * needs to be set before we register anything. This is
9115 	 * just a bootstrap of current_trace anyway.
9116 	 */
9117 	global_trace.current_trace = &nop_trace;
9118 
9119 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9120 
9121 	ftrace_init_global_array_ops(&global_trace);
9122 
9123 	init_trace_flags_index(&global_trace);
9124 
9125 	register_tracer(&nop_trace);
9126 
9127 	/* Function tracing may start here (via kernel command line) */
9128 	init_function_trace();
9129 
9130 	/* All seems OK, enable tracing */
9131 	tracing_disabled = 0;
9132 
9133 	atomic_notifier_chain_register(&panic_notifier_list,
9134 				       &trace_panic_notifier);
9135 
9136 	register_die_notifier(&trace_die_notifier);
9137 
9138 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9139 
9140 	INIT_LIST_HEAD(&global_trace.systems);
9141 	INIT_LIST_HEAD(&global_trace.events);
9142 	INIT_LIST_HEAD(&global_trace.hist_vars);
9143 	INIT_LIST_HEAD(&global_trace.err_log);
9144 	list_add(&global_trace.list, &ftrace_trace_arrays);
9145 
9146 	apply_trace_boot_options();
9147 
9148 	register_snapshot_cmd();
9149 
9150 	return 0;
9151 
9152 out_free_savedcmd:
9153 	free_saved_cmdlines_buffer(savedcmd);
9154 out_free_temp_buffer:
9155 	ring_buffer_free(temp_buffer);
9156 out_rm_hp_state:
9157 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9158 out_free_cpumask:
9159 	free_cpumask_var(global_trace.tracing_cpumask);
9160 out_free_buffer_mask:
9161 	free_cpumask_var(tracing_buffer_mask);
9162 out:
9163 	return ret;
9164 }
9165 
9166 void __init early_trace_init(void)
9167 {
9168 	if (tracepoint_printk) {
9169 		tracepoint_print_iter =
9170 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9171 		if (WARN_ON(!tracepoint_print_iter))
9172 			tracepoint_printk = 0;
9173 		else
9174 			static_key_enable(&tracepoint_printk_key.key);
9175 	}
9176 	tracer_alloc_buffers();
9177 }
9178 
9179 void __init trace_init(void)
9180 {
9181 	trace_event_init();
9182 }
9183 
9184 __init static int clear_boot_tracer(void)
9185 {
9186 	/*
9187 	 * The default tracer at boot buffer is an init section.
9188 	 * This function is called in lateinit. If we did not
9189 	 * find the boot tracer, then clear it out, to prevent
9190 	 * later registration from accessing the buffer that is
9191 	 * about to be freed.
9192 	 */
9193 	if (!default_bootup_tracer)
9194 		return 0;
9195 
9196 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9197 	       default_bootup_tracer);
9198 	default_bootup_tracer = NULL;
9199 
9200 	return 0;
9201 }
9202 
9203 fs_initcall(tracer_init_tracefs);
9204 late_initcall_sync(clear_boot_tracer);
9205 
9206 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9207 __init static int tracing_set_default_clock(void)
9208 {
9209 	/* sched_clock_stable() is determined in late_initcall */
9210 	if (!trace_boot_clock && !sched_clock_stable()) {
9211 		printk(KERN_WARNING
9212 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9213 		       "If you want to keep using the local clock, then add:\n"
9214 		       "  \"trace_clock=local\"\n"
9215 		       "on the kernel command line\n");
9216 		tracing_set_clock(&global_trace, "global");
9217 	}
9218 
9219 	return 0;
9220 }
9221 late_initcall_sync(tracing_set_default_clock);
9222 #endif
9223