xref: /openbmc/linux/kernel/trace/trace.c (revision 95777591)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = tracing_alloc_snapshot_instance(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 bool tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 bool trace_clock_in_ns(struct trace_array *tr)
1174 {
1175 	if (trace_clocks[tr->clock_id].in_ns)
1176 		return true;
1177 
1178 	return false;
1179 }
1180 
1181 /*
1182  * trace_parser_get_init - gets the buffer for trace parser
1183  */
1184 int trace_parser_get_init(struct trace_parser *parser, int size)
1185 {
1186 	memset(parser, 0, sizeof(*parser));
1187 
1188 	parser->buffer = kmalloc(size, GFP_KERNEL);
1189 	if (!parser->buffer)
1190 		return 1;
1191 
1192 	parser->size = size;
1193 	return 0;
1194 }
1195 
1196 /*
1197  * trace_parser_put - frees the buffer for trace parser
1198  */
1199 void trace_parser_put(struct trace_parser *parser)
1200 {
1201 	kfree(parser->buffer);
1202 	parser->buffer = NULL;
1203 }
1204 
1205 /*
1206  * trace_get_user - reads the user input string separated by  space
1207  * (matched by isspace(ch))
1208  *
1209  * For each string found the 'struct trace_parser' is updated,
1210  * and the function returns.
1211  *
1212  * Returns number of bytes read.
1213  *
1214  * See kernel/trace/trace.h for 'struct trace_parser' details.
1215  */
1216 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217 	size_t cnt, loff_t *ppos)
1218 {
1219 	char ch;
1220 	size_t read = 0;
1221 	ssize_t ret;
1222 
1223 	if (!*ppos)
1224 		trace_parser_clear(parser);
1225 
1226 	ret = get_user(ch, ubuf++);
1227 	if (ret)
1228 		goto out;
1229 
1230 	read++;
1231 	cnt--;
1232 
1233 	/*
1234 	 * The parser is not finished with the last write,
1235 	 * continue reading the user input without skipping spaces.
1236 	 */
1237 	if (!parser->cont) {
1238 		/* skip white space */
1239 		while (cnt && isspace(ch)) {
1240 			ret = get_user(ch, ubuf++);
1241 			if (ret)
1242 				goto out;
1243 			read++;
1244 			cnt--;
1245 		}
1246 
1247 		parser->idx = 0;
1248 
1249 		/* only spaces were written */
1250 		if (isspace(ch) || !ch) {
1251 			*ppos += read;
1252 			ret = read;
1253 			goto out;
1254 		}
1255 	}
1256 
1257 	/* read the non-space input */
1258 	while (cnt && !isspace(ch) && ch) {
1259 		if (parser->idx < parser->size - 1)
1260 			parser->buffer[parser->idx++] = ch;
1261 		else {
1262 			ret = -EINVAL;
1263 			goto out;
1264 		}
1265 		ret = get_user(ch, ubuf++);
1266 		if (ret)
1267 			goto out;
1268 		read++;
1269 		cnt--;
1270 	}
1271 
1272 	/* We either got finished input or we have to wait for another call. */
1273 	if (isspace(ch) || !ch) {
1274 		parser->buffer[parser->idx] = 0;
1275 		parser->cont = false;
1276 	} else if (parser->idx < parser->size - 1) {
1277 		parser->cont = true;
1278 		parser->buffer[parser->idx++] = ch;
1279 		/* Make sure the parsed string always terminates with '\0'. */
1280 		parser->buffer[parser->idx] = 0;
1281 	} else {
1282 		ret = -EINVAL;
1283 		goto out;
1284 	}
1285 
1286 	*ppos += read;
1287 	ret = read;
1288 
1289 out:
1290 	return ret;
1291 }
1292 
1293 /* TODO add a seq_buf_to_buffer() */
1294 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295 {
1296 	int len;
1297 
1298 	if (trace_seq_used(s) <= s->seq.readpos)
1299 		return -EBUSY;
1300 
1301 	len = trace_seq_used(s) - s->seq.readpos;
1302 	if (cnt > len)
1303 		cnt = len;
1304 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305 
1306 	s->seq.readpos += cnt;
1307 	return cnt;
1308 }
1309 
1310 unsigned long __read_mostly	tracing_thresh;
1311 
1312 #ifdef CONFIG_TRACER_MAX_TRACE
1313 /*
1314  * Copy the new maximum trace into the separate maximum-trace
1315  * structure. (this way the maximum trace is permanently saved,
1316  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317  */
1318 static void
1319 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320 {
1321 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1322 	struct trace_buffer *max_buf = &tr->max_buffer;
1323 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325 
1326 	max_buf->cpu = cpu;
1327 	max_buf->time_start = data->preempt_timestamp;
1328 
1329 	max_data->saved_latency = tr->max_latency;
1330 	max_data->critical_start = data->critical_start;
1331 	max_data->critical_end = data->critical_end;
1332 
1333 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334 	max_data->pid = tsk->pid;
1335 	/*
1336 	 * If tsk == current, then use current_uid(), as that does not use
1337 	 * RCU. The irq tracer can be called out of RCU scope.
1338 	 */
1339 	if (tsk == current)
1340 		max_data->uid = current_uid();
1341 	else
1342 		max_data->uid = task_uid(tsk);
1343 
1344 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345 	max_data->policy = tsk->policy;
1346 	max_data->rt_priority = tsk->rt_priority;
1347 
1348 	/* record this tasks comm */
1349 	tracing_record_cmdline(tsk);
1350 }
1351 
1352 /**
1353  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354  * @tr: tracer
1355  * @tsk: the task with the latency
1356  * @cpu: The cpu that initiated the trace.
1357  *
1358  * Flip the buffers between the @tr and the max_tr and record information
1359  * about which task was the cause of this latency.
1360  */
1361 void
1362 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363 {
1364 	if (tr->stop_count)
1365 		return;
1366 
1367 	WARN_ON_ONCE(!irqs_disabled());
1368 
1369 	if (!tr->allocated_snapshot) {
1370 		/* Only the nop tracer should hit this when disabling */
1371 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372 		return;
1373 	}
1374 
1375 	arch_spin_lock(&tr->max_lock);
1376 
1377 	/* Inherit the recordable setting from trace_buffer */
1378 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379 		ring_buffer_record_on(tr->max_buffer.buffer);
1380 	else
1381 		ring_buffer_record_off(tr->max_buffer.buffer);
1382 
1383 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384 
1385 	__update_max_tr(tr, tsk, cpu);
1386 	arch_spin_unlock(&tr->max_lock);
1387 }
1388 
1389 /**
1390  * update_max_tr_single - only copy one trace over, and reset the rest
1391  * @tr - tracer
1392  * @tsk - task with the latency
1393  * @cpu - the cpu of the buffer to copy.
1394  *
1395  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396  */
1397 void
1398 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399 {
1400 	int ret;
1401 
1402 	if (tr->stop_count)
1403 		return;
1404 
1405 	WARN_ON_ONCE(!irqs_disabled());
1406 	if (!tr->allocated_snapshot) {
1407 		/* Only the nop tracer should hit this when disabling */
1408 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409 		return;
1410 	}
1411 
1412 	arch_spin_lock(&tr->max_lock);
1413 
1414 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415 
1416 	if (ret == -EBUSY) {
1417 		/*
1418 		 * We failed to swap the buffer due to a commit taking
1419 		 * place on this CPU. We fail to record, but we reset
1420 		 * the max trace buffer (no one writes directly to it)
1421 		 * and flag that it failed.
1422 		 */
1423 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424 			"Failed to swap buffers due to commit in progress\n");
1425 	}
1426 
1427 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428 
1429 	__update_max_tr(tr, tsk, cpu);
1430 	arch_spin_unlock(&tr->max_lock);
1431 }
1432 #endif /* CONFIG_TRACER_MAX_TRACE */
1433 
1434 static int wait_on_pipe(struct trace_iterator *iter, int full)
1435 {
1436 	/* Iterators are static, they should be filled or empty */
1437 	if (trace_buffer_iter(iter, iter->cpu_file))
1438 		return 0;
1439 
1440 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441 				full);
1442 }
1443 
1444 #ifdef CONFIG_FTRACE_STARTUP_TEST
1445 static bool selftests_can_run;
1446 
1447 struct trace_selftests {
1448 	struct list_head		list;
1449 	struct tracer			*type;
1450 };
1451 
1452 static LIST_HEAD(postponed_selftests);
1453 
1454 static int save_selftest(struct tracer *type)
1455 {
1456 	struct trace_selftests *selftest;
1457 
1458 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459 	if (!selftest)
1460 		return -ENOMEM;
1461 
1462 	selftest->type = type;
1463 	list_add(&selftest->list, &postponed_selftests);
1464 	return 0;
1465 }
1466 
1467 static int run_tracer_selftest(struct tracer *type)
1468 {
1469 	struct trace_array *tr = &global_trace;
1470 	struct tracer *saved_tracer = tr->current_trace;
1471 	int ret;
1472 
1473 	if (!type->selftest || tracing_selftest_disabled)
1474 		return 0;
1475 
1476 	/*
1477 	 * If a tracer registers early in boot up (before scheduling is
1478 	 * initialized and such), then do not run its selftests yet.
1479 	 * Instead, run it a little later in the boot process.
1480 	 */
1481 	if (!selftests_can_run)
1482 		return save_selftest(type);
1483 
1484 	/*
1485 	 * Run a selftest on this tracer.
1486 	 * Here we reset the trace buffer, and set the current
1487 	 * tracer to be this tracer. The tracer can then run some
1488 	 * internal tracing to verify that everything is in order.
1489 	 * If we fail, we do not register this tracer.
1490 	 */
1491 	tracing_reset_online_cpus(&tr->trace_buffer);
1492 
1493 	tr->current_trace = type;
1494 
1495 #ifdef CONFIG_TRACER_MAX_TRACE
1496 	if (type->use_max_tr) {
1497 		/* If we expanded the buffers, make sure the max is expanded too */
1498 		if (ring_buffer_expanded)
1499 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500 					   RING_BUFFER_ALL_CPUS);
1501 		tr->allocated_snapshot = true;
1502 	}
1503 #endif
1504 
1505 	/* the test is responsible for initializing and enabling */
1506 	pr_info("Testing tracer %s: ", type->name);
1507 	ret = type->selftest(type, tr);
1508 	/* the test is responsible for resetting too */
1509 	tr->current_trace = saved_tracer;
1510 	if (ret) {
1511 		printk(KERN_CONT "FAILED!\n");
1512 		/* Add the warning after printing 'FAILED' */
1513 		WARN_ON(1);
1514 		return -1;
1515 	}
1516 	/* Only reset on passing, to avoid touching corrupted buffers */
1517 	tracing_reset_online_cpus(&tr->trace_buffer);
1518 
1519 #ifdef CONFIG_TRACER_MAX_TRACE
1520 	if (type->use_max_tr) {
1521 		tr->allocated_snapshot = false;
1522 
1523 		/* Shrink the max buffer again */
1524 		if (ring_buffer_expanded)
1525 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1526 					   RING_BUFFER_ALL_CPUS);
1527 	}
1528 #endif
1529 
1530 	printk(KERN_CONT "PASSED\n");
1531 	return 0;
1532 }
1533 
1534 static __init int init_trace_selftests(void)
1535 {
1536 	struct trace_selftests *p, *n;
1537 	struct tracer *t, **last;
1538 	int ret;
1539 
1540 	selftests_can_run = true;
1541 
1542 	mutex_lock(&trace_types_lock);
1543 
1544 	if (list_empty(&postponed_selftests))
1545 		goto out;
1546 
1547 	pr_info("Running postponed tracer tests:\n");
1548 
1549 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550 		ret = run_tracer_selftest(p->type);
1551 		/* If the test fails, then warn and remove from available_tracers */
1552 		if (ret < 0) {
1553 			WARN(1, "tracer: %s failed selftest, disabling\n",
1554 			     p->type->name);
1555 			last = &trace_types;
1556 			for (t = trace_types; t; t = t->next) {
1557 				if (t == p->type) {
1558 					*last = t->next;
1559 					break;
1560 				}
1561 				last = &t->next;
1562 			}
1563 		}
1564 		list_del(&p->list);
1565 		kfree(p);
1566 	}
1567 
1568  out:
1569 	mutex_unlock(&trace_types_lock);
1570 
1571 	return 0;
1572 }
1573 core_initcall(init_trace_selftests);
1574 #else
1575 static inline int run_tracer_selftest(struct tracer *type)
1576 {
1577 	return 0;
1578 }
1579 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1580 
1581 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582 
1583 static void __init apply_trace_boot_options(void);
1584 
1585 /**
1586  * register_tracer - register a tracer with the ftrace system.
1587  * @type - the plugin for the tracer
1588  *
1589  * Register a new plugin tracer.
1590  */
1591 int __init register_tracer(struct tracer *type)
1592 {
1593 	struct tracer *t;
1594 	int ret = 0;
1595 
1596 	if (!type->name) {
1597 		pr_info("Tracer must have a name\n");
1598 		return -1;
1599 	}
1600 
1601 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603 		return -1;
1604 	}
1605 
1606 	mutex_lock(&trace_types_lock);
1607 
1608 	tracing_selftest_running = true;
1609 
1610 	for (t = trace_types; t; t = t->next) {
1611 		if (strcmp(type->name, t->name) == 0) {
1612 			/* already found */
1613 			pr_info("Tracer %s already registered\n",
1614 				type->name);
1615 			ret = -1;
1616 			goto out;
1617 		}
1618 	}
1619 
1620 	if (!type->set_flag)
1621 		type->set_flag = &dummy_set_flag;
1622 	if (!type->flags) {
1623 		/*allocate a dummy tracer_flags*/
1624 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625 		if (!type->flags) {
1626 			ret = -ENOMEM;
1627 			goto out;
1628 		}
1629 		type->flags->val = 0;
1630 		type->flags->opts = dummy_tracer_opt;
1631 	} else
1632 		if (!type->flags->opts)
1633 			type->flags->opts = dummy_tracer_opt;
1634 
1635 	/* store the tracer for __set_tracer_option */
1636 	type->flags->trace = type;
1637 
1638 	ret = run_tracer_selftest(type);
1639 	if (ret < 0)
1640 		goto out;
1641 
1642 	type->next = trace_types;
1643 	trace_types = type;
1644 	add_tracer_options(&global_trace, type);
1645 
1646  out:
1647 	tracing_selftest_running = false;
1648 	mutex_unlock(&trace_types_lock);
1649 
1650 	if (ret || !default_bootup_tracer)
1651 		goto out_unlock;
1652 
1653 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654 		goto out_unlock;
1655 
1656 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657 	/* Do we want this tracer to start on bootup? */
1658 	tracing_set_tracer(&global_trace, type->name);
1659 	default_bootup_tracer = NULL;
1660 
1661 	apply_trace_boot_options();
1662 
1663 	/* disable other selftests, since this will break it. */
1664 	tracing_selftest_disabled = true;
1665 #ifdef CONFIG_FTRACE_STARTUP_TEST
1666 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667 	       type->name);
1668 #endif
1669 
1670  out_unlock:
1671 	return ret;
1672 }
1673 
1674 void tracing_reset(struct trace_buffer *buf, int cpu)
1675 {
1676 	struct ring_buffer *buffer = buf->buffer;
1677 
1678 	if (!buffer)
1679 		return;
1680 
1681 	ring_buffer_record_disable(buffer);
1682 
1683 	/* Make sure all commits have finished */
1684 	synchronize_rcu();
1685 	ring_buffer_reset_cpu(buffer, cpu);
1686 
1687 	ring_buffer_record_enable(buffer);
1688 }
1689 
1690 void tracing_reset_online_cpus(struct trace_buffer *buf)
1691 {
1692 	struct ring_buffer *buffer = buf->buffer;
1693 	int cpu;
1694 
1695 	if (!buffer)
1696 		return;
1697 
1698 	ring_buffer_record_disable(buffer);
1699 
1700 	/* Make sure all commits have finished */
1701 	synchronize_rcu();
1702 
1703 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704 
1705 	for_each_online_cpu(cpu)
1706 		ring_buffer_reset_cpu(buffer, cpu);
1707 
1708 	ring_buffer_record_enable(buffer);
1709 }
1710 
1711 /* Must have trace_types_lock held */
1712 void tracing_reset_all_online_cpus(void)
1713 {
1714 	struct trace_array *tr;
1715 
1716 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717 		if (!tr->clear_trace)
1718 			continue;
1719 		tr->clear_trace = false;
1720 		tracing_reset_online_cpus(&tr->trace_buffer);
1721 #ifdef CONFIG_TRACER_MAX_TRACE
1722 		tracing_reset_online_cpus(&tr->max_buffer);
1723 #endif
1724 	}
1725 }
1726 
1727 static int *tgid_map;
1728 
1729 #define SAVED_CMDLINES_DEFAULT 128
1730 #define NO_CMDLINE_MAP UINT_MAX
1731 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732 struct saved_cmdlines_buffer {
1733 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734 	unsigned *map_cmdline_to_pid;
1735 	unsigned cmdline_num;
1736 	int cmdline_idx;
1737 	char *saved_cmdlines;
1738 };
1739 static struct saved_cmdlines_buffer *savedcmd;
1740 
1741 /* temporary disable recording */
1742 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743 
1744 static inline char *get_saved_cmdlines(int idx)
1745 {
1746 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747 }
1748 
1749 static inline void set_cmdline(int idx, const char *cmdline)
1750 {
1751 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752 }
1753 
1754 static int allocate_cmdlines_buffer(unsigned int val,
1755 				    struct saved_cmdlines_buffer *s)
1756 {
1757 	s->map_cmdline_to_pid = kmalloc_array(val,
1758 					      sizeof(*s->map_cmdline_to_pid),
1759 					      GFP_KERNEL);
1760 	if (!s->map_cmdline_to_pid)
1761 		return -ENOMEM;
1762 
1763 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764 	if (!s->saved_cmdlines) {
1765 		kfree(s->map_cmdline_to_pid);
1766 		return -ENOMEM;
1767 	}
1768 
1769 	s->cmdline_idx = 0;
1770 	s->cmdline_num = val;
1771 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772 	       sizeof(s->map_pid_to_cmdline));
1773 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774 	       val * sizeof(*s->map_cmdline_to_pid));
1775 
1776 	return 0;
1777 }
1778 
1779 static int trace_create_savedcmd(void)
1780 {
1781 	int ret;
1782 
1783 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784 	if (!savedcmd)
1785 		return -ENOMEM;
1786 
1787 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788 	if (ret < 0) {
1789 		kfree(savedcmd);
1790 		savedcmd = NULL;
1791 		return -ENOMEM;
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 int is_tracing_stopped(void)
1798 {
1799 	return global_trace.stop_count;
1800 }
1801 
1802 /**
1803  * tracing_start - quick start of the tracer
1804  *
1805  * If tracing is enabled but was stopped by tracing_stop,
1806  * this will start the tracer back up.
1807  */
1808 void tracing_start(void)
1809 {
1810 	struct ring_buffer *buffer;
1811 	unsigned long flags;
1812 
1813 	if (tracing_disabled)
1814 		return;
1815 
1816 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817 	if (--global_trace.stop_count) {
1818 		if (global_trace.stop_count < 0) {
1819 			/* Someone screwed up their debugging */
1820 			WARN_ON_ONCE(1);
1821 			global_trace.stop_count = 0;
1822 		}
1823 		goto out;
1824 	}
1825 
1826 	/* Prevent the buffers from switching */
1827 	arch_spin_lock(&global_trace.max_lock);
1828 
1829 	buffer = global_trace.trace_buffer.buffer;
1830 	if (buffer)
1831 		ring_buffer_record_enable(buffer);
1832 
1833 #ifdef CONFIG_TRACER_MAX_TRACE
1834 	buffer = global_trace.max_buffer.buffer;
1835 	if (buffer)
1836 		ring_buffer_record_enable(buffer);
1837 #endif
1838 
1839 	arch_spin_unlock(&global_trace.max_lock);
1840 
1841  out:
1842 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843 }
1844 
1845 static void tracing_start_tr(struct trace_array *tr)
1846 {
1847 	struct ring_buffer *buffer;
1848 	unsigned long flags;
1849 
1850 	if (tracing_disabled)
1851 		return;
1852 
1853 	/* If global, we need to also start the max tracer */
1854 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855 		return tracing_start();
1856 
1857 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1858 
1859 	if (--tr->stop_count) {
1860 		if (tr->stop_count < 0) {
1861 			/* Someone screwed up their debugging */
1862 			WARN_ON_ONCE(1);
1863 			tr->stop_count = 0;
1864 		}
1865 		goto out;
1866 	}
1867 
1868 	buffer = tr->trace_buffer.buffer;
1869 	if (buffer)
1870 		ring_buffer_record_enable(buffer);
1871 
1872  out:
1873 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874 }
1875 
1876 /**
1877  * tracing_stop - quick stop of the tracer
1878  *
1879  * Light weight way to stop tracing. Use in conjunction with
1880  * tracing_start.
1881  */
1882 void tracing_stop(void)
1883 {
1884 	struct ring_buffer *buffer;
1885 	unsigned long flags;
1886 
1887 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888 	if (global_trace.stop_count++)
1889 		goto out;
1890 
1891 	/* Prevent the buffers from switching */
1892 	arch_spin_lock(&global_trace.max_lock);
1893 
1894 	buffer = global_trace.trace_buffer.buffer;
1895 	if (buffer)
1896 		ring_buffer_record_disable(buffer);
1897 
1898 #ifdef CONFIG_TRACER_MAX_TRACE
1899 	buffer = global_trace.max_buffer.buffer;
1900 	if (buffer)
1901 		ring_buffer_record_disable(buffer);
1902 #endif
1903 
1904 	arch_spin_unlock(&global_trace.max_lock);
1905 
1906  out:
1907 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908 }
1909 
1910 static void tracing_stop_tr(struct trace_array *tr)
1911 {
1912 	struct ring_buffer *buffer;
1913 	unsigned long flags;
1914 
1915 	/* If global, we need to also stop the max tracer */
1916 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917 		return tracing_stop();
1918 
1919 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1920 	if (tr->stop_count++)
1921 		goto out;
1922 
1923 	buffer = tr->trace_buffer.buffer;
1924 	if (buffer)
1925 		ring_buffer_record_disable(buffer);
1926 
1927  out:
1928 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929 }
1930 
1931 static int trace_save_cmdline(struct task_struct *tsk)
1932 {
1933 	unsigned pid, idx;
1934 
1935 	/* treat recording of idle task as a success */
1936 	if (!tsk->pid)
1937 		return 1;
1938 
1939 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940 		return 0;
1941 
1942 	/*
1943 	 * It's not the end of the world if we don't get
1944 	 * the lock, but we also don't want to spin
1945 	 * nor do we want to disable interrupts,
1946 	 * so if we miss here, then better luck next time.
1947 	 */
1948 	if (!arch_spin_trylock(&trace_cmdline_lock))
1949 		return 0;
1950 
1951 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952 	if (idx == NO_CMDLINE_MAP) {
1953 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954 
1955 		/*
1956 		 * Check whether the cmdline buffer at idx has a pid
1957 		 * mapped. We are going to overwrite that entry so we
1958 		 * need to clear the map_pid_to_cmdline. Otherwise we
1959 		 * would read the new comm for the old pid.
1960 		 */
1961 		pid = savedcmd->map_cmdline_to_pid[idx];
1962 		if (pid != NO_CMDLINE_MAP)
1963 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964 
1965 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967 
1968 		savedcmd->cmdline_idx = idx;
1969 	}
1970 
1971 	set_cmdline(idx, tsk->comm);
1972 
1973 	arch_spin_unlock(&trace_cmdline_lock);
1974 
1975 	return 1;
1976 }
1977 
1978 static void __trace_find_cmdline(int pid, char comm[])
1979 {
1980 	unsigned map;
1981 
1982 	if (!pid) {
1983 		strcpy(comm, "<idle>");
1984 		return;
1985 	}
1986 
1987 	if (WARN_ON_ONCE(pid < 0)) {
1988 		strcpy(comm, "<XXX>");
1989 		return;
1990 	}
1991 
1992 	if (pid > PID_MAX_DEFAULT) {
1993 		strcpy(comm, "<...>");
1994 		return;
1995 	}
1996 
1997 	map = savedcmd->map_pid_to_cmdline[pid];
1998 	if (map != NO_CMDLINE_MAP)
1999 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000 	else
2001 		strcpy(comm, "<...>");
2002 }
2003 
2004 void trace_find_cmdline(int pid, char comm[])
2005 {
2006 	preempt_disable();
2007 	arch_spin_lock(&trace_cmdline_lock);
2008 
2009 	__trace_find_cmdline(pid, comm);
2010 
2011 	arch_spin_unlock(&trace_cmdline_lock);
2012 	preempt_enable();
2013 }
2014 
2015 int trace_find_tgid(int pid)
2016 {
2017 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018 		return 0;
2019 
2020 	return tgid_map[pid];
2021 }
2022 
2023 static int trace_save_tgid(struct task_struct *tsk)
2024 {
2025 	/* treat recording of idle task as a success */
2026 	if (!tsk->pid)
2027 		return 1;
2028 
2029 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030 		return 0;
2031 
2032 	tgid_map[tsk->pid] = tsk->tgid;
2033 	return 1;
2034 }
2035 
2036 static bool tracing_record_taskinfo_skip(int flags)
2037 {
2038 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039 		return true;
2040 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041 		return true;
2042 	if (!__this_cpu_read(trace_taskinfo_save))
2043 		return true;
2044 	return false;
2045 }
2046 
2047 /**
2048  * tracing_record_taskinfo - record the task info of a task
2049  *
2050  * @task  - task to record
2051  * @flags - TRACE_RECORD_CMDLINE for recording comm
2052  *        - TRACE_RECORD_TGID for recording tgid
2053  */
2054 void tracing_record_taskinfo(struct task_struct *task, int flags)
2055 {
2056 	bool done;
2057 
2058 	if (tracing_record_taskinfo_skip(flags))
2059 		return;
2060 
2061 	/*
2062 	 * Record as much task information as possible. If some fail, continue
2063 	 * to try to record the others.
2064 	 */
2065 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067 
2068 	/* If recording any information failed, retry again soon. */
2069 	if (!done)
2070 		return;
2071 
2072 	__this_cpu_write(trace_taskinfo_save, false);
2073 }
2074 
2075 /**
2076  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077  *
2078  * @prev - previous task during sched_switch
2079  * @next - next task during sched_switch
2080  * @flags - TRACE_RECORD_CMDLINE for recording comm
2081  *          TRACE_RECORD_TGID for recording tgid
2082  */
2083 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084 					  struct task_struct *next, int flags)
2085 {
2086 	bool done;
2087 
2088 	if (tracing_record_taskinfo_skip(flags))
2089 		return;
2090 
2091 	/*
2092 	 * Record as much task information as possible. If some fail, continue
2093 	 * to try to record the others.
2094 	 */
2095 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099 
2100 	/* If recording any information failed, retry again soon. */
2101 	if (!done)
2102 		return;
2103 
2104 	__this_cpu_write(trace_taskinfo_save, false);
2105 }
2106 
2107 /* Helpers to record a specific task information */
2108 void tracing_record_cmdline(struct task_struct *task)
2109 {
2110 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111 }
2112 
2113 void tracing_record_tgid(struct task_struct *task)
2114 {
2115 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116 }
2117 
2118 /*
2119  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121  * simplifies those functions and keeps them in sync.
2122  */
2123 enum print_line_t trace_handle_return(struct trace_seq *s)
2124 {
2125 	return trace_seq_has_overflowed(s) ?
2126 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_handle_return);
2129 
2130 void
2131 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132 			     int pc)
2133 {
2134 	struct task_struct *tsk = current;
2135 
2136 	entry->preempt_count		= pc & 0xff;
2137 	entry->pid			= (tsk) ? tsk->pid : 0;
2138 	entry->flags =
2139 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141 #else
2142 		TRACE_FLAG_IRQS_NOSUPPORT |
2143 #endif
2144 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149 }
2150 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151 
2152 struct ring_buffer_event *
2153 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154 			  int type,
2155 			  unsigned long len,
2156 			  unsigned long flags, int pc)
2157 {
2158 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159 }
2160 
2161 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163 static int trace_buffered_event_ref;
2164 
2165 /**
2166  * trace_buffered_event_enable - enable buffering events
2167  *
2168  * When events are being filtered, it is quicker to use a temporary
2169  * buffer to write the event data into if there's a likely chance
2170  * that it will not be committed. The discard of the ring buffer
2171  * is not as fast as committing, and is much slower than copying
2172  * a commit.
2173  *
2174  * When an event is to be filtered, allocate per cpu buffers to
2175  * write the event data into, and if the event is filtered and discarded
2176  * it is simply dropped, otherwise, the entire data is to be committed
2177  * in one shot.
2178  */
2179 void trace_buffered_event_enable(void)
2180 {
2181 	struct ring_buffer_event *event;
2182 	struct page *page;
2183 	int cpu;
2184 
2185 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186 
2187 	if (trace_buffered_event_ref++)
2188 		return;
2189 
2190 	for_each_tracing_cpu(cpu) {
2191 		page = alloc_pages_node(cpu_to_node(cpu),
2192 					GFP_KERNEL | __GFP_NORETRY, 0);
2193 		if (!page)
2194 			goto failed;
2195 
2196 		event = page_address(page);
2197 		memset(event, 0, sizeof(*event));
2198 
2199 		per_cpu(trace_buffered_event, cpu) = event;
2200 
2201 		preempt_disable();
2202 		if (cpu == smp_processor_id() &&
2203 		    this_cpu_read(trace_buffered_event) !=
2204 		    per_cpu(trace_buffered_event, cpu))
2205 			WARN_ON_ONCE(1);
2206 		preempt_enable();
2207 	}
2208 
2209 	return;
2210  failed:
2211 	trace_buffered_event_disable();
2212 }
2213 
2214 static void enable_trace_buffered_event(void *data)
2215 {
2216 	/* Probably not needed, but do it anyway */
2217 	smp_rmb();
2218 	this_cpu_dec(trace_buffered_event_cnt);
2219 }
2220 
2221 static void disable_trace_buffered_event(void *data)
2222 {
2223 	this_cpu_inc(trace_buffered_event_cnt);
2224 }
2225 
2226 /**
2227  * trace_buffered_event_disable - disable buffering events
2228  *
2229  * When a filter is removed, it is faster to not use the buffered
2230  * events, and to commit directly into the ring buffer. Free up
2231  * the temp buffers when there are no more users. This requires
2232  * special synchronization with current events.
2233  */
2234 void trace_buffered_event_disable(void)
2235 {
2236 	int cpu;
2237 
2238 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239 
2240 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241 		return;
2242 
2243 	if (--trace_buffered_event_ref)
2244 		return;
2245 
2246 	preempt_disable();
2247 	/* For each CPU, set the buffer as used. */
2248 	smp_call_function_many(tracing_buffer_mask,
2249 			       disable_trace_buffered_event, NULL, 1);
2250 	preempt_enable();
2251 
2252 	/* Wait for all current users to finish */
2253 	synchronize_rcu();
2254 
2255 	for_each_tracing_cpu(cpu) {
2256 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257 		per_cpu(trace_buffered_event, cpu) = NULL;
2258 	}
2259 	/*
2260 	 * Make sure trace_buffered_event is NULL before clearing
2261 	 * trace_buffered_event_cnt.
2262 	 */
2263 	smp_wmb();
2264 
2265 	preempt_disable();
2266 	/* Do the work on each cpu */
2267 	smp_call_function_many(tracing_buffer_mask,
2268 			       enable_trace_buffered_event, NULL, 1);
2269 	preempt_enable();
2270 }
2271 
2272 static struct ring_buffer *temp_buffer;
2273 
2274 struct ring_buffer_event *
2275 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276 			  struct trace_event_file *trace_file,
2277 			  int type, unsigned long len,
2278 			  unsigned long flags, int pc)
2279 {
2280 	struct ring_buffer_event *entry;
2281 	int val;
2282 
2283 	*current_rb = trace_file->tr->trace_buffer.buffer;
2284 
2285 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287 	    (entry = this_cpu_read(trace_buffered_event))) {
2288 		/* Try to use the per cpu buffer first */
2289 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2290 		if (val == 1) {
2291 			trace_event_setup(entry, type, flags, pc);
2292 			entry->array[0] = len;
2293 			return entry;
2294 		}
2295 		this_cpu_dec(trace_buffered_event_cnt);
2296 	}
2297 
2298 	entry = __trace_buffer_lock_reserve(*current_rb,
2299 					    type, len, flags, pc);
2300 	/*
2301 	 * If tracing is off, but we have triggers enabled
2302 	 * we still need to look at the event data. Use the temp_buffer
2303 	 * to store the trace event for the tigger to use. It's recusive
2304 	 * safe and will not be recorded anywhere.
2305 	 */
2306 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307 		*current_rb = temp_buffer;
2308 		entry = __trace_buffer_lock_reserve(*current_rb,
2309 						    type, len, flags, pc);
2310 	}
2311 	return entry;
2312 }
2313 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314 
2315 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316 static DEFINE_MUTEX(tracepoint_printk_mutex);
2317 
2318 static void output_printk(struct trace_event_buffer *fbuffer)
2319 {
2320 	struct trace_event_call *event_call;
2321 	struct trace_event *event;
2322 	unsigned long flags;
2323 	struct trace_iterator *iter = tracepoint_print_iter;
2324 
2325 	/* We should never get here if iter is NULL */
2326 	if (WARN_ON_ONCE(!iter))
2327 		return;
2328 
2329 	event_call = fbuffer->trace_file->event_call;
2330 	if (!event_call || !event_call->event.funcs ||
2331 	    !event_call->event.funcs->trace)
2332 		return;
2333 
2334 	event = &fbuffer->trace_file->event_call->event;
2335 
2336 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337 	trace_seq_init(&iter->seq);
2338 	iter->ent = fbuffer->entry;
2339 	event_call->event.funcs->trace(iter, 0, event);
2340 	trace_seq_putc(&iter->seq, 0);
2341 	printk("%s", iter->seq.buffer);
2342 
2343 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344 }
2345 
2346 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347 			     void __user *buffer, size_t *lenp,
2348 			     loff_t *ppos)
2349 {
2350 	int save_tracepoint_printk;
2351 	int ret;
2352 
2353 	mutex_lock(&tracepoint_printk_mutex);
2354 	save_tracepoint_printk = tracepoint_printk;
2355 
2356 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357 
2358 	/*
2359 	 * This will force exiting early, as tracepoint_printk
2360 	 * is always zero when tracepoint_printk_iter is not allocated
2361 	 */
2362 	if (!tracepoint_print_iter)
2363 		tracepoint_printk = 0;
2364 
2365 	if (save_tracepoint_printk == tracepoint_printk)
2366 		goto out;
2367 
2368 	if (tracepoint_printk)
2369 		static_key_enable(&tracepoint_printk_key.key);
2370 	else
2371 		static_key_disable(&tracepoint_printk_key.key);
2372 
2373  out:
2374 	mutex_unlock(&tracepoint_printk_mutex);
2375 
2376 	return ret;
2377 }
2378 
2379 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380 {
2381 	if (static_key_false(&tracepoint_printk_key.key))
2382 		output_printk(fbuffer);
2383 
2384 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385 				    fbuffer->event, fbuffer->entry,
2386 				    fbuffer->flags, fbuffer->pc);
2387 }
2388 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389 
2390 /*
2391  * Skip 3:
2392  *
2393  *   trace_buffer_unlock_commit_regs()
2394  *   trace_event_buffer_commit()
2395  *   trace_event_raw_event_xxx()
2396  */
2397 # define STACK_SKIP 3
2398 
2399 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400 				     struct ring_buffer *buffer,
2401 				     struct ring_buffer_event *event,
2402 				     unsigned long flags, int pc,
2403 				     struct pt_regs *regs)
2404 {
2405 	__buffer_unlock_commit(buffer, event);
2406 
2407 	/*
2408 	 * If regs is not set, then skip the necessary functions.
2409 	 * Note, we can still get here via blktrace, wakeup tracer
2410 	 * and mmiotrace, but that's ok if they lose a function or
2411 	 * two. They are not that meaningful.
2412 	 */
2413 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414 	ftrace_trace_userstack(buffer, flags, pc);
2415 }
2416 
2417 /*
2418  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419  */
2420 void
2421 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422 				   struct ring_buffer_event *event)
2423 {
2424 	__buffer_unlock_commit(buffer, event);
2425 }
2426 
2427 static void
2428 trace_process_export(struct trace_export *export,
2429 	       struct ring_buffer_event *event)
2430 {
2431 	struct trace_entry *entry;
2432 	unsigned int size = 0;
2433 
2434 	entry = ring_buffer_event_data(event);
2435 	size = ring_buffer_event_length(event);
2436 	export->write(export, entry, size);
2437 }
2438 
2439 static DEFINE_MUTEX(ftrace_export_lock);
2440 
2441 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442 
2443 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444 
2445 static inline void ftrace_exports_enable(void)
2446 {
2447 	static_branch_enable(&ftrace_exports_enabled);
2448 }
2449 
2450 static inline void ftrace_exports_disable(void)
2451 {
2452 	static_branch_disable(&ftrace_exports_enabled);
2453 }
2454 
2455 static void ftrace_exports(struct ring_buffer_event *event)
2456 {
2457 	struct trace_export *export;
2458 
2459 	preempt_disable_notrace();
2460 
2461 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462 	while (export) {
2463 		trace_process_export(export, event);
2464 		export = rcu_dereference_raw_notrace(export->next);
2465 	}
2466 
2467 	preempt_enable_notrace();
2468 }
2469 
2470 static inline void
2471 add_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473 	rcu_assign_pointer(export->next, *list);
2474 	/*
2475 	 * We are entering export into the list but another
2476 	 * CPU might be walking that list. We need to make sure
2477 	 * the export->next pointer is valid before another CPU sees
2478 	 * the export pointer included into the list.
2479 	 */
2480 	rcu_assign_pointer(*list, export);
2481 }
2482 
2483 static inline int
2484 rm_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486 	struct trace_export **p;
2487 
2488 	for (p = list; *p != NULL; p = &(*p)->next)
2489 		if (*p == export)
2490 			break;
2491 
2492 	if (*p != export)
2493 		return -1;
2494 
2495 	rcu_assign_pointer(*p, (*p)->next);
2496 
2497 	return 0;
2498 }
2499 
2500 static inline void
2501 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502 {
2503 	if (*list == NULL)
2504 		ftrace_exports_enable();
2505 
2506 	add_trace_export(list, export);
2507 }
2508 
2509 static inline int
2510 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511 {
2512 	int ret;
2513 
2514 	ret = rm_trace_export(list, export);
2515 	if (*list == NULL)
2516 		ftrace_exports_disable();
2517 
2518 	return ret;
2519 }
2520 
2521 int register_ftrace_export(struct trace_export *export)
2522 {
2523 	if (WARN_ON_ONCE(!export->write))
2524 		return -1;
2525 
2526 	mutex_lock(&ftrace_export_lock);
2527 
2528 	add_ftrace_export(&ftrace_exports_list, export);
2529 
2530 	mutex_unlock(&ftrace_export_lock);
2531 
2532 	return 0;
2533 }
2534 EXPORT_SYMBOL_GPL(register_ftrace_export);
2535 
2536 int unregister_ftrace_export(struct trace_export *export)
2537 {
2538 	int ret;
2539 
2540 	mutex_lock(&ftrace_export_lock);
2541 
2542 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2543 
2544 	mutex_unlock(&ftrace_export_lock);
2545 
2546 	return ret;
2547 }
2548 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549 
2550 void
2551 trace_function(struct trace_array *tr,
2552 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553 	       int pc)
2554 {
2555 	struct trace_event_call *call = &event_function;
2556 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557 	struct ring_buffer_event *event;
2558 	struct ftrace_entry *entry;
2559 
2560 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561 					    flags, pc);
2562 	if (!event)
2563 		return;
2564 	entry	= ring_buffer_event_data(event);
2565 	entry->ip			= ip;
2566 	entry->parent_ip		= parent_ip;
2567 
2568 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2569 		if (static_branch_unlikely(&ftrace_exports_enabled))
2570 			ftrace_exports(event);
2571 		__buffer_unlock_commit(buffer, event);
2572 	}
2573 }
2574 
2575 #ifdef CONFIG_STACKTRACE
2576 
2577 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578 struct ftrace_stack {
2579 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2580 };
2581 
2582 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584 
2585 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586 				 unsigned long flags,
2587 				 int skip, int pc, struct pt_regs *regs)
2588 {
2589 	struct trace_event_call *call = &event_kernel_stack;
2590 	struct ring_buffer_event *event;
2591 	struct stack_entry *entry;
2592 	struct stack_trace trace;
2593 	int use_stack;
2594 	int size = FTRACE_STACK_ENTRIES;
2595 
2596 	trace.nr_entries	= 0;
2597 	trace.skip		= skip;
2598 
2599 	/*
2600 	 * Add one, for this function and the call to save_stack_trace()
2601 	 * If regs is set, then these functions will not be in the way.
2602 	 */
2603 #ifndef CONFIG_UNWINDER_ORC
2604 	if (!regs)
2605 		trace.skip++;
2606 #endif
2607 
2608 	/*
2609 	 * Since events can happen in NMIs there's no safe way to
2610 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611 	 * or NMI comes in, it will just have to use the default
2612 	 * FTRACE_STACK_SIZE.
2613 	 */
2614 	preempt_disable_notrace();
2615 
2616 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617 	/*
2618 	 * We don't need any atomic variables, just a barrier.
2619 	 * If an interrupt comes in, we don't care, because it would
2620 	 * have exited and put the counter back to what we want.
2621 	 * We just need a barrier to keep gcc from moving things
2622 	 * around.
2623 	 */
2624 	barrier();
2625 	if (use_stack == 1) {
2626 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2627 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2628 
2629 		if (regs)
2630 			save_stack_trace_regs(regs, &trace);
2631 		else
2632 			save_stack_trace(&trace);
2633 
2634 		if (trace.nr_entries > size)
2635 			size = trace.nr_entries;
2636 	} else
2637 		/* From now on, use_stack is a boolean */
2638 		use_stack = 0;
2639 
2640 	size *= sizeof(unsigned long);
2641 
2642 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643 					    sizeof(*entry) + size, flags, pc);
2644 	if (!event)
2645 		goto out;
2646 	entry = ring_buffer_event_data(event);
2647 
2648 	memset(&entry->caller, 0, size);
2649 
2650 	if (use_stack)
2651 		memcpy(&entry->caller, trace.entries,
2652 		       trace.nr_entries * sizeof(unsigned long));
2653 	else {
2654 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2655 		trace.entries		= entry->caller;
2656 		if (regs)
2657 			save_stack_trace_regs(regs, &trace);
2658 		else
2659 			save_stack_trace(&trace);
2660 	}
2661 
2662 	entry->size = trace.nr_entries;
2663 
2664 	if (!call_filter_check_discard(call, entry, buffer, event))
2665 		__buffer_unlock_commit(buffer, event);
2666 
2667  out:
2668 	/* Again, don't let gcc optimize things here */
2669 	barrier();
2670 	__this_cpu_dec(ftrace_stack_reserve);
2671 	preempt_enable_notrace();
2672 
2673 }
2674 
2675 static inline void ftrace_trace_stack(struct trace_array *tr,
2676 				      struct ring_buffer *buffer,
2677 				      unsigned long flags,
2678 				      int skip, int pc, struct pt_regs *regs)
2679 {
2680 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681 		return;
2682 
2683 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684 }
2685 
2686 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687 		   int pc)
2688 {
2689 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690 
2691 	if (rcu_is_watching()) {
2692 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693 		return;
2694 	}
2695 
2696 	/*
2697 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698 	 * but if the above rcu_is_watching() failed, then the NMI
2699 	 * triggered someplace critical, and rcu_irq_enter() should
2700 	 * not be called from NMI.
2701 	 */
2702 	if (unlikely(in_nmi()))
2703 		return;
2704 
2705 	rcu_irq_enter_irqson();
2706 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707 	rcu_irq_exit_irqson();
2708 }
2709 
2710 /**
2711  * trace_dump_stack - record a stack back trace in the trace buffer
2712  * @skip: Number of functions to skip (helper handlers)
2713  */
2714 void trace_dump_stack(int skip)
2715 {
2716 	unsigned long flags;
2717 
2718 	if (tracing_disabled || tracing_selftest_running)
2719 		return;
2720 
2721 	local_save_flags(flags);
2722 
2723 #ifndef CONFIG_UNWINDER_ORC
2724 	/* Skip 1 to skip this function. */
2725 	skip++;
2726 #endif
2727 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728 			     flags, skip, preempt_count(), NULL);
2729 }
2730 EXPORT_SYMBOL_GPL(trace_dump_stack);
2731 
2732 static DEFINE_PER_CPU(int, user_stack_count);
2733 
2734 void
2735 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2736 {
2737 	struct trace_event_call *call = &event_user_stack;
2738 	struct ring_buffer_event *event;
2739 	struct userstack_entry *entry;
2740 	struct stack_trace trace;
2741 
2742 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2743 		return;
2744 
2745 	/*
2746 	 * NMIs can not handle page faults, even with fix ups.
2747 	 * The save user stack can (and often does) fault.
2748 	 */
2749 	if (unlikely(in_nmi()))
2750 		return;
2751 
2752 	/*
2753 	 * prevent recursion, since the user stack tracing may
2754 	 * trigger other kernel events.
2755 	 */
2756 	preempt_disable();
2757 	if (__this_cpu_read(user_stack_count))
2758 		goto out;
2759 
2760 	__this_cpu_inc(user_stack_count);
2761 
2762 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2763 					    sizeof(*entry), flags, pc);
2764 	if (!event)
2765 		goto out_drop_count;
2766 	entry	= ring_buffer_event_data(event);
2767 
2768 	entry->tgid		= current->tgid;
2769 	memset(&entry->caller, 0, sizeof(entry->caller));
2770 
2771 	trace.nr_entries	= 0;
2772 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2773 	trace.skip		= 0;
2774 	trace.entries		= entry->caller;
2775 
2776 	save_stack_trace_user(&trace);
2777 	if (!call_filter_check_discard(call, entry, buffer, event))
2778 		__buffer_unlock_commit(buffer, event);
2779 
2780  out_drop_count:
2781 	__this_cpu_dec(user_stack_count);
2782  out:
2783 	preempt_enable();
2784 }
2785 
2786 #ifdef UNUSED
2787 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2788 {
2789 	ftrace_trace_userstack(tr, flags, preempt_count());
2790 }
2791 #endif /* UNUSED */
2792 
2793 #endif /* CONFIG_STACKTRACE */
2794 
2795 /* created for use with alloc_percpu */
2796 struct trace_buffer_struct {
2797 	int nesting;
2798 	char buffer[4][TRACE_BUF_SIZE];
2799 };
2800 
2801 static struct trace_buffer_struct *trace_percpu_buffer;
2802 
2803 /*
2804  * Thise allows for lockless recording.  If we're nested too deeply, then
2805  * this returns NULL.
2806  */
2807 static char *get_trace_buf(void)
2808 {
2809 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2810 
2811 	if (!buffer || buffer->nesting >= 4)
2812 		return NULL;
2813 
2814 	buffer->nesting++;
2815 
2816 	/* Interrupts must see nesting incremented before we use the buffer */
2817 	barrier();
2818 	return &buffer->buffer[buffer->nesting][0];
2819 }
2820 
2821 static void put_trace_buf(void)
2822 {
2823 	/* Don't let the decrement of nesting leak before this */
2824 	barrier();
2825 	this_cpu_dec(trace_percpu_buffer->nesting);
2826 }
2827 
2828 static int alloc_percpu_trace_buffer(void)
2829 {
2830 	struct trace_buffer_struct *buffers;
2831 
2832 	buffers = alloc_percpu(struct trace_buffer_struct);
2833 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2834 		return -ENOMEM;
2835 
2836 	trace_percpu_buffer = buffers;
2837 	return 0;
2838 }
2839 
2840 static int buffers_allocated;
2841 
2842 void trace_printk_init_buffers(void)
2843 {
2844 	if (buffers_allocated)
2845 		return;
2846 
2847 	if (alloc_percpu_trace_buffer())
2848 		return;
2849 
2850 	/* trace_printk() is for debug use only. Don't use it in production. */
2851 
2852 	pr_warn("\n");
2853 	pr_warn("**********************************************************\n");
2854 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2857 	pr_warn("**                                                      **\n");
2858 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2859 	pr_warn("** unsafe for production use.                           **\n");
2860 	pr_warn("**                                                      **\n");
2861 	pr_warn("** If you see this message and you are not debugging    **\n");
2862 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2863 	pr_warn("**                                                      **\n");
2864 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2865 	pr_warn("**********************************************************\n");
2866 
2867 	/* Expand the buffers to set size */
2868 	tracing_update_buffers();
2869 
2870 	buffers_allocated = 1;
2871 
2872 	/*
2873 	 * trace_printk_init_buffers() can be called by modules.
2874 	 * If that happens, then we need to start cmdline recording
2875 	 * directly here. If the global_trace.buffer is already
2876 	 * allocated here, then this was called by module code.
2877 	 */
2878 	if (global_trace.trace_buffer.buffer)
2879 		tracing_start_cmdline_record();
2880 }
2881 
2882 void trace_printk_start_comm(void)
2883 {
2884 	/* Start tracing comms if trace printk is set */
2885 	if (!buffers_allocated)
2886 		return;
2887 	tracing_start_cmdline_record();
2888 }
2889 
2890 static void trace_printk_start_stop_comm(int enabled)
2891 {
2892 	if (!buffers_allocated)
2893 		return;
2894 
2895 	if (enabled)
2896 		tracing_start_cmdline_record();
2897 	else
2898 		tracing_stop_cmdline_record();
2899 }
2900 
2901 /**
2902  * trace_vbprintk - write binary msg to tracing buffer
2903  *
2904  */
2905 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2906 {
2907 	struct trace_event_call *call = &event_bprint;
2908 	struct ring_buffer_event *event;
2909 	struct ring_buffer *buffer;
2910 	struct trace_array *tr = &global_trace;
2911 	struct bprint_entry *entry;
2912 	unsigned long flags;
2913 	char *tbuffer;
2914 	int len = 0, size, pc;
2915 
2916 	if (unlikely(tracing_selftest_running || tracing_disabled))
2917 		return 0;
2918 
2919 	/* Don't pollute graph traces with trace_vprintk internals */
2920 	pause_graph_tracing();
2921 
2922 	pc = preempt_count();
2923 	preempt_disable_notrace();
2924 
2925 	tbuffer = get_trace_buf();
2926 	if (!tbuffer) {
2927 		len = 0;
2928 		goto out_nobuffer;
2929 	}
2930 
2931 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2932 
2933 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2934 		goto out;
2935 
2936 	local_save_flags(flags);
2937 	size = sizeof(*entry) + sizeof(u32) * len;
2938 	buffer = tr->trace_buffer.buffer;
2939 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2940 					    flags, pc);
2941 	if (!event)
2942 		goto out;
2943 	entry = ring_buffer_event_data(event);
2944 	entry->ip			= ip;
2945 	entry->fmt			= fmt;
2946 
2947 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2948 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2949 		__buffer_unlock_commit(buffer, event);
2950 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2951 	}
2952 
2953 out:
2954 	put_trace_buf();
2955 
2956 out_nobuffer:
2957 	preempt_enable_notrace();
2958 	unpause_graph_tracing();
2959 
2960 	return len;
2961 }
2962 EXPORT_SYMBOL_GPL(trace_vbprintk);
2963 
2964 __printf(3, 0)
2965 static int
2966 __trace_array_vprintk(struct ring_buffer *buffer,
2967 		      unsigned long ip, const char *fmt, va_list args)
2968 {
2969 	struct trace_event_call *call = &event_print;
2970 	struct ring_buffer_event *event;
2971 	int len = 0, size, pc;
2972 	struct print_entry *entry;
2973 	unsigned long flags;
2974 	char *tbuffer;
2975 
2976 	if (tracing_disabled || tracing_selftest_running)
2977 		return 0;
2978 
2979 	/* Don't pollute graph traces with trace_vprintk internals */
2980 	pause_graph_tracing();
2981 
2982 	pc = preempt_count();
2983 	preempt_disable_notrace();
2984 
2985 
2986 	tbuffer = get_trace_buf();
2987 	if (!tbuffer) {
2988 		len = 0;
2989 		goto out_nobuffer;
2990 	}
2991 
2992 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2993 
2994 	local_save_flags(flags);
2995 	size = sizeof(*entry) + len + 1;
2996 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2997 					    flags, pc);
2998 	if (!event)
2999 		goto out;
3000 	entry = ring_buffer_event_data(event);
3001 	entry->ip = ip;
3002 
3003 	memcpy(&entry->buf, tbuffer, len + 1);
3004 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3005 		__buffer_unlock_commit(buffer, event);
3006 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3007 	}
3008 
3009 out:
3010 	put_trace_buf();
3011 
3012 out_nobuffer:
3013 	preempt_enable_notrace();
3014 	unpause_graph_tracing();
3015 
3016 	return len;
3017 }
3018 
3019 __printf(3, 0)
3020 int trace_array_vprintk(struct trace_array *tr,
3021 			unsigned long ip, const char *fmt, va_list args)
3022 {
3023 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3024 }
3025 
3026 __printf(3, 0)
3027 int trace_array_printk(struct trace_array *tr,
3028 		       unsigned long ip, const char *fmt, ...)
3029 {
3030 	int ret;
3031 	va_list ap;
3032 
3033 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3034 		return 0;
3035 
3036 	va_start(ap, fmt);
3037 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3038 	va_end(ap);
3039 	return ret;
3040 }
3041 
3042 __printf(3, 4)
3043 int trace_array_printk_buf(struct ring_buffer *buffer,
3044 			   unsigned long ip, const char *fmt, ...)
3045 {
3046 	int ret;
3047 	va_list ap;
3048 
3049 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3050 		return 0;
3051 
3052 	va_start(ap, fmt);
3053 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3054 	va_end(ap);
3055 	return ret;
3056 }
3057 
3058 __printf(2, 0)
3059 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3060 {
3061 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3062 }
3063 EXPORT_SYMBOL_GPL(trace_vprintk);
3064 
3065 static void trace_iterator_increment(struct trace_iterator *iter)
3066 {
3067 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3068 
3069 	iter->idx++;
3070 	if (buf_iter)
3071 		ring_buffer_read(buf_iter, NULL);
3072 }
3073 
3074 static struct trace_entry *
3075 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3076 		unsigned long *lost_events)
3077 {
3078 	struct ring_buffer_event *event;
3079 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3080 
3081 	if (buf_iter)
3082 		event = ring_buffer_iter_peek(buf_iter, ts);
3083 	else
3084 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3085 					 lost_events);
3086 
3087 	if (event) {
3088 		iter->ent_size = ring_buffer_event_length(event);
3089 		return ring_buffer_event_data(event);
3090 	}
3091 	iter->ent_size = 0;
3092 	return NULL;
3093 }
3094 
3095 static struct trace_entry *
3096 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3097 		  unsigned long *missing_events, u64 *ent_ts)
3098 {
3099 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3100 	struct trace_entry *ent, *next = NULL;
3101 	unsigned long lost_events = 0, next_lost = 0;
3102 	int cpu_file = iter->cpu_file;
3103 	u64 next_ts = 0, ts;
3104 	int next_cpu = -1;
3105 	int next_size = 0;
3106 	int cpu;
3107 
3108 	/*
3109 	 * If we are in a per_cpu trace file, don't bother by iterating over
3110 	 * all cpu and peek directly.
3111 	 */
3112 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3113 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3114 			return NULL;
3115 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3116 		if (ent_cpu)
3117 			*ent_cpu = cpu_file;
3118 
3119 		return ent;
3120 	}
3121 
3122 	for_each_tracing_cpu(cpu) {
3123 
3124 		if (ring_buffer_empty_cpu(buffer, cpu))
3125 			continue;
3126 
3127 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3128 
3129 		/*
3130 		 * Pick the entry with the smallest timestamp:
3131 		 */
3132 		if (ent && (!next || ts < next_ts)) {
3133 			next = ent;
3134 			next_cpu = cpu;
3135 			next_ts = ts;
3136 			next_lost = lost_events;
3137 			next_size = iter->ent_size;
3138 		}
3139 	}
3140 
3141 	iter->ent_size = next_size;
3142 
3143 	if (ent_cpu)
3144 		*ent_cpu = next_cpu;
3145 
3146 	if (ent_ts)
3147 		*ent_ts = next_ts;
3148 
3149 	if (missing_events)
3150 		*missing_events = next_lost;
3151 
3152 	return next;
3153 }
3154 
3155 /* Find the next real entry, without updating the iterator itself */
3156 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3157 					  int *ent_cpu, u64 *ent_ts)
3158 {
3159 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3160 }
3161 
3162 /* Find the next real entry, and increment the iterator to the next entry */
3163 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3164 {
3165 	iter->ent = __find_next_entry(iter, &iter->cpu,
3166 				      &iter->lost_events, &iter->ts);
3167 
3168 	if (iter->ent)
3169 		trace_iterator_increment(iter);
3170 
3171 	return iter->ent ? iter : NULL;
3172 }
3173 
3174 static void trace_consume(struct trace_iterator *iter)
3175 {
3176 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3177 			    &iter->lost_events);
3178 }
3179 
3180 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3181 {
3182 	struct trace_iterator *iter = m->private;
3183 	int i = (int)*pos;
3184 	void *ent;
3185 
3186 	WARN_ON_ONCE(iter->leftover);
3187 
3188 	(*pos)++;
3189 
3190 	/* can't go backwards */
3191 	if (iter->idx > i)
3192 		return NULL;
3193 
3194 	if (iter->idx < 0)
3195 		ent = trace_find_next_entry_inc(iter);
3196 	else
3197 		ent = iter;
3198 
3199 	while (ent && iter->idx < i)
3200 		ent = trace_find_next_entry_inc(iter);
3201 
3202 	iter->pos = *pos;
3203 
3204 	return ent;
3205 }
3206 
3207 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3208 {
3209 	struct ring_buffer_event *event;
3210 	struct ring_buffer_iter *buf_iter;
3211 	unsigned long entries = 0;
3212 	u64 ts;
3213 
3214 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3215 
3216 	buf_iter = trace_buffer_iter(iter, cpu);
3217 	if (!buf_iter)
3218 		return;
3219 
3220 	ring_buffer_iter_reset(buf_iter);
3221 
3222 	/*
3223 	 * We could have the case with the max latency tracers
3224 	 * that a reset never took place on a cpu. This is evident
3225 	 * by the timestamp being before the start of the buffer.
3226 	 */
3227 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3228 		if (ts >= iter->trace_buffer->time_start)
3229 			break;
3230 		entries++;
3231 		ring_buffer_read(buf_iter, NULL);
3232 	}
3233 
3234 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3235 }
3236 
3237 /*
3238  * The current tracer is copied to avoid a global locking
3239  * all around.
3240  */
3241 static void *s_start(struct seq_file *m, loff_t *pos)
3242 {
3243 	struct trace_iterator *iter = m->private;
3244 	struct trace_array *tr = iter->tr;
3245 	int cpu_file = iter->cpu_file;
3246 	void *p = NULL;
3247 	loff_t l = 0;
3248 	int cpu;
3249 
3250 	/*
3251 	 * copy the tracer to avoid using a global lock all around.
3252 	 * iter->trace is a copy of current_trace, the pointer to the
3253 	 * name may be used instead of a strcmp(), as iter->trace->name
3254 	 * will point to the same string as current_trace->name.
3255 	 */
3256 	mutex_lock(&trace_types_lock);
3257 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3258 		*iter->trace = *tr->current_trace;
3259 	mutex_unlock(&trace_types_lock);
3260 
3261 #ifdef CONFIG_TRACER_MAX_TRACE
3262 	if (iter->snapshot && iter->trace->use_max_tr)
3263 		return ERR_PTR(-EBUSY);
3264 #endif
3265 
3266 	if (!iter->snapshot)
3267 		atomic_inc(&trace_record_taskinfo_disabled);
3268 
3269 	if (*pos != iter->pos) {
3270 		iter->ent = NULL;
3271 		iter->cpu = 0;
3272 		iter->idx = -1;
3273 
3274 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3275 			for_each_tracing_cpu(cpu)
3276 				tracing_iter_reset(iter, cpu);
3277 		} else
3278 			tracing_iter_reset(iter, cpu_file);
3279 
3280 		iter->leftover = 0;
3281 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3282 			;
3283 
3284 	} else {
3285 		/*
3286 		 * If we overflowed the seq_file before, then we want
3287 		 * to just reuse the trace_seq buffer again.
3288 		 */
3289 		if (iter->leftover)
3290 			p = iter;
3291 		else {
3292 			l = *pos - 1;
3293 			p = s_next(m, p, &l);
3294 		}
3295 	}
3296 
3297 	trace_event_read_lock();
3298 	trace_access_lock(cpu_file);
3299 	return p;
3300 }
3301 
3302 static void s_stop(struct seq_file *m, void *p)
3303 {
3304 	struct trace_iterator *iter = m->private;
3305 
3306 #ifdef CONFIG_TRACER_MAX_TRACE
3307 	if (iter->snapshot && iter->trace->use_max_tr)
3308 		return;
3309 #endif
3310 
3311 	if (!iter->snapshot)
3312 		atomic_dec(&trace_record_taskinfo_disabled);
3313 
3314 	trace_access_unlock(iter->cpu_file);
3315 	trace_event_read_unlock();
3316 }
3317 
3318 static void
3319 get_total_entries(struct trace_buffer *buf,
3320 		  unsigned long *total, unsigned long *entries)
3321 {
3322 	unsigned long count;
3323 	int cpu;
3324 
3325 	*total = 0;
3326 	*entries = 0;
3327 
3328 	for_each_tracing_cpu(cpu) {
3329 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3330 		/*
3331 		 * If this buffer has skipped entries, then we hold all
3332 		 * entries for the trace and we need to ignore the
3333 		 * ones before the time stamp.
3334 		 */
3335 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3336 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3337 			/* total is the same as the entries */
3338 			*total += count;
3339 		} else
3340 			*total += count +
3341 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3342 		*entries += count;
3343 	}
3344 }
3345 
3346 static void print_lat_help_header(struct seq_file *m)
3347 {
3348 	seq_puts(m, "#                  _------=> CPU#            \n"
3349 		    "#                 / _-----=> irqs-off        \n"
3350 		    "#                | / _----=> need-resched    \n"
3351 		    "#                || / _---=> hardirq/softirq \n"
3352 		    "#                ||| / _--=> preempt-depth   \n"
3353 		    "#                |||| /     delay            \n"
3354 		    "#  cmd     pid   ||||| time  |   caller      \n"
3355 		    "#     \\   /      |||||  \\    |   /         \n");
3356 }
3357 
3358 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3359 {
3360 	unsigned long total;
3361 	unsigned long entries;
3362 
3363 	get_total_entries(buf, &total, &entries);
3364 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3365 		   entries, total, num_online_cpus());
3366 	seq_puts(m, "#\n");
3367 }
3368 
3369 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3370 				   unsigned int flags)
3371 {
3372 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3373 
3374 	print_event_info(buf, m);
3375 
3376 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3377 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3378 }
3379 
3380 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3381 				       unsigned int flags)
3382 {
3383 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3384 	const char tgid_space[] = "          ";
3385 	const char space[] = "  ";
3386 
3387 	print_event_info(buf, m);
3388 
3389 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3390 		   tgid ? tgid_space : space);
3391 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3392 		   tgid ? tgid_space : space);
3393 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3394 		   tgid ? tgid_space : space);
3395 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3396 		   tgid ? tgid_space : space);
3397 	seq_printf(m, "#                          %s||| /     delay\n",
3398 		   tgid ? tgid_space : space);
3399 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3400 		   tgid ? "   TGID   " : space);
3401 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3402 		   tgid ? "     |    " : space);
3403 }
3404 
3405 void
3406 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3407 {
3408 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3409 	struct trace_buffer *buf = iter->trace_buffer;
3410 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3411 	struct tracer *type = iter->trace;
3412 	unsigned long entries;
3413 	unsigned long total;
3414 	const char *name = "preemption";
3415 
3416 	name = type->name;
3417 
3418 	get_total_entries(buf, &total, &entries);
3419 
3420 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3421 		   name, UTS_RELEASE);
3422 	seq_puts(m, "# -----------------------------------"
3423 		 "---------------------------------\n");
3424 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3425 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3426 		   nsecs_to_usecs(data->saved_latency),
3427 		   entries,
3428 		   total,
3429 		   buf->cpu,
3430 #if defined(CONFIG_PREEMPT_NONE)
3431 		   "server",
3432 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3433 		   "desktop",
3434 #elif defined(CONFIG_PREEMPT)
3435 		   "preempt",
3436 #else
3437 		   "unknown",
3438 #endif
3439 		   /* These are reserved for later use */
3440 		   0, 0, 0, 0);
3441 #ifdef CONFIG_SMP
3442 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3443 #else
3444 	seq_puts(m, ")\n");
3445 #endif
3446 	seq_puts(m, "#    -----------------\n");
3447 	seq_printf(m, "#    | task: %.16s-%d "
3448 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3449 		   data->comm, data->pid,
3450 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3451 		   data->policy, data->rt_priority);
3452 	seq_puts(m, "#    -----------------\n");
3453 
3454 	if (data->critical_start) {
3455 		seq_puts(m, "#  => started at: ");
3456 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3457 		trace_print_seq(m, &iter->seq);
3458 		seq_puts(m, "\n#  => ended at:   ");
3459 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3460 		trace_print_seq(m, &iter->seq);
3461 		seq_puts(m, "\n#\n");
3462 	}
3463 
3464 	seq_puts(m, "#\n");
3465 }
3466 
3467 static void test_cpu_buff_start(struct trace_iterator *iter)
3468 {
3469 	struct trace_seq *s = &iter->seq;
3470 	struct trace_array *tr = iter->tr;
3471 
3472 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3473 		return;
3474 
3475 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3476 		return;
3477 
3478 	if (cpumask_available(iter->started) &&
3479 	    cpumask_test_cpu(iter->cpu, iter->started))
3480 		return;
3481 
3482 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3483 		return;
3484 
3485 	if (cpumask_available(iter->started))
3486 		cpumask_set_cpu(iter->cpu, iter->started);
3487 
3488 	/* Don't print started cpu buffer for the first entry of the trace */
3489 	if (iter->idx > 1)
3490 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3491 				iter->cpu);
3492 }
3493 
3494 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3495 {
3496 	struct trace_array *tr = iter->tr;
3497 	struct trace_seq *s = &iter->seq;
3498 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3499 	struct trace_entry *entry;
3500 	struct trace_event *event;
3501 
3502 	entry = iter->ent;
3503 
3504 	test_cpu_buff_start(iter);
3505 
3506 	event = ftrace_find_event(entry->type);
3507 
3508 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3509 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3510 			trace_print_lat_context(iter);
3511 		else
3512 			trace_print_context(iter);
3513 	}
3514 
3515 	if (trace_seq_has_overflowed(s))
3516 		return TRACE_TYPE_PARTIAL_LINE;
3517 
3518 	if (event)
3519 		return event->funcs->trace(iter, sym_flags, event);
3520 
3521 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3522 
3523 	return trace_handle_return(s);
3524 }
3525 
3526 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3527 {
3528 	struct trace_array *tr = iter->tr;
3529 	struct trace_seq *s = &iter->seq;
3530 	struct trace_entry *entry;
3531 	struct trace_event *event;
3532 
3533 	entry = iter->ent;
3534 
3535 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3536 		trace_seq_printf(s, "%d %d %llu ",
3537 				 entry->pid, iter->cpu, iter->ts);
3538 
3539 	if (trace_seq_has_overflowed(s))
3540 		return TRACE_TYPE_PARTIAL_LINE;
3541 
3542 	event = ftrace_find_event(entry->type);
3543 	if (event)
3544 		return event->funcs->raw(iter, 0, event);
3545 
3546 	trace_seq_printf(s, "%d ?\n", entry->type);
3547 
3548 	return trace_handle_return(s);
3549 }
3550 
3551 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3552 {
3553 	struct trace_array *tr = iter->tr;
3554 	struct trace_seq *s = &iter->seq;
3555 	unsigned char newline = '\n';
3556 	struct trace_entry *entry;
3557 	struct trace_event *event;
3558 
3559 	entry = iter->ent;
3560 
3561 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3562 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3563 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3564 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3565 		if (trace_seq_has_overflowed(s))
3566 			return TRACE_TYPE_PARTIAL_LINE;
3567 	}
3568 
3569 	event = ftrace_find_event(entry->type);
3570 	if (event) {
3571 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3572 		if (ret != TRACE_TYPE_HANDLED)
3573 			return ret;
3574 	}
3575 
3576 	SEQ_PUT_FIELD(s, newline);
3577 
3578 	return trace_handle_return(s);
3579 }
3580 
3581 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3582 {
3583 	struct trace_array *tr = iter->tr;
3584 	struct trace_seq *s = &iter->seq;
3585 	struct trace_entry *entry;
3586 	struct trace_event *event;
3587 
3588 	entry = iter->ent;
3589 
3590 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3591 		SEQ_PUT_FIELD(s, entry->pid);
3592 		SEQ_PUT_FIELD(s, iter->cpu);
3593 		SEQ_PUT_FIELD(s, iter->ts);
3594 		if (trace_seq_has_overflowed(s))
3595 			return TRACE_TYPE_PARTIAL_LINE;
3596 	}
3597 
3598 	event = ftrace_find_event(entry->type);
3599 	return event ? event->funcs->binary(iter, 0, event) :
3600 		TRACE_TYPE_HANDLED;
3601 }
3602 
3603 int trace_empty(struct trace_iterator *iter)
3604 {
3605 	struct ring_buffer_iter *buf_iter;
3606 	int cpu;
3607 
3608 	/* If we are looking at one CPU buffer, only check that one */
3609 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3610 		cpu = iter->cpu_file;
3611 		buf_iter = trace_buffer_iter(iter, cpu);
3612 		if (buf_iter) {
3613 			if (!ring_buffer_iter_empty(buf_iter))
3614 				return 0;
3615 		} else {
3616 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617 				return 0;
3618 		}
3619 		return 1;
3620 	}
3621 
3622 	for_each_tracing_cpu(cpu) {
3623 		buf_iter = trace_buffer_iter(iter, cpu);
3624 		if (buf_iter) {
3625 			if (!ring_buffer_iter_empty(buf_iter))
3626 				return 0;
3627 		} else {
3628 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3629 				return 0;
3630 		}
3631 	}
3632 
3633 	return 1;
3634 }
3635 
3636 /*  Called with trace_event_read_lock() held. */
3637 enum print_line_t print_trace_line(struct trace_iterator *iter)
3638 {
3639 	struct trace_array *tr = iter->tr;
3640 	unsigned long trace_flags = tr->trace_flags;
3641 	enum print_line_t ret;
3642 
3643 	if (iter->lost_events) {
3644 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3645 				 iter->cpu, iter->lost_events);
3646 		if (trace_seq_has_overflowed(&iter->seq))
3647 			return TRACE_TYPE_PARTIAL_LINE;
3648 	}
3649 
3650 	if (iter->trace && iter->trace->print_line) {
3651 		ret = iter->trace->print_line(iter);
3652 		if (ret != TRACE_TYPE_UNHANDLED)
3653 			return ret;
3654 	}
3655 
3656 	if (iter->ent->type == TRACE_BPUTS &&
3657 			trace_flags & TRACE_ITER_PRINTK &&
3658 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659 		return trace_print_bputs_msg_only(iter);
3660 
3661 	if (iter->ent->type == TRACE_BPRINT &&
3662 			trace_flags & TRACE_ITER_PRINTK &&
3663 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3664 		return trace_print_bprintk_msg_only(iter);
3665 
3666 	if (iter->ent->type == TRACE_PRINT &&
3667 			trace_flags & TRACE_ITER_PRINTK &&
3668 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3669 		return trace_print_printk_msg_only(iter);
3670 
3671 	if (trace_flags & TRACE_ITER_BIN)
3672 		return print_bin_fmt(iter);
3673 
3674 	if (trace_flags & TRACE_ITER_HEX)
3675 		return print_hex_fmt(iter);
3676 
3677 	if (trace_flags & TRACE_ITER_RAW)
3678 		return print_raw_fmt(iter);
3679 
3680 	return print_trace_fmt(iter);
3681 }
3682 
3683 void trace_latency_header(struct seq_file *m)
3684 {
3685 	struct trace_iterator *iter = m->private;
3686 	struct trace_array *tr = iter->tr;
3687 
3688 	/* print nothing if the buffers are empty */
3689 	if (trace_empty(iter))
3690 		return;
3691 
3692 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3693 		print_trace_header(m, iter);
3694 
3695 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3696 		print_lat_help_header(m);
3697 }
3698 
3699 void trace_default_header(struct seq_file *m)
3700 {
3701 	struct trace_iterator *iter = m->private;
3702 	struct trace_array *tr = iter->tr;
3703 	unsigned long trace_flags = tr->trace_flags;
3704 
3705 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3706 		return;
3707 
3708 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3709 		/* print nothing if the buffers are empty */
3710 		if (trace_empty(iter))
3711 			return;
3712 		print_trace_header(m, iter);
3713 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3714 			print_lat_help_header(m);
3715 	} else {
3716 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3717 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3718 				print_func_help_header_irq(iter->trace_buffer,
3719 							   m, trace_flags);
3720 			else
3721 				print_func_help_header(iter->trace_buffer, m,
3722 						       trace_flags);
3723 		}
3724 	}
3725 }
3726 
3727 static void test_ftrace_alive(struct seq_file *m)
3728 {
3729 	if (!ftrace_is_dead())
3730 		return;
3731 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3732 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3733 }
3734 
3735 #ifdef CONFIG_TRACER_MAX_TRACE
3736 static void show_snapshot_main_help(struct seq_file *m)
3737 {
3738 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3739 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740 		    "#                      Takes a snapshot of the main buffer.\n"
3741 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3742 		    "#                      (Doesn't have to be '2' works with any number that\n"
3743 		    "#                       is not a '0' or '1')\n");
3744 }
3745 
3746 static void show_snapshot_percpu_help(struct seq_file *m)
3747 {
3748 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3749 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3750 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3751 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3752 #else
3753 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3754 		    "#                     Must use main snapshot file to allocate.\n");
3755 #endif
3756 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3757 		    "#                      (Doesn't have to be '2' works with any number that\n"
3758 		    "#                       is not a '0' or '1')\n");
3759 }
3760 
3761 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3762 {
3763 	if (iter->tr->allocated_snapshot)
3764 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3765 	else
3766 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3767 
3768 	seq_puts(m, "# Snapshot commands:\n");
3769 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3770 		show_snapshot_main_help(m);
3771 	else
3772 		show_snapshot_percpu_help(m);
3773 }
3774 #else
3775 /* Should never be called */
3776 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3777 #endif
3778 
3779 static int s_show(struct seq_file *m, void *v)
3780 {
3781 	struct trace_iterator *iter = v;
3782 	int ret;
3783 
3784 	if (iter->ent == NULL) {
3785 		if (iter->tr) {
3786 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3787 			seq_puts(m, "#\n");
3788 			test_ftrace_alive(m);
3789 		}
3790 		if (iter->snapshot && trace_empty(iter))
3791 			print_snapshot_help(m, iter);
3792 		else if (iter->trace && iter->trace->print_header)
3793 			iter->trace->print_header(m);
3794 		else
3795 			trace_default_header(m);
3796 
3797 	} else if (iter->leftover) {
3798 		/*
3799 		 * If we filled the seq_file buffer earlier, we
3800 		 * want to just show it now.
3801 		 */
3802 		ret = trace_print_seq(m, &iter->seq);
3803 
3804 		/* ret should this time be zero, but you never know */
3805 		iter->leftover = ret;
3806 
3807 	} else {
3808 		print_trace_line(iter);
3809 		ret = trace_print_seq(m, &iter->seq);
3810 		/*
3811 		 * If we overflow the seq_file buffer, then it will
3812 		 * ask us for this data again at start up.
3813 		 * Use that instead.
3814 		 *  ret is 0 if seq_file write succeeded.
3815 		 *        -1 otherwise.
3816 		 */
3817 		iter->leftover = ret;
3818 	}
3819 
3820 	return 0;
3821 }
3822 
3823 /*
3824  * Should be used after trace_array_get(), trace_types_lock
3825  * ensures that i_cdev was already initialized.
3826  */
3827 static inline int tracing_get_cpu(struct inode *inode)
3828 {
3829 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3830 		return (long)inode->i_cdev - 1;
3831 	return RING_BUFFER_ALL_CPUS;
3832 }
3833 
3834 static const struct seq_operations tracer_seq_ops = {
3835 	.start		= s_start,
3836 	.next		= s_next,
3837 	.stop		= s_stop,
3838 	.show		= s_show,
3839 };
3840 
3841 static struct trace_iterator *
3842 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3843 {
3844 	struct trace_array *tr = inode->i_private;
3845 	struct trace_iterator *iter;
3846 	int cpu;
3847 
3848 	if (tracing_disabled)
3849 		return ERR_PTR(-ENODEV);
3850 
3851 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3852 	if (!iter)
3853 		return ERR_PTR(-ENOMEM);
3854 
3855 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3856 				    GFP_KERNEL);
3857 	if (!iter->buffer_iter)
3858 		goto release;
3859 
3860 	/*
3861 	 * We make a copy of the current tracer to avoid concurrent
3862 	 * changes on it while we are reading.
3863 	 */
3864 	mutex_lock(&trace_types_lock);
3865 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3866 	if (!iter->trace)
3867 		goto fail;
3868 
3869 	*iter->trace = *tr->current_trace;
3870 
3871 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3872 		goto fail;
3873 
3874 	iter->tr = tr;
3875 
3876 #ifdef CONFIG_TRACER_MAX_TRACE
3877 	/* Currently only the top directory has a snapshot */
3878 	if (tr->current_trace->print_max || snapshot)
3879 		iter->trace_buffer = &tr->max_buffer;
3880 	else
3881 #endif
3882 		iter->trace_buffer = &tr->trace_buffer;
3883 	iter->snapshot = snapshot;
3884 	iter->pos = -1;
3885 	iter->cpu_file = tracing_get_cpu(inode);
3886 	mutex_init(&iter->mutex);
3887 
3888 	/* Notify the tracer early; before we stop tracing. */
3889 	if (iter->trace && iter->trace->open)
3890 		iter->trace->open(iter);
3891 
3892 	/* Annotate start of buffers if we had overruns */
3893 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3894 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3895 
3896 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3897 	if (trace_clocks[tr->clock_id].in_ns)
3898 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3899 
3900 	/* stop the trace while dumping if we are not opening "snapshot" */
3901 	if (!iter->snapshot)
3902 		tracing_stop_tr(tr);
3903 
3904 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3905 		for_each_tracing_cpu(cpu) {
3906 			iter->buffer_iter[cpu] =
3907 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908 		}
3909 		ring_buffer_read_prepare_sync();
3910 		for_each_tracing_cpu(cpu) {
3911 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3912 			tracing_iter_reset(iter, cpu);
3913 		}
3914 	} else {
3915 		cpu = iter->cpu_file;
3916 		iter->buffer_iter[cpu] =
3917 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3918 		ring_buffer_read_prepare_sync();
3919 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3920 		tracing_iter_reset(iter, cpu);
3921 	}
3922 
3923 	mutex_unlock(&trace_types_lock);
3924 
3925 	return iter;
3926 
3927  fail:
3928 	mutex_unlock(&trace_types_lock);
3929 	kfree(iter->trace);
3930 	kfree(iter->buffer_iter);
3931 release:
3932 	seq_release_private(inode, file);
3933 	return ERR_PTR(-ENOMEM);
3934 }
3935 
3936 int tracing_open_generic(struct inode *inode, struct file *filp)
3937 {
3938 	if (tracing_disabled)
3939 		return -ENODEV;
3940 
3941 	filp->private_data = inode->i_private;
3942 	return 0;
3943 }
3944 
3945 bool tracing_is_disabled(void)
3946 {
3947 	return (tracing_disabled) ? true: false;
3948 }
3949 
3950 /*
3951  * Open and update trace_array ref count.
3952  * Must have the current trace_array passed to it.
3953  */
3954 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3955 {
3956 	struct trace_array *tr = inode->i_private;
3957 
3958 	if (tracing_disabled)
3959 		return -ENODEV;
3960 
3961 	if (trace_array_get(tr) < 0)
3962 		return -ENODEV;
3963 
3964 	filp->private_data = inode->i_private;
3965 
3966 	return 0;
3967 }
3968 
3969 static int tracing_release(struct inode *inode, struct file *file)
3970 {
3971 	struct trace_array *tr = inode->i_private;
3972 	struct seq_file *m = file->private_data;
3973 	struct trace_iterator *iter;
3974 	int cpu;
3975 
3976 	if (!(file->f_mode & FMODE_READ)) {
3977 		trace_array_put(tr);
3978 		return 0;
3979 	}
3980 
3981 	/* Writes do not use seq_file */
3982 	iter = m->private;
3983 	mutex_lock(&trace_types_lock);
3984 
3985 	for_each_tracing_cpu(cpu) {
3986 		if (iter->buffer_iter[cpu])
3987 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3988 	}
3989 
3990 	if (iter->trace && iter->trace->close)
3991 		iter->trace->close(iter);
3992 
3993 	if (!iter->snapshot)
3994 		/* reenable tracing if it was previously enabled */
3995 		tracing_start_tr(tr);
3996 
3997 	__trace_array_put(tr);
3998 
3999 	mutex_unlock(&trace_types_lock);
4000 
4001 	mutex_destroy(&iter->mutex);
4002 	free_cpumask_var(iter->started);
4003 	kfree(iter->trace);
4004 	kfree(iter->buffer_iter);
4005 	seq_release_private(inode, file);
4006 
4007 	return 0;
4008 }
4009 
4010 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4011 {
4012 	struct trace_array *tr = inode->i_private;
4013 
4014 	trace_array_put(tr);
4015 	return 0;
4016 }
4017 
4018 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4019 {
4020 	struct trace_array *tr = inode->i_private;
4021 
4022 	trace_array_put(tr);
4023 
4024 	return single_release(inode, file);
4025 }
4026 
4027 static int tracing_open(struct inode *inode, struct file *file)
4028 {
4029 	struct trace_array *tr = inode->i_private;
4030 	struct trace_iterator *iter;
4031 	int ret = 0;
4032 
4033 	if (trace_array_get(tr) < 0)
4034 		return -ENODEV;
4035 
4036 	/* If this file was open for write, then erase contents */
4037 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4038 		int cpu = tracing_get_cpu(inode);
4039 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4040 
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042 		if (tr->current_trace->print_max)
4043 			trace_buf = &tr->max_buffer;
4044 #endif
4045 
4046 		if (cpu == RING_BUFFER_ALL_CPUS)
4047 			tracing_reset_online_cpus(trace_buf);
4048 		else
4049 			tracing_reset(trace_buf, cpu);
4050 	}
4051 
4052 	if (file->f_mode & FMODE_READ) {
4053 		iter = __tracing_open(inode, file, false);
4054 		if (IS_ERR(iter))
4055 			ret = PTR_ERR(iter);
4056 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4057 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4058 	}
4059 
4060 	if (ret < 0)
4061 		trace_array_put(tr);
4062 
4063 	return ret;
4064 }
4065 
4066 /*
4067  * Some tracers are not suitable for instance buffers.
4068  * A tracer is always available for the global array (toplevel)
4069  * or if it explicitly states that it is.
4070  */
4071 static bool
4072 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4073 {
4074 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4075 }
4076 
4077 /* Find the next tracer that this trace array may use */
4078 static struct tracer *
4079 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4080 {
4081 	while (t && !trace_ok_for_array(t, tr))
4082 		t = t->next;
4083 
4084 	return t;
4085 }
4086 
4087 static void *
4088 t_next(struct seq_file *m, void *v, loff_t *pos)
4089 {
4090 	struct trace_array *tr = m->private;
4091 	struct tracer *t = v;
4092 
4093 	(*pos)++;
4094 
4095 	if (t)
4096 		t = get_tracer_for_array(tr, t->next);
4097 
4098 	return t;
4099 }
4100 
4101 static void *t_start(struct seq_file *m, loff_t *pos)
4102 {
4103 	struct trace_array *tr = m->private;
4104 	struct tracer *t;
4105 	loff_t l = 0;
4106 
4107 	mutex_lock(&trace_types_lock);
4108 
4109 	t = get_tracer_for_array(tr, trace_types);
4110 	for (; t && l < *pos; t = t_next(m, t, &l))
4111 			;
4112 
4113 	return t;
4114 }
4115 
4116 static void t_stop(struct seq_file *m, void *p)
4117 {
4118 	mutex_unlock(&trace_types_lock);
4119 }
4120 
4121 static int t_show(struct seq_file *m, void *v)
4122 {
4123 	struct tracer *t = v;
4124 
4125 	if (!t)
4126 		return 0;
4127 
4128 	seq_puts(m, t->name);
4129 	if (t->next)
4130 		seq_putc(m, ' ');
4131 	else
4132 		seq_putc(m, '\n');
4133 
4134 	return 0;
4135 }
4136 
4137 static const struct seq_operations show_traces_seq_ops = {
4138 	.start		= t_start,
4139 	.next		= t_next,
4140 	.stop		= t_stop,
4141 	.show		= t_show,
4142 };
4143 
4144 static int show_traces_open(struct inode *inode, struct file *file)
4145 {
4146 	struct trace_array *tr = inode->i_private;
4147 	struct seq_file *m;
4148 	int ret;
4149 
4150 	if (tracing_disabled)
4151 		return -ENODEV;
4152 
4153 	ret = seq_open(file, &show_traces_seq_ops);
4154 	if (ret)
4155 		return ret;
4156 
4157 	m = file->private_data;
4158 	m->private = tr;
4159 
4160 	return 0;
4161 }
4162 
4163 static ssize_t
4164 tracing_write_stub(struct file *filp, const char __user *ubuf,
4165 		   size_t count, loff_t *ppos)
4166 {
4167 	return count;
4168 }
4169 
4170 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4171 {
4172 	int ret;
4173 
4174 	if (file->f_mode & FMODE_READ)
4175 		ret = seq_lseek(file, offset, whence);
4176 	else
4177 		file->f_pos = ret = 0;
4178 
4179 	return ret;
4180 }
4181 
4182 static const struct file_operations tracing_fops = {
4183 	.open		= tracing_open,
4184 	.read		= seq_read,
4185 	.write		= tracing_write_stub,
4186 	.llseek		= tracing_lseek,
4187 	.release	= tracing_release,
4188 };
4189 
4190 static const struct file_operations show_traces_fops = {
4191 	.open		= show_traces_open,
4192 	.read		= seq_read,
4193 	.release	= seq_release,
4194 	.llseek		= seq_lseek,
4195 };
4196 
4197 static ssize_t
4198 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4199 		     size_t count, loff_t *ppos)
4200 {
4201 	struct trace_array *tr = file_inode(filp)->i_private;
4202 	char *mask_str;
4203 	int len;
4204 
4205 	len = snprintf(NULL, 0, "%*pb\n",
4206 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4207 	mask_str = kmalloc(len, GFP_KERNEL);
4208 	if (!mask_str)
4209 		return -ENOMEM;
4210 
4211 	len = snprintf(mask_str, len, "%*pb\n",
4212 		       cpumask_pr_args(tr->tracing_cpumask));
4213 	if (len >= count) {
4214 		count = -EINVAL;
4215 		goto out_err;
4216 	}
4217 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4218 
4219 out_err:
4220 	kfree(mask_str);
4221 
4222 	return count;
4223 }
4224 
4225 static ssize_t
4226 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4227 		      size_t count, loff_t *ppos)
4228 {
4229 	struct trace_array *tr = file_inode(filp)->i_private;
4230 	cpumask_var_t tracing_cpumask_new;
4231 	int err, cpu;
4232 
4233 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4234 		return -ENOMEM;
4235 
4236 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4237 	if (err)
4238 		goto err_unlock;
4239 
4240 	local_irq_disable();
4241 	arch_spin_lock(&tr->max_lock);
4242 	for_each_tracing_cpu(cpu) {
4243 		/*
4244 		 * Increase/decrease the disabled counter if we are
4245 		 * about to flip a bit in the cpumask:
4246 		 */
4247 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4248 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4249 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4250 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4251 		}
4252 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4253 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4254 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4255 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4256 		}
4257 	}
4258 	arch_spin_unlock(&tr->max_lock);
4259 	local_irq_enable();
4260 
4261 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4262 	free_cpumask_var(tracing_cpumask_new);
4263 
4264 	return count;
4265 
4266 err_unlock:
4267 	free_cpumask_var(tracing_cpumask_new);
4268 
4269 	return err;
4270 }
4271 
4272 static const struct file_operations tracing_cpumask_fops = {
4273 	.open		= tracing_open_generic_tr,
4274 	.read		= tracing_cpumask_read,
4275 	.write		= tracing_cpumask_write,
4276 	.release	= tracing_release_generic_tr,
4277 	.llseek		= generic_file_llseek,
4278 };
4279 
4280 static int tracing_trace_options_show(struct seq_file *m, void *v)
4281 {
4282 	struct tracer_opt *trace_opts;
4283 	struct trace_array *tr = m->private;
4284 	u32 tracer_flags;
4285 	int i;
4286 
4287 	mutex_lock(&trace_types_lock);
4288 	tracer_flags = tr->current_trace->flags->val;
4289 	trace_opts = tr->current_trace->flags->opts;
4290 
4291 	for (i = 0; trace_options[i]; i++) {
4292 		if (tr->trace_flags & (1 << i))
4293 			seq_printf(m, "%s\n", trace_options[i]);
4294 		else
4295 			seq_printf(m, "no%s\n", trace_options[i]);
4296 	}
4297 
4298 	for (i = 0; trace_opts[i].name; i++) {
4299 		if (tracer_flags & trace_opts[i].bit)
4300 			seq_printf(m, "%s\n", trace_opts[i].name);
4301 		else
4302 			seq_printf(m, "no%s\n", trace_opts[i].name);
4303 	}
4304 	mutex_unlock(&trace_types_lock);
4305 
4306 	return 0;
4307 }
4308 
4309 static int __set_tracer_option(struct trace_array *tr,
4310 			       struct tracer_flags *tracer_flags,
4311 			       struct tracer_opt *opts, int neg)
4312 {
4313 	struct tracer *trace = tracer_flags->trace;
4314 	int ret;
4315 
4316 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4317 	if (ret)
4318 		return ret;
4319 
4320 	if (neg)
4321 		tracer_flags->val &= ~opts->bit;
4322 	else
4323 		tracer_flags->val |= opts->bit;
4324 	return 0;
4325 }
4326 
4327 /* Try to assign a tracer specific option */
4328 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4329 {
4330 	struct tracer *trace = tr->current_trace;
4331 	struct tracer_flags *tracer_flags = trace->flags;
4332 	struct tracer_opt *opts = NULL;
4333 	int i;
4334 
4335 	for (i = 0; tracer_flags->opts[i].name; i++) {
4336 		opts = &tracer_flags->opts[i];
4337 
4338 		if (strcmp(cmp, opts->name) == 0)
4339 			return __set_tracer_option(tr, trace->flags, opts, neg);
4340 	}
4341 
4342 	return -EINVAL;
4343 }
4344 
4345 /* Some tracers require overwrite to stay enabled */
4346 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4347 {
4348 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4349 		return -1;
4350 
4351 	return 0;
4352 }
4353 
4354 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4355 {
4356 	/* do nothing if flag is already set */
4357 	if (!!(tr->trace_flags & mask) == !!enabled)
4358 		return 0;
4359 
4360 	/* Give the tracer a chance to approve the change */
4361 	if (tr->current_trace->flag_changed)
4362 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4363 			return -EINVAL;
4364 
4365 	if (enabled)
4366 		tr->trace_flags |= mask;
4367 	else
4368 		tr->trace_flags &= ~mask;
4369 
4370 	if (mask == TRACE_ITER_RECORD_CMD)
4371 		trace_event_enable_cmd_record(enabled);
4372 
4373 	if (mask == TRACE_ITER_RECORD_TGID) {
4374 		if (!tgid_map)
4375 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4376 					   sizeof(*tgid_map),
4377 					   GFP_KERNEL);
4378 		if (!tgid_map) {
4379 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4380 			return -ENOMEM;
4381 		}
4382 
4383 		trace_event_enable_tgid_record(enabled);
4384 	}
4385 
4386 	if (mask == TRACE_ITER_EVENT_FORK)
4387 		trace_event_follow_fork(tr, enabled);
4388 
4389 	if (mask == TRACE_ITER_FUNC_FORK)
4390 		ftrace_pid_follow_fork(tr, enabled);
4391 
4392 	if (mask == TRACE_ITER_OVERWRITE) {
4393 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4394 #ifdef CONFIG_TRACER_MAX_TRACE
4395 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4396 #endif
4397 	}
4398 
4399 	if (mask == TRACE_ITER_PRINTK) {
4400 		trace_printk_start_stop_comm(enabled);
4401 		trace_printk_control(enabled);
4402 	}
4403 
4404 	return 0;
4405 }
4406 
4407 static int trace_set_options(struct trace_array *tr, char *option)
4408 {
4409 	char *cmp;
4410 	int neg = 0;
4411 	int ret;
4412 	size_t orig_len = strlen(option);
4413 	int len;
4414 
4415 	cmp = strstrip(option);
4416 
4417 	len = str_has_prefix(cmp, "no");
4418 	if (len)
4419 		neg = 1;
4420 
4421 	cmp += len;
4422 
4423 	mutex_lock(&trace_types_lock);
4424 
4425 	ret = match_string(trace_options, -1, cmp);
4426 	/* If no option could be set, test the specific tracer options */
4427 	if (ret < 0)
4428 		ret = set_tracer_option(tr, cmp, neg);
4429 	else
4430 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4431 
4432 	mutex_unlock(&trace_types_lock);
4433 
4434 	/*
4435 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4436 	 * turn it back into a space.
4437 	 */
4438 	if (orig_len > strlen(option))
4439 		option[strlen(option)] = ' ';
4440 
4441 	return ret;
4442 }
4443 
4444 static void __init apply_trace_boot_options(void)
4445 {
4446 	char *buf = trace_boot_options_buf;
4447 	char *option;
4448 
4449 	while (true) {
4450 		option = strsep(&buf, ",");
4451 
4452 		if (!option)
4453 			break;
4454 
4455 		if (*option)
4456 			trace_set_options(&global_trace, option);
4457 
4458 		/* Put back the comma to allow this to be called again */
4459 		if (buf)
4460 			*(buf - 1) = ',';
4461 	}
4462 }
4463 
4464 static ssize_t
4465 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4466 			size_t cnt, loff_t *ppos)
4467 {
4468 	struct seq_file *m = filp->private_data;
4469 	struct trace_array *tr = m->private;
4470 	char buf[64];
4471 	int ret;
4472 
4473 	if (cnt >= sizeof(buf))
4474 		return -EINVAL;
4475 
4476 	if (copy_from_user(buf, ubuf, cnt))
4477 		return -EFAULT;
4478 
4479 	buf[cnt] = 0;
4480 
4481 	ret = trace_set_options(tr, buf);
4482 	if (ret < 0)
4483 		return ret;
4484 
4485 	*ppos += cnt;
4486 
4487 	return cnt;
4488 }
4489 
4490 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4491 {
4492 	struct trace_array *tr = inode->i_private;
4493 	int ret;
4494 
4495 	if (tracing_disabled)
4496 		return -ENODEV;
4497 
4498 	if (trace_array_get(tr) < 0)
4499 		return -ENODEV;
4500 
4501 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4502 	if (ret < 0)
4503 		trace_array_put(tr);
4504 
4505 	return ret;
4506 }
4507 
4508 static const struct file_operations tracing_iter_fops = {
4509 	.open		= tracing_trace_options_open,
4510 	.read		= seq_read,
4511 	.llseek		= seq_lseek,
4512 	.release	= tracing_single_release_tr,
4513 	.write		= tracing_trace_options_write,
4514 };
4515 
4516 static const char readme_msg[] =
4517 	"tracing mini-HOWTO:\n\n"
4518 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4519 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4520 	" Important files:\n"
4521 	"  trace\t\t\t- The static contents of the buffer\n"
4522 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4523 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4524 	"  current_tracer\t- function and latency tracers\n"
4525 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4526 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4527 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4528 	"  trace_clock\t\t-change the clock used to order events\n"
4529 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4530 	"      global:   Synced across CPUs but slows tracing down.\n"
4531 	"     counter:   Not a clock, but just an increment\n"
4532 	"      uptime:   Jiffy counter from time of boot\n"
4533 	"        perf:   Same clock that perf events use\n"
4534 #ifdef CONFIG_X86_64
4535 	"     x86-tsc:   TSC cycle counter\n"
4536 #endif
4537 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4538 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4539 	"    absolute:   Absolute (standalone) timestamp\n"
4540 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4541 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4542 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4543 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4544 	"\t\t\t  Remove sub-buffer with rmdir\n"
4545 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4546 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4547 	"\t\t\t  option name\n"
4548 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4549 #ifdef CONFIG_DYNAMIC_FTRACE
4550 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4551 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4552 	"\t\t\t  functions\n"
4553 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4554 	"\t     modules: Can select a group via module\n"
4555 	"\t      Format: :mod:<module-name>\n"
4556 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4557 	"\t    triggers: a command to perform when function is hit\n"
4558 	"\t      Format: <function>:<trigger>[:count]\n"
4559 	"\t     trigger: traceon, traceoff\n"
4560 	"\t\t      enable_event:<system>:<event>\n"
4561 	"\t\t      disable_event:<system>:<event>\n"
4562 #ifdef CONFIG_STACKTRACE
4563 	"\t\t      stacktrace\n"
4564 #endif
4565 #ifdef CONFIG_TRACER_SNAPSHOT
4566 	"\t\t      snapshot\n"
4567 #endif
4568 	"\t\t      dump\n"
4569 	"\t\t      cpudump\n"
4570 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4571 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4572 	"\t     The first one will disable tracing every time do_fault is hit\n"
4573 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4574 	"\t       The first time do trap is hit and it disables tracing, the\n"
4575 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4576 	"\t       the counter will not decrement. It only decrements when the\n"
4577 	"\t       trigger did work\n"
4578 	"\t     To remove trigger without count:\n"
4579 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4580 	"\t     To remove trigger with a count:\n"
4581 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4582 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4583 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4584 	"\t    modules: Can select a group via module command :mod:\n"
4585 	"\t    Does not accept triggers\n"
4586 #endif /* CONFIG_DYNAMIC_FTRACE */
4587 #ifdef CONFIG_FUNCTION_TRACER
4588 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4589 	"\t\t    (function)\n"
4590 #endif
4591 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4592 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4593 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4594 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4595 #endif
4596 #ifdef CONFIG_TRACER_SNAPSHOT
4597 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4598 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4599 	"\t\t\t  information\n"
4600 #endif
4601 #ifdef CONFIG_STACK_TRACER
4602 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4603 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4604 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4605 	"\t\t\t  new trace)\n"
4606 #ifdef CONFIG_DYNAMIC_FTRACE
4607 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4608 	"\t\t\t  traces\n"
4609 #endif
4610 #endif /* CONFIG_STACK_TRACER */
4611 #ifdef CONFIG_DYNAMIC_EVENTS
4612 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4613 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4614 #endif
4615 #ifdef CONFIG_KPROBE_EVENTS
4616 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4617 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4618 #endif
4619 #ifdef CONFIG_UPROBE_EVENTS
4620 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4621 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4622 #endif
4623 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4624 	"\t  accepts: event-definitions (one definition per line)\n"
4625 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4626 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4627 #ifdef CONFIG_HIST_TRIGGERS
4628 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4629 #endif
4630 	"\t           -:[<group>/]<event>\n"
4631 #ifdef CONFIG_KPROBE_EVENTS
4632 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4633   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4634 #endif
4635 #ifdef CONFIG_UPROBE_EVENTS
4636   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4637 #endif
4638 	"\t     args: <name>=fetcharg[:type]\n"
4639 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4640 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4641 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4642 #else
4643 	"\t           $stack<index>, $stack, $retval, $comm\n"
4644 #endif
4645 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4646 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4647 	"\t           <type>\\[<array-size>\\]\n"
4648 #ifdef CONFIG_HIST_TRIGGERS
4649 	"\t    field: <stype> <name>;\n"
4650 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4651 	"\t           [unsigned] char/int/long\n"
4652 #endif
4653 #endif
4654 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4655 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4656 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4657 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4658 	"\t\t\t  events\n"
4659 	"      filter\t\t- If set, only events passing filter are traced\n"
4660 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4661 	"\t\t\t  <event>:\n"
4662 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4663 	"      filter\t\t- If set, only events passing filter are traced\n"
4664 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4665 	"\t    Format: <trigger>[:count][if <filter>]\n"
4666 	"\t   trigger: traceon, traceoff\n"
4667 	"\t            enable_event:<system>:<event>\n"
4668 	"\t            disable_event:<system>:<event>\n"
4669 #ifdef CONFIG_HIST_TRIGGERS
4670 	"\t            enable_hist:<system>:<event>\n"
4671 	"\t            disable_hist:<system>:<event>\n"
4672 #endif
4673 #ifdef CONFIG_STACKTRACE
4674 	"\t\t    stacktrace\n"
4675 #endif
4676 #ifdef CONFIG_TRACER_SNAPSHOT
4677 	"\t\t    snapshot\n"
4678 #endif
4679 #ifdef CONFIG_HIST_TRIGGERS
4680 	"\t\t    hist (see below)\n"
4681 #endif
4682 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4683 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4684 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4685 	"\t                  events/block/block_unplug/trigger\n"
4686 	"\t   The first disables tracing every time block_unplug is hit.\n"
4687 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4688 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4689 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4690 	"\t   Like function triggers, the counter is only decremented if it\n"
4691 	"\t    enabled or disabled tracing.\n"
4692 	"\t   To remove a trigger without a count:\n"
4693 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4694 	"\t   To remove a trigger with a count:\n"
4695 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4696 	"\t   Filters can be ignored when removing a trigger.\n"
4697 #ifdef CONFIG_HIST_TRIGGERS
4698 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4699 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4700 	"\t            [:values=<field1[,field2,...]>]\n"
4701 	"\t            [:sort=<field1[,field2,...]>]\n"
4702 	"\t            [:size=#entries]\n"
4703 	"\t            [:pause][:continue][:clear]\n"
4704 	"\t            [:name=histname1]\n"
4705 	"\t            [if <filter>]\n\n"
4706 	"\t    When a matching event is hit, an entry is added to a hash\n"
4707 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4708 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4709 	"\t    correspond to fields in the event's format description.  Keys\n"
4710 	"\t    can be any field, or the special string 'stacktrace'.\n"
4711 	"\t    Compound keys consisting of up to two fields can be specified\n"
4712 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4713 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4714 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4715 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4716 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4717 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4718 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4719 	"\t    its histogram data will be shared with other triggers of the\n"
4720 	"\t    same name, and trigger hits will update this common data.\n\n"
4721 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4722 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4723 	"\t    triggers attached to an event, there will be a table for each\n"
4724 	"\t    trigger in the output.  The table displayed for a named\n"
4725 	"\t    trigger will be the same as any other instance having the\n"
4726 	"\t    same name.  The default format used to display a given field\n"
4727 	"\t    can be modified by appending any of the following modifiers\n"
4728 	"\t    to the field name, as applicable:\n\n"
4729 	"\t            .hex        display a number as a hex value\n"
4730 	"\t            .sym        display an address as a symbol\n"
4731 	"\t            .sym-offset display an address as a symbol and offset\n"
4732 	"\t            .execname   display a common_pid as a program name\n"
4733 	"\t            .syscall    display a syscall id as a syscall name\n"
4734 	"\t            .log2       display log2 value rather than raw number\n"
4735 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4736 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4737 	"\t    trigger or to start a hist trigger but not log any events\n"
4738 	"\t    until told to do so.  'continue' can be used to start or\n"
4739 	"\t    restart a paused hist trigger.\n\n"
4740 	"\t    The 'clear' parameter will clear the contents of a running\n"
4741 	"\t    hist trigger and leave its current paused/active state\n"
4742 	"\t    unchanged.\n\n"
4743 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4744 	"\t    have one event conditionally start and stop another event's\n"
4745 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4746 	"\t    the enable_event and disable_event triggers.\n"
4747 #endif
4748 ;
4749 
4750 static ssize_t
4751 tracing_readme_read(struct file *filp, char __user *ubuf,
4752 		       size_t cnt, loff_t *ppos)
4753 {
4754 	return simple_read_from_buffer(ubuf, cnt, ppos,
4755 					readme_msg, strlen(readme_msg));
4756 }
4757 
4758 static const struct file_operations tracing_readme_fops = {
4759 	.open		= tracing_open_generic,
4760 	.read		= tracing_readme_read,
4761 	.llseek		= generic_file_llseek,
4762 };
4763 
4764 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4765 {
4766 	int *ptr = v;
4767 
4768 	if (*pos || m->count)
4769 		ptr++;
4770 
4771 	(*pos)++;
4772 
4773 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4774 		if (trace_find_tgid(*ptr))
4775 			return ptr;
4776 	}
4777 
4778 	return NULL;
4779 }
4780 
4781 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4782 {
4783 	void *v;
4784 	loff_t l = 0;
4785 
4786 	if (!tgid_map)
4787 		return NULL;
4788 
4789 	v = &tgid_map[0];
4790 	while (l <= *pos) {
4791 		v = saved_tgids_next(m, v, &l);
4792 		if (!v)
4793 			return NULL;
4794 	}
4795 
4796 	return v;
4797 }
4798 
4799 static void saved_tgids_stop(struct seq_file *m, void *v)
4800 {
4801 }
4802 
4803 static int saved_tgids_show(struct seq_file *m, void *v)
4804 {
4805 	int pid = (int *)v - tgid_map;
4806 
4807 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4808 	return 0;
4809 }
4810 
4811 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4812 	.start		= saved_tgids_start,
4813 	.stop		= saved_tgids_stop,
4814 	.next		= saved_tgids_next,
4815 	.show		= saved_tgids_show,
4816 };
4817 
4818 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4819 {
4820 	if (tracing_disabled)
4821 		return -ENODEV;
4822 
4823 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4824 }
4825 
4826 
4827 static const struct file_operations tracing_saved_tgids_fops = {
4828 	.open		= tracing_saved_tgids_open,
4829 	.read		= seq_read,
4830 	.llseek		= seq_lseek,
4831 	.release	= seq_release,
4832 };
4833 
4834 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4835 {
4836 	unsigned int *ptr = v;
4837 
4838 	if (*pos || m->count)
4839 		ptr++;
4840 
4841 	(*pos)++;
4842 
4843 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4844 	     ptr++) {
4845 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4846 			continue;
4847 
4848 		return ptr;
4849 	}
4850 
4851 	return NULL;
4852 }
4853 
4854 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4855 {
4856 	void *v;
4857 	loff_t l = 0;
4858 
4859 	preempt_disable();
4860 	arch_spin_lock(&trace_cmdline_lock);
4861 
4862 	v = &savedcmd->map_cmdline_to_pid[0];
4863 	while (l <= *pos) {
4864 		v = saved_cmdlines_next(m, v, &l);
4865 		if (!v)
4866 			return NULL;
4867 	}
4868 
4869 	return v;
4870 }
4871 
4872 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4873 {
4874 	arch_spin_unlock(&trace_cmdline_lock);
4875 	preempt_enable();
4876 }
4877 
4878 static int saved_cmdlines_show(struct seq_file *m, void *v)
4879 {
4880 	char buf[TASK_COMM_LEN];
4881 	unsigned int *pid = v;
4882 
4883 	__trace_find_cmdline(*pid, buf);
4884 	seq_printf(m, "%d %s\n", *pid, buf);
4885 	return 0;
4886 }
4887 
4888 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4889 	.start		= saved_cmdlines_start,
4890 	.next		= saved_cmdlines_next,
4891 	.stop		= saved_cmdlines_stop,
4892 	.show		= saved_cmdlines_show,
4893 };
4894 
4895 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4896 {
4897 	if (tracing_disabled)
4898 		return -ENODEV;
4899 
4900 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4901 }
4902 
4903 static const struct file_operations tracing_saved_cmdlines_fops = {
4904 	.open		= tracing_saved_cmdlines_open,
4905 	.read		= seq_read,
4906 	.llseek		= seq_lseek,
4907 	.release	= seq_release,
4908 };
4909 
4910 static ssize_t
4911 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4912 				 size_t cnt, loff_t *ppos)
4913 {
4914 	char buf[64];
4915 	int r;
4916 
4917 	arch_spin_lock(&trace_cmdline_lock);
4918 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4919 	arch_spin_unlock(&trace_cmdline_lock);
4920 
4921 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4922 }
4923 
4924 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4925 {
4926 	kfree(s->saved_cmdlines);
4927 	kfree(s->map_cmdline_to_pid);
4928 	kfree(s);
4929 }
4930 
4931 static int tracing_resize_saved_cmdlines(unsigned int val)
4932 {
4933 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4934 
4935 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4936 	if (!s)
4937 		return -ENOMEM;
4938 
4939 	if (allocate_cmdlines_buffer(val, s) < 0) {
4940 		kfree(s);
4941 		return -ENOMEM;
4942 	}
4943 
4944 	arch_spin_lock(&trace_cmdline_lock);
4945 	savedcmd_temp = savedcmd;
4946 	savedcmd = s;
4947 	arch_spin_unlock(&trace_cmdline_lock);
4948 	free_saved_cmdlines_buffer(savedcmd_temp);
4949 
4950 	return 0;
4951 }
4952 
4953 static ssize_t
4954 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4955 				  size_t cnt, loff_t *ppos)
4956 {
4957 	unsigned long val;
4958 	int ret;
4959 
4960 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4961 	if (ret)
4962 		return ret;
4963 
4964 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4965 	if (!val || val > PID_MAX_DEFAULT)
4966 		return -EINVAL;
4967 
4968 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4969 	if (ret < 0)
4970 		return ret;
4971 
4972 	*ppos += cnt;
4973 
4974 	return cnt;
4975 }
4976 
4977 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4978 	.open		= tracing_open_generic,
4979 	.read		= tracing_saved_cmdlines_size_read,
4980 	.write		= tracing_saved_cmdlines_size_write,
4981 };
4982 
4983 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4984 static union trace_eval_map_item *
4985 update_eval_map(union trace_eval_map_item *ptr)
4986 {
4987 	if (!ptr->map.eval_string) {
4988 		if (ptr->tail.next) {
4989 			ptr = ptr->tail.next;
4990 			/* Set ptr to the next real item (skip head) */
4991 			ptr++;
4992 		} else
4993 			return NULL;
4994 	}
4995 	return ptr;
4996 }
4997 
4998 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4999 {
5000 	union trace_eval_map_item *ptr = v;
5001 
5002 	/*
5003 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5004 	 * This really should never happen.
5005 	 */
5006 	ptr = update_eval_map(ptr);
5007 	if (WARN_ON_ONCE(!ptr))
5008 		return NULL;
5009 
5010 	ptr++;
5011 
5012 	(*pos)++;
5013 
5014 	ptr = update_eval_map(ptr);
5015 
5016 	return ptr;
5017 }
5018 
5019 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5020 {
5021 	union trace_eval_map_item *v;
5022 	loff_t l = 0;
5023 
5024 	mutex_lock(&trace_eval_mutex);
5025 
5026 	v = trace_eval_maps;
5027 	if (v)
5028 		v++;
5029 
5030 	while (v && l < *pos) {
5031 		v = eval_map_next(m, v, &l);
5032 	}
5033 
5034 	return v;
5035 }
5036 
5037 static void eval_map_stop(struct seq_file *m, void *v)
5038 {
5039 	mutex_unlock(&trace_eval_mutex);
5040 }
5041 
5042 static int eval_map_show(struct seq_file *m, void *v)
5043 {
5044 	union trace_eval_map_item *ptr = v;
5045 
5046 	seq_printf(m, "%s %ld (%s)\n",
5047 		   ptr->map.eval_string, ptr->map.eval_value,
5048 		   ptr->map.system);
5049 
5050 	return 0;
5051 }
5052 
5053 static const struct seq_operations tracing_eval_map_seq_ops = {
5054 	.start		= eval_map_start,
5055 	.next		= eval_map_next,
5056 	.stop		= eval_map_stop,
5057 	.show		= eval_map_show,
5058 };
5059 
5060 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5061 {
5062 	if (tracing_disabled)
5063 		return -ENODEV;
5064 
5065 	return seq_open(filp, &tracing_eval_map_seq_ops);
5066 }
5067 
5068 static const struct file_operations tracing_eval_map_fops = {
5069 	.open		= tracing_eval_map_open,
5070 	.read		= seq_read,
5071 	.llseek		= seq_lseek,
5072 	.release	= seq_release,
5073 };
5074 
5075 static inline union trace_eval_map_item *
5076 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5077 {
5078 	/* Return tail of array given the head */
5079 	return ptr + ptr->head.length + 1;
5080 }
5081 
5082 static void
5083 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5084 			   int len)
5085 {
5086 	struct trace_eval_map **stop;
5087 	struct trace_eval_map **map;
5088 	union trace_eval_map_item *map_array;
5089 	union trace_eval_map_item *ptr;
5090 
5091 	stop = start + len;
5092 
5093 	/*
5094 	 * The trace_eval_maps contains the map plus a head and tail item,
5095 	 * where the head holds the module and length of array, and the
5096 	 * tail holds a pointer to the next list.
5097 	 */
5098 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5099 	if (!map_array) {
5100 		pr_warn("Unable to allocate trace eval mapping\n");
5101 		return;
5102 	}
5103 
5104 	mutex_lock(&trace_eval_mutex);
5105 
5106 	if (!trace_eval_maps)
5107 		trace_eval_maps = map_array;
5108 	else {
5109 		ptr = trace_eval_maps;
5110 		for (;;) {
5111 			ptr = trace_eval_jmp_to_tail(ptr);
5112 			if (!ptr->tail.next)
5113 				break;
5114 			ptr = ptr->tail.next;
5115 
5116 		}
5117 		ptr->tail.next = map_array;
5118 	}
5119 	map_array->head.mod = mod;
5120 	map_array->head.length = len;
5121 	map_array++;
5122 
5123 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5124 		map_array->map = **map;
5125 		map_array++;
5126 	}
5127 	memset(map_array, 0, sizeof(*map_array));
5128 
5129 	mutex_unlock(&trace_eval_mutex);
5130 }
5131 
5132 static void trace_create_eval_file(struct dentry *d_tracer)
5133 {
5134 	trace_create_file("eval_map", 0444, d_tracer,
5135 			  NULL, &tracing_eval_map_fops);
5136 }
5137 
5138 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5139 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5140 static inline void trace_insert_eval_map_file(struct module *mod,
5141 			      struct trace_eval_map **start, int len) { }
5142 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5143 
5144 static void trace_insert_eval_map(struct module *mod,
5145 				  struct trace_eval_map **start, int len)
5146 {
5147 	struct trace_eval_map **map;
5148 
5149 	if (len <= 0)
5150 		return;
5151 
5152 	map = start;
5153 
5154 	trace_event_eval_update(map, len);
5155 
5156 	trace_insert_eval_map_file(mod, start, len);
5157 }
5158 
5159 static ssize_t
5160 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5161 		       size_t cnt, loff_t *ppos)
5162 {
5163 	struct trace_array *tr = filp->private_data;
5164 	char buf[MAX_TRACER_SIZE+2];
5165 	int r;
5166 
5167 	mutex_lock(&trace_types_lock);
5168 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5169 	mutex_unlock(&trace_types_lock);
5170 
5171 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5172 }
5173 
5174 int tracer_init(struct tracer *t, struct trace_array *tr)
5175 {
5176 	tracing_reset_online_cpus(&tr->trace_buffer);
5177 	return t->init(tr);
5178 }
5179 
5180 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5181 {
5182 	int cpu;
5183 
5184 	for_each_tracing_cpu(cpu)
5185 		per_cpu_ptr(buf->data, cpu)->entries = val;
5186 }
5187 
5188 #ifdef CONFIG_TRACER_MAX_TRACE
5189 /* resize @tr's buffer to the size of @size_tr's entries */
5190 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5191 					struct trace_buffer *size_buf, int cpu_id)
5192 {
5193 	int cpu, ret = 0;
5194 
5195 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5196 		for_each_tracing_cpu(cpu) {
5197 			ret = ring_buffer_resize(trace_buf->buffer,
5198 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5199 			if (ret < 0)
5200 				break;
5201 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5202 				per_cpu_ptr(size_buf->data, cpu)->entries;
5203 		}
5204 	} else {
5205 		ret = ring_buffer_resize(trace_buf->buffer,
5206 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5207 		if (ret == 0)
5208 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5209 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5210 	}
5211 
5212 	return ret;
5213 }
5214 #endif /* CONFIG_TRACER_MAX_TRACE */
5215 
5216 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5217 					unsigned long size, int cpu)
5218 {
5219 	int ret;
5220 
5221 	/*
5222 	 * If kernel or user changes the size of the ring buffer
5223 	 * we use the size that was given, and we can forget about
5224 	 * expanding it later.
5225 	 */
5226 	ring_buffer_expanded = true;
5227 
5228 	/* May be called before buffers are initialized */
5229 	if (!tr->trace_buffer.buffer)
5230 		return 0;
5231 
5232 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5233 	if (ret < 0)
5234 		return ret;
5235 
5236 #ifdef CONFIG_TRACER_MAX_TRACE
5237 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5238 	    !tr->current_trace->use_max_tr)
5239 		goto out;
5240 
5241 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5242 	if (ret < 0) {
5243 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5244 						     &tr->trace_buffer, cpu);
5245 		if (r < 0) {
5246 			/*
5247 			 * AARGH! We are left with different
5248 			 * size max buffer!!!!
5249 			 * The max buffer is our "snapshot" buffer.
5250 			 * When a tracer needs a snapshot (one of the
5251 			 * latency tracers), it swaps the max buffer
5252 			 * with the saved snap shot. We succeeded to
5253 			 * update the size of the main buffer, but failed to
5254 			 * update the size of the max buffer. But when we tried
5255 			 * to reset the main buffer to the original size, we
5256 			 * failed there too. This is very unlikely to
5257 			 * happen, but if it does, warn and kill all
5258 			 * tracing.
5259 			 */
5260 			WARN_ON(1);
5261 			tracing_disabled = 1;
5262 		}
5263 		return ret;
5264 	}
5265 
5266 	if (cpu == RING_BUFFER_ALL_CPUS)
5267 		set_buffer_entries(&tr->max_buffer, size);
5268 	else
5269 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5270 
5271  out:
5272 #endif /* CONFIG_TRACER_MAX_TRACE */
5273 
5274 	if (cpu == RING_BUFFER_ALL_CPUS)
5275 		set_buffer_entries(&tr->trace_buffer, size);
5276 	else
5277 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5278 
5279 	return ret;
5280 }
5281 
5282 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5283 					  unsigned long size, int cpu_id)
5284 {
5285 	int ret = size;
5286 
5287 	mutex_lock(&trace_types_lock);
5288 
5289 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5290 		/* make sure, this cpu is enabled in the mask */
5291 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5292 			ret = -EINVAL;
5293 			goto out;
5294 		}
5295 	}
5296 
5297 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5298 	if (ret < 0)
5299 		ret = -ENOMEM;
5300 
5301 out:
5302 	mutex_unlock(&trace_types_lock);
5303 
5304 	return ret;
5305 }
5306 
5307 
5308 /**
5309  * tracing_update_buffers - used by tracing facility to expand ring buffers
5310  *
5311  * To save on memory when the tracing is never used on a system with it
5312  * configured in. The ring buffers are set to a minimum size. But once
5313  * a user starts to use the tracing facility, then they need to grow
5314  * to their default size.
5315  *
5316  * This function is to be called when a tracer is about to be used.
5317  */
5318 int tracing_update_buffers(void)
5319 {
5320 	int ret = 0;
5321 
5322 	mutex_lock(&trace_types_lock);
5323 	if (!ring_buffer_expanded)
5324 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5325 						RING_BUFFER_ALL_CPUS);
5326 	mutex_unlock(&trace_types_lock);
5327 
5328 	return ret;
5329 }
5330 
5331 struct trace_option_dentry;
5332 
5333 static void
5334 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5335 
5336 /*
5337  * Used to clear out the tracer before deletion of an instance.
5338  * Must have trace_types_lock held.
5339  */
5340 static void tracing_set_nop(struct trace_array *tr)
5341 {
5342 	if (tr->current_trace == &nop_trace)
5343 		return;
5344 
5345 	tr->current_trace->enabled--;
5346 
5347 	if (tr->current_trace->reset)
5348 		tr->current_trace->reset(tr);
5349 
5350 	tr->current_trace = &nop_trace;
5351 }
5352 
5353 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5354 {
5355 	/* Only enable if the directory has been created already. */
5356 	if (!tr->dir)
5357 		return;
5358 
5359 	create_trace_option_files(tr, t);
5360 }
5361 
5362 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5363 {
5364 	struct tracer *t;
5365 #ifdef CONFIG_TRACER_MAX_TRACE
5366 	bool had_max_tr;
5367 #endif
5368 	int ret = 0;
5369 
5370 	mutex_lock(&trace_types_lock);
5371 
5372 	if (!ring_buffer_expanded) {
5373 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5374 						RING_BUFFER_ALL_CPUS);
5375 		if (ret < 0)
5376 			goto out;
5377 		ret = 0;
5378 	}
5379 
5380 	for (t = trace_types; t; t = t->next) {
5381 		if (strcmp(t->name, buf) == 0)
5382 			break;
5383 	}
5384 	if (!t) {
5385 		ret = -EINVAL;
5386 		goto out;
5387 	}
5388 	if (t == tr->current_trace)
5389 		goto out;
5390 
5391 	/* Some tracers won't work on kernel command line */
5392 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5393 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5394 			t->name);
5395 		goto out;
5396 	}
5397 
5398 	/* Some tracers are only allowed for the top level buffer */
5399 	if (!trace_ok_for_array(t, tr)) {
5400 		ret = -EINVAL;
5401 		goto out;
5402 	}
5403 
5404 	/* If trace pipe files are being read, we can't change the tracer */
5405 	if (tr->current_trace->ref) {
5406 		ret = -EBUSY;
5407 		goto out;
5408 	}
5409 
5410 	trace_branch_disable();
5411 
5412 	tr->current_trace->enabled--;
5413 
5414 	if (tr->current_trace->reset)
5415 		tr->current_trace->reset(tr);
5416 
5417 	/* Current trace needs to be nop_trace before synchronize_rcu */
5418 	tr->current_trace = &nop_trace;
5419 
5420 #ifdef CONFIG_TRACER_MAX_TRACE
5421 	had_max_tr = tr->allocated_snapshot;
5422 
5423 	if (had_max_tr && !t->use_max_tr) {
5424 		/*
5425 		 * We need to make sure that the update_max_tr sees that
5426 		 * current_trace changed to nop_trace to keep it from
5427 		 * swapping the buffers after we resize it.
5428 		 * The update_max_tr is called from interrupts disabled
5429 		 * so a synchronized_sched() is sufficient.
5430 		 */
5431 		synchronize_rcu();
5432 		free_snapshot(tr);
5433 	}
5434 #endif
5435 
5436 #ifdef CONFIG_TRACER_MAX_TRACE
5437 	if (t->use_max_tr && !had_max_tr) {
5438 		ret = tracing_alloc_snapshot_instance(tr);
5439 		if (ret < 0)
5440 			goto out;
5441 	}
5442 #endif
5443 
5444 	if (t->init) {
5445 		ret = tracer_init(t, tr);
5446 		if (ret)
5447 			goto out;
5448 	}
5449 
5450 	tr->current_trace = t;
5451 	tr->current_trace->enabled++;
5452 	trace_branch_enable(tr);
5453  out:
5454 	mutex_unlock(&trace_types_lock);
5455 
5456 	return ret;
5457 }
5458 
5459 static ssize_t
5460 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5461 			size_t cnt, loff_t *ppos)
5462 {
5463 	struct trace_array *tr = filp->private_data;
5464 	char buf[MAX_TRACER_SIZE+1];
5465 	int i;
5466 	size_t ret;
5467 	int err;
5468 
5469 	ret = cnt;
5470 
5471 	if (cnt > MAX_TRACER_SIZE)
5472 		cnt = MAX_TRACER_SIZE;
5473 
5474 	if (copy_from_user(buf, ubuf, cnt))
5475 		return -EFAULT;
5476 
5477 	buf[cnt] = 0;
5478 
5479 	/* strip ending whitespace. */
5480 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5481 		buf[i] = 0;
5482 
5483 	err = tracing_set_tracer(tr, buf);
5484 	if (err)
5485 		return err;
5486 
5487 	*ppos += ret;
5488 
5489 	return ret;
5490 }
5491 
5492 static ssize_t
5493 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5494 		   size_t cnt, loff_t *ppos)
5495 {
5496 	char buf[64];
5497 	int r;
5498 
5499 	r = snprintf(buf, sizeof(buf), "%ld\n",
5500 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5501 	if (r > sizeof(buf))
5502 		r = sizeof(buf);
5503 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5504 }
5505 
5506 static ssize_t
5507 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5508 		    size_t cnt, loff_t *ppos)
5509 {
5510 	unsigned long val;
5511 	int ret;
5512 
5513 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5514 	if (ret)
5515 		return ret;
5516 
5517 	*ptr = val * 1000;
5518 
5519 	return cnt;
5520 }
5521 
5522 static ssize_t
5523 tracing_thresh_read(struct file *filp, char __user *ubuf,
5524 		    size_t cnt, loff_t *ppos)
5525 {
5526 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5527 }
5528 
5529 static ssize_t
5530 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5531 		     size_t cnt, loff_t *ppos)
5532 {
5533 	struct trace_array *tr = filp->private_data;
5534 	int ret;
5535 
5536 	mutex_lock(&trace_types_lock);
5537 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5538 	if (ret < 0)
5539 		goto out;
5540 
5541 	if (tr->current_trace->update_thresh) {
5542 		ret = tr->current_trace->update_thresh(tr);
5543 		if (ret < 0)
5544 			goto out;
5545 	}
5546 
5547 	ret = cnt;
5548 out:
5549 	mutex_unlock(&trace_types_lock);
5550 
5551 	return ret;
5552 }
5553 
5554 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5555 
5556 static ssize_t
5557 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5558 		     size_t cnt, loff_t *ppos)
5559 {
5560 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5561 }
5562 
5563 static ssize_t
5564 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5565 		      size_t cnt, loff_t *ppos)
5566 {
5567 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5568 }
5569 
5570 #endif
5571 
5572 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5573 {
5574 	struct trace_array *tr = inode->i_private;
5575 	struct trace_iterator *iter;
5576 	int ret = 0;
5577 
5578 	if (tracing_disabled)
5579 		return -ENODEV;
5580 
5581 	if (trace_array_get(tr) < 0)
5582 		return -ENODEV;
5583 
5584 	mutex_lock(&trace_types_lock);
5585 
5586 	/* create a buffer to store the information to pass to userspace */
5587 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5588 	if (!iter) {
5589 		ret = -ENOMEM;
5590 		__trace_array_put(tr);
5591 		goto out;
5592 	}
5593 
5594 	trace_seq_init(&iter->seq);
5595 	iter->trace = tr->current_trace;
5596 
5597 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5598 		ret = -ENOMEM;
5599 		goto fail;
5600 	}
5601 
5602 	/* trace pipe does not show start of buffer */
5603 	cpumask_setall(iter->started);
5604 
5605 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5606 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5607 
5608 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5609 	if (trace_clocks[tr->clock_id].in_ns)
5610 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5611 
5612 	iter->tr = tr;
5613 	iter->trace_buffer = &tr->trace_buffer;
5614 	iter->cpu_file = tracing_get_cpu(inode);
5615 	mutex_init(&iter->mutex);
5616 	filp->private_data = iter;
5617 
5618 	if (iter->trace->pipe_open)
5619 		iter->trace->pipe_open(iter);
5620 
5621 	nonseekable_open(inode, filp);
5622 
5623 	tr->current_trace->ref++;
5624 out:
5625 	mutex_unlock(&trace_types_lock);
5626 	return ret;
5627 
5628 fail:
5629 	kfree(iter->trace);
5630 	kfree(iter);
5631 	__trace_array_put(tr);
5632 	mutex_unlock(&trace_types_lock);
5633 	return ret;
5634 }
5635 
5636 static int tracing_release_pipe(struct inode *inode, struct file *file)
5637 {
5638 	struct trace_iterator *iter = file->private_data;
5639 	struct trace_array *tr = inode->i_private;
5640 
5641 	mutex_lock(&trace_types_lock);
5642 
5643 	tr->current_trace->ref--;
5644 
5645 	if (iter->trace->pipe_close)
5646 		iter->trace->pipe_close(iter);
5647 
5648 	mutex_unlock(&trace_types_lock);
5649 
5650 	free_cpumask_var(iter->started);
5651 	mutex_destroy(&iter->mutex);
5652 	kfree(iter);
5653 
5654 	trace_array_put(tr);
5655 
5656 	return 0;
5657 }
5658 
5659 static __poll_t
5660 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5661 {
5662 	struct trace_array *tr = iter->tr;
5663 
5664 	/* Iterators are static, they should be filled or empty */
5665 	if (trace_buffer_iter(iter, iter->cpu_file))
5666 		return EPOLLIN | EPOLLRDNORM;
5667 
5668 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5669 		/*
5670 		 * Always select as readable when in blocking mode
5671 		 */
5672 		return EPOLLIN | EPOLLRDNORM;
5673 	else
5674 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5675 					     filp, poll_table);
5676 }
5677 
5678 static __poll_t
5679 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5680 {
5681 	struct trace_iterator *iter = filp->private_data;
5682 
5683 	return trace_poll(iter, filp, poll_table);
5684 }
5685 
5686 /* Must be called with iter->mutex held. */
5687 static int tracing_wait_pipe(struct file *filp)
5688 {
5689 	struct trace_iterator *iter = filp->private_data;
5690 	int ret;
5691 
5692 	while (trace_empty(iter)) {
5693 
5694 		if ((filp->f_flags & O_NONBLOCK)) {
5695 			return -EAGAIN;
5696 		}
5697 
5698 		/*
5699 		 * We block until we read something and tracing is disabled.
5700 		 * We still block if tracing is disabled, but we have never
5701 		 * read anything. This allows a user to cat this file, and
5702 		 * then enable tracing. But after we have read something,
5703 		 * we give an EOF when tracing is again disabled.
5704 		 *
5705 		 * iter->pos will be 0 if we haven't read anything.
5706 		 */
5707 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5708 			break;
5709 
5710 		mutex_unlock(&iter->mutex);
5711 
5712 		ret = wait_on_pipe(iter, 0);
5713 
5714 		mutex_lock(&iter->mutex);
5715 
5716 		if (ret)
5717 			return ret;
5718 	}
5719 
5720 	return 1;
5721 }
5722 
5723 /*
5724  * Consumer reader.
5725  */
5726 static ssize_t
5727 tracing_read_pipe(struct file *filp, char __user *ubuf,
5728 		  size_t cnt, loff_t *ppos)
5729 {
5730 	struct trace_iterator *iter = filp->private_data;
5731 	ssize_t sret;
5732 
5733 	/*
5734 	 * Avoid more than one consumer on a single file descriptor
5735 	 * This is just a matter of traces coherency, the ring buffer itself
5736 	 * is protected.
5737 	 */
5738 	mutex_lock(&iter->mutex);
5739 
5740 	/* return any leftover data */
5741 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5742 	if (sret != -EBUSY)
5743 		goto out;
5744 
5745 	trace_seq_init(&iter->seq);
5746 
5747 	if (iter->trace->read) {
5748 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5749 		if (sret)
5750 			goto out;
5751 	}
5752 
5753 waitagain:
5754 	sret = tracing_wait_pipe(filp);
5755 	if (sret <= 0)
5756 		goto out;
5757 
5758 	/* stop when tracing is finished */
5759 	if (trace_empty(iter)) {
5760 		sret = 0;
5761 		goto out;
5762 	}
5763 
5764 	if (cnt >= PAGE_SIZE)
5765 		cnt = PAGE_SIZE - 1;
5766 
5767 	/* reset all but tr, trace, and overruns */
5768 	memset(&iter->seq, 0,
5769 	       sizeof(struct trace_iterator) -
5770 	       offsetof(struct trace_iterator, seq));
5771 	cpumask_clear(iter->started);
5772 	iter->pos = -1;
5773 
5774 	trace_event_read_lock();
5775 	trace_access_lock(iter->cpu_file);
5776 	while (trace_find_next_entry_inc(iter) != NULL) {
5777 		enum print_line_t ret;
5778 		int save_len = iter->seq.seq.len;
5779 
5780 		ret = print_trace_line(iter);
5781 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5782 			/* don't print partial lines */
5783 			iter->seq.seq.len = save_len;
5784 			break;
5785 		}
5786 		if (ret != TRACE_TYPE_NO_CONSUME)
5787 			trace_consume(iter);
5788 
5789 		if (trace_seq_used(&iter->seq) >= cnt)
5790 			break;
5791 
5792 		/*
5793 		 * Setting the full flag means we reached the trace_seq buffer
5794 		 * size and we should leave by partial output condition above.
5795 		 * One of the trace_seq_* functions is not used properly.
5796 		 */
5797 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5798 			  iter->ent->type);
5799 	}
5800 	trace_access_unlock(iter->cpu_file);
5801 	trace_event_read_unlock();
5802 
5803 	/* Now copy what we have to the user */
5804 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5805 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5806 		trace_seq_init(&iter->seq);
5807 
5808 	/*
5809 	 * If there was nothing to send to user, in spite of consuming trace
5810 	 * entries, go back to wait for more entries.
5811 	 */
5812 	if (sret == -EBUSY)
5813 		goto waitagain;
5814 
5815 out:
5816 	mutex_unlock(&iter->mutex);
5817 
5818 	return sret;
5819 }
5820 
5821 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5822 				     unsigned int idx)
5823 {
5824 	__free_page(spd->pages[idx]);
5825 }
5826 
5827 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5828 	.can_merge		= 0,
5829 	.confirm		= generic_pipe_buf_confirm,
5830 	.release		= generic_pipe_buf_release,
5831 	.steal			= generic_pipe_buf_steal,
5832 	.get			= generic_pipe_buf_get,
5833 };
5834 
5835 static size_t
5836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5837 {
5838 	size_t count;
5839 	int save_len;
5840 	int ret;
5841 
5842 	/* Seq buffer is page-sized, exactly what we need. */
5843 	for (;;) {
5844 		save_len = iter->seq.seq.len;
5845 		ret = print_trace_line(iter);
5846 
5847 		if (trace_seq_has_overflowed(&iter->seq)) {
5848 			iter->seq.seq.len = save_len;
5849 			break;
5850 		}
5851 
5852 		/*
5853 		 * This should not be hit, because it should only
5854 		 * be set if the iter->seq overflowed. But check it
5855 		 * anyway to be safe.
5856 		 */
5857 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5858 			iter->seq.seq.len = save_len;
5859 			break;
5860 		}
5861 
5862 		count = trace_seq_used(&iter->seq) - save_len;
5863 		if (rem < count) {
5864 			rem = 0;
5865 			iter->seq.seq.len = save_len;
5866 			break;
5867 		}
5868 
5869 		if (ret != TRACE_TYPE_NO_CONSUME)
5870 			trace_consume(iter);
5871 		rem -= count;
5872 		if (!trace_find_next_entry_inc(iter))	{
5873 			rem = 0;
5874 			iter->ent = NULL;
5875 			break;
5876 		}
5877 	}
5878 
5879 	return rem;
5880 }
5881 
5882 static ssize_t tracing_splice_read_pipe(struct file *filp,
5883 					loff_t *ppos,
5884 					struct pipe_inode_info *pipe,
5885 					size_t len,
5886 					unsigned int flags)
5887 {
5888 	struct page *pages_def[PIPE_DEF_BUFFERS];
5889 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5890 	struct trace_iterator *iter = filp->private_data;
5891 	struct splice_pipe_desc spd = {
5892 		.pages		= pages_def,
5893 		.partial	= partial_def,
5894 		.nr_pages	= 0, /* This gets updated below. */
5895 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5896 		.ops		= &tracing_pipe_buf_ops,
5897 		.spd_release	= tracing_spd_release_pipe,
5898 	};
5899 	ssize_t ret;
5900 	size_t rem;
5901 	unsigned int i;
5902 
5903 	if (splice_grow_spd(pipe, &spd))
5904 		return -ENOMEM;
5905 
5906 	mutex_lock(&iter->mutex);
5907 
5908 	if (iter->trace->splice_read) {
5909 		ret = iter->trace->splice_read(iter, filp,
5910 					       ppos, pipe, len, flags);
5911 		if (ret)
5912 			goto out_err;
5913 	}
5914 
5915 	ret = tracing_wait_pipe(filp);
5916 	if (ret <= 0)
5917 		goto out_err;
5918 
5919 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5920 		ret = -EFAULT;
5921 		goto out_err;
5922 	}
5923 
5924 	trace_event_read_lock();
5925 	trace_access_lock(iter->cpu_file);
5926 
5927 	/* Fill as many pages as possible. */
5928 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5929 		spd.pages[i] = alloc_page(GFP_KERNEL);
5930 		if (!spd.pages[i])
5931 			break;
5932 
5933 		rem = tracing_fill_pipe_page(rem, iter);
5934 
5935 		/* Copy the data into the page, so we can start over. */
5936 		ret = trace_seq_to_buffer(&iter->seq,
5937 					  page_address(spd.pages[i]),
5938 					  trace_seq_used(&iter->seq));
5939 		if (ret < 0) {
5940 			__free_page(spd.pages[i]);
5941 			break;
5942 		}
5943 		spd.partial[i].offset = 0;
5944 		spd.partial[i].len = trace_seq_used(&iter->seq);
5945 
5946 		trace_seq_init(&iter->seq);
5947 	}
5948 
5949 	trace_access_unlock(iter->cpu_file);
5950 	trace_event_read_unlock();
5951 	mutex_unlock(&iter->mutex);
5952 
5953 	spd.nr_pages = i;
5954 
5955 	if (i)
5956 		ret = splice_to_pipe(pipe, &spd);
5957 	else
5958 		ret = 0;
5959 out:
5960 	splice_shrink_spd(&spd);
5961 	return ret;
5962 
5963 out_err:
5964 	mutex_unlock(&iter->mutex);
5965 	goto out;
5966 }
5967 
5968 static ssize_t
5969 tracing_entries_read(struct file *filp, char __user *ubuf,
5970 		     size_t cnt, loff_t *ppos)
5971 {
5972 	struct inode *inode = file_inode(filp);
5973 	struct trace_array *tr = inode->i_private;
5974 	int cpu = tracing_get_cpu(inode);
5975 	char buf[64];
5976 	int r = 0;
5977 	ssize_t ret;
5978 
5979 	mutex_lock(&trace_types_lock);
5980 
5981 	if (cpu == RING_BUFFER_ALL_CPUS) {
5982 		int cpu, buf_size_same;
5983 		unsigned long size;
5984 
5985 		size = 0;
5986 		buf_size_same = 1;
5987 		/* check if all cpu sizes are same */
5988 		for_each_tracing_cpu(cpu) {
5989 			/* fill in the size from first enabled cpu */
5990 			if (size == 0)
5991 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5992 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5993 				buf_size_same = 0;
5994 				break;
5995 			}
5996 		}
5997 
5998 		if (buf_size_same) {
5999 			if (!ring_buffer_expanded)
6000 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6001 					    size >> 10,
6002 					    trace_buf_size >> 10);
6003 			else
6004 				r = sprintf(buf, "%lu\n", size >> 10);
6005 		} else
6006 			r = sprintf(buf, "X\n");
6007 	} else
6008 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6009 
6010 	mutex_unlock(&trace_types_lock);
6011 
6012 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6013 	return ret;
6014 }
6015 
6016 static ssize_t
6017 tracing_entries_write(struct file *filp, const char __user *ubuf,
6018 		      size_t cnt, loff_t *ppos)
6019 {
6020 	struct inode *inode = file_inode(filp);
6021 	struct trace_array *tr = inode->i_private;
6022 	unsigned long val;
6023 	int ret;
6024 
6025 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6026 	if (ret)
6027 		return ret;
6028 
6029 	/* must have at least 1 entry */
6030 	if (!val)
6031 		return -EINVAL;
6032 
6033 	/* value is in KB */
6034 	val <<= 10;
6035 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6036 	if (ret < 0)
6037 		return ret;
6038 
6039 	*ppos += cnt;
6040 
6041 	return cnt;
6042 }
6043 
6044 static ssize_t
6045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6046 				size_t cnt, loff_t *ppos)
6047 {
6048 	struct trace_array *tr = filp->private_data;
6049 	char buf[64];
6050 	int r, cpu;
6051 	unsigned long size = 0, expanded_size = 0;
6052 
6053 	mutex_lock(&trace_types_lock);
6054 	for_each_tracing_cpu(cpu) {
6055 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6056 		if (!ring_buffer_expanded)
6057 			expanded_size += trace_buf_size >> 10;
6058 	}
6059 	if (ring_buffer_expanded)
6060 		r = sprintf(buf, "%lu\n", size);
6061 	else
6062 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6063 	mutex_unlock(&trace_types_lock);
6064 
6065 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6066 }
6067 
6068 static ssize_t
6069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6070 			  size_t cnt, loff_t *ppos)
6071 {
6072 	/*
6073 	 * There is no need to read what the user has written, this function
6074 	 * is just to make sure that there is no error when "echo" is used
6075 	 */
6076 
6077 	*ppos += cnt;
6078 
6079 	return cnt;
6080 }
6081 
6082 static int
6083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6084 {
6085 	struct trace_array *tr = inode->i_private;
6086 
6087 	/* disable tracing ? */
6088 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6089 		tracer_tracing_off(tr);
6090 	/* resize the ring buffer to 0 */
6091 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6092 
6093 	trace_array_put(tr);
6094 
6095 	return 0;
6096 }
6097 
6098 static ssize_t
6099 tracing_mark_write(struct file *filp, const char __user *ubuf,
6100 					size_t cnt, loff_t *fpos)
6101 {
6102 	struct trace_array *tr = filp->private_data;
6103 	struct ring_buffer_event *event;
6104 	enum event_trigger_type tt = ETT_NONE;
6105 	struct ring_buffer *buffer;
6106 	struct print_entry *entry;
6107 	unsigned long irq_flags;
6108 	const char faulted[] = "<faulted>";
6109 	ssize_t written;
6110 	int size;
6111 	int len;
6112 
6113 /* Used in tracing_mark_raw_write() as well */
6114 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6115 
6116 	if (tracing_disabled)
6117 		return -EINVAL;
6118 
6119 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6120 		return -EINVAL;
6121 
6122 	if (cnt > TRACE_BUF_SIZE)
6123 		cnt = TRACE_BUF_SIZE;
6124 
6125 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6126 
6127 	local_save_flags(irq_flags);
6128 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6129 
6130 	/* If less than "<faulted>", then make sure we can still add that */
6131 	if (cnt < FAULTED_SIZE)
6132 		size += FAULTED_SIZE - cnt;
6133 
6134 	buffer = tr->trace_buffer.buffer;
6135 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6136 					    irq_flags, preempt_count());
6137 	if (unlikely(!event))
6138 		/* Ring buffer disabled, return as if not open for write */
6139 		return -EBADF;
6140 
6141 	entry = ring_buffer_event_data(event);
6142 	entry->ip = _THIS_IP_;
6143 
6144 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6145 	if (len) {
6146 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6147 		cnt = FAULTED_SIZE;
6148 		written = -EFAULT;
6149 	} else
6150 		written = cnt;
6151 	len = cnt;
6152 
6153 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6154 		/* do not add \n before testing triggers, but add \0 */
6155 		entry->buf[cnt] = '\0';
6156 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6157 	}
6158 
6159 	if (entry->buf[cnt - 1] != '\n') {
6160 		entry->buf[cnt] = '\n';
6161 		entry->buf[cnt + 1] = '\0';
6162 	} else
6163 		entry->buf[cnt] = '\0';
6164 
6165 	__buffer_unlock_commit(buffer, event);
6166 
6167 	if (tt)
6168 		event_triggers_post_call(tr->trace_marker_file, tt);
6169 
6170 	if (written > 0)
6171 		*fpos += written;
6172 
6173 	return written;
6174 }
6175 
6176 /* Limit it for now to 3K (including tag) */
6177 #define RAW_DATA_MAX_SIZE (1024*3)
6178 
6179 static ssize_t
6180 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6181 					size_t cnt, loff_t *fpos)
6182 {
6183 	struct trace_array *tr = filp->private_data;
6184 	struct ring_buffer_event *event;
6185 	struct ring_buffer *buffer;
6186 	struct raw_data_entry *entry;
6187 	const char faulted[] = "<faulted>";
6188 	unsigned long irq_flags;
6189 	ssize_t written;
6190 	int size;
6191 	int len;
6192 
6193 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6194 
6195 	if (tracing_disabled)
6196 		return -EINVAL;
6197 
6198 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6199 		return -EINVAL;
6200 
6201 	/* The marker must at least have a tag id */
6202 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6203 		return -EINVAL;
6204 
6205 	if (cnt > TRACE_BUF_SIZE)
6206 		cnt = TRACE_BUF_SIZE;
6207 
6208 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6209 
6210 	local_save_flags(irq_flags);
6211 	size = sizeof(*entry) + cnt;
6212 	if (cnt < FAULT_SIZE_ID)
6213 		size += FAULT_SIZE_ID - cnt;
6214 
6215 	buffer = tr->trace_buffer.buffer;
6216 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6217 					    irq_flags, preempt_count());
6218 	if (!event)
6219 		/* Ring buffer disabled, return as if not open for write */
6220 		return -EBADF;
6221 
6222 	entry = ring_buffer_event_data(event);
6223 
6224 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6225 	if (len) {
6226 		entry->id = -1;
6227 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6228 		written = -EFAULT;
6229 	} else
6230 		written = cnt;
6231 
6232 	__buffer_unlock_commit(buffer, event);
6233 
6234 	if (written > 0)
6235 		*fpos += written;
6236 
6237 	return written;
6238 }
6239 
6240 static int tracing_clock_show(struct seq_file *m, void *v)
6241 {
6242 	struct trace_array *tr = m->private;
6243 	int i;
6244 
6245 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6246 		seq_printf(m,
6247 			"%s%s%s%s", i ? " " : "",
6248 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6249 			i == tr->clock_id ? "]" : "");
6250 	seq_putc(m, '\n');
6251 
6252 	return 0;
6253 }
6254 
6255 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6256 {
6257 	int i;
6258 
6259 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6260 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6261 			break;
6262 	}
6263 	if (i == ARRAY_SIZE(trace_clocks))
6264 		return -EINVAL;
6265 
6266 	mutex_lock(&trace_types_lock);
6267 
6268 	tr->clock_id = i;
6269 
6270 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6271 
6272 	/*
6273 	 * New clock may not be consistent with the previous clock.
6274 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6275 	 */
6276 	tracing_reset_online_cpus(&tr->trace_buffer);
6277 
6278 #ifdef CONFIG_TRACER_MAX_TRACE
6279 	if (tr->max_buffer.buffer)
6280 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6281 	tracing_reset_online_cpus(&tr->max_buffer);
6282 #endif
6283 
6284 	mutex_unlock(&trace_types_lock);
6285 
6286 	return 0;
6287 }
6288 
6289 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6290 				   size_t cnt, loff_t *fpos)
6291 {
6292 	struct seq_file *m = filp->private_data;
6293 	struct trace_array *tr = m->private;
6294 	char buf[64];
6295 	const char *clockstr;
6296 	int ret;
6297 
6298 	if (cnt >= sizeof(buf))
6299 		return -EINVAL;
6300 
6301 	if (copy_from_user(buf, ubuf, cnt))
6302 		return -EFAULT;
6303 
6304 	buf[cnt] = 0;
6305 
6306 	clockstr = strstrip(buf);
6307 
6308 	ret = tracing_set_clock(tr, clockstr);
6309 	if (ret)
6310 		return ret;
6311 
6312 	*fpos += cnt;
6313 
6314 	return cnt;
6315 }
6316 
6317 static int tracing_clock_open(struct inode *inode, struct file *file)
6318 {
6319 	struct trace_array *tr = inode->i_private;
6320 	int ret;
6321 
6322 	if (tracing_disabled)
6323 		return -ENODEV;
6324 
6325 	if (trace_array_get(tr))
6326 		return -ENODEV;
6327 
6328 	ret = single_open(file, tracing_clock_show, inode->i_private);
6329 	if (ret < 0)
6330 		trace_array_put(tr);
6331 
6332 	return ret;
6333 }
6334 
6335 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6336 {
6337 	struct trace_array *tr = m->private;
6338 
6339 	mutex_lock(&trace_types_lock);
6340 
6341 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6342 		seq_puts(m, "delta [absolute]\n");
6343 	else
6344 		seq_puts(m, "[delta] absolute\n");
6345 
6346 	mutex_unlock(&trace_types_lock);
6347 
6348 	return 0;
6349 }
6350 
6351 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6352 {
6353 	struct trace_array *tr = inode->i_private;
6354 	int ret;
6355 
6356 	if (tracing_disabled)
6357 		return -ENODEV;
6358 
6359 	if (trace_array_get(tr))
6360 		return -ENODEV;
6361 
6362 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6363 	if (ret < 0)
6364 		trace_array_put(tr);
6365 
6366 	return ret;
6367 }
6368 
6369 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6370 {
6371 	int ret = 0;
6372 
6373 	mutex_lock(&trace_types_lock);
6374 
6375 	if (abs && tr->time_stamp_abs_ref++)
6376 		goto out;
6377 
6378 	if (!abs) {
6379 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6380 			ret = -EINVAL;
6381 			goto out;
6382 		}
6383 
6384 		if (--tr->time_stamp_abs_ref)
6385 			goto out;
6386 	}
6387 
6388 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6389 
6390 #ifdef CONFIG_TRACER_MAX_TRACE
6391 	if (tr->max_buffer.buffer)
6392 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6393 #endif
6394  out:
6395 	mutex_unlock(&trace_types_lock);
6396 
6397 	return ret;
6398 }
6399 
6400 struct ftrace_buffer_info {
6401 	struct trace_iterator	iter;
6402 	void			*spare;
6403 	unsigned int		spare_cpu;
6404 	unsigned int		read;
6405 };
6406 
6407 #ifdef CONFIG_TRACER_SNAPSHOT
6408 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6409 {
6410 	struct trace_array *tr = inode->i_private;
6411 	struct trace_iterator *iter;
6412 	struct seq_file *m;
6413 	int ret = 0;
6414 
6415 	if (trace_array_get(tr) < 0)
6416 		return -ENODEV;
6417 
6418 	if (file->f_mode & FMODE_READ) {
6419 		iter = __tracing_open(inode, file, true);
6420 		if (IS_ERR(iter))
6421 			ret = PTR_ERR(iter);
6422 	} else {
6423 		/* Writes still need the seq_file to hold the private data */
6424 		ret = -ENOMEM;
6425 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6426 		if (!m)
6427 			goto out;
6428 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6429 		if (!iter) {
6430 			kfree(m);
6431 			goto out;
6432 		}
6433 		ret = 0;
6434 
6435 		iter->tr = tr;
6436 		iter->trace_buffer = &tr->max_buffer;
6437 		iter->cpu_file = tracing_get_cpu(inode);
6438 		m->private = iter;
6439 		file->private_data = m;
6440 	}
6441 out:
6442 	if (ret < 0)
6443 		trace_array_put(tr);
6444 
6445 	return ret;
6446 }
6447 
6448 static ssize_t
6449 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6450 		       loff_t *ppos)
6451 {
6452 	struct seq_file *m = filp->private_data;
6453 	struct trace_iterator *iter = m->private;
6454 	struct trace_array *tr = iter->tr;
6455 	unsigned long val;
6456 	int ret;
6457 
6458 	ret = tracing_update_buffers();
6459 	if (ret < 0)
6460 		return ret;
6461 
6462 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6463 	if (ret)
6464 		return ret;
6465 
6466 	mutex_lock(&trace_types_lock);
6467 
6468 	if (tr->current_trace->use_max_tr) {
6469 		ret = -EBUSY;
6470 		goto out;
6471 	}
6472 
6473 	switch (val) {
6474 	case 0:
6475 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6476 			ret = -EINVAL;
6477 			break;
6478 		}
6479 		if (tr->allocated_snapshot)
6480 			free_snapshot(tr);
6481 		break;
6482 	case 1:
6483 /* Only allow per-cpu swap if the ring buffer supports it */
6484 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6485 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6486 			ret = -EINVAL;
6487 			break;
6488 		}
6489 #endif
6490 		if (!tr->allocated_snapshot) {
6491 			ret = tracing_alloc_snapshot_instance(tr);
6492 			if (ret < 0)
6493 				break;
6494 		}
6495 		local_irq_disable();
6496 		/* Now, we're going to swap */
6497 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6498 			update_max_tr(tr, current, smp_processor_id());
6499 		else
6500 			update_max_tr_single(tr, current, iter->cpu_file);
6501 		local_irq_enable();
6502 		break;
6503 	default:
6504 		if (tr->allocated_snapshot) {
6505 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6506 				tracing_reset_online_cpus(&tr->max_buffer);
6507 			else
6508 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6509 		}
6510 		break;
6511 	}
6512 
6513 	if (ret >= 0) {
6514 		*ppos += cnt;
6515 		ret = cnt;
6516 	}
6517 out:
6518 	mutex_unlock(&trace_types_lock);
6519 	return ret;
6520 }
6521 
6522 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6523 {
6524 	struct seq_file *m = file->private_data;
6525 	int ret;
6526 
6527 	ret = tracing_release(inode, file);
6528 
6529 	if (file->f_mode & FMODE_READ)
6530 		return ret;
6531 
6532 	/* If write only, the seq_file is just a stub */
6533 	if (m)
6534 		kfree(m->private);
6535 	kfree(m);
6536 
6537 	return 0;
6538 }
6539 
6540 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6541 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6542 				    size_t count, loff_t *ppos);
6543 static int tracing_buffers_release(struct inode *inode, struct file *file);
6544 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6545 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6546 
6547 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6548 {
6549 	struct ftrace_buffer_info *info;
6550 	int ret;
6551 
6552 	ret = tracing_buffers_open(inode, filp);
6553 	if (ret < 0)
6554 		return ret;
6555 
6556 	info = filp->private_data;
6557 
6558 	if (info->iter.trace->use_max_tr) {
6559 		tracing_buffers_release(inode, filp);
6560 		return -EBUSY;
6561 	}
6562 
6563 	info->iter.snapshot = true;
6564 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6565 
6566 	return ret;
6567 }
6568 
6569 #endif /* CONFIG_TRACER_SNAPSHOT */
6570 
6571 
6572 static const struct file_operations tracing_thresh_fops = {
6573 	.open		= tracing_open_generic,
6574 	.read		= tracing_thresh_read,
6575 	.write		= tracing_thresh_write,
6576 	.llseek		= generic_file_llseek,
6577 };
6578 
6579 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6580 static const struct file_operations tracing_max_lat_fops = {
6581 	.open		= tracing_open_generic,
6582 	.read		= tracing_max_lat_read,
6583 	.write		= tracing_max_lat_write,
6584 	.llseek		= generic_file_llseek,
6585 };
6586 #endif
6587 
6588 static const struct file_operations set_tracer_fops = {
6589 	.open		= tracing_open_generic,
6590 	.read		= tracing_set_trace_read,
6591 	.write		= tracing_set_trace_write,
6592 	.llseek		= generic_file_llseek,
6593 };
6594 
6595 static const struct file_operations tracing_pipe_fops = {
6596 	.open		= tracing_open_pipe,
6597 	.poll		= tracing_poll_pipe,
6598 	.read		= tracing_read_pipe,
6599 	.splice_read	= tracing_splice_read_pipe,
6600 	.release	= tracing_release_pipe,
6601 	.llseek		= no_llseek,
6602 };
6603 
6604 static const struct file_operations tracing_entries_fops = {
6605 	.open		= tracing_open_generic_tr,
6606 	.read		= tracing_entries_read,
6607 	.write		= tracing_entries_write,
6608 	.llseek		= generic_file_llseek,
6609 	.release	= tracing_release_generic_tr,
6610 };
6611 
6612 static const struct file_operations tracing_total_entries_fops = {
6613 	.open		= tracing_open_generic_tr,
6614 	.read		= tracing_total_entries_read,
6615 	.llseek		= generic_file_llseek,
6616 	.release	= tracing_release_generic_tr,
6617 };
6618 
6619 static const struct file_operations tracing_free_buffer_fops = {
6620 	.open		= tracing_open_generic_tr,
6621 	.write		= tracing_free_buffer_write,
6622 	.release	= tracing_free_buffer_release,
6623 };
6624 
6625 static const struct file_operations tracing_mark_fops = {
6626 	.open		= tracing_open_generic_tr,
6627 	.write		= tracing_mark_write,
6628 	.llseek		= generic_file_llseek,
6629 	.release	= tracing_release_generic_tr,
6630 };
6631 
6632 static const struct file_operations tracing_mark_raw_fops = {
6633 	.open		= tracing_open_generic_tr,
6634 	.write		= tracing_mark_raw_write,
6635 	.llseek		= generic_file_llseek,
6636 	.release	= tracing_release_generic_tr,
6637 };
6638 
6639 static const struct file_operations trace_clock_fops = {
6640 	.open		= tracing_clock_open,
6641 	.read		= seq_read,
6642 	.llseek		= seq_lseek,
6643 	.release	= tracing_single_release_tr,
6644 	.write		= tracing_clock_write,
6645 };
6646 
6647 static const struct file_operations trace_time_stamp_mode_fops = {
6648 	.open		= tracing_time_stamp_mode_open,
6649 	.read		= seq_read,
6650 	.llseek		= seq_lseek,
6651 	.release	= tracing_single_release_tr,
6652 };
6653 
6654 #ifdef CONFIG_TRACER_SNAPSHOT
6655 static const struct file_operations snapshot_fops = {
6656 	.open		= tracing_snapshot_open,
6657 	.read		= seq_read,
6658 	.write		= tracing_snapshot_write,
6659 	.llseek		= tracing_lseek,
6660 	.release	= tracing_snapshot_release,
6661 };
6662 
6663 static const struct file_operations snapshot_raw_fops = {
6664 	.open		= snapshot_raw_open,
6665 	.read		= tracing_buffers_read,
6666 	.release	= tracing_buffers_release,
6667 	.splice_read	= tracing_buffers_splice_read,
6668 	.llseek		= no_llseek,
6669 };
6670 
6671 #endif /* CONFIG_TRACER_SNAPSHOT */
6672 
6673 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6674 {
6675 	struct trace_array *tr = inode->i_private;
6676 	struct ftrace_buffer_info *info;
6677 	int ret;
6678 
6679 	if (tracing_disabled)
6680 		return -ENODEV;
6681 
6682 	if (trace_array_get(tr) < 0)
6683 		return -ENODEV;
6684 
6685 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6686 	if (!info) {
6687 		trace_array_put(tr);
6688 		return -ENOMEM;
6689 	}
6690 
6691 	mutex_lock(&trace_types_lock);
6692 
6693 	info->iter.tr		= tr;
6694 	info->iter.cpu_file	= tracing_get_cpu(inode);
6695 	info->iter.trace	= tr->current_trace;
6696 	info->iter.trace_buffer = &tr->trace_buffer;
6697 	info->spare		= NULL;
6698 	/* Force reading ring buffer for first read */
6699 	info->read		= (unsigned int)-1;
6700 
6701 	filp->private_data = info;
6702 
6703 	tr->current_trace->ref++;
6704 
6705 	mutex_unlock(&trace_types_lock);
6706 
6707 	ret = nonseekable_open(inode, filp);
6708 	if (ret < 0)
6709 		trace_array_put(tr);
6710 
6711 	return ret;
6712 }
6713 
6714 static __poll_t
6715 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6716 {
6717 	struct ftrace_buffer_info *info = filp->private_data;
6718 	struct trace_iterator *iter = &info->iter;
6719 
6720 	return trace_poll(iter, filp, poll_table);
6721 }
6722 
6723 static ssize_t
6724 tracing_buffers_read(struct file *filp, char __user *ubuf,
6725 		     size_t count, loff_t *ppos)
6726 {
6727 	struct ftrace_buffer_info *info = filp->private_data;
6728 	struct trace_iterator *iter = &info->iter;
6729 	ssize_t ret = 0;
6730 	ssize_t size;
6731 
6732 	if (!count)
6733 		return 0;
6734 
6735 #ifdef CONFIG_TRACER_MAX_TRACE
6736 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6737 		return -EBUSY;
6738 #endif
6739 
6740 	if (!info->spare) {
6741 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6742 							  iter->cpu_file);
6743 		if (IS_ERR(info->spare)) {
6744 			ret = PTR_ERR(info->spare);
6745 			info->spare = NULL;
6746 		} else {
6747 			info->spare_cpu = iter->cpu_file;
6748 		}
6749 	}
6750 	if (!info->spare)
6751 		return ret;
6752 
6753 	/* Do we have previous read data to read? */
6754 	if (info->read < PAGE_SIZE)
6755 		goto read;
6756 
6757  again:
6758 	trace_access_lock(iter->cpu_file);
6759 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6760 				    &info->spare,
6761 				    count,
6762 				    iter->cpu_file, 0);
6763 	trace_access_unlock(iter->cpu_file);
6764 
6765 	if (ret < 0) {
6766 		if (trace_empty(iter)) {
6767 			if ((filp->f_flags & O_NONBLOCK))
6768 				return -EAGAIN;
6769 
6770 			ret = wait_on_pipe(iter, 0);
6771 			if (ret)
6772 				return ret;
6773 
6774 			goto again;
6775 		}
6776 		return 0;
6777 	}
6778 
6779 	info->read = 0;
6780  read:
6781 	size = PAGE_SIZE - info->read;
6782 	if (size > count)
6783 		size = count;
6784 
6785 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6786 	if (ret == size)
6787 		return -EFAULT;
6788 
6789 	size -= ret;
6790 
6791 	*ppos += size;
6792 	info->read += size;
6793 
6794 	return size;
6795 }
6796 
6797 static int tracing_buffers_release(struct inode *inode, struct file *file)
6798 {
6799 	struct ftrace_buffer_info *info = file->private_data;
6800 	struct trace_iterator *iter = &info->iter;
6801 
6802 	mutex_lock(&trace_types_lock);
6803 
6804 	iter->tr->current_trace->ref--;
6805 
6806 	__trace_array_put(iter->tr);
6807 
6808 	if (info->spare)
6809 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6810 					   info->spare_cpu, info->spare);
6811 	kfree(info);
6812 
6813 	mutex_unlock(&trace_types_lock);
6814 
6815 	return 0;
6816 }
6817 
6818 struct buffer_ref {
6819 	struct ring_buffer	*buffer;
6820 	void			*page;
6821 	int			cpu;
6822 	int			ref;
6823 };
6824 
6825 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6826 				    struct pipe_buffer *buf)
6827 {
6828 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6829 
6830 	if (--ref->ref)
6831 		return;
6832 
6833 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6834 	kfree(ref);
6835 	buf->private = 0;
6836 }
6837 
6838 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6839 				struct pipe_buffer *buf)
6840 {
6841 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6842 
6843 	ref->ref++;
6844 }
6845 
6846 /* Pipe buffer operations for a buffer. */
6847 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6848 	.can_merge		= 0,
6849 	.confirm		= generic_pipe_buf_confirm,
6850 	.release		= buffer_pipe_buf_release,
6851 	.steal			= generic_pipe_buf_steal,
6852 	.get			= buffer_pipe_buf_get,
6853 };
6854 
6855 /*
6856  * Callback from splice_to_pipe(), if we need to release some pages
6857  * at the end of the spd in case we error'ed out in filling the pipe.
6858  */
6859 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6860 {
6861 	struct buffer_ref *ref =
6862 		(struct buffer_ref *)spd->partial[i].private;
6863 
6864 	if (--ref->ref)
6865 		return;
6866 
6867 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6868 	kfree(ref);
6869 	spd->partial[i].private = 0;
6870 }
6871 
6872 static ssize_t
6873 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6874 			    struct pipe_inode_info *pipe, size_t len,
6875 			    unsigned int flags)
6876 {
6877 	struct ftrace_buffer_info *info = file->private_data;
6878 	struct trace_iterator *iter = &info->iter;
6879 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6880 	struct page *pages_def[PIPE_DEF_BUFFERS];
6881 	struct splice_pipe_desc spd = {
6882 		.pages		= pages_def,
6883 		.partial	= partial_def,
6884 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6885 		.ops		= &buffer_pipe_buf_ops,
6886 		.spd_release	= buffer_spd_release,
6887 	};
6888 	struct buffer_ref *ref;
6889 	int entries, i;
6890 	ssize_t ret = 0;
6891 
6892 #ifdef CONFIG_TRACER_MAX_TRACE
6893 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6894 		return -EBUSY;
6895 #endif
6896 
6897 	if (*ppos & (PAGE_SIZE - 1))
6898 		return -EINVAL;
6899 
6900 	if (len & (PAGE_SIZE - 1)) {
6901 		if (len < PAGE_SIZE)
6902 			return -EINVAL;
6903 		len &= PAGE_MASK;
6904 	}
6905 
6906 	if (splice_grow_spd(pipe, &spd))
6907 		return -ENOMEM;
6908 
6909  again:
6910 	trace_access_lock(iter->cpu_file);
6911 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6912 
6913 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6914 		struct page *page;
6915 		int r;
6916 
6917 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6918 		if (!ref) {
6919 			ret = -ENOMEM;
6920 			break;
6921 		}
6922 
6923 		ref->ref = 1;
6924 		ref->buffer = iter->trace_buffer->buffer;
6925 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6926 		if (IS_ERR(ref->page)) {
6927 			ret = PTR_ERR(ref->page);
6928 			ref->page = NULL;
6929 			kfree(ref);
6930 			break;
6931 		}
6932 		ref->cpu = iter->cpu_file;
6933 
6934 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6935 					  len, iter->cpu_file, 1);
6936 		if (r < 0) {
6937 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6938 						   ref->page);
6939 			kfree(ref);
6940 			break;
6941 		}
6942 
6943 		page = virt_to_page(ref->page);
6944 
6945 		spd.pages[i] = page;
6946 		spd.partial[i].len = PAGE_SIZE;
6947 		spd.partial[i].offset = 0;
6948 		spd.partial[i].private = (unsigned long)ref;
6949 		spd.nr_pages++;
6950 		*ppos += PAGE_SIZE;
6951 
6952 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6953 	}
6954 
6955 	trace_access_unlock(iter->cpu_file);
6956 	spd.nr_pages = i;
6957 
6958 	/* did we read anything? */
6959 	if (!spd.nr_pages) {
6960 		if (ret)
6961 			goto out;
6962 
6963 		ret = -EAGAIN;
6964 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6965 			goto out;
6966 
6967 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
6968 		if (ret)
6969 			goto out;
6970 
6971 		goto again;
6972 	}
6973 
6974 	ret = splice_to_pipe(pipe, &spd);
6975 out:
6976 	splice_shrink_spd(&spd);
6977 
6978 	return ret;
6979 }
6980 
6981 static const struct file_operations tracing_buffers_fops = {
6982 	.open		= tracing_buffers_open,
6983 	.read		= tracing_buffers_read,
6984 	.poll		= tracing_buffers_poll,
6985 	.release	= tracing_buffers_release,
6986 	.splice_read	= tracing_buffers_splice_read,
6987 	.llseek		= no_llseek,
6988 };
6989 
6990 static ssize_t
6991 tracing_stats_read(struct file *filp, char __user *ubuf,
6992 		   size_t count, loff_t *ppos)
6993 {
6994 	struct inode *inode = file_inode(filp);
6995 	struct trace_array *tr = inode->i_private;
6996 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6997 	int cpu = tracing_get_cpu(inode);
6998 	struct trace_seq *s;
6999 	unsigned long cnt;
7000 	unsigned long long t;
7001 	unsigned long usec_rem;
7002 
7003 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7004 	if (!s)
7005 		return -ENOMEM;
7006 
7007 	trace_seq_init(s);
7008 
7009 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7010 	trace_seq_printf(s, "entries: %ld\n", cnt);
7011 
7012 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7013 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7014 
7015 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7016 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7017 
7018 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7019 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7020 
7021 	if (trace_clocks[tr->clock_id].in_ns) {
7022 		/* local or global for trace_clock */
7023 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7024 		usec_rem = do_div(t, USEC_PER_SEC);
7025 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7026 								t, usec_rem);
7027 
7028 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7029 		usec_rem = do_div(t, USEC_PER_SEC);
7030 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7031 	} else {
7032 		/* counter or tsc mode for trace_clock */
7033 		trace_seq_printf(s, "oldest event ts: %llu\n",
7034 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7035 
7036 		trace_seq_printf(s, "now ts: %llu\n",
7037 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7038 	}
7039 
7040 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7041 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7042 
7043 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7044 	trace_seq_printf(s, "read events: %ld\n", cnt);
7045 
7046 	count = simple_read_from_buffer(ubuf, count, ppos,
7047 					s->buffer, trace_seq_used(s));
7048 
7049 	kfree(s);
7050 
7051 	return count;
7052 }
7053 
7054 static const struct file_operations tracing_stats_fops = {
7055 	.open		= tracing_open_generic_tr,
7056 	.read		= tracing_stats_read,
7057 	.llseek		= generic_file_llseek,
7058 	.release	= tracing_release_generic_tr,
7059 };
7060 
7061 #ifdef CONFIG_DYNAMIC_FTRACE
7062 
7063 static ssize_t
7064 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7065 		  size_t cnt, loff_t *ppos)
7066 {
7067 	unsigned long *p = filp->private_data;
7068 	char buf[64]; /* Not too big for a shallow stack */
7069 	int r;
7070 
7071 	r = scnprintf(buf, 63, "%ld", *p);
7072 	buf[r++] = '\n';
7073 
7074 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7075 }
7076 
7077 static const struct file_operations tracing_dyn_info_fops = {
7078 	.open		= tracing_open_generic,
7079 	.read		= tracing_read_dyn_info,
7080 	.llseek		= generic_file_llseek,
7081 };
7082 #endif /* CONFIG_DYNAMIC_FTRACE */
7083 
7084 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7085 static void
7086 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7087 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7088 		void *data)
7089 {
7090 	tracing_snapshot_instance(tr);
7091 }
7092 
7093 static void
7094 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7095 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7096 		      void *data)
7097 {
7098 	struct ftrace_func_mapper *mapper = data;
7099 	long *count = NULL;
7100 
7101 	if (mapper)
7102 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7103 
7104 	if (count) {
7105 
7106 		if (*count <= 0)
7107 			return;
7108 
7109 		(*count)--;
7110 	}
7111 
7112 	tracing_snapshot_instance(tr);
7113 }
7114 
7115 static int
7116 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7117 		      struct ftrace_probe_ops *ops, void *data)
7118 {
7119 	struct ftrace_func_mapper *mapper = data;
7120 	long *count = NULL;
7121 
7122 	seq_printf(m, "%ps:", (void *)ip);
7123 
7124 	seq_puts(m, "snapshot");
7125 
7126 	if (mapper)
7127 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7128 
7129 	if (count)
7130 		seq_printf(m, ":count=%ld\n", *count);
7131 	else
7132 		seq_puts(m, ":unlimited\n");
7133 
7134 	return 0;
7135 }
7136 
7137 static int
7138 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7139 		     unsigned long ip, void *init_data, void **data)
7140 {
7141 	struct ftrace_func_mapper *mapper = *data;
7142 
7143 	if (!mapper) {
7144 		mapper = allocate_ftrace_func_mapper();
7145 		if (!mapper)
7146 			return -ENOMEM;
7147 		*data = mapper;
7148 	}
7149 
7150 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7151 }
7152 
7153 static void
7154 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7155 		     unsigned long ip, void *data)
7156 {
7157 	struct ftrace_func_mapper *mapper = data;
7158 
7159 	if (!ip) {
7160 		if (!mapper)
7161 			return;
7162 		free_ftrace_func_mapper(mapper, NULL);
7163 		return;
7164 	}
7165 
7166 	ftrace_func_mapper_remove_ip(mapper, ip);
7167 }
7168 
7169 static struct ftrace_probe_ops snapshot_probe_ops = {
7170 	.func			= ftrace_snapshot,
7171 	.print			= ftrace_snapshot_print,
7172 };
7173 
7174 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7175 	.func			= ftrace_count_snapshot,
7176 	.print			= ftrace_snapshot_print,
7177 	.init			= ftrace_snapshot_init,
7178 	.free			= ftrace_snapshot_free,
7179 };
7180 
7181 static int
7182 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7183 			       char *glob, char *cmd, char *param, int enable)
7184 {
7185 	struct ftrace_probe_ops *ops;
7186 	void *count = (void *)-1;
7187 	char *number;
7188 	int ret;
7189 
7190 	if (!tr)
7191 		return -ENODEV;
7192 
7193 	/* hash funcs only work with set_ftrace_filter */
7194 	if (!enable)
7195 		return -EINVAL;
7196 
7197 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7198 
7199 	if (glob[0] == '!')
7200 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7201 
7202 	if (!param)
7203 		goto out_reg;
7204 
7205 	number = strsep(&param, ":");
7206 
7207 	if (!strlen(number))
7208 		goto out_reg;
7209 
7210 	/*
7211 	 * We use the callback data field (which is a pointer)
7212 	 * as our counter.
7213 	 */
7214 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7215 	if (ret)
7216 		return ret;
7217 
7218  out_reg:
7219 	ret = tracing_alloc_snapshot_instance(tr);
7220 	if (ret < 0)
7221 		goto out;
7222 
7223 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7224 
7225  out:
7226 	return ret < 0 ? ret : 0;
7227 }
7228 
7229 static struct ftrace_func_command ftrace_snapshot_cmd = {
7230 	.name			= "snapshot",
7231 	.func			= ftrace_trace_snapshot_callback,
7232 };
7233 
7234 static __init int register_snapshot_cmd(void)
7235 {
7236 	return register_ftrace_command(&ftrace_snapshot_cmd);
7237 }
7238 #else
7239 static inline __init int register_snapshot_cmd(void) { return 0; }
7240 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7241 
7242 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7243 {
7244 	if (WARN_ON(!tr->dir))
7245 		return ERR_PTR(-ENODEV);
7246 
7247 	/* Top directory uses NULL as the parent */
7248 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7249 		return NULL;
7250 
7251 	/* All sub buffers have a descriptor */
7252 	return tr->dir;
7253 }
7254 
7255 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7256 {
7257 	struct dentry *d_tracer;
7258 
7259 	if (tr->percpu_dir)
7260 		return tr->percpu_dir;
7261 
7262 	d_tracer = tracing_get_dentry(tr);
7263 	if (IS_ERR(d_tracer))
7264 		return NULL;
7265 
7266 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7267 
7268 	WARN_ONCE(!tr->percpu_dir,
7269 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7270 
7271 	return tr->percpu_dir;
7272 }
7273 
7274 static struct dentry *
7275 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7276 		      void *data, long cpu, const struct file_operations *fops)
7277 {
7278 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7279 
7280 	if (ret) /* See tracing_get_cpu() */
7281 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7282 	return ret;
7283 }
7284 
7285 static void
7286 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7287 {
7288 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7289 	struct dentry *d_cpu;
7290 	char cpu_dir[30]; /* 30 characters should be more than enough */
7291 
7292 	if (!d_percpu)
7293 		return;
7294 
7295 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7296 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7297 	if (!d_cpu) {
7298 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7299 		return;
7300 	}
7301 
7302 	/* per cpu trace_pipe */
7303 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7304 				tr, cpu, &tracing_pipe_fops);
7305 
7306 	/* per cpu trace */
7307 	trace_create_cpu_file("trace", 0644, d_cpu,
7308 				tr, cpu, &tracing_fops);
7309 
7310 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7311 				tr, cpu, &tracing_buffers_fops);
7312 
7313 	trace_create_cpu_file("stats", 0444, d_cpu,
7314 				tr, cpu, &tracing_stats_fops);
7315 
7316 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7317 				tr, cpu, &tracing_entries_fops);
7318 
7319 #ifdef CONFIG_TRACER_SNAPSHOT
7320 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7321 				tr, cpu, &snapshot_fops);
7322 
7323 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7324 				tr, cpu, &snapshot_raw_fops);
7325 #endif
7326 }
7327 
7328 #ifdef CONFIG_FTRACE_SELFTEST
7329 /* Let selftest have access to static functions in this file */
7330 #include "trace_selftest.c"
7331 #endif
7332 
7333 static ssize_t
7334 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7335 			loff_t *ppos)
7336 {
7337 	struct trace_option_dentry *topt = filp->private_data;
7338 	char *buf;
7339 
7340 	if (topt->flags->val & topt->opt->bit)
7341 		buf = "1\n";
7342 	else
7343 		buf = "0\n";
7344 
7345 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7346 }
7347 
7348 static ssize_t
7349 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7350 			 loff_t *ppos)
7351 {
7352 	struct trace_option_dentry *topt = filp->private_data;
7353 	unsigned long val;
7354 	int ret;
7355 
7356 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7357 	if (ret)
7358 		return ret;
7359 
7360 	if (val != 0 && val != 1)
7361 		return -EINVAL;
7362 
7363 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7364 		mutex_lock(&trace_types_lock);
7365 		ret = __set_tracer_option(topt->tr, topt->flags,
7366 					  topt->opt, !val);
7367 		mutex_unlock(&trace_types_lock);
7368 		if (ret)
7369 			return ret;
7370 	}
7371 
7372 	*ppos += cnt;
7373 
7374 	return cnt;
7375 }
7376 
7377 
7378 static const struct file_operations trace_options_fops = {
7379 	.open = tracing_open_generic,
7380 	.read = trace_options_read,
7381 	.write = trace_options_write,
7382 	.llseek	= generic_file_llseek,
7383 };
7384 
7385 /*
7386  * In order to pass in both the trace_array descriptor as well as the index
7387  * to the flag that the trace option file represents, the trace_array
7388  * has a character array of trace_flags_index[], which holds the index
7389  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7390  * The address of this character array is passed to the flag option file
7391  * read/write callbacks.
7392  *
7393  * In order to extract both the index and the trace_array descriptor,
7394  * get_tr_index() uses the following algorithm.
7395  *
7396  *   idx = *ptr;
7397  *
7398  * As the pointer itself contains the address of the index (remember
7399  * index[1] == 1).
7400  *
7401  * Then to get the trace_array descriptor, by subtracting that index
7402  * from the ptr, we get to the start of the index itself.
7403  *
7404  *   ptr - idx == &index[0]
7405  *
7406  * Then a simple container_of() from that pointer gets us to the
7407  * trace_array descriptor.
7408  */
7409 static void get_tr_index(void *data, struct trace_array **ptr,
7410 			 unsigned int *pindex)
7411 {
7412 	*pindex = *(unsigned char *)data;
7413 
7414 	*ptr = container_of(data - *pindex, struct trace_array,
7415 			    trace_flags_index);
7416 }
7417 
7418 static ssize_t
7419 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7420 			loff_t *ppos)
7421 {
7422 	void *tr_index = filp->private_data;
7423 	struct trace_array *tr;
7424 	unsigned int index;
7425 	char *buf;
7426 
7427 	get_tr_index(tr_index, &tr, &index);
7428 
7429 	if (tr->trace_flags & (1 << index))
7430 		buf = "1\n";
7431 	else
7432 		buf = "0\n";
7433 
7434 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7435 }
7436 
7437 static ssize_t
7438 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7439 			 loff_t *ppos)
7440 {
7441 	void *tr_index = filp->private_data;
7442 	struct trace_array *tr;
7443 	unsigned int index;
7444 	unsigned long val;
7445 	int ret;
7446 
7447 	get_tr_index(tr_index, &tr, &index);
7448 
7449 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7450 	if (ret)
7451 		return ret;
7452 
7453 	if (val != 0 && val != 1)
7454 		return -EINVAL;
7455 
7456 	mutex_lock(&trace_types_lock);
7457 	ret = set_tracer_flag(tr, 1 << index, val);
7458 	mutex_unlock(&trace_types_lock);
7459 
7460 	if (ret < 0)
7461 		return ret;
7462 
7463 	*ppos += cnt;
7464 
7465 	return cnt;
7466 }
7467 
7468 static const struct file_operations trace_options_core_fops = {
7469 	.open = tracing_open_generic,
7470 	.read = trace_options_core_read,
7471 	.write = trace_options_core_write,
7472 	.llseek = generic_file_llseek,
7473 };
7474 
7475 struct dentry *trace_create_file(const char *name,
7476 				 umode_t mode,
7477 				 struct dentry *parent,
7478 				 void *data,
7479 				 const struct file_operations *fops)
7480 {
7481 	struct dentry *ret;
7482 
7483 	ret = tracefs_create_file(name, mode, parent, data, fops);
7484 	if (!ret)
7485 		pr_warn("Could not create tracefs '%s' entry\n", name);
7486 
7487 	return ret;
7488 }
7489 
7490 
7491 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7492 {
7493 	struct dentry *d_tracer;
7494 
7495 	if (tr->options)
7496 		return tr->options;
7497 
7498 	d_tracer = tracing_get_dentry(tr);
7499 	if (IS_ERR(d_tracer))
7500 		return NULL;
7501 
7502 	tr->options = tracefs_create_dir("options", d_tracer);
7503 	if (!tr->options) {
7504 		pr_warn("Could not create tracefs directory 'options'\n");
7505 		return NULL;
7506 	}
7507 
7508 	return tr->options;
7509 }
7510 
7511 static void
7512 create_trace_option_file(struct trace_array *tr,
7513 			 struct trace_option_dentry *topt,
7514 			 struct tracer_flags *flags,
7515 			 struct tracer_opt *opt)
7516 {
7517 	struct dentry *t_options;
7518 
7519 	t_options = trace_options_init_dentry(tr);
7520 	if (!t_options)
7521 		return;
7522 
7523 	topt->flags = flags;
7524 	topt->opt = opt;
7525 	topt->tr = tr;
7526 
7527 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7528 				    &trace_options_fops);
7529 
7530 }
7531 
7532 static void
7533 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7534 {
7535 	struct trace_option_dentry *topts;
7536 	struct trace_options *tr_topts;
7537 	struct tracer_flags *flags;
7538 	struct tracer_opt *opts;
7539 	int cnt;
7540 	int i;
7541 
7542 	if (!tracer)
7543 		return;
7544 
7545 	flags = tracer->flags;
7546 
7547 	if (!flags || !flags->opts)
7548 		return;
7549 
7550 	/*
7551 	 * If this is an instance, only create flags for tracers
7552 	 * the instance may have.
7553 	 */
7554 	if (!trace_ok_for_array(tracer, tr))
7555 		return;
7556 
7557 	for (i = 0; i < tr->nr_topts; i++) {
7558 		/* Make sure there's no duplicate flags. */
7559 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7560 			return;
7561 	}
7562 
7563 	opts = flags->opts;
7564 
7565 	for (cnt = 0; opts[cnt].name; cnt++)
7566 		;
7567 
7568 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7569 	if (!topts)
7570 		return;
7571 
7572 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7573 			    GFP_KERNEL);
7574 	if (!tr_topts) {
7575 		kfree(topts);
7576 		return;
7577 	}
7578 
7579 	tr->topts = tr_topts;
7580 	tr->topts[tr->nr_topts].tracer = tracer;
7581 	tr->topts[tr->nr_topts].topts = topts;
7582 	tr->nr_topts++;
7583 
7584 	for (cnt = 0; opts[cnt].name; cnt++) {
7585 		create_trace_option_file(tr, &topts[cnt], flags,
7586 					 &opts[cnt]);
7587 		WARN_ONCE(topts[cnt].entry == NULL,
7588 			  "Failed to create trace option: %s",
7589 			  opts[cnt].name);
7590 	}
7591 }
7592 
7593 static struct dentry *
7594 create_trace_option_core_file(struct trace_array *tr,
7595 			      const char *option, long index)
7596 {
7597 	struct dentry *t_options;
7598 
7599 	t_options = trace_options_init_dentry(tr);
7600 	if (!t_options)
7601 		return NULL;
7602 
7603 	return trace_create_file(option, 0644, t_options,
7604 				 (void *)&tr->trace_flags_index[index],
7605 				 &trace_options_core_fops);
7606 }
7607 
7608 static void create_trace_options_dir(struct trace_array *tr)
7609 {
7610 	struct dentry *t_options;
7611 	bool top_level = tr == &global_trace;
7612 	int i;
7613 
7614 	t_options = trace_options_init_dentry(tr);
7615 	if (!t_options)
7616 		return;
7617 
7618 	for (i = 0; trace_options[i]; i++) {
7619 		if (top_level ||
7620 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7621 			create_trace_option_core_file(tr, trace_options[i], i);
7622 	}
7623 }
7624 
7625 static ssize_t
7626 rb_simple_read(struct file *filp, char __user *ubuf,
7627 	       size_t cnt, loff_t *ppos)
7628 {
7629 	struct trace_array *tr = filp->private_data;
7630 	char buf[64];
7631 	int r;
7632 
7633 	r = tracer_tracing_is_on(tr);
7634 	r = sprintf(buf, "%d\n", r);
7635 
7636 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7637 }
7638 
7639 static ssize_t
7640 rb_simple_write(struct file *filp, const char __user *ubuf,
7641 		size_t cnt, loff_t *ppos)
7642 {
7643 	struct trace_array *tr = filp->private_data;
7644 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7645 	unsigned long val;
7646 	int ret;
7647 
7648 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7649 	if (ret)
7650 		return ret;
7651 
7652 	if (buffer) {
7653 		mutex_lock(&trace_types_lock);
7654 		if (!!val == tracer_tracing_is_on(tr)) {
7655 			val = 0; /* do nothing */
7656 		} else if (val) {
7657 			tracer_tracing_on(tr);
7658 			if (tr->current_trace->start)
7659 				tr->current_trace->start(tr);
7660 		} else {
7661 			tracer_tracing_off(tr);
7662 			if (tr->current_trace->stop)
7663 				tr->current_trace->stop(tr);
7664 		}
7665 		mutex_unlock(&trace_types_lock);
7666 	}
7667 
7668 	(*ppos)++;
7669 
7670 	return cnt;
7671 }
7672 
7673 static const struct file_operations rb_simple_fops = {
7674 	.open		= tracing_open_generic_tr,
7675 	.read		= rb_simple_read,
7676 	.write		= rb_simple_write,
7677 	.release	= tracing_release_generic_tr,
7678 	.llseek		= default_llseek,
7679 };
7680 
7681 static ssize_t
7682 buffer_percent_read(struct file *filp, char __user *ubuf,
7683 		    size_t cnt, loff_t *ppos)
7684 {
7685 	struct trace_array *tr = filp->private_data;
7686 	char buf[64];
7687 	int r;
7688 
7689 	r = tr->buffer_percent;
7690 	r = sprintf(buf, "%d\n", r);
7691 
7692 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7693 }
7694 
7695 static ssize_t
7696 buffer_percent_write(struct file *filp, const char __user *ubuf,
7697 		     size_t cnt, loff_t *ppos)
7698 {
7699 	struct trace_array *tr = filp->private_data;
7700 	unsigned long val;
7701 	int ret;
7702 
7703 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7704 	if (ret)
7705 		return ret;
7706 
7707 	if (val > 100)
7708 		return -EINVAL;
7709 
7710 	if (!val)
7711 		val = 1;
7712 
7713 	tr->buffer_percent = val;
7714 
7715 	(*ppos)++;
7716 
7717 	return cnt;
7718 }
7719 
7720 static const struct file_operations buffer_percent_fops = {
7721 	.open		= tracing_open_generic_tr,
7722 	.read		= buffer_percent_read,
7723 	.write		= buffer_percent_write,
7724 	.release	= tracing_release_generic_tr,
7725 	.llseek		= default_llseek,
7726 };
7727 
7728 struct dentry *trace_instance_dir;
7729 
7730 static void
7731 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7732 
7733 static int
7734 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7735 {
7736 	enum ring_buffer_flags rb_flags;
7737 
7738 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7739 
7740 	buf->tr = tr;
7741 
7742 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7743 	if (!buf->buffer)
7744 		return -ENOMEM;
7745 
7746 	buf->data = alloc_percpu(struct trace_array_cpu);
7747 	if (!buf->data) {
7748 		ring_buffer_free(buf->buffer);
7749 		buf->buffer = NULL;
7750 		return -ENOMEM;
7751 	}
7752 
7753 	/* Allocate the first page for all buffers */
7754 	set_buffer_entries(&tr->trace_buffer,
7755 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7756 
7757 	return 0;
7758 }
7759 
7760 static int allocate_trace_buffers(struct trace_array *tr, int size)
7761 {
7762 	int ret;
7763 
7764 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7765 	if (ret)
7766 		return ret;
7767 
7768 #ifdef CONFIG_TRACER_MAX_TRACE
7769 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7770 				    allocate_snapshot ? size : 1);
7771 	if (WARN_ON(ret)) {
7772 		ring_buffer_free(tr->trace_buffer.buffer);
7773 		tr->trace_buffer.buffer = NULL;
7774 		free_percpu(tr->trace_buffer.data);
7775 		tr->trace_buffer.data = NULL;
7776 		return -ENOMEM;
7777 	}
7778 	tr->allocated_snapshot = allocate_snapshot;
7779 
7780 	/*
7781 	 * Only the top level trace array gets its snapshot allocated
7782 	 * from the kernel command line.
7783 	 */
7784 	allocate_snapshot = false;
7785 #endif
7786 	return 0;
7787 }
7788 
7789 static void free_trace_buffer(struct trace_buffer *buf)
7790 {
7791 	if (buf->buffer) {
7792 		ring_buffer_free(buf->buffer);
7793 		buf->buffer = NULL;
7794 		free_percpu(buf->data);
7795 		buf->data = NULL;
7796 	}
7797 }
7798 
7799 static void free_trace_buffers(struct trace_array *tr)
7800 {
7801 	if (!tr)
7802 		return;
7803 
7804 	free_trace_buffer(&tr->trace_buffer);
7805 
7806 #ifdef CONFIG_TRACER_MAX_TRACE
7807 	free_trace_buffer(&tr->max_buffer);
7808 #endif
7809 }
7810 
7811 static void init_trace_flags_index(struct trace_array *tr)
7812 {
7813 	int i;
7814 
7815 	/* Used by the trace options files */
7816 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7817 		tr->trace_flags_index[i] = i;
7818 }
7819 
7820 static void __update_tracer_options(struct trace_array *tr)
7821 {
7822 	struct tracer *t;
7823 
7824 	for (t = trace_types; t; t = t->next)
7825 		add_tracer_options(tr, t);
7826 }
7827 
7828 static void update_tracer_options(struct trace_array *tr)
7829 {
7830 	mutex_lock(&trace_types_lock);
7831 	__update_tracer_options(tr);
7832 	mutex_unlock(&trace_types_lock);
7833 }
7834 
7835 static int instance_mkdir(const char *name)
7836 {
7837 	struct trace_array *tr;
7838 	int ret;
7839 
7840 	mutex_lock(&event_mutex);
7841 	mutex_lock(&trace_types_lock);
7842 
7843 	ret = -EEXIST;
7844 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7845 		if (tr->name && strcmp(tr->name, name) == 0)
7846 			goto out_unlock;
7847 	}
7848 
7849 	ret = -ENOMEM;
7850 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7851 	if (!tr)
7852 		goto out_unlock;
7853 
7854 	tr->name = kstrdup(name, GFP_KERNEL);
7855 	if (!tr->name)
7856 		goto out_free_tr;
7857 
7858 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7859 		goto out_free_tr;
7860 
7861 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7862 
7863 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7864 
7865 	raw_spin_lock_init(&tr->start_lock);
7866 
7867 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7868 
7869 	tr->current_trace = &nop_trace;
7870 
7871 	INIT_LIST_HEAD(&tr->systems);
7872 	INIT_LIST_HEAD(&tr->events);
7873 	INIT_LIST_HEAD(&tr->hist_vars);
7874 
7875 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7876 		goto out_free_tr;
7877 
7878 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7879 	if (!tr->dir)
7880 		goto out_free_tr;
7881 
7882 	ret = event_trace_add_tracer(tr->dir, tr);
7883 	if (ret) {
7884 		tracefs_remove_recursive(tr->dir);
7885 		goto out_free_tr;
7886 	}
7887 
7888 	ftrace_init_trace_array(tr);
7889 
7890 	init_tracer_tracefs(tr, tr->dir);
7891 	init_trace_flags_index(tr);
7892 	__update_tracer_options(tr);
7893 
7894 	list_add(&tr->list, &ftrace_trace_arrays);
7895 
7896 	mutex_unlock(&trace_types_lock);
7897 	mutex_unlock(&event_mutex);
7898 
7899 	return 0;
7900 
7901  out_free_tr:
7902 	free_trace_buffers(tr);
7903 	free_cpumask_var(tr->tracing_cpumask);
7904 	kfree(tr->name);
7905 	kfree(tr);
7906 
7907  out_unlock:
7908 	mutex_unlock(&trace_types_lock);
7909 	mutex_unlock(&event_mutex);
7910 
7911 	return ret;
7912 
7913 }
7914 
7915 static int instance_rmdir(const char *name)
7916 {
7917 	struct trace_array *tr;
7918 	int found = 0;
7919 	int ret;
7920 	int i;
7921 
7922 	mutex_lock(&event_mutex);
7923 	mutex_lock(&trace_types_lock);
7924 
7925 	ret = -ENODEV;
7926 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7927 		if (tr->name && strcmp(tr->name, name) == 0) {
7928 			found = 1;
7929 			break;
7930 		}
7931 	}
7932 	if (!found)
7933 		goto out_unlock;
7934 
7935 	ret = -EBUSY;
7936 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7937 		goto out_unlock;
7938 
7939 	list_del(&tr->list);
7940 
7941 	/* Disable all the flags that were enabled coming in */
7942 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7943 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7944 			set_tracer_flag(tr, 1 << i, 0);
7945 	}
7946 
7947 	tracing_set_nop(tr);
7948 	clear_ftrace_function_probes(tr);
7949 	event_trace_del_tracer(tr);
7950 	ftrace_clear_pids(tr);
7951 	ftrace_destroy_function_files(tr);
7952 	tracefs_remove_recursive(tr->dir);
7953 	free_trace_buffers(tr);
7954 
7955 	for (i = 0; i < tr->nr_topts; i++) {
7956 		kfree(tr->topts[i].topts);
7957 	}
7958 	kfree(tr->topts);
7959 
7960 	free_cpumask_var(tr->tracing_cpumask);
7961 	kfree(tr->name);
7962 	kfree(tr);
7963 
7964 	ret = 0;
7965 
7966  out_unlock:
7967 	mutex_unlock(&trace_types_lock);
7968 	mutex_unlock(&event_mutex);
7969 
7970 	return ret;
7971 }
7972 
7973 static __init void create_trace_instances(struct dentry *d_tracer)
7974 {
7975 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7976 							 instance_mkdir,
7977 							 instance_rmdir);
7978 	if (WARN_ON(!trace_instance_dir))
7979 		return;
7980 }
7981 
7982 static void
7983 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7984 {
7985 	struct trace_event_file *file;
7986 	int cpu;
7987 
7988 	trace_create_file("available_tracers", 0444, d_tracer,
7989 			tr, &show_traces_fops);
7990 
7991 	trace_create_file("current_tracer", 0644, d_tracer,
7992 			tr, &set_tracer_fops);
7993 
7994 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7995 			  tr, &tracing_cpumask_fops);
7996 
7997 	trace_create_file("trace_options", 0644, d_tracer,
7998 			  tr, &tracing_iter_fops);
7999 
8000 	trace_create_file("trace", 0644, d_tracer,
8001 			  tr, &tracing_fops);
8002 
8003 	trace_create_file("trace_pipe", 0444, d_tracer,
8004 			  tr, &tracing_pipe_fops);
8005 
8006 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8007 			  tr, &tracing_entries_fops);
8008 
8009 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8010 			  tr, &tracing_total_entries_fops);
8011 
8012 	trace_create_file("free_buffer", 0200, d_tracer,
8013 			  tr, &tracing_free_buffer_fops);
8014 
8015 	trace_create_file("trace_marker", 0220, d_tracer,
8016 			  tr, &tracing_mark_fops);
8017 
8018 	file = __find_event_file(tr, "ftrace", "print");
8019 	if (file && file->dir)
8020 		trace_create_file("trigger", 0644, file->dir, file,
8021 				  &event_trigger_fops);
8022 	tr->trace_marker_file = file;
8023 
8024 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8025 			  tr, &tracing_mark_raw_fops);
8026 
8027 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8028 			  &trace_clock_fops);
8029 
8030 	trace_create_file("tracing_on", 0644, d_tracer,
8031 			  tr, &rb_simple_fops);
8032 
8033 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8034 			  &trace_time_stamp_mode_fops);
8035 
8036 	tr->buffer_percent = 50;
8037 
8038 	trace_create_file("buffer_percent", 0444, d_tracer,
8039 			tr, &buffer_percent_fops);
8040 
8041 	create_trace_options_dir(tr);
8042 
8043 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8044 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8045 			&tr->max_latency, &tracing_max_lat_fops);
8046 #endif
8047 
8048 	if (ftrace_create_function_files(tr, d_tracer))
8049 		WARN(1, "Could not allocate function filter files");
8050 
8051 #ifdef CONFIG_TRACER_SNAPSHOT
8052 	trace_create_file("snapshot", 0644, d_tracer,
8053 			  tr, &snapshot_fops);
8054 #endif
8055 
8056 	for_each_tracing_cpu(cpu)
8057 		tracing_init_tracefs_percpu(tr, cpu);
8058 
8059 	ftrace_init_tracefs(tr, d_tracer);
8060 }
8061 
8062 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8063 {
8064 	struct vfsmount *mnt;
8065 	struct file_system_type *type;
8066 
8067 	/*
8068 	 * To maintain backward compatibility for tools that mount
8069 	 * debugfs to get to the tracing facility, tracefs is automatically
8070 	 * mounted to the debugfs/tracing directory.
8071 	 */
8072 	type = get_fs_type("tracefs");
8073 	if (!type)
8074 		return NULL;
8075 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8076 	put_filesystem(type);
8077 	if (IS_ERR(mnt))
8078 		return NULL;
8079 	mntget(mnt);
8080 
8081 	return mnt;
8082 }
8083 
8084 /**
8085  * tracing_init_dentry - initialize top level trace array
8086  *
8087  * This is called when creating files or directories in the tracing
8088  * directory. It is called via fs_initcall() by any of the boot up code
8089  * and expects to return the dentry of the top level tracing directory.
8090  */
8091 struct dentry *tracing_init_dentry(void)
8092 {
8093 	struct trace_array *tr = &global_trace;
8094 
8095 	/* The top level trace array uses  NULL as parent */
8096 	if (tr->dir)
8097 		return NULL;
8098 
8099 	if (WARN_ON(!tracefs_initialized()) ||
8100 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8101 		 WARN_ON(!debugfs_initialized())))
8102 		return ERR_PTR(-ENODEV);
8103 
8104 	/*
8105 	 * As there may still be users that expect the tracing
8106 	 * files to exist in debugfs/tracing, we must automount
8107 	 * the tracefs file system there, so older tools still
8108 	 * work with the newer kerenl.
8109 	 */
8110 	tr->dir = debugfs_create_automount("tracing", NULL,
8111 					   trace_automount, NULL);
8112 	if (!tr->dir) {
8113 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8114 		return ERR_PTR(-ENOMEM);
8115 	}
8116 
8117 	return NULL;
8118 }
8119 
8120 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8121 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8122 
8123 static void __init trace_eval_init(void)
8124 {
8125 	int len;
8126 
8127 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8128 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8129 }
8130 
8131 #ifdef CONFIG_MODULES
8132 static void trace_module_add_evals(struct module *mod)
8133 {
8134 	if (!mod->num_trace_evals)
8135 		return;
8136 
8137 	/*
8138 	 * Modules with bad taint do not have events created, do
8139 	 * not bother with enums either.
8140 	 */
8141 	if (trace_module_has_bad_taint(mod))
8142 		return;
8143 
8144 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8145 }
8146 
8147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8148 static void trace_module_remove_evals(struct module *mod)
8149 {
8150 	union trace_eval_map_item *map;
8151 	union trace_eval_map_item **last = &trace_eval_maps;
8152 
8153 	if (!mod->num_trace_evals)
8154 		return;
8155 
8156 	mutex_lock(&trace_eval_mutex);
8157 
8158 	map = trace_eval_maps;
8159 
8160 	while (map) {
8161 		if (map->head.mod == mod)
8162 			break;
8163 		map = trace_eval_jmp_to_tail(map);
8164 		last = &map->tail.next;
8165 		map = map->tail.next;
8166 	}
8167 	if (!map)
8168 		goto out;
8169 
8170 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8171 	kfree(map);
8172  out:
8173 	mutex_unlock(&trace_eval_mutex);
8174 }
8175 #else
8176 static inline void trace_module_remove_evals(struct module *mod) { }
8177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8178 
8179 static int trace_module_notify(struct notifier_block *self,
8180 			       unsigned long val, void *data)
8181 {
8182 	struct module *mod = data;
8183 
8184 	switch (val) {
8185 	case MODULE_STATE_COMING:
8186 		trace_module_add_evals(mod);
8187 		break;
8188 	case MODULE_STATE_GOING:
8189 		trace_module_remove_evals(mod);
8190 		break;
8191 	}
8192 
8193 	return 0;
8194 }
8195 
8196 static struct notifier_block trace_module_nb = {
8197 	.notifier_call = trace_module_notify,
8198 	.priority = 0,
8199 };
8200 #endif /* CONFIG_MODULES */
8201 
8202 static __init int tracer_init_tracefs(void)
8203 {
8204 	struct dentry *d_tracer;
8205 
8206 	trace_access_lock_init();
8207 
8208 	d_tracer = tracing_init_dentry();
8209 	if (IS_ERR(d_tracer))
8210 		return 0;
8211 
8212 	event_trace_init();
8213 
8214 	init_tracer_tracefs(&global_trace, d_tracer);
8215 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8216 
8217 	trace_create_file("tracing_thresh", 0644, d_tracer,
8218 			&global_trace, &tracing_thresh_fops);
8219 
8220 	trace_create_file("README", 0444, d_tracer,
8221 			NULL, &tracing_readme_fops);
8222 
8223 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8224 			NULL, &tracing_saved_cmdlines_fops);
8225 
8226 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8227 			  NULL, &tracing_saved_cmdlines_size_fops);
8228 
8229 	trace_create_file("saved_tgids", 0444, d_tracer,
8230 			NULL, &tracing_saved_tgids_fops);
8231 
8232 	trace_eval_init();
8233 
8234 	trace_create_eval_file(d_tracer);
8235 
8236 #ifdef CONFIG_MODULES
8237 	register_module_notifier(&trace_module_nb);
8238 #endif
8239 
8240 #ifdef CONFIG_DYNAMIC_FTRACE
8241 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8242 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8243 #endif
8244 
8245 	create_trace_instances(d_tracer);
8246 
8247 	update_tracer_options(&global_trace);
8248 
8249 	return 0;
8250 }
8251 
8252 static int trace_panic_handler(struct notifier_block *this,
8253 			       unsigned long event, void *unused)
8254 {
8255 	if (ftrace_dump_on_oops)
8256 		ftrace_dump(ftrace_dump_on_oops);
8257 	return NOTIFY_OK;
8258 }
8259 
8260 static struct notifier_block trace_panic_notifier = {
8261 	.notifier_call  = trace_panic_handler,
8262 	.next           = NULL,
8263 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8264 };
8265 
8266 static int trace_die_handler(struct notifier_block *self,
8267 			     unsigned long val,
8268 			     void *data)
8269 {
8270 	switch (val) {
8271 	case DIE_OOPS:
8272 		if (ftrace_dump_on_oops)
8273 			ftrace_dump(ftrace_dump_on_oops);
8274 		break;
8275 	default:
8276 		break;
8277 	}
8278 	return NOTIFY_OK;
8279 }
8280 
8281 static struct notifier_block trace_die_notifier = {
8282 	.notifier_call = trace_die_handler,
8283 	.priority = 200
8284 };
8285 
8286 /*
8287  * printk is set to max of 1024, we really don't need it that big.
8288  * Nothing should be printing 1000 characters anyway.
8289  */
8290 #define TRACE_MAX_PRINT		1000
8291 
8292 /*
8293  * Define here KERN_TRACE so that we have one place to modify
8294  * it if we decide to change what log level the ftrace dump
8295  * should be at.
8296  */
8297 #define KERN_TRACE		KERN_EMERG
8298 
8299 void
8300 trace_printk_seq(struct trace_seq *s)
8301 {
8302 	/* Probably should print a warning here. */
8303 	if (s->seq.len >= TRACE_MAX_PRINT)
8304 		s->seq.len = TRACE_MAX_PRINT;
8305 
8306 	/*
8307 	 * More paranoid code. Although the buffer size is set to
8308 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8309 	 * an extra layer of protection.
8310 	 */
8311 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8312 		s->seq.len = s->seq.size - 1;
8313 
8314 	/* should be zero ended, but we are paranoid. */
8315 	s->buffer[s->seq.len] = 0;
8316 
8317 	printk(KERN_TRACE "%s", s->buffer);
8318 
8319 	trace_seq_init(s);
8320 }
8321 
8322 void trace_init_global_iter(struct trace_iterator *iter)
8323 {
8324 	iter->tr = &global_trace;
8325 	iter->trace = iter->tr->current_trace;
8326 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8327 	iter->trace_buffer = &global_trace.trace_buffer;
8328 
8329 	if (iter->trace && iter->trace->open)
8330 		iter->trace->open(iter);
8331 
8332 	/* Annotate start of buffers if we had overruns */
8333 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8334 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8335 
8336 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8337 	if (trace_clocks[iter->tr->clock_id].in_ns)
8338 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8339 }
8340 
8341 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8342 {
8343 	/* use static because iter can be a bit big for the stack */
8344 	static struct trace_iterator iter;
8345 	static atomic_t dump_running;
8346 	struct trace_array *tr = &global_trace;
8347 	unsigned int old_userobj;
8348 	unsigned long flags;
8349 	int cnt = 0, cpu;
8350 
8351 	/* Only allow one dump user at a time. */
8352 	if (atomic_inc_return(&dump_running) != 1) {
8353 		atomic_dec(&dump_running);
8354 		return;
8355 	}
8356 
8357 	/*
8358 	 * Always turn off tracing when we dump.
8359 	 * We don't need to show trace output of what happens
8360 	 * between multiple crashes.
8361 	 *
8362 	 * If the user does a sysrq-z, then they can re-enable
8363 	 * tracing with echo 1 > tracing_on.
8364 	 */
8365 	tracing_off();
8366 
8367 	local_irq_save(flags);
8368 	printk_nmi_direct_enter();
8369 
8370 	/* Simulate the iterator */
8371 	trace_init_global_iter(&iter);
8372 
8373 	for_each_tracing_cpu(cpu) {
8374 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8375 	}
8376 
8377 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8378 
8379 	/* don't look at user memory in panic mode */
8380 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8381 
8382 	switch (oops_dump_mode) {
8383 	case DUMP_ALL:
8384 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8385 		break;
8386 	case DUMP_ORIG:
8387 		iter.cpu_file = raw_smp_processor_id();
8388 		break;
8389 	case DUMP_NONE:
8390 		goto out_enable;
8391 	default:
8392 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8393 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8394 	}
8395 
8396 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8397 
8398 	/* Did function tracer already get disabled? */
8399 	if (ftrace_is_dead()) {
8400 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8401 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8402 	}
8403 
8404 	/*
8405 	 * We need to stop all tracing on all CPUS to read the
8406 	 * the next buffer. This is a bit expensive, but is
8407 	 * not done often. We fill all what we can read,
8408 	 * and then release the locks again.
8409 	 */
8410 
8411 	while (!trace_empty(&iter)) {
8412 
8413 		if (!cnt)
8414 			printk(KERN_TRACE "---------------------------------\n");
8415 
8416 		cnt++;
8417 
8418 		/* reset all but tr, trace, and overruns */
8419 		memset(&iter.seq, 0,
8420 		       sizeof(struct trace_iterator) -
8421 		       offsetof(struct trace_iterator, seq));
8422 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8423 		iter.pos = -1;
8424 
8425 		if (trace_find_next_entry_inc(&iter) != NULL) {
8426 			int ret;
8427 
8428 			ret = print_trace_line(&iter);
8429 			if (ret != TRACE_TYPE_NO_CONSUME)
8430 				trace_consume(&iter);
8431 		}
8432 		touch_nmi_watchdog();
8433 
8434 		trace_printk_seq(&iter.seq);
8435 	}
8436 
8437 	if (!cnt)
8438 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8439 	else
8440 		printk(KERN_TRACE "---------------------------------\n");
8441 
8442  out_enable:
8443 	tr->trace_flags |= old_userobj;
8444 
8445 	for_each_tracing_cpu(cpu) {
8446 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8447 	}
8448 	atomic_dec(&dump_running);
8449 	printk_nmi_direct_exit();
8450 	local_irq_restore(flags);
8451 }
8452 EXPORT_SYMBOL_GPL(ftrace_dump);
8453 
8454 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8455 {
8456 	char **argv;
8457 	int argc, ret;
8458 
8459 	argc = 0;
8460 	ret = 0;
8461 	argv = argv_split(GFP_KERNEL, buf, &argc);
8462 	if (!argv)
8463 		return -ENOMEM;
8464 
8465 	if (argc)
8466 		ret = createfn(argc, argv);
8467 
8468 	argv_free(argv);
8469 
8470 	return ret;
8471 }
8472 
8473 #define WRITE_BUFSIZE  4096
8474 
8475 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8476 				size_t count, loff_t *ppos,
8477 				int (*createfn)(int, char **))
8478 {
8479 	char *kbuf, *buf, *tmp;
8480 	int ret = 0;
8481 	size_t done = 0;
8482 	size_t size;
8483 
8484 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8485 	if (!kbuf)
8486 		return -ENOMEM;
8487 
8488 	while (done < count) {
8489 		size = count - done;
8490 
8491 		if (size >= WRITE_BUFSIZE)
8492 			size = WRITE_BUFSIZE - 1;
8493 
8494 		if (copy_from_user(kbuf, buffer + done, size)) {
8495 			ret = -EFAULT;
8496 			goto out;
8497 		}
8498 		kbuf[size] = '\0';
8499 		buf = kbuf;
8500 		do {
8501 			tmp = strchr(buf, '\n');
8502 			if (tmp) {
8503 				*tmp = '\0';
8504 				size = tmp - buf + 1;
8505 			} else {
8506 				size = strlen(buf);
8507 				if (done + size < count) {
8508 					if (buf != kbuf)
8509 						break;
8510 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8511 					pr_warn("Line length is too long: Should be less than %d\n",
8512 						WRITE_BUFSIZE - 2);
8513 					ret = -EINVAL;
8514 					goto out;
8515 				}
8516 			}
8517 			done += size;
8518 
8519 			/* Remove comments */
8520 			tmp = strchr(buf, '#');
8521 
8522 			if (tmp)
8523 				*tmp = '\0';
8524 
8525 			ret = trace_run_command(buf, createfn);
8526 			if (ret)
8527 				goto out;
8528 			buf += size;
8529 
8530 		} while (done < count);
8531 	}
8532 	ret = done;
8533 
8534 out:
8535 	kfree(kbuf);
8536 
8537 	return ret;
8538 }
8539 
8540 __init static int tracer_alloc_buffers(void)
8541 {
8542 	int ring_buf_size;
8543 	int ret = -ENOMEM;
8544 
8545 	/*
8546 	 * Make sure we don't accidently add more trace options
8547 	 * than we have bits for.
8548 	 */
8549 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8550 
8551 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8552 		goto out;
8553 
8554 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8555 		goto out_free_buffer_mask;
8556 
8557 	/* Only allocate trace_printk buffers if a trace_printk exists */
8558 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8559 		/* Must be called before global_trace.buffer is allocated */
8560 		trace_printk_init_buffers();
8561 
8562 	/* To save memory, keep the ring buffer size to its minimum */
8563 	if (ring_buffer_expanded)
8564 		ring_buf_size = trace_buf_size;
8565 	else
8566 		ring_buf_size = 1;
8567 
8568 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8569 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8570 
8571 	raw_spin_lock_init(&global_trace.start_lock);
8572 
8573 	/*
8574 	 * The prepare callbacks allocates some memory for the ring buffer. We
8575 	 * don't free the buffer if the if the CPU goes down. If we were to free
8576 	 * the buffer, then the user would lose any trace that was in the
8577 	 * buffer. The memory will be removed once the "instance" is removed.
8578 	 */
8579 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8580 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8581 				      NULL);
8582 	if (ret < 0)
8583 		goto out_free_cpumask;
8584 	/* Used for event triggers */
8585 	ret = -ENOMEM;
8586 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8587 	if (!temp_buffer)
8588 		goto out_rm_hp_state;
8589 
8590 	if (trace_create_savedcmd() < 0)
8591 		goto out_free_temp_buffer;
8592 
8593 	/* TODO: make the number of buffers hot pluggable with CPUS */
8594 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8595 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8596 		WARN_ON(1);
8597 		goto out_free_savedcmd;
8598 	}
8599 
8600 	if (global_trace.buffer_disabled)
8601 		tracing_off();
8602 
8603 	if (trace_boot_clock) {
8604 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8605 		if (ret < 0)
8606 			pr_warn("Trace clock %s not defined, going back to default\n",
8607 				trace_boot_clock);
8608 	}
8609 
8610 	/*
8611 	 * register_tracer() might reference current_trace, so it
8612 	 * needs to be set before we register anything. This is
8613 	 * just a bootstrap of current_trace anyway.
8614 	 */
8615 	global_trace.current_trace = &nop_trace;
8616 
8617 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8618 
8619 	ftrace_init_global_array_ops(&global_trace);
8620 
8621 	init_trace_flags_index(&global_trace);
8622 
8623 	register_tracer(&nop_trace);
8624 
8625 	/* Function tracing may start here (via kernel command line) */
8626 	init_function_trace();
8627 
8628 	/* All seems OK, enable tracing */
8629 	tracing_disabled = 0;
8630 
8631 	atomic_notifier_chain_register(&panic_notifier_list,
8632 				       &trace_panic_notifier);
8633 
8634 	register_die_notifier(&trace_die_notifier);
8635 
8636 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8637 
8638 	INIT_LIST_HEAD(&global_trace.systems);
8639 	INIT_LIST_HEAD(&global_trace.events);
8640 	INIT_LIST_HEAD(&global_trace.hist_vars);
8641 	list_add(&global_trace.list, &ftrace_trace_arrays);
8642 
8643 	apply_trace_boot_options();
8644 
8645 	register_snapshot_cmd();
8646 
8647 	return 0;
8648 
8649 out_free_savedcmd:
8650 	free_saved_cmdlines_buffer(savedcmd);
8651 out_free_temp_buffer:
8652 	ring_buffer_free(temp_buffer);
8653 out_rm_hp_state:
8654 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8655 out_free_cpumask:
8656 	free_cpumask_var(global_trace.tracing_cpumask);
8657 out_free_buffer_mask:
8658 	free_cpumask_var(tracing_buffer_mask);
8659 out:
8660 	return ret;
8661 }
8662 
8663 void __init early_trace_init(void)
8664 {
8665 	if (tracepoint_printk) {
8666 		tracepoint_print_iter =
8667 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8668 		if (WARN_ON(!tracepoint_print_iter))
8669 			tracepoint_printk = 0;
8670 		else
8671 			static_key_enable(&tracepoint_printk_key.key);
8672 	}
8673 	tracer_alloc_buffers();
8674 }
8675 
8676 void __init trace_init(void)
8677 {
8678 	trace_event_init();
8679 }
8680 
8681 __init static int clear_boot_tracer(void)
8682 {
8683 	/*
8684 	 * The default tracer at boot buffer is an init section.
8685 	 * This function is called in lateinit. If we did not
8686 	 * find the boot tracer, then clear it out, to prevent
8687 	 * later registration from accessing the buffer that is
8688 	 * about to be freed.
8689 	 */
8690 	if (!default_bootup_tracer)
8691 		return 0;
8692 
8693 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8694 	       default_bootup_tracer);
8695 	default_bootup_tracer = NULL;
8696 
8697 	return 0;
8698 }
8699 
8700 fs_initcall(tracer_init_tracefs);
8701 late_initcall_sync(clear_boot_tracer);
8702 
8703 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8704 __init static int tracing_set_default_clock(void)
8705 {
8706 	/* sched_clock_stable() is determined in late_initcall */
8707 	if (!trace_boot_clock && !sched_clock_stable()) {
8708 		printk(KERN_WARNING
8709 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8710 		       "If you want to keep using the local clock, then add:\n"
8711 		       "  \"trace_clock=local\"\n"
8712 		       "on the kernel command line\n");
8713 		tracing_set_clock(&global_trace, "global");
8714 	}
8715 
8716 	return 0;
8717 }
8718 late_initcall_sync(tracing_set_default_clock);
8719 #endif
8720