xref: /openbmc/linux/kernel/trace/trace.c (revision 704cfd7f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = tracing_alloc_snapshot_instance(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 bool tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 bool trace_clock_in_ns(struct trace_array *tr)
1174 {
1175 	if (trace_clocks[tr->clock_id].in_ns)
1176 		return true;
1177 
1178 	return false;
1179 }
1180 
1181 /*
1182  * trace_parser_get_init - gets the buffer for trace parser
1183  */
1184 int trace_parser_get_init(struct trace_parser *parser, int size)
1185 {
1186 	memset(parser, 0, sizeof(*parser));
1187 
1188 	parser->buffer = kmalloc(size, GFP_KERNEL);
1189 	if (!parser->buffer)
1190 		return 1;
1191 
1192 	parser->size = size;
1193 	return 0;
1194 }
1195 
1196 /*
1197  * trace_parser_put - frees the buffer for trace parser
1198  */
1199 void trace_parser_put(struct trace_parser *parser)
1200 {
1201 	kfree(parser->buffer);
1202 	parser->buffer = NULL;
1203 }
1204 
1205 /*
1206  * trace_get_user - reads the user input string separated by  space
1207  * (matched by isspace(ch))
1208  *
1209  * For each string found the 'struct trace_parser' is updated,
1210  * and the function returns.
1211  *
1212  * Returns number of bytes read.
1213  *
1214  * See kernel/trace/trace.h for 'struct trace_parser' details.
1215  */
1216 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217 	size_t cnt, loff_t *ppos)
1218 {
1219 	char ch;
1220 	size_t read = 0;
1221 	ssize_t ret;
1222 
1223 	if (!*ppos)
1224 		trace_parser_clear(parser);
1225 
1226 	ret = get_user(ch, ubuf++);
1227 	if (ret)
1228 		goto out;
1229 
1230 	read++;
1231 	cnt--;
1232 
1233 	/*
1234 	 * The parser is not finished with the last write,
1235 	 * continue reading the user input without skipping spaces.
1236 	 */
1237 	if (!parser->cont) {
1238 		/* skip white space */
1239 		while (cnt && isspace(ch)) {
1240 			ret = get_user(ch, ubuf++);
1241 			if (ret)
1242 				goto out;
1243 			read++;
1244 			cnt--;
1245 		}
1246 
1247 		parser->idx = 0;
1248 
1249 		/* only spaces were written */
1250 		if (isspace(ch) || !ch) {
1251 			*ppos += read;
1252 			ret = read;
1253 			goto out;
1254 		}
1255 	}
1256 
1257 	/* read the non-space input */
1258 	while (cnt && !isspace(ch) && ch) {
1259 		if (parser->idx < parser->size - 1)
1260 			parser->buffer[parser->idx++] = ch;
1261 		else {
1262 			ret = -EINVAL;
1263 			goto out;
1264 		}
1265 		ret = get_user(ch, ubuf++);
1266 		if (ret)
1267 			goto out;
1268 		read++;
1269 		cnt--;
1270 	}
1271 
1272 	/* We either got finished input or we have to wait for another call. */
1273 	if (isspace(ch) || !ch) {
1274 		parser->buffer[parser->idx] = 0;
1275 		parser->cont = false;
1276 	} else if (parser->idx < parser->size - 1) {
1277 		parser->cont = true;
1278 		parser->buffer[parser->idx++] = ch;
1279 		/* Make sure the parsed string always terminates with '\0'. */
1280 		parser->buffer[parser->idx] = 0;
1281 	} else {
1282 		ret = -EINVAL;
1283 		goto out;
1284 	}
1285 
1286 	*ppos += read;
1287 	ret = read;
1288 
1289 out:
1290 	return ret;
1291 }
1292 
1293 /* TODO add a seq_buf_to_buffer() */
1294 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295 {
1296 	int len;
1297 
1298 	if (trace_seq_used(s) <= s->seq.readpos)
1299 		return -EBUSY;
1300 
1301 	len = trace_seq_used(s) - s->seq.readpos;
1302 	if (cnt > len)
1303 		cnt = len;
1304 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305 
1306 	s->seq.readpos += cnt;
1307 	return cnt;
1308 }
1309 
1310 unsigned long __read_mostly	tracing_thresh;
1311 
1312 #ifdef CONFIG_TRACER_MAX_TRACE
1313 /*
1314  * Copy the new maximum trace into the separate maximum-trace
1315  * structure. (this way the maximum trace is permanently saved,
1316  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317  */
1318 static void
1319 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320 {
1321 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1322 	struct trace_buffer *max_buf = &tr->max_buffer;
1323 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325 
1326 	max_buf->cpu = cpu;
1327 	max_buf->time_start = data->preempt_timestamp;
1328 
1329 	max_data->saved_latency = tr->max_latency;
1330 	max_data->critical_start = data->critical_start;
1331 	max_data->critical_end = data->critical_end;
1332 
1333 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334 	max_data->pid = tsk->pid;
1335 	/*
1336 	 * If tsk == current, then use current_uid(), as that does not use
1337 	 * RCU. The irq tracer can be called out of RCU scope.
1338 	 */
1339 	if (tsk == current)
1340 		max_data->uid = current_uid();
1341 	else
1342 		max_data->uid = task_uid(tsk);
1343 
1344 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345 	max_data->policy = tsk->policy;
1346 	max_data->rt_priority = tsk->rt_priority;
1347 
1348 	/* record this tasks comm */
1349 	tracing_record_cmdline(tsk);
1350 }
1351 
1352 /**
1353  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354  * @tr: tracer
1355  * @tsk: the task with the latency
1356  * @cpu: The cpu that initiated the trace.
1357  *
1358  * Flip the buffers between the @tr and the max_tr and record information
1359  * about which task was the cause of this latency.
1360  */
1361 void
1362 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363 {
1364 	if (tr->stop_count)
1365 		return;
1366 
1367 	WARN_ON_ONCE(!irqs_disabled());
1368 
1369 	if (!tr->allocated_snapshot) {
1370 		/* Only the nop tracer should hit this when disabling */
1371 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372 		return;
1373 	}
1374 
1375 	arch_spin_lock(&tr->max_lock);
1376 
1377 	/* Inherit the recordable setting from trace_buffer */
1378 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379 		ring_buffer_record_on(tr->max_buffer.buffer);
1380 	else
1381 		ring_buffer_record_off(tr->max_buffer.buffer);
1382 
1383 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384 
1385 	__update_max_tr(tr, tsk, cpu);
1386 	arch_spin_unlock(&tr->max_lock);
1387 }
1388 
1389 /**
1390  * update_max_tr_single - only copy one trace over, and reset the rest
1391  * @tr - tracer
1392  * @tsk - task with the latency
1393  * @cpu - the cpu of the buffer to copy.
1394  *
1395  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396  */
1397 void
1398 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399 {
1400 	int ret;
1401 
1402 	if (tr->stop_count)
1403 		return;
1404 
1405 	WARN_ON_ONCE(!irqs_disabled());
1406 	if (!tr->allocated_snapshot) {
1407 		/* Only the nop tracer should hit this when disabling */
1408 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409 		return;
1410 	}
1411 
1412 	arch_spin_lock(&tr->max_lock);
1413 
1414 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415 
1416 	if (ret == -EBUSY) {
1417 		/*
1418 		 * We failed to swap the buffer due to a commit taking
1419 		 * place on this CPU. We fail to record, but we reset
1420 		 * the max trace buffer (no one writes directly to it)
1421 		 * and flag that it failed.
1422 		 */
1423 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424 			"Failed to swap buffers due to commit in progress\n");
1425 	}
1426 
1427 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428 
1429 	__update_max_tr(tr, tsk, cpu);
1430 	arch_spin_unlock(&tr->max_lock);
1431 }
1432 #endif /* CONFIG_TRACER_MAX_TRACE */
1433 
1434 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1435 {
1436 	/* Iterators are static, they should be filled or empty */
1437 	if (trace_buffer_iter(iter, iter->cpu_file))
1438 		return 0;
1439 
1440 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441 				full);
1442 }
1443 
1444 #ifdef CONFIG_FTRACE_STARTUP_TEST
1445 static bool selftests_can_run;
1446 
1447 struct trace_selftests {
1448 	struct list_head		list;
1449 	struct tracer			*type;
1450 };
1451 
1452 static LIST_HEAD(postponed_selftests);
1453 
1454 static int save_selftest(struct tracer *type)
1455 {
1456 	struct trace_selftests *selftest;
1457 
1458 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459 	if (!selftest)
1460 		return -ENOMEM;
1461 
1462 	selftest->type = type;
1463 	list_add(&selftest->list, &postponed_selftests);
1464 	return 0;
1465 }
1466 
1467 static int run_tracer_selftest(struct tracer *type)
1468 {
1469 	struct trace_array *tr = &global_trace;
1470 	struct tracer *saved_tracer = tr->current_trace;
1471 	int ret;
1472 
1473 	if (!type->selftest || tracing_selftest_disabled)
1474 		return 0;
1475 
1476 	/*
1477 	 * If a tracer registers early in boot up (before scheduling is
1478 	 * initialized and such), then do not run its selftests yet.
1479 	 * Instead, run it a little later in the boot process.
1480 	 */
1481 	if (!selftests_can_run)
1482 		return save_selftest(type);
1483 
1484 	/*
1485 	 * Run a selftest on this tracer.
1486 	 * Here we reset the trace buffer, and set the current
1487 	 * tracer to be this tracer. The tracer can then run some
1488 	 * internal tracing to verify that everything is in order.
1489 	 * If we fail, we do not register this tracer.
1490 	 */
1491 	tracing_reset_online_cpus(&tr->trace_buffer);
1492 
1493 	tr->current_trace = type;
1494 
1495 #ifdef CONFIG_TRACER_MAX_TRACE
1496 	if (type->use_max_tr) {
1497 		/* If we expanded the buffers, make sure the max is expanded too */
1498 		if (ring_buffer_expanded)
1499 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500 					   RING_BUFFER_ALL_CPUS);
1501 		tr->allocated_snapshot = true;
1502 	}
1503 #endif
1504 
1505 	/* the test is responsible for initializing and enabling */
1506 	pr_info("Testing tracer %s: ", type->name);
1507 	ret = type->selftest(type, tr);
1508 	/* the test is responsible for resetting too */
1509 	tr->current_trace = saved_tracer;
1510 	if (ret) {
1511 		printk(KERN_CONT "FAILED!\n");
1512 		/* Add the warning after printing 'FAILED' */
1513 		WARN_ON(1);
1514 		return -1;
1515 	}
1516 	/* Only reset on passing, to avoid touching corrupted buffers */
1517 	tracing_reset_online_cpus(&tr->trace_buffer);
1518 
1519 #ifdef CONFIG_TRACER_MAX_TRACE
1520 	if (type->use_max_tr) {
1521 		tr->allocated_snapshot = false;
1522 
1523 		/* Shrink the max buffer again */
1524 		if (ring_buffer_expanded)
1525 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1526 					   RING_BUFFER_ALL_CPUS);
1527 	}
1528 #endif
1529 
1530 	printk(KERN_CONT "PASSED\n");
1531 	return 0;
1532 }
1533 
1534 static __init int init_trace_selftests(void)
1535 {
1536 	struct trace_selftests *p, *n;
1537 	struct tracer *t, **last;
1538 	int ret;
1539 
1540 	selftests_can_run = true;
1541 
1542 	mutex_lock(&trace_types_lock);
1543 
1544 	if (list_empty(&postponed_selftests))
1545 		goto out;
1546 
1547 	pr_info("Running postponed tracer tests:\n");
1548 
1549 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550 		ret = run_tracer_selftest(p->type);
1551 		/* If the test fails, then warn and remove from available_tracers */
1552 		if (ret < 0) {
1553 			WARN(1, "tracer: %s failed selftest, disabling\n",
1554 			     p->type->name);
1555 			last = &trace_types;
1556 			for (t = trace_types; t; t = t->next) {
1557 				if (t == p->type) {
1558 					*last = t->next;
1559 					break;
1560 				}
1561 				last = &t->next;
1562 			}
1563 		}
1564 		list_del(&p->list);
1565 		kfree(p);
1566 	}
1567 
1568  out:
1569 	mutex_unlock(&trace_types_lock);
1570 
1571 	return 0;
1572 }
1573 core_initcall(init_trace_selftests);
1574 #else
1575 static inline int run_tracer_selftest(struct tracer *type)
1576 {
1577 	return 0;
1578 }
1579 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1580 
1581 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582 
1583 static void __init apply_trace_boot_options(void);
1584 
1585 /**
1586  * register_tracer - register a tracer with the ftrace system.
1587  * @type - the plugin for the tracer
1588  *
1589  * Register a new plugin tracer.
1590  */
1591 int __init register_tracer(struct tracer *type)
1592 {
1593 	struct tracer *t;
1594 	int ret = 0;
1595 
1596 	if (!type->name) {
1597 		pr_info("Tracer must have a name\n");
1598 		return -1;
1599 	}
1600 
1601 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603 		return -1;
1604 	}
1605 
1606 	mutex_lock(&trace_types_lock);
1607 
1608 	tracing_selftest_running = true;
1609 
1610 	for (t = trace_types; t; t = t->next) {
1611 		if (strcmp(type->name, t->name) == 0) {
1612 			/* already found */
1613 			pr_info("Tracer %s already registered\n",
1614 				type->name);
1615 			ret = -1;
1616 			goto out;
1617 		}
1618 	}
1619 
1620 	if (!type->set_flag)
1621 		type->set_flag = &dummy_set_flag;
1622 	if (!type->flags) {
1623 		/*allocate a dummy tracer_flags*/
1624 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625 		if (!type->flags) {
1626 			ret = -ENOMEM;
1627 			goto out;
1628 		}
1629 		type->flags->val = 0;
1630 		type->flags->opts = dummy_tracer_opt;
1631 	} else
1632 		if (!type->flags->opts)
1633 			type->flags->opts = dummy_tracer_opt;
1634 
1635 	/* store the tracer for __set_tracer_option */
1636 	type->flags->trace = type;
1637 
1638 	ret = run_tracer_selftest(type);
1639 	if (ret < 0)
1640 		goto out;
1641 
1642 	type->next = trace_types;
1643 	trace_types = type;
1644 	add_tracer_options(&global_trace, type);
1645 
1646  out:
1647 	tracing_selftest_running = false;
1648 	mutex_unlock(&trace_types_lock);
1649 
1650 	if (ret || !default_bootup_tracer)
1651 		goto out_unlock;
1652 
1653 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654 		goto out_unlock;
1655 
1656 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657 	/* Do we want this tracer to start on bootup? */
1658 	tracing_set_tracer(&global_trace, type->name);
1659 	default_bootup_tracer = NULL;
1660 
1661 	apply_trace_boot_options();
1662 
1663 	/* disable other selftests, since this will break it. */
1664 	tracing_selftest_disabled = true;
1665 #ifdef CONFIG_FTRACE_STARTUP_TEST
1666 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667 	       type->name);
1668 #endif
1669 
1670  out_unlock:
1671 	return ret;
1672 }
1673 
1674 void tracing_reset(struct trace_buffer *buf, int cpu)
1675 {
1676 	struct ring_buffer *buffer = buf->buffer;
1677 
1678 	if (!buffer)
1679 		return;
1680 
1681 	ring_buffer_record_disable(buffer);
1682 
1683 	/* Make sure all commits have finished */
1684 	synchronize_sched();
1685 	ring_buffer_reset_cpu(buffer, cpu);
1686 
1687 	ring_buffer_record_enable(buffer);
1688 }
1689 
1690 void tracing_reset_online_cpus(struct trace_buffer *buf)
1691 {
1692 	struct ring_buffer *buffer = buf->buffer;
1693 	int cpu;
1694 
1695 	if (!buffer)
1696 		return;
1697 
1698 	ring_buffer_record_disable(buffer);
1699 
1700 	/* Make sure all commits have finished */
1701 	synchronize_sched();
1702 
1703 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704 
1705 	for_each_online_cpu(cpu)
1706 		ring_buffer_reset_cpu(buffer, cpu);
1707 
1708 	ring_buffer_record_enable(buffer);
1709 }
1710 
1711 /* Must have trace_types_lock held */
1712 void tracing_reset_all_online_cpus(void)
1713 {
1714 	struct trace_array *tr;
1715 
1716 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717 		if (!tr->clear_trace)
1718 			continue;
1719 		tr->clear_trace = false;
1720 		tracing_reset_online_cpus(&tr->trace_buffer);
1721 #ifdef CONFIG_TRACER_MAX_TRACE
1722 		tracing_reset_online_cpus(&tr->max_buffer);
1723 #endif
1724 	}
1725 }
1726 
1727 static int *tgid_map;
1728 
1729 #define SAVED_CMDLINES_DEFAULT 128
1730 #define NO_CMDLINE_MAP UINT_MAX
1731 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732 struct saved_cmdlines_buffer {
1733 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734 	unsigned *map_cmdline_to_pid;
1735 	unsigned cmdline_num;
1736 	int cmdline_idx;
1737 	char *saved_cmdlines;
1738 };
1739 static struct saved_cmdlines_buffer *savedcmd;
1740 
1741 /* temporary disable recording */
1742 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743 
1744 static inline char *get_saved_cmdlines(int idx)
1745 {
1746 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747 }
1748 
1749 static inline void set_cmdline(int idx, const char *cmdline)
1750 {
1751 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752 }
1753 
1754 static int allocate_cmdlines_buffer(unsigned int val,
1755 				    struct saved_cmdlines_buffer *s)
1756 {
1757 	s->map_cmdline_to_pid = kmalloc_array(val,
1758 					      sizeof(*s->map_cmdline_to_pid),
1759 					      GFP_KERNEL);
1760 	if (!s->map_cmdline_to_pid)
1761 		return -ENOMEM;
1762 
1763 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764 	if (!s->saved_cmdlines) {
1765 		kfree(s->map_cmdline_to_pid);
1766 		return -ENOMEM;
1767 	}
1768 
1769 	s->cmdline_idx = 0;
1770 	s->cmdline_num = val;
1771 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772 	       sizeof(s->map_pid_to_cmdline));
1773 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774 	       val * sizeof(*s->map_cmdline_to_pid));
1775 
1776 	return 0;
1777 }
1778 
1779 static int trace_create_savedcmd(void)
1780 {
1781 	int ret;
1782 
1783 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784 	if (!savedcmd)
1785 		return -ENOMEM;
1786 
1787 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788 	if (ret < 0) {
1789 		kfree(savedcmd);
1790 		savedcmd = NULL;
1791 		return -ENOMEM;
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 int is_tracing_stopped(void)
1798 {
1799 	return global_trace.stop_count;
1800 }
1801 
1802 /**
1803  * tracing_start - quick start of the tracer
1804  *
1805  * If tracing is enabled but was stopped by tracing_stop,
1806  * this will start the tracer back up.
1807  */
1808 void tracing_start(void)
1809 {
1810 	struct ring_buffer *buffer;
1811 	unsigned long flags;
1812 
1813 	if (tracing_disabled)
1814 		return;
1815 
1816 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817 	if (--global_trace.stop_count) {
1818 		if (global_trace.stop_count < 0) {
1819 			/* Someone screwed up their debugging */
1820 			WARN_ON_ONCE(1);
1821 			global_trace.stop_count = 0;
1822 		}
1823 		goto out;
1824 	}
1825 
1826 	/* Prevent the buffers from switching */
1827 	arch_spin_lock(&global_trace.max_lock);
1828 
1829 	buffer = global_trace.trace_buffer.buffer;
1830 	if (buffer)
1831 		ring_buffer_record_enable(buffer);
1832 
1833 #ifdef CONFIG_TRACER_MAX_TRACE
1834 	buffer = global_trace.max_buffer.buffer;
1835 	if (buffer)
1836 		ring_buffer_record_enable(buffer);
1837 #endif
1838 
1839 	arch_spin_unlock(&global_trace.max_lock);
1840 
1841  out:
1842 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843 }
1844 
1845 static void tracing_start_tr(struct trace_array *tr)
1846 {
1847 	struct ring_buffer *buffer;
1848 	unsigned long flags;
1849 
1850 	if (tracing_disabled)
1851 		return;
1852 
1853 	/* If global, we need to also start the max tracer */
1854 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855 		return tracing_start();
1856 
1857 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1858 
1859 	if (--tr->stop_count) {
1860 		if (tr->stop_count < 0) {
1861 			/* Someone screwed up their debugging */
1862 			WARN_ON_ONCE(1);
1863 			tr->stop_count = 0;
1864 		}
1865 		goto out;
1866 	}
1867 
1868 	buffer = tr->trace_buffer.buffer;
1869 	if (buffer)
1870 		ring_buffer_record_enable(buffer);
1871 
1872  out:
1873 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874 }
1875 
1876 /**
1877  * tracing_stop - quick stop of the tracer
1878  *
1879  * Light weight way to stop tracing. Use in conjunction with
1880  * tracing_start.
1881  */
1882 void tracing_stop(void)
1883 {
1884 	struct ring_buffer *buffer;
1885 	unsigned long flags;
1886 
1887 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888 	if (global_trace.stop_count++)
1889 		goto out;
1890 
1891 	/* Prevent the buffers from switching */
1892 	arch_spin_lock(&global_trace.max_lock);
1893 
1894 	buffer = global_trace.trace_buffer.buffer;
1895 	if (buffer)
1896 		ring_buffer_record_disable(buffer);
1897 
1898 #ifdef CONFIG_TRACER_MAX_TRACE
1899 	buffer = global_trace.max_buffer.buffer;
1900 	if (buffer)
1901 		ring_buffer_record_disable(buffer);
1902 #endif
1903 
1904 	arch_spin_unlock(&global_trace.max_lock);
1905 
1906  out:
1907 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908 }
1909 
1910 static void tracing_stop_tr(struct trace_array *tr)
1911 {
1912 	struct ring_buffer *buffer;
1913 	unsigned long flags;
1914 
1915 	/* If global, we need to also stop the max tracer */
1916 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917 		return tracing_stop();
1918 
1919 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1920 	if (tr->stop_count++)
1921 		goto out;
1922 
1923 	buffer = tr->trace_buffer.buffer;
1924 	if (buffer)
1925 		ring_buffer_record_disable(buffer);
1926 
1927  out:
1928 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929 }
1930 
1931 static int trace_save_cmdline(struct task_struct *tsk)
1932 {
1933 	unsigned pid, idx;
1934 
1935 	/* treat recording of idle task as a success */
1936 	if (!tsk->pid)
1937 		return 1;
1938 
1939 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940 		return 0;
1941 
1942 	/*
1943 	 * It's not the end of the world if we don't get
1944 	 * the lock, but we also don't want to spin
1945 	 * nor do we want to disable interrupts,
1946 	 * so if we miss here, then better luck next time.
1947 	 */
1948 	if (!arch_spin_trylock(&trace_cmdline_lock))
1949 		return 0;
1950 
1951 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952 	if (idx == NO_CMDLINE_MAP) {
1953 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954 
1955 		/*
1956 		 * Check whether the cmdline buffer at idx has a pid
1957 		 * mapped. We are going to overwrite that entry so we
1958 		 * need to clear the map_pid_to_cmdline. Otherwise we
1959 		 * would read the new comm for the old pid.
1960 		 */
1961 		pid = savedcmd->map_cmdline_to_pid[idx];
1962 		if (pid != NO_CMDLINE_MAP)
1963 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964 
1965 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967 
1968 		savedcmd->cmdline_idx = idx;
1969 	}
1970 
1971 	set_cmdline(idx, tsk->comm);
1972 
1973 	arch_spin_unlock(&trace_cmdline_lock);
1974 
1975 	return 1;
1976 }
1977 
1978 static void __trace_find_cmdline(int pid, char comm[])
1979 {
1980 	unsigned map;
1981 
1982 	if (!pid) {
1983 		strcpy(comm, "<idle>");
1984 		return;
1985 	}
1986 
1987 	if (WARN_ON_ONCE(pid < 0)) {
1988 		strcpy(comm, "<XXX>");
1989 		return;
1990 	}
1991 
1992 	if (pid > PID_MAX_DEFAULT) {
1993 		strcpy(comm, "<...>");
1994 		return;
1995 	}
1996 
1997 	map = savedcmd->map_pid_to_cmdline[pid];
1998 	if (map != NO_CMDLINE_MAP)
1999 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000 	else
2001 		strcpy(comm, "<...>");
2002 }
2003 
2004 void trace_find_cmdline(int pid, char comm[])
2005 {
2006 	preempt_disable();
2007 	arch_spin_lock(&trace_cmdline_lock);
2008 
2009 	__trace_find_cmdline(pid, comm);
2010 
2011 	arch_spin_unlock(&trace_cmdline_lock);
2012 	preempt_enable();
2013 }
2014 
2015 int trace_find_tgid(int pid)
2016 {
2017 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018 		return 0;
2019 
2020 	return tgid_map[pid];
2021 }
2022 
2023 static int trace_save_tgid(struct task_struct *tsk)
2024 {
2025 	/* treat recording of idle task as a success */
2026 	if (!tsk->pid)
2027 		return 1;
2028 
2029 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030 		return 0;
2031 
2032 	tgid_map[tsk->pid] = tsk->tgid;
2033 	return 1;
2034 }
2035 
2036 static bool tracing_record_taskinfo_skip(int flags)
2037 {
2038 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039 		return true;
2040 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041 		return true;
2042 	if (!__this_cpu_read(trace_taskinfo_save))
2043 		return true;
2044 	return false;
2045 }
2046 
2047 /**
2048  * tracing_record_taskinfo - record the task info of a task
2049  *
2050  * @task  - task to record
2051  * @flags - TRACE_RECORD_CMDLINE for recording comm
2052  *        - TRACE_RECORD_TGID for recording tgid
2053  */
2054 void tracing_record_taskinfo(struct task_struct *task, int flags)
2055 {
2056 	bool done;
2057 
2058 	if (tracing_record_taskinfo_skip(flags))
2059 		return;
2060 
2061 	/*
2062 	 * Record as much task information as possible. If some fail, continue
2063 	 * to try to record the others.
2064 	 */
2065 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067 
2068 	/* If recording any information failed, retry again soon. */
2069 	if (!done)
2070 		return;
2071 
2072 	__this_cpu_write(trace_taskinfo_save, false);
2073 }
2074 
2075 /**
2076  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077  *
2078  * @prev - previous task during sched_switch
2079  * @next - next task during sched_switch
2080  * @flags - TRACE_RECORD_CMDLINE for recording comm
2081  *          TRACE_RECORD_TGID for recording tgid
2082  */
2083 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084 					  struct task_struct *next, int flags)
2085 {
2086 	bool done;
2087 
2088 	if (tracing_record_taskinfo_skip(flags))
2089 		return;
2090 
2091 	/*
2092 	 * Record as much task information as possible. If some fail, continue
2093 	 * to try to record the others.
2094 	 */
2095 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099 
2100 	/* If recording any information failed, retry again soon. */
2101 	if (!done)
2102 		return;
2103 
2104 	__this_cpu_write(trace_taskinfo_save, false);
2105 }
2106 
2107 /* Helpers to record a specific task information */
2108 void tracing_record_cmdline(struct task_struct *task)
2109 {
2110 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111 }
2112 
2113 void tracing_record_tgid(struct task_struct *task)
2114 {
2115 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116 }
2117 
2118 /*
2119  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121  * simplifies those functions and keeps them in sync.
2122  */
2123 enum print_line_t trace_handle_return(struct trace_seq *s)
2124 {
2125 	return trace_seq_has_overflowed(s) ?
2126 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_handle_return);
2129 
2130 void
2131 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132 			     int pc)
2133 {
2134 	struct task_struct *tsk = current;
2135 
2136 	entry->preempt_count		= pc & 0xff;
2137 	entry->pid			= (tsk) ? tsk->pid : 0;
2138 	entry->flags =
2139 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141 #else
2142 		TRACE_FLAG_IRQS_NOSUPPORT |
2143 #endif
2144 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149 }
2150 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151 
2152 struct ring_buffer_event *
2153 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154 			  int type,
2155 			  unsigned long len,
2156 			  unsigned long flags, int pc)
2157 {
2158 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159 }
2160 
2161 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163 static int trace_buffered_event_ref;
2164 
2165 /**
2166  * trace_buffered_event_enable - enable buffering events
2167  *
2168  * When events are being filtered, it is quicker to use a temporary
2169  * buffer to write the event data into if there's a likely chance
2170  * that it will not be committed. The discard of the ring buffer
2171  * is not as fast as committing, and is much slower than copying
2172  * a commit.
2173  *
2174  * When an event is to be filtered, allocate per cpu buffers to
2175  * write the event data into, and if the event is filtered and discarded
2176  * it is simply dropped, otherwise, the entire data is to be committed
2177  * in one shot.
2178  */
2179 void trace_buffered_event_enable(void)
2180 {
2181 	struct ring_buffer_event *event;
2182 	struct page *page;
2183 	int cpu;
2184 
2185 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186 
2187 	if (trace_buffered_event_ref++)
2188 		return;
2189 
2190 	for_each_tracing_cpu(cpu) {
2191 		page = alloc_pages_node(cpu_to_node(cpu),
2192 					GFP_KERNEL | __GFP_NORETRY, 0);
2193 		if (!page)
2194 			goto failed;
2195 
2196 		event = page_address(page);
2197 		memset(event, 0, sizeof(*event));
2198 
2199 		per_cpu(trace_buffered_event, cpu) = event;
2200 
2201 		preempt_disable();
2202 		if (cpu == smp_processor_id() &&
2203 		    this_cpu_read(trace_buffered_event) !=
2204 		    per_cpu(trace_buffered_event, cpu))
2205 			WARN_ON_ONCE(1);
2206 		preempt_enable();
2207 	}
2208 
2209 	return;
2210  failed:
2211 	trace_buffered_event_disable();
2212 }
2213 
2214 static void enable_trace_buffered_event(void *data)
2215 {
2216 	/* Probably not needed, but do it anyway */
2217 	smp_rmb();
2218 	this_cpu_dec(trace_buffered_event_cnt);
2219 }
2220 
2221 static void disable_trace_buffered_event(void *data)
2222 {
2223 	this_cpu_inc(trace_buffered_event_cnt);
2224 }
2225 
2226 /**
2227  * trace_buffered_event_disable - disable buffering events
2228  *
2229  * When a filter is removed, it is faster to not use the buffered
2230  * events, and to commit directly into the ring buffer. Free up
2231  * the temp buffers when there are no more users. This requires
2232  * special synchronization with current events.
2233  */
2234 void trace_buffered_event_disable(void)
2235 {
2236 	int cpu;
2237 
2238 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239 
2240 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241 		return;
2242 
2243 	if (--trace_buffered_event_ref)
2244 		return;
2245 
2246 	preempt_disable();
2247 	/* For each CPU, set the buffer as used. */
2248 	smp_call_function_many(tracing_buffer_mask,
2249 			       disable_trace_buffered_event, NULL, 1);
2250 	preempt_enable();
2251 
2252 	/* Wait for all current users to finish */
2253 	synchronize_sched();
2254 
2255 	for_each_tracing_cpu(cpu) {
2256 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257 		per_cpu(trace_buffered_event, cpu) = NULL;
2258 	}
2259 	/*
2260 	 * Make sure trace_buffered_event is NULL before clearing
2261 	 * trace_buffered_event_cnt.
2262 	 */
2263 	smp_wmb();
2264 
2265 	preempt_disable();
2266 	/* Do the work on each cpu */
2267 	smp_call_function_many(tracing_buffer_mask,
2268 			       enable_trace_buffered_event, NULL, 1);
2269 	preempt_enable();
2270 }
2271 
2272 static struct ring_buffer *temp_buffer;
2273 
2274 struct ring_buffer_event *
2275 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276 			  struct trace_event_file *trace_file,
2277 			  int type, unsigned long len,
2278 			  unsigned long flags, int pc)
2279 {
2280 	struct ring_buffer_event *entry;
2281 	int val;
2282 
2283 	*current_rb = trace_file->tr->trace_buffer.buffer;
2284 
2285 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287 	    (entry = this_cpu_read(trace_buffered_event))) {
2288 		/* Try to use the per cpu buffer first */
2289 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2290 		if (val == 1) {
2291 			trace_event_setup(entry, type, flags, pc);
2292 			entry->array[0] = len;
2293 			return entry;
2294 		}
2295 		this_cpu_dec(trace_buffered_event_cnt);
2296 	}
2297 
2298 	entry = __trace_buffer_lock_reserve(*current_rb,
2299 					    type, len, flags, pc);
2300 	/*
2301 	 * If tracing is off, but we have triggers enabled
2302 	 * we still need to look at the event data. Use the temp_buffer
2303 	 * to store the trace event for the tigger to use. It's recusive
2304 	 * safe and will not be recorded anywhere.
2305 	 */
2306 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307 		*current_rb = temp_buffer;
2308 		entry = __trace_buffer_lock_reserve(*current_rb,
2309 						    type, len, flags, pc);
2310 	}
2311 	return entry;
2312 }
2313 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314 
2315 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316 static DEFINE_MUTEX(tracepoint_printk_mutex);
2317 
2318 static void output_printk(struct trace_event_buffer *fbuffer)
2319 {
2320 	struct trace_event_call *event_call;
2321 	struct trace_event *event;
2322 	unsigned long flags;
2323 	struct trace_iterator *iter = tracepoint_print_iter;
2324 
2325 	/* We should never get here if iter is NULL */
2326 	if (WARN_ON_ONCE(!iter))
2327 		return;
2328 
2329 	event_call = fbuffer->trace_file->event_call;
2330 	if (!event_call || !event_call->event.funcs ||
2331 	    !event_call->event.funcs->trace)
2332 		return;
2333 
2334 	event = &fbuffer->trace_file->event_call->event;
2335 
2336 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337 	trace_seq_init(&iter->seq);
2338 	iter->ent = fbuffer->entry;
2339 	event_call->event.funcs->trace(iter, 0, event);
2340 	trace_seq_putc(&iter->seq, 0);
2341 	printk("%s", iter->seq.buffer);
2342 
2343 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344 }
2345 
2346 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347 			     void __user *buffer, size_t *lenp,
2348 			     loff_t *ppos)
2349 {
2350 	int save_tracepoint_printk;
2351 	int ret;
2352 
2353 	mutex_lock(&tracepoint_printk_mutex);
2354 	save_tracepoint_printk = tracepoint_printk;
2355 
2356 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357 
2358 	/*
2359 	 * This will force exiting early, as tracepoint_printk
2360 	 * is always zero when tracepoint_printk_iter is not allocated
2361 	 */
2362 	if (!tracepoint_print_iter)
2363 		tracepoint_printk = 0;
2364 
2365 	if (save_tracepoint_printk == tracepoint_printk)
2366 		goto out;
2367 
2368 	if (tracepoint_printk)
2369 		static_key_enable(&tracepoint_printk_key.key);
2370 	else
2371 		static_key_disable(&tracepoint_printk_key.key);
2372 
2373  out:
2374 	mutex_unlock(&tracepoint_printk_mutex);
2375 
2376 	return ret;
2377 }
2378 
2379 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380 {
2381 	if (static_key_false(&tracepoint_printk_key.key))
2382 		output_printk(fbuffer);
2383 
2384 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385 				    fbuffer->event, fbuffer->entry,
2386 				    fbuffer->flags, fbuffer->pc);
2387 }
2388 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389 
2390 /*
2391  * Skip 3:
2392  *
2393  *   trace_buffer_unlock_commit_regs()
2394  *   trace_event_buffer_commit()
2395  *   trace_event_raw_event_xxx()
2396  */
2397 # define STACK_SKIP 3
2398 
2399 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400 				     struct ring_buffer *buffer,
2401 				     struct ring_buffer_event *event,
2402 				     unsigned long flags, int pc,
2403 				     struct pt_regs *regs)
2404 {
2405 	__buffer_unlock_commit(buffer, event);
2406 
2407 	/*
2408 	 * If regs is not set, then skip the necessary functions.
2409 	 * Note, we can still get here via blktrace, wakeup tracer
2410 	 * and mmiotrace, but that's ok if they lose a function or
2411 	 * two. They are not that meaningful.
2412 	 */
2413 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414 	ftrace_trace_userstack(buffer, flags, pc);
2415 }
2416 
2417 /*
2418  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419  */
2420 void
2421 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422 				   struct ring_buffer_event *event)
2423 {
2424 	__buffer_unlock_commit(buffer, event);
2425 }
2426 
2427 static void
2428 trace_process_export(struct trace_export *export,
2429 	       struct ring_buffer_event *event)
2430 {
2431 	struct trace_entry *entry;
2432 	unsigned int size = 0;
2433 
2434 	entry = ring_buffer_event_data(event);
2435 	size = ring_buffer_event_length(event);
2436 	export->write(export, entry, size);
2437 }
2438 
2439 static DEFINE_MUTEX(ftrace_export_lock);
2440 
2441 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442 
2443 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444 
2445 static inline void ftrace_exports_enable(void)
2446 {
2447 	static_branch_enable(&ftrace_exports_enabled);
2448 }
2449 
2450 static inline void ftrace_exports_disable(void)
2451 {
2452 	static_branch_disable(&ftrace_exports_enabled);
2453 }
2454 
2455 void ftrace_exports(struct ring_buffer_event *event)
2456 {
2457 	struct trace_export *export;
2458 
2459 	preempt_disable_notrace();
2460 
2461 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462 	while (export) {
2463 		trace_process_export(export, event);
2464 		export = rcu_dereference_raw_notrace(export->next);
2465 	}
2466 
2467 	preempt_enable_notrace();
2468 }
2469 
2470 static inline void
2471 add_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473 	rcu_assign_pointer(export->next, *list);
2474 	/*
2475 	 * We are entering export into the list but another
2476 	 * CPU might be walking that list. We need to make sure
2477 	 * the export->next pointer is valid before another CPU sees
2478 	 * the export pointer included into the list.
2479 	 */
2480 	rcu_assign_pointer(*list, export);
2481 }
2482 
2483 static inline int
2484 rm_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486 	struct trace_export **p;
2487 
2488 	for (p = list; *p != NULL; p = &(*p)->next)
2489 		if (*p == export)
2490 			break;
2491 
2492 	if (*p != export)
2493 		return -1;
2494 
2495 	rcu_assign_pointer(*p, (*p)->next);
2496 
2497 	return 0;
2498 }
2499 
2500 static inline void
2501 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502 {
2503 	if (*list == NULL)
2504 		ftrace_exports_enable();
2505 
2506 	add_trace_export(list, export);
2507 }
2508 
2509 static inline int
2510 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511 {
2512 	int ret;
2513 
2514 	ret = rm_trace_export(list, export);
2515 	if (*list == NULL)
2516 		ftrace_exports_disable();
2517 
2518 	return ret;
2519 }
2520 
2521 int register_ftrace_export(struct trace_export *export)
2522 {
2523 	if (WARN_ON_ONCE(!export->write))
2524 		return -1;
2525 
2526 	mutex_lock(&ftrace_export_lock);
2527 
2528 	add_ftrace_export(&ftrace_exports_list, export);
2529 
2530 	mutex_unlock(&ftrace_export_lock);
2531 
2532 	return 0;
2533 }
2534 EXPORT_SYMBOL_GPL(register_ftrace_export);
2535 
2536 int unregister_ftrace_export(struct trace_export *export)
2537 {
2538 	int ret;
2539 
2540 	mutex_lock(&ftrace_export_lock);
2541 
2542 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2543 
2544 	mutex_unlock(&ftrace_export_lock);
2545 
2546 	return ret;
2547 }
2548 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549 
2550 void
2551 trace_function(struct trace_array *tr,
2552 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553 	       int pc)
2554 {
2555 	struct trace_event_call *call = &event_function;
2556 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557 	struct ring_buffer_event *event;
2558 	struct ftrace_entry *entry;
2559 
2560 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561 					    flags, pc);
2562 	if (!event)
2563 		return;
2564 	entry	= ring_buffer_event_data(event);
2565 	entry->ip			= ip;
2566 	entry->parent_ip		= parent_ip;
2567 
2568 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2569 		if (static_branch_unlikely(&ftrace_exports_enabled))
2570 			ftrace_exports(event);
2571 		__buffer_unlock_commit(buffer, event);
2572 	}
2573 }
2574 
2575 #ifdef CONFIG_STACKTRACE
2576 
2577 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578 struct ftrace_stack {
2579 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2580 };
2581 
2582 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584 
2585 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586 				 unsigned long flags,
2587 				 int skip, int pc, struct pt_regs *regs)
2588 {
2589 	struct trace_event_call *call = &event_kernel_stack;
2590 	struct ring_buffer_event *event;
2591 	struct stack_entry *entry;
2592 	struct stack_trace trace;
2593 	int use_stack;
2594 	int size = FTRACE_STACK_ENTRIES;
2595 
2596 	trace.nr_entries	= 0;
2597 	trace.skip		= skip;
2598 
2599 	/*
2600 	 * Add one, for this function and the call to save_stack_trace()
2601 	 * If regs is set, then these functions will not be in the way.
2602 	 */
2603 #ifndef CONFIG_UNWINDER_ORC
2604 	if (!regs)
2605 		trace.skip++;
2606 #endif
2607 
2608 	/*
2609 	 * Since events can happen in NMIs there's no safe way to
2610 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611 	 * or NMI comes in, it will just have to use the default
2612 	 * FTRACE_STACK_SIZE.
2613 	 */
2614 	preempt_disable_notrace();
2615 
2616 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617 	/*
2618 	 * We don't need any atomic variables, just a barrier.
2619 	 * If an interrupt comes in, we don't care, because it would
2620 	 * have exited and put the counter back to what we want.
2621 	 * We just need a barrier to keep gcc from moving things
2622 	 * around.
2623 	 */
2624 	barrier();
2625 	if (use_stack == 1) {
2626 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2627 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2628 
2629 		if (regs)
2630 			save_stack_trace_regs(regs, &trace);
2631 		else
2632 			save_stack_trace(&trace);
2633 
2634 		if (trace.nr_entries > size)
2635 			size = trace.nr_entries;
2636 	} else
2637 		/* From now on, use_stack is a boolean */
2638 		use_stack = 0;
2639 
2640 	size *= sizeof(unsigned long);
2641 
2642 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643 					    sizeof(*entry) + size, flags, pc);
2644 	if (!event)
2645 		goto out;
2646 	entry = ring_buffer_event_data(event);
2647 
2648 	memset(&entry->caller, 0, size);
2649 
2650 	if (use_stack)
2651 		memcpy(&entry->caller, trace.entries,
2652 		       trace.nr_entries * sizeof(unsigned long));
2653 	else {
2654 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2655 		trace.entries		= entry->caller;
2656 		if (regs)
2657 			save_stack_trace_regs(regs, &trace);
2658 		else
2659 			save_stack_trace(&trace);
2660 	}
2661 
2662 	entry->size = trace.nr_entries;
2663 
2664 	if (!call_filter_check_discard(call, entry, buffer, event))
2665 		__buffer_unlock_commit(buffer, event);
2666 
2667  out:
2668 	/* Again, don't let gcc optimize things here */
2669 	barrier();
2670 	__this_cpu_dec(ftrace_stack_reserve);
2671 	preempt_enable_notrace();
2672 
2673 }
2674 
2675 static inline void ftrace_trace_stack(struct trace_array *tr,
2676 				      struct ring_buffer *buffer,
2677 				      unsigned long flags,
2678 				      int skip, int pc, struct pt_regs *regs)
2679 {
2680 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681 		return;
2682 
2683 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684 }
2685 
2686 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687 		   int pc)
2688 {
2689 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690 
2691 	if (rcu_is_watching()) {
2692 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693 		return;
2694 	}
2695 
2696 	/*
2697 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698 	 * but if the above rcu_is_watching() failed, then the NMI
2699 	 * triggered someplace critical, and rcu_irq_enter() should
2700 	 * not be called from NMI.
2701 	 */
2702 	if (unlikely(in_nmi()))
2703 		return;
2704 
2705 	rcu_irq_enter_irqson();
2706 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707 	rcu_irq_exit_irqson();
2708 }
2709 
2710 /**
2711  * trace_dump_stack - record a stack back trace in the trace buffer
2712  * @skip: Number of functions to skip (helper handlers)
2713  */
2714 void trace_dump_stack(int skip)
2715 {
2716 	unsigned long flags;
2717 
2718 	if (tracing_disabled || tracing_selftest_running)
2719 		return;
2720 
2721 	local_save_flags(flags);
2722 
2723 #ifndef CONFIG_UNWINDER_ORC
2724 	/* Skip 1 to skip this function. */
2725 	skip++;
2726 #endif
2727 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728 			     flags, skip, preempt_count(), NULL);
2729 }
2730 EXPORT_SYMBOL_GPL(trace_dump_stack);
2731 
2732 static DEFINE_PER_CPU(int, user_stack_count);
2733 
2734 void
2735 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2736 {
2737 	struct trace_event_call *call = &event_user_stack;
2738 	struct ring_buffer_event *event;
2739 	struct userstack_entry *entry;
2740 	struct stack_trace trace;
2741 
2742 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2743 		return;
2744 
2745 	/*
2746 	 * NMIs can not handle page faults, even with fix ups.
2747 	 * The save user stack can (and often does) fault.
2748 	 */
2749 	if (unlikely(in_nmi()))
2750 		return;
2751 
2752 	/*
2753 	 * prevent recursion, since the user stack tracing may
2754 	 * trigger other kernel events.
2755 	 */
2756 	preempt_disable();
2757 	if (__this_cpu_read(user_stack_count))
2758 		goto out;
2759 
2760 	__this_cpu_inc(user_stack_count);
2761 
2762 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2763 					    sizeof(*entry), flags, pc);
2764 	if (!event)
2765 		goto out_drop_count;
2766 	entry	= ring_buffer_event_data(event);
2767 
2768 	entry->tgid		= current->tgid;
2769 	memset(&entry->caller, 0, sizeof(entry->caller));
2770 
2771 	trace.nr_entries	= 0;
2772 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2773 	trace.skip		= 0;
2774 	trace.entries		= entry->caller;
2775 
2776 	save_stack_trace_user(&trace);
2777 	if (!call_filter_check_discard(call, entry, buffer, event))
2778 		__buffer_unlock_commit(buffer, event);
2779 
2780  out_drop_count:
2781 	__this_cpu_dec(user_stack_count);
2782  out:
2783 	preempt_enable();
2784 }
2785 
2786 #ifdef UNUSED
2787 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2788 {
2789 	ftrace_trace_userstack(tr, flags, preempt_count());
2790 }
2791 #endif /* UNUSED */
2792 
2793 #endif /* CONFIG_STACKTRACE */
2794 
2795 /* created for use with alloc_percpu */
2796 struct trace_buffer_struct {
2797 	int nesting;
2798 	char buffer[4][TRACE_BUF_SIZE];
2799 };
2800 
2801 static struct trace_buffer_struct *trace_percpu_buffer;
2802 
2803 /*
2804  * Thise allows for lockless recording.  If we're nested too deeply, then
2805  * this returns NULL.
2806  */
2807 static char *get_trace_buf(void)
2808 {
2809 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2810 
2811 	if (!buffer || buffer->nesting >= 4)
2812 		return NULL;
2813 
2814 	buffer->nesting++;
2815 
2816 	/* Interrupts must see nesting incremented before we use the buffer */
2817 	barrier();
2818 	return &buffer->buffer[buffer->nesting][0];
2819 }
2820 
2821 static void put_trace_buf(void)
2822 {
2823 	/* Don't let the decrement of nesting leak before this */
2824 	barrier();
2825 	this_cpu_dec(trace_percpu_buffer->nesting);
2826 }
2827 
2828 static int alloc_percpu_trace_buffer(void)
2829 {
2830 	struct trace_buffer_struct *buffers;
2831 
2832 	buffers = alloc_percpu(struct trace_buffer_struct);
2833 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2834 		return -ENOMEM;
2835 
2836 	trace_percpu_buffer = buffers;
2837 	return 0;
2838 }
2839 
2840 static int buffers_allocated;
2841 
2842 void trace_printk_init_buffers(void)
2843 {
2844 	if (buffers_allocated)
2845 		return;
2846 
2847 	if (alloc_percpu_trace_buffer())
2848 		return;
2849 
2850 	/* trace_printk() is for debug use only. Don't use it in production. */
2851 
2852 	pr_warn("\n");
2853 	pr_warn("**********************************************************\n");
2854 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2857 	pr_warn("**                                                      **\n");
2858 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2859 	pr_warn("** unsafe for production use.                           **\n");
2860 	pr_warn("**                                                      **\n");
2861 	pr_warn("** If you see this message and you are not debugging    **\n");
2862 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2863 	pr_warn("**                                                      **\n");
2864 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2865 	pr_warn("**********************************************************\n");
2866 
2867 	/* Expand the buffers to set size */
2868 	tracing_update_buffers();
2869 
2870 	buffers_allocated = 1;
2871 
2872 	/*
2873 	 * trace_printk_init_buffers() can be called by modules.
2874 	 * If that happens, then we need to start cmdline recording
2875 	 * directly here. If the global_trace.buffer is already
2876 	 * allocated here, then this was called by module code.
2877 	 */
2878 	if (global_trace.trace_buffer.buffer)
2879 		tracing_start_cmdline_record();
2880 }
2881 
2882 void trace_printk_start_comm(void)
2883 {
2884 	/* Start tracing comms if trace printk is set */
2885 	if (!buffers_allocated)
2886 		return;
2887 	tracing_start_cmdline_record();
2888 }
2889 
2890 static void trace_printk_start_stop_comm(int enabled)
2891 {
2892 	if (!buffers_allocated)
2893 		return;
2894 
2895 	if (enabled)
2896 		tracing_start_cmdline_record();
2897 	else
2898 		tracing_stop_cmdline_record();
2899 }
2900 
2901 /**
2902  * trace_vbprintk - write binary msg to tracing buffer
2903  *
2904  */
2905 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2906 {
2907 	struct trace_event_call *call = &event_bprint;
2908 	struct ring_buffer_event *event;
2909 	struct ring_buffer *buffer;
2910 	struct trace_array *tr = &global_trace;
2911 	struct bprint_entry *entry;
2912 	unsigned long flags;
2913 	char *tbuffer;
2914 	int len = 0, size, pc;
2915 
2916 	if (unlikely(tracing_selftest_running || tracing_disabled))
2917 		return 0;
2918 
2919 	/* Don't pollute graph traces with trace_vprintk internals */
2920 	pause_graph_tracing();
2921 
2922 	pc = preempt_count();
2923 	preempt_disable_notrace();
2924 
2925 	tbuffer = get_trace_buf();
2926 	if (!tbuffer) {
2927 		len = 0;
2928 		goto out_nobuffer;
2929 	}
2930 
2931 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2932 
2933 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2934 		goto out;
2935 
2936 	local_save_flags(flags);
2937 	size = sizeof(*entry) + sizeof(u32) * len;
2938 	buffer = tr->trace_buffer.buffer;
2939 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2940 					    flags, pc);
2941 	if (!event)
2942 		goto out;
2943 	entry = ring_buffer_event_data(event);
2944 	entry->ip			= ip;
2945 	entry->fmt			= fmt;
2946 
2947 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2948 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2949 		__buffer_unlock_commit(buffer, event);
2950 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2951 	}
2952 
2953 out:
2954 	put_trace_buf();
2955 
2956 out_nobuffer:
2957 	preempt_enable_notrace();
2958 	unpause_graph_tracing();
2959 
2960 	return len;
2961 }
2962 EXPORT_SYMBOL_GPL(trace_vbprintk);
2963 
2964 __printf(3, 0)
2965 static int
2966 __trace_array_vprintk(struct ring_buffer *buffer,
2967 		      unsigned long ip, const char *fmt, va_list args)
2968 {
2969 	struct trace_event_call *call = &event_print;
2970 	struct ring_buffer_event *event;
2971 	int len = 0, size, pc;
2972 	struct print_entry *entry;
2973 	unsigned long flags;
2974 	char *tbuffer;
2975 
2976 	if (tracing_disabled || tracing_selftest_running)
2977 		return 0;
2978 
2979 	/* Don't pollute graph traces with trace_vprintk internals */
2980 	pause_graph_tracing();
2981 
2982 	pc = preempt_count();
2983 	preempt_disable_notrace();
2984 
2985 
2986 	tbuffer = get_trace_buf();
2987 	if (!tbuffer) {
2988 		len = 0;
2989 		goto out_nobuffer;
2990 	}
2991 
2992 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2993 
2994 	local_save_flags(flags);
2995 	size = sizeof(*entry) + len + 1;
2996 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2997 					    flags, pc);
2998 	if (!event)
2999 		goto out;
3000 	entry = ring_buffer_event_data(event);
3001 	entry->ip = ip;
3002 
3003 	memcpy(&entry->buf, tbuffer, len + 1);
3004 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3005 		__buffer_unlock_commit(buffer, event);
3006 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3007 	}
3008 
3009 out:
3010 	put_trace_buf();
3011 
3012 out_nobuffer:
3013 	preempt_enable_notrace();
3014 	unpause_graph_tracing();
3015 
3016 	return len;
3017 }
3018 
3019 __printf(3, 0)
3020 int trace_array_vprintk(struct trace_array *tr,
3021 			unsigned long ip, const char *fmt, va_list args)
3022 {
3023 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3024 }
3025 
3026 __printf(3, 0)
3027 int trace_array_printk(struct trace_array *tr,
3028 		       unsigned long ip, const char *fmt, ...)
3029 {
3030 	int ret;
3031 	va_list ap;
3032 
3033 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3034 		return 0;
3035 
3036 	va_start(ap, fmt);
3037 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3038 	va_end(ap);
3039 	return ret;
3040 }
3041 
3042 __printf(3, 4)
3043 int trace_array_printk_buf(struct ring_buffer *buffer,
3044 			   unsigned long ip, const char *fmt, ...)
3045 {
3046 	int ret;
3047 	va_list ap;
3048 
3049 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3050 		return 0;
3051 
3052 	va_start(ap, fmt);
3053 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3054 	va_end(ap);
3055 	return ret;
3056 }
3057 
3058 __printf(2, 0)
3059 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3060 {
3061 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3062 }
3063 EXPORT_SYMBOL_GPL(trace_vprintk);
3064 
3065 static void trace_iterator_increment(struct trace_iterator *iter)
3066 {
3067 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3068 
3069 	iter->idx++;
3070 	if (buf_iter)
3071 		ring_buffer_read(buf_iter, NULL);
3072 }
3073 
3074 static struct trace_entry *
3075 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3076 		unsigned long *lost_events)
3077 {
3078 	struct ring_buffer_event *event;
3079 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3080 
3081 	if (buf_iter)
3082 		event = ring_buffer_iter_peek(buf_iter, ts);
3083 	else
3084 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3085 					 lost_events);
3086 
3087 	if (event) {
3088 		iter->ent_size = ring_buffer_event_length(event);
3089 		return ring_buffer_event_data(event);
3090 	}
3091 	iter->ent_size = 0;
3092 	return NULL;
3093 }
3094 
3095 static struct trace_entry *
3096 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3097 		  unsigned long *missing_events, u64 *ent_ts)
3098 {
3099 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3100 	struct trace_entry *ent, *next = NULL;
3101 	unsigned long lost_events = 0, next_lost = 0;
3102 	int cpu_file = iter->cpu_file;
3103 	u64 next_ts = 0, ts;
3104 	int next_cpu = -1;
3105 	int next_size = 0;
3106 	int cpu;
3107 
3108 	/*
3109 	 * If we are in a per_cpu trace file, don't bother by iterating over
3110 	 * all cpu and peek directly.
3111 	 */
3112 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3113 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3114 			return NULL;
3115 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3116 		if (ent_cpu)
3117 			*ent_cpu = cpu_file;
3118 
3119 		return ent;
3120 	}
3121 
3122 	for_each_tracing_cpu(cpu) {
3123 
3124 		if (ring_buffer_empty_cpu(buffer, cpu))
3125 			continue;
3126 
3127 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3128 
3129 		/*
3130 		 * Pick the entry with the smallest timestamp:
3131 		 */
3132 		if (ent && (!next || ts < next_ts)) {
3133 			next = ent;
3134 			next_cpu = cpu;
3135 			next_ts = ts;
3136 			next_lost = lost_events;
3137 			next_size = iter->ent_size;
3138 		}
3139 	}
3140 
3141 	iter->ent_size = next_size;
3142 
3143 	if (ent_cpu)
3144 		*ent_cpu = next_cpu;
3145 
3146 	if (ent_ts)
3147 		*ent_ts = next_ts;
3148 
3149 	if (missing_events)
3150 		*missing_events = next_lost;
3151 
3152 	return next;
3153 }
3154 
3155 /* Find the next real entry, without updating the iterator itself */
3156 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3157 					  int *ent_cpu, u64 *ent_ts)
3158 {
3159 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3160 }
3161 
3162 /* Find the next real entry, and increment the iterator to the next entry */
3163 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3164 {
3165 	iter->ent = __find_next_entry(iter, &iter->cpu,
3166 				      &iter->lost_events, &iter->ts);
3167 
3168 	if (iter->ent)
3169 		trace_iterator_increment(iter);
3170 
3171 	return iter->ent ? iter : NULL;
3172 }
3173 
3174 static void trace_consume(struct trace_iterator *iter)
3175 {
3176 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3177 			    &iter->lost_events);
3178 }
3179 
3180 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3181 {
3182 	struct trace_iterator *iter = m->private;
3183 	int i = (int)*pos;
3184 	void *ent;
3185 
3186 	WARN_ON_ONCE(iter->leftover);
3187 
3188 	(*pos)++;
3189 
3190 	/* can't go backwards */
3191 	if (iter->idx > i)
3192 		return NULL;
3193 
3194 	if (iter->idx < 0)
3195 		ent = trace_find_next_entry_inc(iter);
3196 	else
3197 		ent = iter;
3198 
3199 	while (ent && iter->idx < i)
3200 		ent = trace_find_next_entry_inc(iter);
3201 
3202 	iter->pos = *pos;
3203 
3204 	return ent;
3205 }
3206 
3207 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3208 {
3209 	struct ring_buffer_event *event;
3210 	struct ring_buffer_iter *buf_iter;
3211 	unsigned long entries = 0;
3212 	u64 ts;
3213 
3214 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3215 
3216 	buf_iter = trace_buffer_iter(iter, cpu);
3217 	if (!buf_iter)
3218 		return;
3219 
3220 	ring_buffer_iter_reset(buf_iter);
3221 
3222 	/*
3223 	 * We could have the case with the max latency tracers
3224 	 * that a reset never took place on a cpu. This is evident
3225 	 * by the timestamp being before the start of the buffer.
3226 	 */
3227 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3228 		if (ts >= iter->trace_buffer->time_start)
3229 			break;
3230 		entries++;
3231 		ring_buffer_read(buf_iter, NULL);
3232 	}
3233 
3234 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3235 }
3236 
3237 /*
3238  * The current tracer is copied to avoid a global locking
3239  * all around.
3240  */
3241 static void *s_start(struct seq_file *m, loff_t *pos)
3242 {
3243 	struct trace_iterator *iter = m->private;
3244 	struct trace_array *tr = iter->tr;
3245 	int cpu_file = iter->cpu_file;
3246 	void *p = NULL;
3247 	loff_t l = 0;
3248 	int cpu;
3249 
3250 	/*
3251 	 * copy the tracer to avoid using a global lock all around.
3252 	 * iter->trace is a copy of current_trace, the pointer to the
3253 	 * name may be used instead of a strcmp(), as iter->trace->name
3254 	 * will point to the same string as current_trace->name.
3255 	 */
3256 	mutex_lock(&trace_types_lock);
3257 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3258 		*iter->trace = *tr->current_trace;
3259 	mutex_unlock(&trace_types_lock);
3260 
3261 #ifdef CONFIG_TRACER_MAX_TRACE
3262 	if (iter->snapshot && iter->trace->use_max_tr)
3263 		return ERR_PTR(-EBUSY);
3264 #endif
3265 
3266 	if (!iter->snapshot)
3267 		atomic_inc(&trace_record_taskinfo_disabled);
3268 
3269 	if (*pos != iter->pos) {
3270 		iter->ent = NULL;
3271 		iter->cpu = 0;
3272 		iter->idx = -1;
3273 
3274 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3275 			for_each_tracing_cpu(cpu)
3276 				tracing_iter_reset(iter, cpu);
3277 		} else
3278 			tracing_iter_reset(iter, cpu_file);
3279 
3280 		iter->leftover = 0;
3281 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3282 			;
3283 
3284 	} else {
3285 		/*
3286 		 * If we overflowed the seq_file before, then we want
3287 		 * to just reuse the trace_seq buffer again.
3288 		 */
3289 		if (iter->leftover)
3290 			p = iter;
3291 		else {
3292 			l = *pos - 1;
3293 			p = s_next(m, p, &l);
3294 		}
3295 	}
3296 
3297 	trace_event_read_lock();
3298 	trace_access_lock(cpu_file);
3299 	return p;
3300 }
3301 
3302 static void s_stop(struct seq_file *m, void *p)
3303 {
3304 	struct trace_iterator *iter = m->private;
3305 
3306 #ifdef CONFIG_TRACER_MAX_TRACE
3307 	if (iter->snapshot && iter->trace->use_max_tr)
3308 		return;
3309 #endif
3310 
3311 	if (!iter->snapshot)
3312 		atomic_dec(&trace_record_taskinfo_disabled);
3313 
3314 	trace_access_unlock(iter->cpu_file);
3315 	trace_event_read_unlock();
3316 }
3317 
3318 static void
3319 get_total_entries(struct trace_buffer *buf,
3320 		  unsigned long *total, unsigned long *entries)
3321 {
3322 	unsigned long count;
3323 	int cpu;
3324 
3325 	*total = 0;
3326 	*entries = 0;
3327 
3328 	for_each_tracing_cpu(cpu) {
3329 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3330 		/*
3331 		 * If this buffer has skipped entries, then we hold all
3332 		 * entries for the trace and we need to ignore the
3333 		 * ones before the time stamp.
3334 		 */
3335 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3336 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3337 			/* total is the same as the entries */
3338 			*total += count;
3339 		} else
3340 			*total += count +
3341 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3342 		*entries += count;
3343 	}
3344 }
3345 
3346 static void print_lat_help_header(struct seq_file *m)
3347 {
3348 	seq_puts(m, "#                  _------=> CPU#            \n"
3349 		    "#                 / _-----=> irqs-off        \n"
3350 		    "#                | / _----=> need-resched    \n"
3351 		    "#                || / _---=> hardirq/softirq \n"
3352 		    "#                ||| / _--=> preempt-depth   \n"
3353 		    "#                |||| /     delay            \n"
3354 		    "#  cmd     pid   ||||| time  |   caller      \n"
3355 		    "#     \\   /      |||||  \\    |   /         \n");
3356 }
3357 
3358 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3359 {
3360 	unsigned long total;
3361 	unsigned long entries;
3362 
3363 	get_total_entries(buf, &total, &entries);
3364 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3365 		   entries, total, num_online_cpus());
3366 	seq_puts(m, "#\n");
3367 }
3368 
3369 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3370 				   unsigned int flags)
3371 {
3372 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3373 
3374 	print_event_info(buf, m);
3375 
3376 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3377 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3378 }
3379 
3380 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3381 				       unsigned int flags)
3382 {
3383 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3384 	const char tgid_space[] = "          ";
3385 	const char space[] = "  ";
3386 
3387 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3388 		   tgid ? tgid_space : space);
3389 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3390 		   tgid ? tgid_space : space);
3391 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3392 		   tgid ? tgid_space : space);
3393 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3394 		   tgid ? tgid_space : space);
3395 	seq_printf(m, "#                          %s||| /     delay\n",
3396 		   tgid ? tgid_space : space);
3397 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3398 		   tgid ? "   TGID   " : space);
3399 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3400 		   tgid ? "     |    " : space);
3401 }
3402 
3403 void
3404 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3405 {
3406 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3407 	struct trace_buffer *buf = iter->trace_buffer;
3408 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3409 	struct tracer *type = iter->trace;
3410 	unsigned long entries;
3411 	unsigned long total;
3412 	const char *name = "preemption";
3413 
3414 	name = type->name;
3415 
3416 	get_total_entries(buf, &total, &entries);
3417 
3418 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3419 		   name, UTS_RELEASE);
3420 	seq_puts(m, "# -----------------------------------"
3421 		 "---------------------------------\n");
3422 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3423 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3424 		   nsecs_to_usecs(data->saved_latency),
3425 		   entries,
3426 		   total,
3427 		   buf->cpu,
3428 #if defined(CONFIG_PREEMPT_NONE)
3429 		   "server",
3430 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3431 		   "desktop",
3432 #elif defined(CONFIG_PREEMPT)
3433 		   "preempt",
3434 #else
3435 		   "unknown",
3436 #endif
3437 		   /* These are reserved for later use */
3438 		   0, 0, 0, 0);
3439 #ifdef CONFIG_SMP
3440 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3441 #else
3442 	seq_puts(m, ")\n");
3443 #endif
3444 	seq_puts(m, "#    -----------------\n");
3445 	seq_printf(m, "#    | task: %.16s-%d "
3446 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3447 		   data->comm, data->pid,
3448 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3449 		   data->policy, data->rt_priority);
3450 	seq_puts(m, "#    -----------------\n");
3451 
3452 	if (data->critical_start) {
3453 		seq_puts(m, "#  => started at: ");
3454 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3455 		trace_print_seq(m, &iter->seq);
3456 		seq_puts(m, "\n#  => ended at:   ");
3457 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3458 		trace_print_seq(m, &iter->seq);
3459 		seq_puts(m, "\n#\n");
3460 	}
3461 
3462 	seq_puts(m, "#\n");
3463 }
3464 
3465 static void test_cpu_buff_start(struct trace_iterator *iter)
3466 {
3467 	struct trace_seq *s = &iter->seq;
3468 	struct trace_array *tr = iter->tr;
3469 
3470 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3471 		return;
3472 
3473 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3474 		return;
3475 
3476 	if (cpumask_available(iter->started) &&
3477 	    cpumask_test_cpu(iter->cpu, iter->started))
3478 		return;
3479 
3480 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3481 		return;
3482 
3483 	if (cpumask_available(iter->started))
3484 		cpumask_set_cpu(iter->cpu, iter->started);
3485 
3486 	/* Don't print started cpu buffer for the first entry of the trace */
3487 	if (iter->idx > 1)
3488 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3489 				iter->cpu);
3490 }
3491 
3492 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3493 {
3494 	struct trace_array *tr = iter->tr;
3495 	struct trace_seq *s = &iter->seq;
3496 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3497 	struct trace_entry *entry;
3498 	struct trace_event *event;
3499 
3500 	entry = iter->ent;
3501 
3502 	test_cpu_buff_start(iter);
3503 
3504 	event = ftrace_find_event(entry->type);
3505 
3506 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3507 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3508 			trace_print_lat_context(iter);
3509 		else
3510 			trace_print_context(iter);
3511 	}
3512 
3513 	if (trace_seq_has_overflowed(s))
3514 		return TRACE_TYPE_PARTIAL_LINE;
3515 
3516 	if (event)
3517 		return event->funcs->trace(iter, sym_flags, event);
3518 
3519 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3520 
3521 	return trace_handle_return(s);
3522 }
3523 
3524 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3525 {
3526 	struct trace_array *tr = iter->tr;
3527 	struct trace_seq *s = &iter->seq;
3528 	struct trace_entry *entry;
3529 	struct trace_event *event;
3530 
3531 	entry = iter->ent;
3532 
3533 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3534 		trace_seq_printf(s, "%d %d %llu ",
3535 				 entry->pid, iter->cpu, iter->ts);
3536 
3537 	if (trace_seq_has_overflowed(s))
3538 		return TRACE_TYPE_PARTIAL_LINE;
3539 
3540 	event = ftrace_find_event(entry->type);
3541 	if (event)
3542 		return event->funcs->raw(iter, 0, event);
3543 
3544 	trace_seq_printf(s, "%d ?\n", entry->type);
3545 
3546 	return trace_handle_return(s);
3547 }
3548 
3549 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3550 {
3551 	struct trace_array *tr = iter->tr;
3552 	struct trace_seq *s = &iter->seq;
3553 	unsigned char newline = '\n';
3554 	struct trace_entry *entry;
3555 	struct trace_event *event;
3556 
3557 	entry = iter->ent;
3558 
3559 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3560 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3561 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3562 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3563 		if (trace_seq_has_overflowed(s))
3564 			return TRACE_TYPE_PARTIAL_LINE;
3565 	}
3566 
3567 	event = ftrace_find_event(entry->type);
3568 	if (event) {
3569 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3570 		if (ret != TRACE_TYPE_HANDLED)
3571 			return ret;
3572 	}
3573 
3574 	SEQ_PUT_FIELD(s, newline);
3575 
3576 	return trace_handle_return(s);
3577 }
3578 
3579 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3580 {
3581 	struct trace_array *tr = iter->tr;
3582 	struct trace_seq *s = &iter->seq;
3583 	struct trace_entry *entry;
3584 	struct trace_event *event;
3585 
3586 	entry = iter->ent;
3587 
3588 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3589 		SEQ_PUT_FIELD(s, entry->pid);
3590 		SEQ_PUT_FIELD(s, iter->cpu);
3591 		SEQ_PUT_FIELD(s, iter->ts);
3592 		if (trace_seq_has_overflowed(s))
3593 			return TRACE_TYPE_PARTIAL_LINE;
3594 	}
3595 
3596 	event = ftrace_find_event(entry->type);
3597 	return event ? event->funcs->binary(iter, 0, event) :
3598 		TRACE_TYPE_HANDLED;
3599 }
3600 
3601 int trace_empty(struct trace_iterator *iter)
3602 {
3603 	struct ring_buffer_iter *buf_iter;
3604 	int cpu;
3605 
3606 	/* If we are looking at one CPU buffer, only check that one */
3607 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3608 		cpu = iter->cpu_file;
3609 		buf_iter = trace_buffer_iter(iter, cpu);
3610 		if (buf_iter) {
3611 			if (!ring_buffer_iter_empty(buf_iter))
3612 				return 0;
3613 		} else {
3614 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3615 				return 0;
3616 		}
3617 		return 1;
3618 	}
3619 
3620 	for_each_tracing_cpu(cpu) {
3621 		buf_iter = trace_buffer_iter(iter, cpu);
3622 		if (buf_iter) {
3623 			if (!ring_buffer_iter_empty(buf_iter))
3624 				return 0;
3625 		} else {
3626 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3627 				return 0;
3628 		}
3629 	}
3630 
3631 	return 1;
3632 }
3633 
3634 /*  Called with trace_event_read_lock() held. */
3635 enum print_line_t print_trace_line(struct trace_iterator *iter)
3636 {
3637 	struct trace_array *tr = iter->tr;
3638 	unsigned long trace_flags = tr->trace_flags;
3639 	enum print_line_t ret;
3640 
3641 	if (iter->lost_events) {
3642 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3643 				 iter->cpu, iter->lost_events);
3644 		if (trace_seq_has_overflowed(&iter->seq))
3645 			return TRACE_TYPE_PARTIAL_LINE;
3646 	}
3647 
3648 	if (iter->trace && iter->trace->print_line) {
3649 		ret = iter->trace->print_line(iter);
3650 		if (ret != TRACE_TYPE_UNHANDLED)
3651 			return ret;
3652 	}
3653 
3654 	if (iter->ent->type == TRACE_BPUTS &&
3655 			trace_flags & TRACE_ITER_PRINTK &&
3656 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3657 		return trace_print_bputs_msg_only(iter);
3658 
3659 	if (iter->ent->type == TRACE_BPRINT &&
3660 			trace_flags & TRACE_ITER_PRINTK &&
3661 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3662 		return trace_print_bprintk_msg_only(iter);
3663 
3664 	if (iter->ent->type == TRACE_PRINT &&
3665 			trace_flags & TRACE_ITER_PRINTK &&
3666 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3667 		return trace_print_printk_msg_only(iter);
3668 
3669 	if (trace_flags & TRACE_ITER_BIN)
3670 		return print_bin_fmt(iter);
3671 
3672 	if (trace_flags & TRACE_ITER_HEX)
3673 		return print_hex_fmt(iter);
3674 
3675 	if (trace_flags & TRACE_ITER_RAW)
3676 		return print_raw_fmt(iter);
3677 
3678 	return print_trace_fmt(iter);
3679 }
3680 
3681 void trace_latency_header(struct seq_file *m)
3682 {
3683 	struct trace_iterator *iter = m->private;
3684 	struct trace_array *tr = iter->tr;
3685 
3686 	/* print nothing if the buffers are empty */
3687 	if (trace_empty(iter))
3688 		return;
3689 
3690 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3691 		print_trace_header(m, iter);
3692 
3693 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3694 		print_lat_help_header(m);
3695 }
3696 
3697 void trace_default_header(struct seq_file *m)
3698 {
3699 	struct trace_iterator *iter = m->private;
3700 	struct trace_array *tr = iter->tr;
3701 	unsigned long trace_flags = tr->trace_flags;
3702 
3703 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3704 		return;
3705 
3706 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3707 		/* print nothing if the buffers are empty */
3708 		if (trace_empty(iter))
3709 			return;
3710 		print_trace_header(m, iter);
3711 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3712 			print_lat_help_header(m);
3713 	} else {
3714 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3715 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3716 				print_func_help_header_irq(iter->trace_buffer,
3717 							   m, trace_flags);
3718 			else
3719 				print_func_help_header(iter->trace_buffer, m,
3720 						       trace_flags);
3721 		}
3722 	}
3723 }
3724 
3725 static void test_ftrace_alive(struct seq_file *m)
3726 {
3727 	if (!ftrace_is_dead())
3728 		return;
3729 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3730 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3731 }
3732 
3733 #ifdef CONFIG_TRACER_MAX_TRACE
3734 static void show_snapshot_main_help(struct seq_file *m)
3735 {
3736 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3737 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3738 		    "#                      Takes a snapshot of the main buffer.\n"
3739 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3740 		    "#                      (Doesn't have to be '2' works with any number that\n"
3741 		    "#                       is not a '0' or '1')\n");
3742 }
3743 
3744 static void show_snapshot_percpu_help(struct seq_file *m)
3745 {
3746 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3747 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3748 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3749 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3750 #else
3751 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3752 		    "#                     Must use main snapshot file to allocate.\n");
3753 #endif
3754 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3755 		    "#                      (Doesn't have to be '2' works with any number that\n"
3756 		    "#                       is not a '0' or '1')\n");
3757 }
3758 
3759 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3760 {
3761 	if (iter->tr->allocated_snapshot)
3762 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3763 	else
3764 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3765 
3766 	seq_puts(m, "# Snapshot commands:\n");
3767 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3768 		show_snapshot_main_help(m);
3769 	else
3770 		show_snapshot_percpu_help(m);
3771 }
3772 #else
3773 /* Should never be called */
3774 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3775 #endif
3776 
3777 static int s_show(struct seq_file *m, void *v)
3778 {
3779 	struct trace_iterator *iter = v;
3780 	int ret;
3781 
3782 	if (iter->ent == NULL) {
3783 		if (iter->tr) {
3784 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3785 			seq_puts(m, "#\n");
3786 			test_ftrace_alive(m);
3787 		}
3788 		if (iter->snapshot && trace_empty(iter))
3789 			print_snapshot_help(m, iter);
3790 		else if (iter->trace && iter->trace->print_header)
3791 			iter->trace->print_header(m);
3792 		else
3793 			trace_default_header(m);
3794 
3795 	} else if (iter->leftover) {
3796 		/*
3797 		 * If we filled the seq_file buffer earlier, we
3798 		 * want to just show it now.
3799 		 */
3800 		ret = trace_print_seq(m, &iter->seq);
3801 
3802 		/* ret should this time be zero, but you never know */
3803 		iter->leftover = ret;
3804 
3805 	} else {
3806 		print_trace_line(iter);
3807 		ret = trace_print_seq(m, &iter->seq);
3808 		/*
3809 		 * If we overflow the seq_file buffer, then it will
3810 		 * ask us for this data again at start up.
3811 		 * Use that instead.
3812 		 *  ret is 0 if seq_file write succeeded.
3813 		 *        -1 otherwise.
3814 		 */
3815 		iter->leftover = ret;
3816 	}
3817 
3818 	return 0;
3819 }
3820 
3821 /*
3822  * Should be used after trace_array_get(), trace_types_lock
3823  * ensures that i_cdev was already initialized.
3824  */
3825 static inline int tracing_get_cpu(struct inode *inode)
3826 {
3827 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3828 		return (long)inode->i_cdev - 1;
3829 	return RING_BUFFER_ALL_CPUS;
3830 }
3831 
3832 static const struct seq_operations tracer_seq_ops = {
3833 	.start		= s_start,
3834 	.next		= s_next,
3835 	.stop		= s_stop,
3836 	.show		= s_show,
3837 };
3838 
3839 static struct trace_iterator *
3840 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3841 {
3842 	struct trace_array *tr = inode->i_private;
3843 	struct trace_iterator *iter;
3844 	int cpu;
3845 
3846 	if (tracing_disabled)
3847 		return ERR_PTR(-ENODEV);
3848 
3849 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3850 	if (!iter)
3851 		return ERR_PTR(-ENOMEM);
3852 
3853 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3854 				    GFP_KERNEL);
3855 	if (!iter->buffer_iter)
3856 		goto release;
3857 
3858 	/*
3859 	 * We make a copy of the current tracer to avoid concurrent
3860 	 * changes on it while we are reading.
3861 	 */
3862 	mutex_lock(&trace_types_lock);
3863 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3864 	if (!iter->trace)
3865 		goto fail;
3866 
3867 	*iter->trace = *tr->current_trace;
3868 
3869 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3870 		goto fail;
3871 
3872 	iter->tr = tr;
3873 
3874 #ifdef CONFIG_TRACER_MAX_TRACE
3875 	/* Currently only the top directory has a snapshot */
3876 	if (tr->current_trace->print_max || snapshot)
3877 		iter->trace_buffer = &tr->max_buffer;
3878 	else
3879 #endif
3880 		iter->trace_buffer = &tr->trace_buffer;
3881 	iter->snapshot = snapshot;
3882 	iter->pos = -1;
3883 	iter->cpu_file = tracing_get_cpu(inode);
3884 	mutex_init(&iter->mutex);
3885 
3886 	/* Notify the tracer early; before we stop tracing. */
3887 	if (iter->trace && iter->trace->open)
3888 		iter->trace->open(iter);
3889 
3890 	/* Annotate start of buffers if we had overruns */
3891 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3892 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3893 
3894 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3895 	if (trace_clocks[tr->clock_id].in_ns)
3896 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3897 
3898 	/* stop the trace while dumping if we are not opening "snapshot" */
3899 	if (!iter->snapshot)
3900 		tracing_stop_tr(tr);
3901 
3902 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3903 		for_each_tracing_cpu(cpu) {
3904 			iter->buffer_iter[cpu] =
3905 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3906 		}
3907 		ring_buffer_read_prepare_sync();
3908 		for_each_tracing_cpu(cpu) {
3909 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3910 			tracing_iter_reset(iter, cpu);
3911 		}
3912 	} else {
3913 		cpu = iter->cpu_file;
3914 		iter->buffer_iter[cpu] =
3915 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3916 		ring_buffer_read_prepare_sync();
3917 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3918 		tracing_iter_reset(iter, cpu);
3919 	}
3920 
3921 	mutex_unlock(&trace_types_lock);
3922 
3923 	return iter;
3924 
3925  fail:
3926 	mutex_unlock(&trace_types_lock);
3927 	kfree(iter->trace);
3928 	kfree(iter->buffer_iter);
3929 release:
3930 	seq_release_private(inode, file);
3931 	return ERR_PTR(-ENOMEM);
3932 }
3933 
3934 int tracing_open_generic(struct inode *inode, struct file *filp)
3935 {
3936 	if (tracing_disabled)
3937 		return -ENODEV;
3938 
3939 	filp->private_data = inode->i_private;
3940 	return 0;
3941 }
3942 
3943 bool tracing_is_disabled(void)
3944 {
3945 	return (tracing_disabled) ? true: false;
3946 }
3947 
3948 /*
3949  * Open and update trace_array ref count.
3950  * Must have the current trace_array passed to it.
3951  */
3952 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3953 {
3954 	struct trace_array *tr = inode->i_private;
3955 
3956 	if (tracing_disabled)
3957 		return -ENODEV;
3958 
3959 	if (trace_array_get(tr) < 0)
3960 		return -ENODEV;
3961 
3962 	filp->private_data = inode->i_private;
3963 
3964 	return 0;
3965 }
3966 
3967 static int tracing_release(struct inode *inode, struct file *file)
3968 {
3969 	struct trace_array *tr = inode->i_private;
3970 	struct seq_file *m = file->private_data;
3971 	struct trace_iterator *iter;
3972 	int cpu;
3973 
3974 	if (!(file->f_mode & FMODE_READ)) {
3975 		trace_array_put(tr);
3976 		return 0;
3977 	}
3978 
3979 	/* Writes do not use seq_file */
3980 	iter = m->private;
3981 	mutex_lock(&trace_types_lock);
3982 
3983 	for_each_tracing_cpu(cpu) {
3984 		if (iter->buffer_iter[cpu])
3985 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3986 	}
3987 
3988 	if (iter->trace && iter->trace->close)
3989 		iter->trace->close(iter);
3990 
3991 	if (!iter->snapshot)
3992 		/* reenable tracing if it was previously enabled */
3993 		tracing_start_tr(tr);
3994 
3995 	__trace_array_put(tr);
3996 
3997 	mutex_unlock(&trace_types_lock);
3998 
3999 	mutex_destroy(&iter->mutex);
4000 	free_cpumask_var(iter->started);
4001 	kfree(iter->trace);
4002 	kfree(iter->buffer_iter);
4003 	seq_release_private(inode, file);
4004 
4005 	return 0;
4006 }
4007 
4008 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4009 {
4010 	struct trace_array *tr = inode->i_private;
4011 
4012 	trace_array_put(tr);
4013 	return 0;
4014 }
4015 
4016 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4017 {
4018 	struct trace_array *tr = inode->i_private;
4019 
4020 	trace_array_put(tr);
4021 
4022 	return single_release(inode, file);
4023 }
4024 
4025 static int tracing_open(struct inode *inode, struct file *file)
4026 {
4027 	struct trace_array *tr = inode->i_private;
4028 	struct trace_iterator *iter;
4029 	int ret = 0;
4030 
4031 	if (trace_array_get(tr) < 0)
4032 		return -ENODEV;
4033 
4034 	/* If this file was open for write, then erase contents */
4035 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4036 		int cpu = tracing_get_cpu(inode);
4037 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4038 
4039 #ifdef CONFIG_TRACER_MAX_TRACE
4040 		if (tr->current_trace->print_max)
4041 			trace_buf = &tr->max_buffer;
4042 #endif
4043 
4044 		if (cpu == RING_BUFFER_ALL_CPUS)
4045 			tracing_reset_online_cpus(trace_buf);
4046 		else
4047 			tracing_reset(trace_buf, cpu);
4048 	}
4049 
4050 	if (file->f_mode & FMODE_READ) {
4051 		iter = __tracing_open(inode, file, false);
4052 		if (IS_ERR(iter))
4053 			ret = PTR_ERR(iter);
4054 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4055 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4056 	}
4057 
4058 	if (ret < 0)
4059 		trace_array_put(tr);
4060 
4061 	return ret;
4062 }
4063 
4064 /*
4065  * Some tracers are not suitable for instance buffers.
4066  * A tracer is always available for the global array (toplevel)
4067  * or if it explicitly states that it is.
4068  */
4069 static bool
4070 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4071 {
4072 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4073 }
4074 
4075 /* Find the next tracer that this trace array may use */
4076 static struct tracer *
4077 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4078 {
4079 	while (t && !trace_ok_for_array(t, tr))
4080 		t = t->next;
4081 
4082 	return t;
4083 }
4084 
4085 static void *
4086 t_next(struct seq_file *m, void *v, loff_t *pos)
4087 {
4088 	struct trace_array *tr = m->private;
4089 	struct tracer *t = v;
4090 
4091 	(*pos)++;
4092 
4093 	if (t)
4094 		t = get_tracer_for_array(tr, t->next);
4095 
4096 	return t;
4097 }
4098 
4099 static void *t_start(struct seq_file *m, loff_t *pos)
4100 {
4101 	struct trace_array *tr = m->private;
4102 	struct tracer *t;
4103 	loff_t l = 0;
4104 
4105 	mutex_lock(&trace_types_lock);
4106 
4107 	t = get_tracer_for_array(tr, trace_types);
4108 	for (; t && l < *pos; t = t_next(m, t, &l))
4109 			;
4110 
4111 	return t;
4112 }
4113 
4114 static void t_stop(struct seq_file *m, void *p)
4115 {
4116 	mutex_unlock(&trace_types_lock);
4117 }
4118 
4119 static int t_show(struct seq_file *m, void *v)
4120 {
4121 	struct tracer *t = v;
4122 
4123 	if (!t)
4124 		return 0;
4125 
4126 	seq_puts(m, t->name);
4127 	if (t->next)
4128 		seq_putc(m, ' ');
4129 	else
4130 		seq_putc(m, '\n');
4131 
4132 	return 0;
4133 }
4134 
4135 static const struct seq_operations show_traces_seq_ops = {
4136 	.start		= t_start,
4137 	.next		= t_next,
4138 	.stop		= t_stop,
4139 	.show		= t_show,
4140 };
4141 
4142 static int show_traces_open(struct inode *inode, struct file *file)
4143 {
4144 	struct trace_array *tr = inode->i_private;
4145 	struct seq_file *m;
4146 	int ret;
4147 
4148 	if (tracing_disabled)
4149 		return -ENODEV;
4150 
4151 	ret = seq_open(file, &show_traces_seq_ops);
4152 	if (ret)
4153 		return ret;
4154 
4155 	m = file->private_data;
4156 	m->private = tr;
4157 
4158 	return 0;
4159 }
4160 
4161 static ssize_t
4162 tracing_write_stub(struct file *filp, const char __user *ubuf,
4163 		   size_t count, loff_t *ppos)
4164 {
4165 	return count;
4166 }
4167 
4168 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4169 {
4170 	int ret;
4171 
4172 	if (file->f_mode & FMODE_READ)
4173 		ret = seq_lseek(file, offset, whence);
4174 	else
4175 		file->f_pos = ret = 0;
4176 
4177 	return ret;
4178 }
4179 
4180 static const struct file_operations tracing_fops = {
4181 	.open		= tracing_open,
4182 	.read		= seq_read,
4183 	.write		= tracing_write_stub,
4184 	.llseek		= tracing_lseek,
4185 	.release	= tracing_release,
4186 };
4187 
4188 static const struct file_operations show_traces_fops = {
4189 	.open		= show_traces_open,
4190 	.read		= seq_read,
4191 	.release	= seq_release,
4192 	.llseek		= seq_lseek,
4193 };
4194 
4195 static ssize_t
4196 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4197 		     size_t count, loff_t *ppos)
4198 {
4199 	struct trace_array *tr = file_inode(filp)->i_private;
4200 	char *mask_str;
4201 	int len;
4202 
4203 	len = snprintf(NULL, 0, "%*pb\n",
4204 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4205 	mask_str = kmalloc(len, GFP_KERNEL);
4206 	if (!mask_str)
4207 		return -ENOMEM;
4208 
4209 	len = snprintf(mask_str, len, "%*pb\n",
4210 		       cpumask_pr_args(tr->tracing_cpumask));
4211 	if (len >= count) {
4212 		count = -EINVAL;
4213 		goto out_err;
4214 	}
4215 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4216 
4217 out_err:
4218 	kfree(mask_str);
4219 
4220 	return count;
4221 }
4222 
4223 static ssize_t
4224 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4225 		      size_t count, loff_t *ppos)
4226 {
4227 	struct trace_array *tr = file_inode(filp)->i_private;
4228 	cpumask_var_t tracing_cpumask_new;
4229 	int err, cpu;
4230 
4231 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4232 		return -ENOMEM;
4233 
4234 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4235 	if (err)
4236 		goto err_unlock;
4237 
4238 	local_irq_disable();
4239 	arch_spin_lock(&tr->max_lock);
4240 	for_each_tracing_cpu(cpu) {
4241 		/*
4242 		 * Increase/decrease the disabled counter if we are
4243 		 * about to flip a bit in the cpumask:
4244 		 */
4245 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4246 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4247 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4248 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4249 		}
4250 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4251 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4252 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4253 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4254 		}
4255 	}
4256 	arch_spin_unlock(&tr->max_lock);
4257 	local_irq_enable();
4258 
4259 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4260 	free_cpumask_var(tracing_cpumask_new);
4261 
4262 	return count;
4263 
4264 err_unlock:
4265 	free_cpumask_var(tracing_cpumask_new);
4266 
4267 	return err;
4268 }
4269 
4270 static const struct file_operations tracing_cpumask_fops = {
4271 	.open		= tracing_open_generic_tr,
4272 	.read		= tracing_cpumask_read,
4273 	.write		= tracing_cpumask_write,
4274 	.release	= tracing_release_generic_tr,
4275 	.llseek		= generic_file_llseek,
4276 };
4277 
4278 static int tracing_trace_options_show(struct seq_file *m, void *v)
4279 {
4280 	struct tracer_opt *trace_opts;
4281 	struct trace_array *tr = m->private;
4282 	u32 tracer_flags;
4283 	int i;
4284 
4285 	mutex_lock(&trace_types_lock);
4286 	tracer_flags = tr->current_trace->flags->val;
4287 	trace_opts = tr->current_trace->flags->opts;
4288 
4289 	for (i = 0; trace_options[i]; i++) {
4290 		if (tr->trace_flags & (1 << i))
4291 			seq_printf(m, "%s\n", trace_options[i]);
4292 		else
4293 			seq_printf(m, "no%s\n", trace_options[i]);
4294 	}
4295 
4296 	for (i = 0; trace_opts[i].name; i++) {
4297 		if (tracer_flags & trace_opts[i].bit)
4298 			seq_printf(m, "%s\n", trace_opts[i].name);
4299 		else
4300 			seq_printf(m, "no%s\n", trace_opts[i].name);
4301 	}
4302 	mutex_unlock(&trace_types_lock);
4303 
4304 	return 0;
4305 }
4306 
4307 static int __set_tracer_option(struct trace_array *tr,
4308 			       struct tracer_flags *tracer_flags,
4309 			       struct tracer_opt *opts, int neg)
4310 {
4311 	struct tracer *trace = tracer_flags->trace;
4312 	int ret;
4313 
4314 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4315 	if (ret)
4316 		return ret;
4317 
4318 	if (neg)
4319 		tracer_flags->val &= ~opts->bit;
4320 	else
4321 		tracer_flags->val |= opts->bit;
4322 	return 0;
4323 }
4324 
4325 /* Try to assign a tracer specific option */
4326 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4327 {
4328 	struct tracer *trace = tr->current_trace;
4329 	struct tracer_flags *tracer_flags = trace->flags;
4330 	struct tracer_opt *opts = NULL;
4331 	int i;
4332 
4333 	for (i = 0; tracer_flags->opts[i].name; i++) {
4334 		opts = &tracer_flags->opts[i];
4335 
4336 		if (strcmp(cmp, opts->name) == 0)
4337 			return __set_tracer_option(tr, trace->flags, opts, neg);
4338 	}
4339 
4340 	return -EINVAL;
4341 }
4342 
4343 /* Some tracers require overwrite to stay enabled */
4344 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4345 {
4346 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4347 		return -1;
4348 
4349 	return 0;
4350 }
4351 
4352 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4353 {
4354 	/* do nothing if flag is already set */
4355 	if (!!(tr->trace_flags & mask) == !!enabled)
4356 		return 0;
4357 
4358 	/* Give the tracer a chance to approve the change */
4359 	if (tr->current_trace->flag_changed)
4360 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4361 			return -EINVAL;
4362 
4363 	if (enabled)
4364 		tr->trace_flags |= mask;
4365 	else
4366 		tr->trace_flags &= ~mask;
4367 
4368 	if (mask == TRACE_ITER_RECORD_CMD)
4369 		trace_event_enable_cmd_record(enabled);
4370 
4371 	if (mask == TRACE_ITER_RECORD_TGID) {
4372 		if (!tgid_map)
4373 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4374 					   sizeof(*tgid_map),
4375 					   GFP_KERNEL);
4376 		if (!tgid_map) {
4377 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4378 			return -ENOMEM;
4379 		}
4380 
4381 		trace_event_enable_tgid_record(enabled);
4382 	}
4383 
4384 	if (mask == TRACE_ITER_EVENT_FORK)
4385 		trace_event_follow_fork(tr, enabled);
4386 
4387 	if (mask == TRACE_ITER_FUNC_FORK)
4388 		ftrace_pid_follow_fork(tr, enabled);
4389 
4390 	if (mask == TRACE_ITER_OVERWRITE) {
4391 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4392 #ifdef CONFIG_TRACER_MAX_TRACE
4393 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4394 #endif
4395 	}
4396 
4397 	if (mask == TRACE_ITER_PRINTK) {
4398 		trace_printk_start_stop_comm(enabled);
4399 		trace_printk_control(enabled);
4400 	}
4401 
4402 	return 0;
4403 }
4404 
4405 static int trace_set_options(struct trace_array *tr, char *option)
4406 {
4407 	char *cmp;
4408 	int neg = 0;
4409 	int ret;
4410 	size_t orig_len = strlen(option);
4411 
4412 	cmp = strstrip(option);
4413 
4414 	if (strncmp(cmp, "no", 2) == 0) {
4415 		neg = 1;
4416 		cmp += 2;
4417 	}
4418 
4419 	mutex_lock(&trace_types_lock);
4420 
4421 	ret = match_string(trace_options, -1, cmp);
4422 	/* If no option could be set, test the specific tracer options */
4423 	if (ret < 0)
4424 		ret = set_tracer_option(tr, cmp, neg);
4425 	else
4426 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4427 
4428 	mutex_unlock(&trace_types_lock);
4429 
4430 	/*
4431 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4432 	 * turn it back into a space.
4433 	 */
4434 	if (orig_len > strlen(option))
4435 		option[strlen(option)] = ' ';
4436 
4437 	return ret;
4438 }
4439 
4440 static void __init apply_trace_boot_options(void)
4441 {
4442 	char *buf = trace_boot_options_buf;
4443 	char *option;
4444 
4445 	while (true) {
4446 		option = strsep(&buf, ",");
4447 
4448 		if (!option)
4449 			break;
4450 
4451 		if (*option)
4452 			trace_set_options(&global_trace, option);
4453 
4454 		/* Put back the comma to allow this to be called again */
4455 		if (buf)
4456 			*(buf - 1) = ',';
4457 	}
4458 }
4459 
4460 static ssize_t
4461 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4462 			size_t cnt, loff_t *ppos)
4463 {
4464 	struct seq_file *m = filp->private_data;
4465 	struct trace_array *tr = m->private;
4466 	char buf[64];
4467 	int ret;
4468 
4469 	if (cnt >= sizeof(buf))
4470 		return -EINVAL;
4471 
4472 	if (copy_from_user(buf, ubuf, cnt))
4473 		return -EFAULT;
4474 
4475 	buf[cnt] = 0;
4476 
4477 	ret = trace_set_options(tr, buf);
4478 	if (ret < 0)
4479 		return ret;
4480 
4481 	*ppos += cnt;
4482 
4483 	return cnt;
4484 }
4485 
4486 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4487 {
4488 	struct trace_array *tr = inode->i_private;
4489 	int ret;
4490 
4491 	if (tracing_disabled)
4492 		return -ENODEV;
4493 
4494 	if (trace_array_get(tr) < 0)
4495 		return -ENODEV;
4496 
4497 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4498 	if (ret < 0)
4499 		trace_array_put(tr);
4500 
4501 	return ret;
4502 }
4503 
4504 static const struct file_operations tracing_iter_fops = {
4505 	.open		= tracing_trace_options_open,
4506 	.read		= seq_read,
4507 	.llseek		= seq_lseek,
4508 	.release	= tracing_single_release_tr,
4509 	.write		= tracing_trace_options_write,
4510 };
4511 
4512 static const char readme_msg[] =
4513 	"tracing mini-HOWTO:\n\n"
4514 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4515 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4516 	" Important files:\n"
4517 	"  trace\t\t\t- The static contents of the buffer\n"
4518 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4519 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4520 	"  current_tracer\t- function and latency tracers\n"
4521 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4522 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4523 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4524 	"  trace_clock\t\t-change the clock used to order events\n"
4525 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4526 	"      global:   Synced across CPUs but slows tracing down.\n"
4527 	"     counter:   Not a clock, but just an increment\n"
4528 	"      uptime:   Jiffy counter from time of boot\n"
4529 	"        perf:   Same clock that perf events use\n"
4530 #ifdef CONFIG_X86_64
4531 	"     x86-tsc:   TSC cycle counter\n"
4532 #endif
4533 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4534 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4535 	"    absolute:   Absolute (standalone) timestamp\n"
4536 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4537 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4538 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4539 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4540 	"\t\t\t  Remove sub-buffer with rmdir\n"
4541 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4542 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4543 	"\t\t\t  option name\n"
4544 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4545 #ifdef CONFIG_DYNAMIC_FTRACE
4546 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4547 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4548 	"\t\t\t  functions\n"
4549 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4550 	"\t     modules: Can select a group via module\n"
4551 	"\t      Format: :mod:<module-name>\n"
4552 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4553 	"\t    triggers: a command to perform when function is hit\n"
4554 	"\t      Format: <function>:<trigger>[:count]\n"
4555 	"\t     trigger: traceon, traceoff\n"
4556 	"\t\t      enable_event:<system>:<event>\n"
4557 	"\t\t      disable_event:<system>:<event>\n"
4558 #ifdef CONFIG_STACKTRACE
4559 	"\t\t      stacktrace\n"
4560 #endif
4561 #ifdef CONFIG_TRACER_SNAPSHOT
4562 	"\t\t      snapshot\n"
4563 #endif
4564 	"\t\t      dump\n"
4565 	"\t\t      cpudump\n"
4566 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4567 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4568 	"\t     The first one will disable tracing every time do_fault is hit\n"
4569 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4570 	"\t       The first time do trap is hit and it disables tracing, the\n"
4571 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4572 	"\t       the counter will not decrement. It only decrements when the\n"
4573 	"\t       trigger did work\n"
4574 	"\t     To remove trigger without count:\n"
4575 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4576 	"\t     To remove trigger with a count:\n"
4577 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4578 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4579 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4580 	"\t    modules: Can select a group via module command :mod:\n"
4581 	"\t    Does not accept triggers\n"
4582 #endif /* CONFIG_DYNAMIC_FTRACE */
4583 #ifdef CONFIG_FUNCTION_TRACER
4584 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4585 	"\t\t    (function)\n"
4586 #endif
4587 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4588 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4589 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4590 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4591 #endif
4592 #ifdef CONFIG_TRACER_SNAPSHOT
4593 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4594 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4595 	"\t\t\t  information\n"
4596 #endif
4597 #ifdef CONFIG_STACK_TRACER
4598 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4599 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4600 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4601 	"\t\t\t  new trace)\n"
4602 #ifdef CONFIG_DYNAMIC_FTRACE
4603 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4604 	"\t\t\t  traces\n"
4605 #endif
4606 #endif /* CONFIG_STACK_TRACER */
4607 #ifdef CONFIG_KPROBE_EVENTS
4608 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4609 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4610 #endif
4611 #ifdef CONFIG_UPROBE_EVENTS
4612 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4613 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4614 #endif
4615 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4616 	"\t  accepts: event-definitions (one definition per line)\n"
4617 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4618 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4619 	"\t           -:[<group>/]<event>\n"
4620 #ifdef CONFIG_KPROBE_EVENTS
4621 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4622   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4623 #endif
4624 #ifdef CONFIG_UPROBE_EVENTS
4625   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4626 #endif
4627 	"\t     args: <name>=fetcharg[:type]\n"
4628 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4629 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4630 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4631 #else
4632 	"\t           $stack<index>, $stack, $retval, $comm\n"
4633 #endif
4634 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4635 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4636 	"\t           <type>\\[<array-size>\\]\n"
4637 #endif
4638 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4639 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4640 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4641 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4642 	"\t\t\t  events\n"
4643 	"      filter\t\t- If set, only events passing filter are traced\n"
4644 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4645 	"\t\t\t  <event>:\n"
4646 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4647 	"      filter\t\t- If set, only events passing filter are traced\n"
4648 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4649 	"\t    Format: <trigger>[:count][if <filter>]\n"
4650 	"\t   trigger: traceon, traceoff\n"
4651 	"\t            enable_event:<system>:<event>\n"
4652 	"\t            disable_event:<system>:<event>\n"
4653 #ifdef CONFIG_HIST_TRIGGERS
4654 	"\t            enable_hist:<system>:<event>\n"
4655 	"\t            disable_hist:<system>:<event>\n"
4656 #endif
4657 #ifdef CONFIG_STACKTRACE
4658 	"\t\t    stacktrace\n"
4659 #endif
4660 #ifdef CONFIG_TRACER_SNAPSHOT
4661 	"\t\t    snapshot\n"
4662 #endif
4663 #ifdef CONFIG_HIST_TRIGGERS
4664 	"\t\t    hist (see below)\n"
4665 #endif
4666 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4667 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4668 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4669 	"\t                  events/block/block_unplug/trigger\n"
4670 	"\t   The first disables tracing every time block_unplug is hit.\n"
4671 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4672 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4673 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4674 	"\t   Like function triggers, the counter is only decremented if it\n"
4675 	"\t    enabled or disabled tracing.\n"
4676 	"\t   To remove a trigger without a count:\n"
4677 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4678 	"\t   To remove a trigger with a count:\n"
4679 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4680 	"\t   Filters can be ignored when removing a trigger.\n"
4681 #ifdef CONFIG_HIST_TRIGGERS
4682 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4683 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4684 	"\t            [:values=<field1[,field2,...]>]\n"
4685 	"\t            [:sort=<field1[,field2,...]>]\n"
4686 	"\t            [:size=#entries]\n"
4687 	"\t            [:pause][:continue][:clear]\n"
4688 	"\t            [:name=histname1]\n"
4689 	"\t            [if <filter>]\n\n"
4690 	"\t    When a matching event is hit, an entry is added to a hash\n"
4691 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4692 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4693 	"\t    correspond to fields in the event's format description.  Keys\n"
4694 	"\t    can be any field, or the special string 'stacktrace'.\n"
4695 	"\t    Compound keys consisting of up to two fields can be specified\n"
4696 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4697 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4698 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4699 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4700 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4701 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4702 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4703 	"\t    its histogram data will be shared with other triggers of the\n"
4704 	"\t    same name, and trigger hits will update this common data.\n\n"
4705 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4706 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4707 	"\t    triggers attached to an event, there will be a table for each\n"
4708 	"\t    trigger in the output.  The table displayed for a named\n"
4709 	"\t    trigger will be the same as any other instance having the\n"
4710 	"\t    same name.  The default format used to display a given field\n"
4711 	"\t    can be modified by appending any of the following modifiers\n"
4712 	"\t    to the field name, as applicable:\n\n"
4713 	"\t            .hex        display a number as a hex value\n"
4714 	"\t            .sym        display an address as a symbol\n"
4715 	"\t            .sym-offset display an address as a symbol and offset\n"
4716 	"\t            .execname   display a common_pid as a program name\n"
4717 	"\t            .syscall    display a syscall id as a syscall name\n"
4718 	"\t            .log2       display log2 value rather than raw number\n"
4719 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4720 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4721 	"\t    trigger or to start a hist trigger but not log any events\n"
4722 	"\t    until told to do so.  'continue' can be used to start or\n"
4723 	"\t    restart a paused hist trigger.\n\n"
4724 	"\t    The 'clear' parameter will clear the contents of a running\n"
4725 	"\t    hist trigger and leave its current paused/active state\n"
4726 	"\t    unchanged.\n\n"
4727 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4728 	"\t    have one event conditionally start and stop another event's\n"
4729 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4730 	"\t    the enable_event and disable_event triggers.\n"
4731 #endif
4732 ;
4733 
4734 static ssize_t
4735 tracing_readme_read(struct file *filp, char __user *ubuf,
4736 		       size_t cnt, loff_t *ppos)
4737 {
4738 	return simple_read_from_buffer(ubuf, cnt, ppos,
4739 					readme_msg, strlen(readme_msg));
4740 }
4741 
4742 static const struct file_operations tracing_readme_fops = {
4743 	.open		= tracing_open_generic,
4744 	.read		= tracing_readme_read,
4745 	.llseek		= generic_file_llseek,
4746 };
4747 
4748 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4749 {
4750 	int *ptr = v;
4751 
4752 	if (*pos || m->count)
4753 		ptr++;
4754 
4755 	(*pos)++;
4756 
4757 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4758 		if (trace_find_tgid(*ptr))
4759 			return ptr;
4760 	}
4761 
4762 	return NULL;
4763 }
4764 
4765 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4766 {
4767 	void *v;
4768 	loff_t l = 0;
4769 
4770 	if (!tgid_map)
4771 		return NULL;
4772 
4773 	v = &tgid_map[0];
4774 	while (l <= *pos) {
4775 		v = saved_tgids_next(m, v, &l);
4776 		if (!v)
4777 			return NULL;
4778 	}
4779 
4780 	return v;
4781 }
4782 
4783 static void saved_tgids_stop(struct seq_file *m, void *v)
4784 {
4785 }
4786 
4787 static int saved_tgids_show(struct seq_file *m, void *v)
4788 {
4789 	int pid = (int *)v - tgid_map;
4790 
4791 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4792 	return 0;
4793 }
4794 
4795 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4796 	.start		= saved_tgids_start,
4797 	.stop		= saved_tgids_stop,
4798 	.next		= saved_tgids_next,
4799 	.show		= saved_tgids_show,
4800 };
4801 
4802 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4803 {
4804 	if (tracing_disabled)
4805 		return -ENODEV;
4806 
4807 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4808 }
4809 
4810 
4811 static const struct file_operations tracing_saved_tgids_fops = {
4812 	.open		= tracing_saved_tgids_open,
4813 	.read		= seq_read,
4814 	.llseek		= seq_lseek,
4815 	.release	= seq_release,
4816 };
4817 
4818 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4819 {
4820 	unsigned int *ptr = v;
4821 
4822 	if (*pos || m->count)
4823 		ptr++;
4824 
4825 	(*pos)++;
4826 
4827 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4828 	     ptr++) {
4829 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4830 			continue;
4831 
4832 		return ptr;
4833 	}
4834 
4835 	return NULL;
4836 }
4837 
4838 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4839 {
4840 	void *v;
4841 	loff_t l = 0;
4842 
4843 	preempt_disable();
4844 	arch_spin_lock(&trace_cmdline_lock);
4845 
4846 	v = &savedcmd->map_cmdline_to_pid[0];
4847 	while (l <= *pos) {
4848 		v = saved_cmdlines_next(m, v, &l);
4849 		if (!v)
4850 			return NULL;
4851 	}
4852 
4853 	return v;
4854 }
4855 
4856 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4857 {
4858 	arch_spin_unlock(&trace_cmdline_lock);
4859 	preempt_enable();
4860 }
4861 
4862 static int saved_cmdlines_show(struct seq_file *m, void *v)
4863 {
4864 	char buf[TASK_COMM_LEN];
4865 	unsigned int *pid = v;
4866 
4867 	__trace_find_cmdline(*pid, buf);
4868 	seq_printf(m, "%d %s\n", *pid, buf);
4869 	return 0;
4870 }
4871 
4872 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4873 	.start		= saved_cmdlines_start,
4874 	.next		= saved_cmdlines_next,
4875 	.stop		= saved_cmdlines_stop,
4876 	.show		= saved_cmdlines_show,
4877 };
4878 
4879 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4880 {
4881 	if (tracing_disabled)
4882 		return -ENODEV;
4883 
4884 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4885 }
4886 
4887 static const struct file_operations tracing_saved_cmdlines_fops = {
4888 	.open		= tracing_saved_cmdlines_open,
4889 	.read		= seq_read,
4890 	.llseek		= seq_lseek,
4891 	.release	= seq_release,
4892 };
4893 
4894 static ssize_t
4895 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4896 				 size_t cnt, loff_t *ppos)
4897 {
4898 	char buf[64];
4899 	int r;
4900 
4901 	arch_spin_lock(&trace_cmdline_lock);
4902 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4903 	arch_spin_unlock(&trace_cmdline_lock);
4904 
4905 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4906 }
4907 
4908 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4909 {
4910 	kfree(s->saved_cmdlines);
4911 	kfree(s->map_cmdline_to_pid);
4912 	kfree(s);
4913 }
4914 
4915 static int tracing_resize_saved_cmdlines(unsigned int val)
4916 {
4917 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4918 
4919 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4920 	if (!s)
4921 		return -ENOMEM;
4922 
4923 	if (allocate_cmdlines_buffer(val, s) < 0) {
4924 		kfree(s);
4925 		return -ENOMEM;
4926 	}
4927 
4928 	arch_spin_lock(&trace_cmdline_lock);
4929 	savedcmd_temp = savedcmd;
4930 	savedcmd = s;
4931 	arch_spin_unlock(&trace_cmdline_lock);
4932 	free_saved_cmdlines_buffer(savedcmd_temp);
4933 
4934 	return 0;
4935 }
4936 
4937 static ssize_t
4938 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4939 				  size_t cnt, loff_t *ppos)
4940 {
4941 	unsigned long val;
4942 	int ret;
4943 
4944 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4945 	if (ret)
4946 		return ret;
4947 
4948 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4949 	if (!val || val > PID_MAX_DEFAULT)
4950 		return -EINVAL;
4951 
4952 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4953 	if (ret < 0)
4954 		return ret;
4955 
4956 	*ppos += cnt;
4957 
4958 	return cnt;
4959 }
4960 
4961 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4962 	.open		= tracing_open_generic,
4963 	.read		= tracing_saved_cmdlines_size_read,
4964 	.write		= tracing_saved_cmdlines_size_write,
4965 };
4966 
4967 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4968 static union trace_eval_map_item *
4969 update_eval_map(union trace_eval_map_item *ptr)
4970 {
4971 	if (!ptr->map.eval_string) {
4972 		if (ptr->tail.next) {
4973 			ptr = ptr->tail.next;
4974 			/* Set ptr to the next real item (skip head) */
4975 			ptr++;
4976 		} else
4977 			return NULL;
4978 	}
4979 	return ptr;
4980 }
4981 
4982 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4983 {
4984 	union trace_eval_map_item *ptr = v;
4985 
4986 	/*
4987 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4988 	 * This really should never happen.
4989 	 */
4990 	ptr = update_eval_map(ptr);
4991 	if (WARN_ON_ONCE(!ptr))
4992 		return NULL;
4993 
4994 	ptr++;
4995 
4996 	(*pos)++;
4997 
4998 	ptr = update_eval_map(ptr);
4999 
5000 	return ptr;
5001 }
5002 
5003 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5004 {
5005 	union trace_eval_map_item *v;
5006 	loff_t l = 0;
5007 
5008 	mutex_lock(&trace_eval_mutex);
5009 
5010 	v = trace_eval_maps;
5011 	if (v)
5012 		v++;
5013 
5014 	while (v && l < *pos) {
5015 		v = eval_map_next(m, v, &l);
5016 	}
5017 
5018 	return v;
5019 }
5020 
5021 static void eval_map_stop(struct seq_file *m, void *v)
5022 {
5023 	mutex_unlock(&trace_eval_mutex);
5024 }
5025 
5026 static int eval_map_show(struct seq_file *m, void *v)
5027 {
5028 	union trace_eval_map_item *ptr = v;
5029 
5030 	seq_printf(m, "%s %ld (%s)\n",
5031 		   ptr->map.eval_string, ptr->map.eval_value,
5032 		   ptr->map.system);
5033 
5034 	return 0;
5035 }
5036 
5037 static const struct seq_operations tracing_eval_map_seq_ops = {
5038 	.start		= eval_map_start,
5039 	.next		= eval_map_next,
5040 	.stop		= eval_map_stop,
5041 	.show		= eval_map_show,
5042 };
5043 
5044 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5045 {
5046 	if (tracing_disabled)
5047 		return -ENODEV;
5048 
5049 	return seq_open(filp, &tracing_eval_map_seq_ops);
5050 }
5051 
5052 static const struct file_operations tracing_eval_map_fops = {
5053 	.open		= tracing_eval_map_open,
5054 	.read		= seq_read,
5055 	.llseek		= seq_lseek,
5056 	.release	= seq_release,
5057 };
5058 
5059 static inline union trace_eval_map_item *
5060 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5061 {
5062 	/* Return tail of array given the head */
5063 	return ptr + ptr->head.length + 1;
5064 }
5065 
5066 static void
5067 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5068 			   int len)
5069 {
5070 	struct trace_eval_map **stop;
5071 	struct trace_eval_map **map;
5072 	union trace_eval_map_item *map_array;
5073 	union trace_eval_map_item *ptr;
5074 
5075 	stop = start + len;
5076 
5077 	/*
5078 	 * The trace_eval_maps contains the map plus a head and tail item,
5079 	 * where the head holds the module and length of array, and the
5080 	 * tail holds a pointer to the next list.
5081 	 */
5082 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5083 	if (!map_array) {
5084 		pr_warn("Unable to allocate trace eval mapping\n");
5085 		return;
5086 	}
5087 
5088 	mutex_lock(&trace_eval_mutex);
5089 
5090 	if (!trace_eval_maps)
5091 		trace_eval_maps = map_array;
5092 	else {
5093 		ptr = trace_eval_maps;
5094 		for (;;) {
5095 			ptr = trace_eval_jmp_to_tail(ptr);
5096 			if (!ptr->tail.next)
5097 				break;
5098 			ptr = ptr->tail.next;
5099 
5100 		}
5101 		ptr->tail.next = map_array;
5102 	}
5103 	map_array->head.mod = mod;
5104 	map_array->head.length = len;
5105 	map_array++;
5106 
5107 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5108 		map_array->map = **map;
5109 		map_array++;
5110 	}
5111 	memset(map_array, 0, sizeof(*map_array));
5112 
5113 	mutex_unlock(&trace_eval_mutex);
5114 }
5115 
5116 static void trace_create_eval_file(struct dentry *d_tracer)
5117 {
5118 	trace_create_file("eval_map", 0444, d_tracer,
5119 			  NULL, &tracing_eval_map_fops);
5120 }
5121 
5122 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5123 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5124 static inline void trace_insert_eval_map_file(struct module *mod,
5125 			      struct trace_eval_map **start, int len) { }
5126 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5127 
5128 static void trace_insert_eval_map(struct module *mod,
5129 				  struct trace_eval_map **start, int len)
5130 {
5131 	struct trace_eval_map **map;
5132 
5133 	if (len <= 0)
5134 		return;
5135 
5136 	map = start;
5137 
5138 	trace_event_eval_update(map, len);
5139 
5140 	trace_insert_eval_map_file(mod, start, len);
5141 }
5142 
5143 static ssize_t
5144 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5145 		       size_t cnt, loff_t *ppos)
5146 {
5147 	struct trace_array *tr = filp->private_data;
5148 	char buf[MAX_TRACER_SIZE+2];
5149 	int r;
5150 
5151 	mutex_lock(&trace_types_lock);
5152 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5153 	mutex_unlock(&trace_types_lock);
5154 
5155 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5156 }
5157 
5158 int tracer_init(struct tracer *t, struct trace_array *tr)
5159 {
5160 	tracing_reset_online_cpus(&tr->trace_buffer);
5161 	return t->init(tr);
5162 }
5163 
5164 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5165 {
5166 	int cpu;
5167 
5168 	for_each_tracing_cpu(cpu)
5169 		per_cpu_ptr(buf->data, cpu)->entries = val;
5170 }
5171 
5172 #ifdef CONFIG_TRACER_MAX_TRACE
5173 /* resize @tr's buffer to the size of @size_tr's entries */
5174 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5175 					struct trace_buffer *size_buf, int cpu_id)
5176 {
5177 	int cpu, ret = 0;
5178 
5179 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5180 		for_each_tracing_cpu(cpu) {
5181 			ret = ring_buffer_resize(trace_buf->buffer,
5182 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5183 			if (ret < 0)
5184 				break;
5185 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5186 				per_cpu_ptr(size_buf->data, cpu)->entries;
5187 		}
5188 	} else {
5189 		ret = ring_buffer_resize(trace_buf->buffer,
5190 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5191 		if (ret == 0)
5192 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5193 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5194 	}
5195 
5196 	return ret;
5197 }
5198 #endif /* CONFIG_TRACER_MAX_TRACE */
5199 
5200 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5201 					unsigned long size, int cpu)
5202 {
5203 	int ret;
5204 
5205 	/*
5206 	 * If kernel or user changes the size of the ring buffer
5207 	 * we use the size that was given, and we can forget about
5208 	 * expanding it later.
5209 	 */
5210 	ring_buffer_expanded = true;
5211 
5212 	/* May be called before buffers are initialized */
5213 	if (!tr->trace_buffer.buffer)
5214 		return 0;
5215 
5216 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5217 	if (ret < 0)
5218 		return ret;
5219 
5220 #ifdef CONFIG_TRACER_MAX_TRACE
5221 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5222 	    !tr->current_trace->use_max_tr)
5223 		goto out;
5224 
5225 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5226 	if (ret < 0) {
5227 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5228 						     &tr->trace_buffer, cpu);
5229 		if (r < 0) {
5230 			/*
5231 			 * AARGH! We are left with different
5232 			 * size max buffer!!!!
5233 			 * The max buffer is our "snapshot" buffer.
5234 			 * When a tracer needs a snapshot (one of the
5235 			 * latency tracers), it swaps the max buffer
5236 			 * with the saved snap shot. We succeeded to
5237 			 * update the size of the main buffer, but failed to
5238 			 * update the size of the max buffer. But when we tried
5239 			 * to reset the main buffer to the original size, we
5240 			 * failed there too. This is very unlikely to
5241 			 * happen, but if it does, warn and kill all
5242 			 * tracing.
5243 			 */
5244 			WARN_ON(1);
5245 			tracing_disabled = 1;
5246 		}
5247 		return ret;
5248 	}
5249 
5250 	if (cpu == RING_BUFFER_ALL_CPUS)
5251 		set_buffer_entries(&tr->max_buffer, size);
5252 	else
5253 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5254 
5255  out:
5256 #endif /* CONFIG_TRACER_MAX_TRACE */
5257 
5258 	if (cpu == RING_BUFFER_ALL_CPUS)
5259 		set_buffer_entries(&tr->trace_buffer, size);
5260 	else
5261 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5262 
5263 	return ret;
5264 }
5265 
5266 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5267 					  unsigned long size, int cpu_id)
5268 {
5269 	int ret = size;
5270 
5271 	mutex_lock(&trace_types_lock);
5272 
5273 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5274 		/* make sure, this cpu is enabled in the mask */
5275 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5276 			ret = -EINVAL;
5277 			goto out;
5278 		}
5279 	}
5280 
5281 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5282 	if (ret < 0)
5283 		ret = -ENOMEM;
5284 
5285 out:
5286 	mutex_unlock(&trace_types_lock);
5287 
5288 	return ret;
5289 }
5290 
5291 
5292 /**
5293  * tracing_update_buffers - used by tracing facility to expand ring buffers
5294  *
5295  * To save on memory when the tracing is never used on a system with it
5296  * configured in. The ring buffers are set to a minimum size. But once
5297  * a user starts to use the tracing facility, then they need to grow
5298  * to their default size.
5299  *
5300  * This function is to be called when a tracer is about to be used.
5301  */
5302 int tracing_update_buffers(void)
5303 {
5304 	int ret = 0;
5305 
5306 	mutex_lock(&trace_types_lock);
5307 	if (!ring_buffer_expanded)
5308 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5309 						RING_BUFFER_ALL_CPUS);
5310 	mutex_unlock(&trace_types_lock);
5311 
5312 	return ret;
5313 }
5314 
5315 struct trace_option_dentry;
5316 
5317 static void
5318 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5319 
5320 /*
5321  * Used to clear out the tracer before deletion of an instance.
5322  * Must have trace_types_lock held.
5323  */
5324 static void tracing_set_nop(struct trace_array *tr)
5325 {
5326 	if (tr->current_trace == &nop_trace)
5327 		return;
5328 
5329 	tr->current_trace->enabled--;
5330 
5331 	if (tr->current_trace->reset)
5332 		tr->current_trace->reset(tr);
5333 
5334 	tr->current_trace = &nop_trace;
5335 }
5336 
5337 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5338 {
5339 	/* Only enable if the directory has been created already. */
5340 	if (!tr->dir)
5341 		return;
5342 
5343 	create_trace_option_files(tr, t);
5344 }
5345 
5346 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5347 {
5348 	struct tracer *t;
5349 #ifdef CONFIG_TRACER_MAX_TRACE
5350 	bool had_max_tr;
5351 #endif
5352 	int ret = 0;
5353 
5354 	mutex_lock(&trace_types_lock);
5355 
5356 	if (!ring_buffer_expanded) {
5357 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5358 						RING_BUFFER_ALL_CPUS);
5359 		if (ret < 0)
5360 			goto out;
5361 		ret = 0;
5362 	}
5363 
5364 	for (t = trace_types; t; t = t->next) {
5365 		if (strcmp(t->name, buf) == 0)
5366 			break;
5367 	}
5368 	if (!t) {
5369 		ret = -EINVAL;
5370 		goto out;
5371 	}
5372 	if (t == tr->current_trace)
5373 		goto out;
5374 
5375 	/* Some tracers won't work on kernel command line */
5376 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5377 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5378 			t->name);
5379 		goto out;
5380 	}
5381 
5382 	/* Some tracers are only allowed for the top level buffer */
5383 	if (!trace_ok_for_array(t, tr)) {
5384 		ret = -EINVAL;
5385 		goto out;
5386 	}
5387 
5388 	/* If trace pipe files are being read, we can't change the tracer */
5389 	if (tr->current_trace->ref) {
5390 		ret = -EBUSY;
5391 		goto out;
5392 	}
5393 
5394 	trace_branch_disable();
5395 
5396 	tr->current_trace->enabled--;
5397 
5398 	if (tr->current_trace->reset)
5399 		tr->current_trace->reset(tr);
5400 
5401 	/* Current trace needs to be nop_trace before synchronize_sched */
5402 	tr->current_trace = &nop_trace;
5403 
5404 #ifdef CONFIG_TRACER_MAX_TRACE
5405 	had_max_tr = tr->allocated_snapshot;
5406 
5407 	if (had_max_tr && !t->use_max_tr) {
5408 		/*
5409 		 * We need to make sure that the update_max_tr sees that
5410 		 * current_trace changed to nop_trace to keep it from
5411 		 * swapping the buffers after we resize it.
5412 		 * The update_max_tr is called from interrupts disabled
5413 		 * so a synchronized_sched() is sufficient.
5414 		 */
5415 		synchronize_sched();
5416 		free_snapshot(tr);
5417 	}
5418 #endif
5419 
5420 #ifdef CONFIG_TRACER_MAX_TRACE
5421 	if (t->use_max_tr && !had_max_tr) {
5422 		ret = tracing_alloc_snapshot_instance(tr);
5423 		if (ret < 0)
5424 			goto out;
5425 	}
5426 #endif
5427 
5428 	if (t->init) {
5429 		ret = tracer_init(t, tr);
5430 		if (ret)
5431 			goto out;
5432 	}
5433 
5434 	tr->current_trace = t;
5435 	tr->current_trace->enabled++;
5436 	trace_branch_enable(tr);
5437  out:
5438 	mutex_unlock(&trace_types_lock);
5439 
5440 	return ret;
5441 }
5442 
5443 static ssize_t
5444 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5445 			size_t cnt, loff_t *ppos)
5446 {
5447 	struct trace_array *tr = filp->private_data;
5448 	char buf[MAX_TRACER_SIZE+1];
5449 	int i;
5450 	size_t ret;
5451 	int err;
5452 
5453 	ret = cnt;
5454 
5455 	if (cnt > MAX_TRACER_SIZE)
5456 		cnt = MAX_TRACER_SIZE;
5457 
5458 	if (copy_from_user(buf, ubuf, cnt))
5459 		return -EFAULT;
5460 
5461 	buf[cnt] = 0;
5462 
5463 	/* strip ending whitespace. */
5464 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5465 		buf[i] = 0;
5466 
5467 	err = tracing_set_tracer(tr, buf);
5468 	if (err)
5469 		return err;
5470 
5471 	*ppos += ret;
5472 
5473 	return ret;
5474 }
5475 
5476 static ssize_t
5477 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5478 		   size_t cnt, loff_t *ppos)
5479 {
5480 	char buf[64];
5481 	int r;
5482 
5483 	r = snprintf(buf, sizeof(buf), "%ld\n",
5484 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5485 	if (r > sizeof(buf))
5486 		r = sizeof(buf);
5487 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5488 }
5489 
5490 static ssize_t
5491 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5492 		    size_t cnt, loff_t *ppos)
5493 {
5494 	unsigned long val;
5495 	int ret;
5496 
5497 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5498 	if (ret)
5499 		return ret;
5500 
5501 	*ptr = val * 1000;
5502 
5503 	return cnt;
5504 }
5505 
5506 static ssize_t
5507 tracing_thresh_read(struct file *filp, char __user *ubuf,
5508 		    size_t cnt, loff_t *ppos)
5509 {
5510 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5511 }
5512 
5513 static ssize_t
5514 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5515 		     size_t cnt, loff_t *ppos)
5516 {
5517 	struct trace_array *tr = filp->private_data;
5518 	int ret;
5519 
5520 	mutex_lock(&trace_types_lock);
5521 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5522 	if (ret < 0)
5523 		goto out;
5524 
5525 	if (tr->current_trace->update_thresh) {
5526 		ret = tr->current_trace->update_thresh(tr);
5527 		if (ret < 0)
5528 			goto out;
5529 	}
5530 
5531 	ret = cnt;
5532 out:
5533 	mutex_unlock(&trace_types_lock);
5534 
5535 	return ret;
5536 }
5537 
5538 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5539 
5540 static ssize_t
5541 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5542 		     size_t cnt, loff_t *ppos)
5543 {
5544 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5545 }
5546 
5547 static ssize_t
5548 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5549 		      size_t cnt, loff_t *ppos)
5550 {
5551 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5552 }
5553 
5554 #endif
5555 
5556 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5557 {
5558 	struct trace_array *tr = inode->i_private;
5559 	struct trace_iterator *iter;
5560 	int ret = 0;
5561 
5562 	if (tracing_disabled)
5563 		return -ENODEV;
5564 
5565 	if (trace_array_get(tr) < 0)
5566 		return -ENODEV;
5567 
5568 	mutex_lock(&trace_types_lock);
5569 
5570 	/* create a buffer to store the information to pass to userspace */
5571 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5572 	if (!iter) {
5573 		ret = -ENOMEM;
5574 		__trace_array_put(tr);
5575 		goto out;
5576 	}
5577 
5578 	trace_seq_init(&iter->seq);
5579 	iter->trace = tr->current_trace;
5580 
5581 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5582 		ret = -ENOMEM;
5583 		goto fail;
5584 	}
5585 
5586 	/* trace pipe does not show start of buffer */
5587 	cpumask_setall(iter->started);
5588 
5589 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5590 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5591 
5592 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5593 	if (trace_clocks[tr->clock_id].in_ns)
5594 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5595 
5596 	iter->tr = tr;
5597 	iter->trace_buffer = &tr->trace_buffer;
5598 	iter->cpu_file = tracing_get_cpu(inode);
5599 	mutex_init(&iter->mutex);
5600 	filp->private_data = iter;
5601 
5602 	if (iter->trace->pipe_open)
5603 		iter->trace->pipe_open(iter);
5604 
5605 	nonseekable_open(inode, filp);
5606 
5607 	tr->current_trace->ref++;
5608 out:
5609 	mutex_unlock(&trace_types_lock);
5610 	return ret;
5611 
5612 fail:
5613 	kfree(iter->trace);
5614 	kfree(iter);
5615 	__trace_array_put(tr);
5616 	mutex_unlock(&trace_types_lock);
5617 	return ret;
5618 }
5619 
5620 static int tracing_release_pipe(struct inode *inode, struct file *file)
5621 {
5622 	struct trace_iterator *iter = file->private_data;
5623 	struct trace_array *tr = inode->i_private;
5624 
5625 	mutex_lock(&trace_types_lock);
5626 
5627 	tr->current_trace->ref--;
5628 
5629 	if (iter->trace->pipe_close)
5630 		iter->trace->pipe_close(iter);
5631 
5632 	mutex_unlock(&trace_types_lock);
5633 
5634 	free_cpumask_var(iter->started);
5635 	mutex_destroy(&iter->mutex);
5636 	kfree(iter);
5637 
5638 	trace_array_put(tr);
5639 
5640 	return 0;
5641 }
5642 
5643 static __poll_t
5644 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5645 {
5646 	struct trace_array *tr = iter->tr;
5647 
5648 	/* Iterators are static, they should be filled or empty */
5649 	if (trace_buffer_iter(iter, iter->cpu_file))
5650 		return EPOLLIN | EPOLLRDNORM;
5651 
5652 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5653 		/*
5654 		 * Always select as readable when in blocking mode
5655 		 */
5656 		return EPOLLIN | EPOLLRDNORM;
5657 	else
5658 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5659 					     filp, poll_table);
5660 }
5661 
5662 static __poll_t
5663 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5664 {
5665 	struct trace_iterator *iter = filp->private_data;
5666 
5667 	return trace_poll(iter, filp, poll_table);
5668 }
5669 
5670 /* Must be called with iter->mutex held. */
5671 static int tracing_wait_pipe(struct file *filp)
5672 {
5673 	struct trace_iterator *iter = filp->private_data;
5674 	int ret;
5675 
5676 	while (trace_empty(iter)) {
5677 
5678 		if ((filp->f_flags & O_NONBLOCK)) {
5679 			return -EAGAIN;
5680 		}
5681 
5682 		/*
5683 		 * We block until we read something and tracing is disabled.
5684 		 * We still block if tracing is disabled, but we have never
5685 		 * read anything. This allows a user to cat this file, and
5686 		 * then enable tracing. But after we have read something,
5687 		 * we give an EOF when tracing is again disabled.
5688 		 *
5689 		 * iter->pos will be 0 if we haven't read anything.
5690 		 */
5691 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5692 			break;
5693 
5694 		mutex_unlock(&iter->mutex);
5695 
5696 		ret = wait_on_pipe(iter, false);
5697 
5698 		mutex_lock(&iter->mutex);
5699 
5700 		if (ret)
5701 			return ret;
5702 	}
5703 
5704 	return 1;
5705 }
5706 
5707 /*
5708  * Consumer reader.
5709  */
5710 static ssize_t
5711 tracing_read_pipe(struct file *filp, char __user *ubuf,
5712 		  size_t cnt, loff_t *ppos)
5713 {
5714 	struct trace_iterator *iter = filp->private_data;
5715 	ssize_t sret;
5716 
5717 	/*
5718 	 * Avoid more than one consumer on a single file descriptor
5719 	 * This is just a matter of traces coherency, the ring buffer itself
5720 	 * is protected.
5721 	 */
5722 	mutex_lock(&iter->mutex);
5723 
5724 	/* return any leftover data */
5725 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5726 	if (sret != -EBUSY)
5727 		goto out;
5728 
5729 	trace_seq_init(&iter->seq);
5730 
5731 	if (iter->trace->read) {
5732 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5733 		if (sret)
5734 			goto out;
5735 	}
5736 
5737 waitagain:
5738 	sret = tracing_wait_pipe(filp);
5739 	if (sret <= 0)
5740 		goto out;
5741 
5742 	/* stop when tracing is finished */
5743 	if (trace_empty(iter)) {
5744 		sret = 0;
5745 		goto out;
5746 	}
5747 
5748 	if (cnt >= PAGE_SIZE)
5749 		cnt = PAGE_SIZE - 1;
5750 
5751 	/* reset all but tr, trace, and overruns */
5752 	memset(&iter->seq, 0,
5753 	       sizeof(struct trace_iterator) -
5754 	       offsetof(struct trace_iterator, seq));
5755 	cpumask_clear(iter->started);
5756 	iter->pos = -1;
5757 
5758 	trace_event_read_lock();
5759 	trace_access_lock(iter->cpu_file);
5760 	while (trace_find_next_entry_inc(iter) != NULL) {
5761 		enum print_line_t ret;
5762 		int save_len = iter->seq.seq.len;
5763 
5764 		ret = print_trace_line(iter);
5765 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5766 			/* don't print partial lines */
5767 			iter->seq.seq.len = save_len;
5768 			break;
5769 		}
5770 		if (ret != TRACE_TYPE_NO_CONSUME)
5771 			trace_consume(iter);
5772 
5773 		if (trace_seq_used(&iter->seq) >= cnt)
5774 			break;
5775 
5776 		/*
5777 		 * Setting the full flag means we reached the trace_seq buffer
5778 		 * size and we should leave by partial output condition above.
5779 		 * One of the trace_seq_* functions is not used properly.
5780 		 */
5781 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5782 			  iter->ent->type);
5783 	}
5784 	trace_access_unlock(iter->cpu_file);
5785 	trace_event_read_unlock();
5786 
5787 	/* Now copy what we have to the user */
5788 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5789 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5790 		trace_seq_init(&iter->seq);
5791 
5792 	/*
5793 	 * If there was nothing to send to user, in spite of consuming trace
5794 	 * entries, go back to wait for more entries.
5795 	 */
5796 	if (sret == -EBUSY)
5797 		goto waitagain;
5798 
5799 out:
5800 	mutex_unlock(&iter->mutex);
5801 
5802 	return sret;
5803 }
5804 
5805 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5806 				     unsigned int idx)
5807 {
5808 	__free_page(spd->pages[idx]);
5809 }
5810 
5811 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5812 	.can_merge		= 0,
5813 	.confirm		= generic_pipe_buf_confirm,
5814 	.release		= generic_pipe_buf_release,
5815 	.steal			= generic_pipe_buf_steal,
5816 	.get			= generic_pipe_buf_get,
5817 };
5818 
5819 static size_t
5820 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5821 {
5822 	size_t count;
5823 	int save_len;
5824 	int ret;
5825 
5826 	/* Seq buffer is page-sized, exactly what we need. */
5827 	for (;;) {
5828 		save_len = iter->seq.seq.len;
5829 		ret = print_trace_line(iter);
5830 
5831 		if (trace_seq_has_overflowed(&iter->seq)) {
5832 			iter->seq.seq.len = save_len;
5833 			break;
5834 		}
5835 
5836 		/*
5837 		 * This should not be hit, because it should only
5838 		 * be set if the iter->seq overflowed. But check it
5839 		 * anyway to be safe.
5840 		 */
5841 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5842 			iter->seq.seq.len = save_len;
5843 			break;
5844 		}
5845 
5846 		count = trace_seq_used(&iter->seq) - save_len;
5847 		if (rem < count) {
5848 			rem = 0;
5849 			iter->seq.seq.len = save_len;
5850 			break;
5851 		}
5852 
5853 		if (ret != TRACE_TYPE_NO_CONSUME)
5854 			trace_consume(iter);
5855 		rem -= count;
5856 		if (!trace_find_next_entry_inc(iter))	{
5857 			rem = 0;
5858 			iter->ent = NULL;
5859 			break;
5860 		}
5861 	}
5862 
5863 	return rem;
5864 }
5865 
5866 static ssize_t tracing_splice_read_pipe(struct file *filp,
5867 					loff_t *ppos,
5868 					struct pipe_inode_info *pipe,
5869 					size_t len,
5870 					unsigned int flags)
5871 {
5872 	struct page *pages_def[PIPE_DEF_BUFFERS];
5873 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5874 	struct trace_iterator *iter = filp->private_data;
5875 	struct splice_pipe_desc spd = {
5876 		.pages		= pages_def,
5877 		.partial	= partial_def,
5878 		.nr_pages	= 0, /* This gets updated below. */
5879 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5880 		.ops		= &tracing_pipe_buf_ops,
5881 		.spd_release	= tracing_spd_release_pipe,
5882 	};
5883 	ssize_t ret;
5884 	size_t rem;
5885 	unsigned int i;
5886 
5887 	if (splice_grow_spd(pipe, &spd))
5888 		return -ENOMEM;
5889 
5890 	mutex_lock(&iter->mutex);
5891 
5892 	if (iter->trace->splice_read) {
5893 		ret = iter->trace->splice_read(iter, filp,
5894 					       ppos, pipe, len, flags);
5895 		if (ret)
5896 			goto out_err;
5897 	}
5898 
5899 	ret = tracing_wait_pipe(filp);
5900 	if (ret <= 0)
5901 		goto out_err;
5902 
5903 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5904 		ret = -EFAULT;
5905 		goto out_err;
5906 	}
5907 
5908 	trace_event_read_lock();
5909 	trace_access_lock(iter->cpu_file);
5910 
5911 	/* Fill as many pages as possible. */
5912 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5913 		spd.pages[i] = alloc_page(GFP_KERNEL);
5914 		if (!spd.pages[i])
5915 			break;
5916 
5917 		rem = tracing_fill_pipe_page(rem, iter);
5918 
5919 		/* Copy the data into the page, so we can start over. */
5920 		ret = trace_seq_to_buffer(&iter->seq,
5921 					  page_address(spd.pages[i]),
5922 					  trace_seq_used(&iter->seq));
5923 		if (ret < 0) {
5924 			__free_page(spd.pages[i]);
5925 			break;
5926 		}
5927 		spd.partial[i].offset = 0;
5928 		spd.partial[i].len = trace_seq_used(&iter->seq);
5929 
5930 		trace_seq_init(&iter->seq);
5931 	}
5932 
5933 	trace_access_unlock(iter->cpu_file);
5934 	trace_event_read_unlock();
5935 	mutex_unlock(&iter->mutex);
5936 
5937 	spd.nr_pages = i;
5938 
5939 	if (i)
5940 		ret = splice_to_pipe(pipe, &spd);
5941 	else
5942 		ret = 0;
5943 out:
5944 	splice_shrink_spd(&spd);
5945 	return ret;
5946 
5947 out_err:
5948 	mutex_unlock(&iter->mutex);
5949 	goto out;
5950 }
5951 
5952 static ssize_t
5953 tracing_entries_read(struct file *filp, char __user *ubuf,
5954 		     size_t cnt, loff_t *ppos)
5955 {
5956 	struct inode *inode = file_inode(filp);
5957 	struct trace_array *tr = inode->i_private;
5958 	int cpu = tracing_get_cpu(inode);
5959 	char buf[64];
5960 	int r = 0;
5961 	ssize_t ret;
5962 
5963 	mutex_lock(&trace_types_lock);
5964 
5965 	if (cpu == RING_BUFFER_ALL_CPUS) {
5966 		int cpu, buf_size_same;
5967 		unsigned long size;
5968 
5969 		size = 0;
5970 		buf_size_same = 1;
5971 		/* check if all cpu sizes are same */
5972 		for_each_tracing_cpu(cpu) {
5973 			/* fill in the size from first enabled cpu */
5974 			if (size == 0)
5975 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5976 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5977 				buf_size_same = 0;
5978 				break;
5979 			}
5980 		}
5981 
5982 		if (buf_size_same) {
5983 			if (!ring_buffer_expanded)
5984 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5985 					    size >> 10,
5986 					    trace_buf_size >> 10);
5987 			else
5988 				r = sprintf(buf, "%lu\n", size >> 10);
5989 		} else
5990 			r = sprintf(buf, "X\n");
5991 	} else
5992 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5993 
5994 	mutex_unlock(&trace_types_lock);
5995 
5996 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5997 	return ret;
5998 }
5999 
6000 static ssize_t
6001 tracing_entries_write(struct file *filp, const char __user *ubuf,
6002 		      size_t cnt, loff_t *ppos)
6003 {
6004 	struct inode *inode = file_inode(filp);
6005 	struct trace_array *tr = inode->i_private;
6006 	unsigned long val;
6007 	int ret;
6008 
6009 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6010 	if (ret)
6011 		return ret;
6012 
6013 	/* must have at least 1 entry */
6014 	if (!val)
6015 		return -EINVAL;
6016 
6017 	/* value is in KB */
6018 	val <<= 10;
6019 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6020 	if (ret < 0)
6021 		return ret;
6022 
6023 	*ppos += cnt;
6024 
6025 	return cnt;
6026 }
6027 
6028 static ssize_t
6029 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6030 				size_t cnt, loff_t *ppos)
6031 {
6032 	struct trace_array *tr = filp->private_data;
6033 	char buf[64];
6034 	int r, cpu;
6035 	unsigned long size = 0, expanded_size = 0;
6036 
6037 	mutex_lock(&trace_types_lock);
6038 	for_each_tracing_cpu(cpu) {
6039 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6040 		if (!ring_buffer_expanded)
6041 			expanded_size += trace_buf_size >> 10;
6042 	}
6043 	if (ring_buffer_expanded)
6044 		r = sprintf(buf, "%lu\n", size);
6045 	else
6046 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6047 	mutex_unlock(&trace_types_lock);
6048 
6049 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6050 }
6051 
6052 static ssize_t
6053 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6054 			  size_t cnt, loff_t *ppos)
6055 {
6056 	/*
6057 	 * There is no need to read what the user has written, this function
6058 	 * is just to make sure that there is no error when "echo" is used
6059 	 */
6060 
6061 	*ppos += cnt;
6062 
6063 	return cnt;
6064 }
6065 
6066 static int
6067 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6068 {
6069 	struct trace_array *tr = inode->i_private;
6070 
6071 	/* disable tracing ? */
6072 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6073 		tracer_tracing_off(tr);
6074 	/* resize the ring buffer to 0 */
6075 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6076 
6077 	trace_array_put(tr);
6078 
6079 	return 0;
6080 }
6081 
6082 static ssize_t
6083 tracing_mark_write(struct file *filp, const char __user *ubuf,
6084 					size_t cnt, loff_t *fpos)
6085 {
6086 	struct trace_array *tr = filp->private_data;
6087 	struct ring_buffer_event *event;
6088 	enum event_trigger_type tt = ETT_NONE;
6089 	struct ring_buffer *buffer;
6090 	struct print_entry *entry;
6091 	unsigned long irq_flags;
6092 	const char faulted[] = "<faulted>";
6093 	ssize_t written;
6094 	int size;
6095 	int len;
6096 
6097 /* Used in tracing_mark_raw_write() as well */
6098 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6099 
6100 	if (tracing_disabled)
6101 		return -EINVAL;
6102 
6103 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6104 		return -EINVAL;
6105 
6106 	if (cnt > TRACE_BUF_SIZE)
6107 		cnt = TRACE_BUF_SIZE;
6108 
6109 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6110 
6111 	local_save_flags(irq_flags);
6112 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6113 
6114 	/* If less than "<faulted>", then make sure we can still add that */
6115 	if (cnt < FAULTED_SIZE)
6116 		size += FAULTED_SIZE - cnt;
6117 
6118 	buffer = tr->trace_buffer.buffer;
6119 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6120 					    irq_flags, preempt_count());
6121 	if (unlikely(!event))
6122 		/* Ring buffer disabled, return as if not open for write */
6123 		return -EBADF;
6124 
6125 	entry = ring_buffer_event_data(event);
6126 	entry->ip = _THIS_IP_;
6127 
6128 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6129 	if (len) {
6130 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6131 		cnt = FAULTED_SIZE;
6132 		written = -EFAULT;
6133 	} else
6134 		written = cnt;
6135 	len = cnt;
6136 
6137 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6138 		/* do not add \n before testing triggers, but add \0 */
6139 		entry->buf[cnt] = '\0';
6140 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6141 	}
6142 
6143 	if (entry->buf[cnt - 1] != '\n') {
6144 		entry->buf[cnt] = '\n';
6145 		entry->buf[cnt + 1] = '\0';
6146 	} else
6147 		entry->buf[cnt] = '\0';
6148 
6149 	__buffer_unlock_commit(buffer, event);
6150 
6151 	if (tt)
6152 		event_triggers_post_call(tr->trace_marker_file, tt);
6153 
6154 	if (written > 0)
6155 		*fpos += written;
6156 
6157 	return written;
6158 }
6159 
6160 /* Limit it for now to 3K (including tag) */
6161 #define RAW_DATA_MAX_SIZE (1024*3)
6162 
6163 static ssize_t
6164 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6165 					size_t cnt, loff_t *fpos)
6166 {
6167 	struct trace_array *tr = filp->private_data;
6168 	struct ring_buffer_event *event;
6169 	struct ring_buffer *buffer;
6170 	struct raw_data_entry *entry;
6171 	const char faulted[] = "<faulted>";
6172 	unsigned long irq_flags;
6173 	ssize_t written;
6174 	int size;
6175 	int len;
6176 
6177 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6178 
6179 	if (tracing_disabled)
6180 		return -EINVAL;
6181 
6182 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6183 		return -EINVAL;
6184 
6185 	/* The marker must at least have a tag id */
6186 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6187 		return -EINVAL;
6188 
6189 	if (cnt > TRACE_BUF_SIZE)
6190 		cnt = TRACE_BUF_SIZE;
6191 
6192 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6193 
6194 	local_save_flags(irq_flags);
6195 	size = sizeof(*entry) + cnt;
6196 	if (cnt < FAULT_SIZE_ID)
6197 		size += FAULT_SIZE_ID - cnt;
6198 
6199 	buffer = tr->trace_buffer.buffer;
6200 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6201 					    irq_flags, preempt_count());
6202 	if (!event)
6203 		/* Ring buffer disabled, return as if not open for write */
6204 		return -EBADF;
6205 
6206 	entry = ring_buffer_event_data(event);
6207 
6208 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6209 	if (len) {
6210 		entry->id = -1;
6211 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6212 		written = -EFAULT;
6213 	} else
6214 		written = cnt;
6215 
6216 	__buffer_unlock_commit(buffer, event);
6217 
6218 	if (written > 0)
6219 		*fpos += written;
6220 
6221 	return written;
6222 }
6223 
6224 static int tracing_clock_show(struct seq_file *m, void *v)
6225 {
6226 	struct trace_array *tr = m->private;
6227 	int i;
6228 
6229 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6230 		seq_printf(m,
6231 			"%s%s%s%s", i ? " " : "",
6232 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6233 			i == tr->clock_id ? "]" : "");
6234 	seq_putc(m, '\n');
6235 
6236 	return 0;
6237 }
6238 
6239 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6240 {
6241 	int i;
6242 
6243 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6244 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6245 			break;
6246 	}
6247 	if (i == ARRAY_SIZE(trace_clocks))
6248 		return -EINVAL;
6249 
6250 	mutex_lock(&trace_types_lock);
6251 
6252 	tr->clock_id = i;
6253 
6254 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6255 
6256 	/*
6257 	 * New clock may not be consistent with the previous clock.
6258 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6259 	 */
6260 	tracing_reset_online_cpus(&tr->trace_buffer);
6261 
6262 #ifdef CONFIG_TRACER_MAX_TRACE
6263 	if (tr->max_buffer.buffer)
6264 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6265 	tracing_reset_online_cpus(&tr->max_buffer);
6266 #endif
6267 
6268 	mutex_unlock(&trace_types_lock);
6269 
6270 	return 0;
6271 }
6272 
6273 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6274 				   size_t cnt, loff_t *fpos)
6275 {
6276 	struct seq_file *m = filp->private_data;
6277 	struct trace_array *tr = m->private;
6278 	char buf[64];
6279 	const char *clockstr;
6280 	int ret;
6281 
6282 	if (cnt >= sizeof(buf))
6283 		return -EINVAL;
6284 
6285 	if (copy_from_user(buf, ubuf, cnt))
6286 		return -EFAULT;
6287 
6288 	buf[cnt] = 0;
6289 
6290 	clockstr = strstrip(buf);
6291 
6292 	ret = tracing_set_clock(tr, clockstr);
6293 	if (ret)
6294 		return ret;
6295 
6296 	*fpos += cnt;
6297 
6298 	return cnt;
6299 }
6300 
6301 static int tracing_clock_open(struct inode *inode, struct file *file)
6302 {
6303 	struct trace_array *tr = inode->i_private;
6304 	int ret;
6305 
6306 	if (tracing_disabled)
6307 		return -ENODEV;
6308 
6309 	if (trace_array_get(tr))
6310 		return -ENODEV;
6311 
6312 	ret = single_open(file, tracing_clock_show, inode->i_private);
6313 	if (ret < 0)
6314 		trace_array_put(tr);
6315 
6316 	return ret;
6317 }
6318 
6319 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6320 {
6321 	struct trace_array *tr = m->private;
6322 
6323 	mutex_lock(&trace_types_lock);
6324 
6325 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6326 		seq_puts(m, "delta [absolute]\n");
6327 	else
6328 		seq_puts(m, "[delta] absolute\n");
6329 
6330 	mutex_unlock(&trace_types_lock);
6331 
6332 	return 0;
6333 }
6334 
6335 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6336 {
6337 	struct trace_array *tr = inode->i_private;
6338 	int ret;
6339 
6340 	if (tracing_disabled)
6341 		return -ENODEV;
6342 
6343 	if (trace_array_get(tr))
6344 		return -ENODEV;
6345 
6346 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6347 	if (ret < 0)
6348 		trace_array_put(tr);
6349 
6350 	return ret;
6351 }
6352 
6353 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6354 {
6355 	int ret = 0;
6356 
6357 	mutex_lock(&trace_types_lock);
6358 
6359 	if (abs && tr->time_stamp_abs_ref++)
6360 		goto out;
6361 
6362 	if (!abs) {
6363 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6364 			ret = -EINVAL;
6365 			goto out;
6366 		}
6367 
6368 		if (--tr->time_stamp_abs_ref)
6369 			goto out;
6370 	}
6371 
6372 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6373 
6374 #ifdef CONFIG_TRACER_MAX_TRACE
6375 	if (tr->max_buffer.buffer)
6376 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6377 #endif
6378  out:
6379 	mutex_unlock(&trace_types_lock);
6380 
6381 	return ret;
6382 }
6383 
6384 struct ftrace_buffer_info {
6385 	struct trace_iterator	iter;
6386 	void			*spare;
6387 	unsigned int		spare_cpu;
6388 	unsigned int		read;
6389 };
6390 
6391 #ifdef CONFIG_TRACER_SNAPSHOT
6392 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6393 {
6394 	struct trace_array *tr = inode->i_private;
6395 	struct trace_iterator *iter;
6396 	struct seq_file *m;
6397 	int ret = 0;
6398 
6399 	if (trace_array_get(tr) < 0)
6400 		return -ENODEV;
6401 
6402 	if (file->f_mode & FMODE_READ) {
6403 		iter = __tracing_open(inode, file, true);
6404 		if (IS_ERR(iter))
6405 			ret = PTR_ERR(iter);
6406 	} else {
6407 		/* Writes still need the seq_file to hold the private data */
6408 		ret = -ENOMEM;
6409 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6410 		if (!m)
6411 			goto out;
6412 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6413 		if (!iter) {
6414 			kfree(m);
6415 			goto out;
6416 		}
6417 		ret = 0;
6418 
6419 		iter->tr = tr;
6420 		iter->trace_buffer = &tr->max_buffer;
6421 		iter->cpu_file = tracing_get_cpu(inode);
6422 		m->private = iter;
6423 		file->private_data = m;
6424 	}
6425 out:
6426 	if (ret < 0)
6427 		trace_array_put(tr);
6428 
6429 	return ret;
6430 }
6431 
6432 static ssize_t
6433 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6434 		       loff_t *ppos)
6435 {
6436 	struct seq_file *m = filp->private_data;
6437 	struct trace_iterator *iter = m->private;
6438 	struct trace_array *tr = iter->tr;
6439 	unsigned long val;
6440 	int ret;
6441 
6442 	ret = tracing_update_buffers();
6443 	if (ret < 0)
6444 		return ret;
6445 
6446 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6447 	if (ret)
6448 		return ret;
6449 
6450 	mutex_lock(&trace_types_lock);
6451 
6452 	if (tr->current_trace->use_max_tr) {
6453 		ret = -EBUSY;
6454 		goto out;
6455 	}
6456 
6457 	switch (val) {
6458 	case 0:
6459 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6460 			ret = -EINVAL;
6461 			break;
6462 		}
6463 		if (tr->allocated_snapshot)
6464 			free_snapshot(tr);
6465 		break;
6466 	case 1:
6467 /* Only allow per-cpu swap if the ring buffer supports it */
6468 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6469 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6470 			ret = -EINVAL;
6471 			break;
6472 		}
6473 #endif
6474 		if (!tr->allocated_snapshot) {
6475 			ret = tracing_alloc_snapshot_instance(tr);
6476 			if (ret < 0)
6477 				break;
6478 		}
6479 		local_irq_disable();
6480 		/* Now, we're going to swap */
6481 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6482 			update_max_tr(tr, current, smp_processor_id());
6483 		else
6484 			update_max_tr_single(tr, current, iter->cpu_file);
6485 		local_irq_enable();
6486 		break;
6487 	default:
6488 		if (tr->allocated_snapshot) {
6489 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6490 				tracing_reset_online_cpus(&tr->max_buffer);
6491 			else
6492 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6493 		}
6494 		break;
6495 	}
6496 
6497 	if (ret >= 0) {
6498 		*ppos += cnt;
6499 		ret = cnt;
6500 	}
6501 out:
6502 	mutex_unlock(&trace_types_lock);
6503 	return ret;
6504 }
6505 
6506 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6507 {
6508 	struct seq_file *m = file->private_data;
6509 	int ret;
6510 
6511 	ret = tracing_release(inode, file);
6512 
6513 	if (file->f_mode & FMODE_READ)
6514 		return ret;
6515 
6516 	/* If write only, the seq_file is just a stub */
6517 	if (m)
6518 		kfree(m->private);
6519 	kfree(m);
6520 
6521 	return 0;
6522 }
6523 
6524 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6525 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6526 				    size_t count, loff_t *ppos);
6527 static int tracing_buffers_release(struct inode *inode, struct file *file);
6528 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6529 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6530 
6531 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6532 {
6533 	struct ftrace_buffer_info *info;
6534 	int ret;
6535 
6536 	ret = tracing_buffers_open(inode, filp);
6537 	if (ret < 0)
6538 		return ret;
6539 
6540 	info = filp->private_data;
6541 
6542 	if (info->iter.trace->use_max_tr) {
6543 		tracing_buffers_release(inode, filp);
6544 		return -EBUSY;
6545 	}
6546 
6547 	info->iter.snapshot = true;
6548 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6549 
6550 	return ret;
6551 }
6552 
6553 #endif /* CONFIG_TRACER_SNAPSHOT */
6554 
6555 
6556 static const struct file_operations tracing_thresh_fops = {
6557 	.open		= tracing_open_generic,
6558 	.read		= tracing_thresh_read,
6559 	.write		= tracing_thresh_write,
6560 	.llseek		= generic_file_llseek,
6561 };
6562 
6563 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6564 static const struct file_operations tracing_max_lat_fops = {
6565 	.open		= tracing_open_generic,
6566 	.read		= tracing_max_lat_read,
6567 	.write		= tracing_max_lat_write,
6568 	.llseek		= generic_file_llseek,
6569 };
6570 #endif
6571 
6572 static const struct file_operations set_tracer_fops = {
6573 	.open		= tracing_open_generic,
6574 	.read		= tracing_set_trace_read,
6575 	.write		= tracing_set_trace_write,
6576 	.llseek		= generic_file_llseek,
6577 };
6578 
6579 static const struct file_operations tracing_pipe_fops = {
6580 	.open		= tracing_open_pipe,
6581 	.poll		= tracing_poll_pipe,
6582 	.read		= tracing_read_pipe,
6583 	.splice_read	= tracing_splice_read_pipe,
6584 	.release	= tracing_release_pipe,
6585 	.llseek		= no_llseek,
6586 };
6587 
6588 static const struct file_operations tracing_entries_fops = {
6589 	.open		= tracing_open_generic_tr,
6590 	.read		= tracing_entries_read,
6591 	.write		= tracing_entries_write,
6592 	.llseek		= generic_file_llseek,
6593 	.release	= tracing_release_generic_tr,
6594 };
6595 
6596 static const struct file_operations tracing_total_entries_fops = {
6597 	.open		= tracing_open_generic_tr,
6598 	.read		= tracing_total_entries_read,
6599 	.llseek		= generic_file_llseek,
6600 	.release	= tracing_release_generic_tr,
6601 };
6602 
6603 static const struct file_operations tracing_free_buffer_fops = {
6604 	.open		= tracing_open_generic_tr,
6605 	.write		= tracing_free_buffer_write,
6606 	.release	= tracing_free_buffer_release,
6607 };
6608 
6609 static const struct file_operations tracing_mark_fops = {
6610 	.open		= tracing_open_generic_tr,
6611 	.write		= tracing_mark_write,
6612 	.llseek		= generic_file_llseek,
6613 	.release	= tracing_release_generic_tr,
6614 };
6615 
6616 static const struct file_operations tracing_mark_raw_fops = {
6617 	.open		= tracing_open_generic_tr,
6618 	.write		= tracing_mark_raw_write,
6619 	.llseek		= generic_file_llseek,
6620 	.release	= tracing_release_generic_tr,
6621 };
6622 
6623 static const struct file_operations trace_clock_fops = {
6624 	.open		= tracing_clock_open,
6625 	.read		= seq_read,
6626 	.llseek		= seq_lseek,
6627 	.release	= tracing_single_release_tr,
6628 	.write		= tracing_clock_write,
6629 };
6630 
6631 static const struct file_operations trace_time_stamp_mode_fops = {
6632 	.open		= tracing_time_stamp_mode_open,
6633 	.read		= seq_read,
6634 	.llseek		= seq_lseek,
6635 	.release	= tracing_single_release_tr,
6636 };
6637 
6638 #ifdef CONFIG_TRACER_SNAPSHOT
6639 static const struct file_operations snapshot_fops = {
6640 	.open		= tracing_snapshot_open,
6641 	.read		= seq_read,
6642 	.write		= tracing_snapshot_write,
6643 	.llseek		= tracing_lseek,
6644 	.release	= tracing_snapshot_release,
6645 };
6646 
6647 static const struct file_operations snapshot_raw_fops = {
6648 	.open		= snapshot_raw_open,
6649 	.read		= tracing_buffers_read,
6650 	.release	= tracing_buffers_release,
6651 	.splice_read	= tracing_buffers_splice_read,
6652 	.llseek		= no_llseek,
6653 };
6654 
6655 #endif /* CONFIG_TRACER_SNAPSHOT */
6656 
6657 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6658 {
6659 	struct trace_array *tr = inode->i_private;
6660 	struct ftrace_buffer_info *info;
6661 	int ret;
6662 
6663 	if (tracing_disabled)
6664 		return -ENODEV;
6665 
6666 	if (trace_array_get(tr) < 0)
6667 		return -ENODEV;
6668 
6669 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6670 	if (!info) {
6671 		trace_array_put(tr);
6672 		return -ENOMEM;
6673 	}
6674 
6675 	mutex_lock(&trace_types_lock);
6676 
6677 	info->iter.tr		= tr;
6678 	info->iter.cpu_file	= tracing_get_cpu(inode);
6679 	info->iter.trace	= tr->current_trace;
6680 	info->iter.trace_buffer = &tr->trace_buffer;
6681 	info->spare		= NULL;
6682 	/* Force reading ring buffer for first read */
6683 	info->read		= (unsigned int)-1;
6684 
6685 	filp->private_data = info;
6686 
6687 	tr->current_trace->ref++;
6688 
6689 	mutex_unlock(&trace_types_lock);
6690 
6691 	ret = nonseekable_open(inode, filp);
6692 	if (ret < 0)
6693 		trace_array_put(tr);
6694 
6695 	return ret;
6696 }
6697 
6698 static __poll_t
6699 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6700 {
6701 	struct ftrace_buffer_info *info = filp->private_data;
6702 	struct trace_iterator *iter = &info->iter;
6703 
6704 	return trace_poll(iter, filp, poll_table);
6705 }
6706 
6707 static ssize_t
6708 tracing_buffers_read(struct file *filp, char __user *ubuf,
6709 		     size_t count, loff_t *ppos)
6710 {
6711 	struct ftrace_buffer_info *info = filp->private_data;
6712 	struct trace_iterator *iter = &info->iter;
6713 	ssize_t ret = 0;
6714 	ssize_t size;
6715 
6716 	if (!count)
6717 		return 0;
6718 
6719 #ifdef CONFIG_TRACER_MAX_TRACE
6720 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6721 		return -EBUSY;
6722 #endif
6723 
6724 	if (!info->spare) {
6725 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6726 							  iter->cpu_file);
6727 		if (IS_ERR(info->spare)) {
6728 			ret = PTR_ERR(info->spare);
6729 			info->spare = NULL;
6730 		} else {
6731 			info->spare_cpu = iter->cpu_file;
6732 		}
6733 	}
6734 	if (!info->spare)
6735 		return ret;
6736 
6737 	/* Do we have previous read data to read? */
6738 	if (info->read < PAGE_SIZE)
6739 		goto read;
6740 
6741  again:
6742 	trace_access_lock(iter->cpu_file);
6743 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6744 				    &info->spare,
6745 				    count,
6746 				    iter->cpu_file, 0);
6747 	trace_access_unlock(iter->cpu_file);
6748 
6749 	if (ret < 0) {
6750 		if (trace_empty(iter)) {
6751 			if ((filp->f_flags & O_NONBLOCK))
6752 				return -EAGAIN;
6753 
6754 			ret = wait_on_pipe(iter, false);
6755 			if (ret)
6756 				return ret;
6757 
6758 			goto again;
6759 		}
6760 		return 0;
6761 	}
6762 
6763 	info->read = 0;
6764  read:
6765 	size = PAGE_SIZE - info->read;
6766 	if (size > count)
6767 		size = count;
6768 
6769 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6770 	if (ret == size)
6771 		return -EFAULT;
6772 
6773 	size -= ret;
6774 
6775 	*ppos += size;
6776 	info->read += size;
6777 
6778 	return size;
6779 }
6780 
6781 static int tracing_buffers_release(struct inode *inode, struct file *file)
6782 {
6783 	struct ftrace_buffer_info *info = file->private_data;
6784 	struct trace_iterator *iter = &info->iter;
6785 
6786 	mutex_lock(&trace_types_lock);
6787 
6788 	iter->tr->current_trace->ref--;
6789 
6790 	__trace_array_put(iter->tr);
6791 
6792 	if (info->spare)
6793 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6794 					   info->spare_cpu, info->spare);
6795 	kfree(info);
6796 
6797 	mutex_unlock(&trace_types_lock);
6798 
6799 	return 0;
6800 }
6801 
6802 struct buffer_ref {
6803 	struct ring_buffer	*buffer;
6804 	void			*page;
6805 	int			cpu;
6806 	int			ref;
6807 };
6808 
6809 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6810 				    struct pipe_buffer *buf)
6811 {
6812 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6813 
6814 	if (--ref->ref)
6815 		return;
6816 
6817 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6818 	kfree(ref);
6819 	buf->private = 0;
6820 }
6821 
6822 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6823 				struct pipe_buffer *buf)
6824 {
6825 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6826 
6827 	ref->ref++;
6828 }
6829 
6830 /* Pipe buffer operations for a buffer. */
6831 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6832 	.can_merge		= 0,
6833 	.confirm		= generic_pipe_buf_confirm,
6834 	.release		= buffer_pipe_buf_release,
6835 	.steal			= generic_pipe_buf_steal,
6836 	.get			= buffer_pipe_buf_get,
6837 };
6838 
6839 /*
6840  * Callback from splice_to_pipe(), if we need to release some pages
6841  * at the end of the spd in case we error'ed out in filling the pipe.
6842  */
6843 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6844 {
6845 	struct buffer_ref *ref =
6846 		(struct buffer_ref *)spd->partial[i].private;
6847 
6848 	if (--ref->ref)
6849 		return;
6850 
6851 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6852 	kfree(ref);
6853 	spd->partial[i].private = 0;
6854 }
6855 
6856 static ssize_t
6857 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6858 			    struct pipe_inode_info *pipe, size_t len,
6859 			    unsigned int flags)
6860 {
6861 	struct ftrace_buffer_info *info = file->private_data;
6862 	struct trace_iterator *iter = &info->iter;
6863 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6864 	struct page *pages_def[PIPE_DEF_BUFFERS];
6865 	struct splice_pipe_desc spd = {
6866 		.pages		= pages_def,
6867 		.partial	= partial_def,
6868 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6869 		.ops		= &buffer_pipe_buf_ops,
6870 		.spd_release	= buffer_spd_release,
6871 	};
6872 	struct buffer_ref *ref;
6873 	int entries, i;
6874 	ssize_t ret = 0;
6875 
6876 #ifdef CONFIG_TRACER_MAX_TRACE
6877 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6878 		return -EBUSY;
6879 #endif
6880 
6881 	if (*ppos & (PAGE_SIZE - 1))
6882 		return -EINVAL;
6883 
6884 	if (len & (PAGE_SIZE - 1)) {
6885 		if (len < PAGE_SIZE)
6886 			return -EINVAL;
6887 		len &= PAGE_MASK;
6888 	}
6889 
6890 	if (splice_grow_spd(pipe, &spd))
6891 		return -ENOMEM;
6892 
6893  again:
6894 	trace_access_lock(iter->cpu_file);
6895 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6896 
6897 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6898 		struct page *page;
6899 		int r;
6900 
6901 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6902 		if (!ref) {
6903 			ret = -ENOMEM;
6904 			break;
6905 		}
6906 
6907 		ref->ref = 1;
6908 		ref->buffer = iter->trace_buffer->buffer;
6909 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6910 		if (IS_ERR(ref->page)) {
6911 			ret = PTR_ERR(ref->page);
6912 			ref->page = NULL;
6913 			kfree(ref);
6914 			break;
6915 		}
6916 		ref->cpu = iter->cpu_file;
6917 
6918 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6919 					  len, iter->cpu_file, 1);
6920 		if (r < 0) {
6921 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6922 						   ref->page);
6923 			kfree(ref);
6924 			break;
6925 		}
6926 
6927 		page = virt_to_page(ref->page);
6928 
6929 		spd.pages[i] = page;
6930 		spd.partial[i].len = PAGE_SIZE;
6931 		spd.partial[i].offset = 0;
6932 		spd.partial[i].private = (unsigned long)ref;
6933 		spd.nr_pages++;
6934 		*ppos += PAGE_SIZE;
6935 
6936 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6937 	}
6938 
6939 	trace_access_unlock(iter->cpu_file);
6940 	spd.nr_pages = i;
6941 
6942 	/* did we read anything? */
6943 	if (!spd.nr_pages) {
6944 		if (ret)
6945 			goto out;
6946 
6947 		ret = -EAGAIN;
6948 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6949 			goto out;
6950 
6951 		ret = wait_on_pipe(iter, true);
6952 		if (ret)
6953 			goto out;
6954 
6955 		goto again;
6956 	}
6957 
6958 	ret = splice_to_pipe(pipe, &spd);
6959 out:
6960 	splice_shrink_spd(&spd);
6961 
6962 	return ret;
6963 }
6964 
6965 static const struct file_operations tracing_buffers_fops = {
6966 	.open		= tracing_buffers_open,
6967 	.read		= tracing_buffers_read,
6968 	.poll		= tracing_buffers_poll,
6969 	.release	= tracing_buffers_release,
6970 	.splice_read	= tracing_buffers_splice_read,
6971 	.llseek		= no_llseek,
6972 };
6973 
6974 static ssize_t
6975 tracing_stats_read(struct file *filp, char __user *ubuf,
6976 		   size_t count, loff_t *ppos)
6977 {
6978 	struct inode *inode = file_inode(filp);
6979 	struct trace_array *tr = inode->i_private;
6980 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6981 	int cpu = tracing_get_cpu(inode);
6982 	struct trace_seq *s;
6983 	unsigned long cnt;
6984 	unsigned long long t;
6985 	unsigned long usec_rem;
6986 
6987 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6988 	if (!s)
6989 		return -ENOMEM;
6990 
6991 	trace_seq_init(s);
6992 
6993 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6994 	trace_seq_printf(s, "entries: %ld\n", cnt);
6995 
6996 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6997 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6998 
6999 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7000 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7001 
7002 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7003 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7004 
7005 	if (trace_clocks[tr->clock_id].in_ns) {
7006 		/* local or global for trace_clock */
7007 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7008 		usec_rem = do_div(t, USEC_PER_SEC);
7009 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7010 								t, usec_rem);
7011 
7012 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7013 		usec_rem = do_div(t, USEC_PER_SEC);
7014 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7015 	} else {
7016 		/* counter or tsc mode for trace_clock */
7017 		trace_seq_printf(s, "oldest event ts: %llu\n",
7018 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7019 
7020 		trace_seq_printf(s, "now ts: %llu\n",
7021 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7022 	}
7023 
7024 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7025 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7026 
7027 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7028 	trace_seq_printf(s, "read events: %ld\n", cnt);
7029 
7030 	count = simple_read_from_buffer(ubuf, count, ppos,
7031 					s->buffer, trace_seq_used(s));
7032 
7033 	kfree(s);
7034 
7035 	return count;
7036 }
7037 
7038 static const struct file_operations tracing_stats_fops = {
7039 	.open		= tracing_open_generic_tr,
7040 	.read		= tracing_stats_read,
7041 	.llseek		= generic_file_llseek,
7042 	.release	= tracing_release_generic_tr,
7043 };
7044 
7045 #ifdef CONFIG_DYNAMIC_FTRACE
7046 
7047 static ssize_t
7048 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7049 		  size_t cnt, loff_t *ppos)
7050 {
7051 	unsigned long *p = filp->private_data;
7052 	char buf[64]; /* Not too big for a shallow stack */
7053 	int r;
7054 
7055 	r = scnprintf(buf, 63, "%ld", *p);
7056 	buf[r++] = '\n';
7057 
7058 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7059 }
7060 
7061 static const struct file_operations tracing_dyn_info_fops = {
7062 	.open		= tracing_open_generic,
7063 	.read		= tracing_read_dyn_info,
7064 	.llseek		= generic_file_llseek,
7065 };
7066 #endif /* CONFIG_DYNAMIC_FTRACE */
7067 
7068 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7069 static void
7070 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7071 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7072 		void *data)
7073 {
7074 	tracing_snapshot_instance(tr);
7075 }
7076 
7077 static void
7078 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7079 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7080 		      void *data)
7081 {
7082 	struct ftrace_func_mapper *mapper = data;
7083 	long *count = NULL;
7084 
7085 	if (mapper)
7086 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7087 
7088 	if (count) {
7089 
7090 		if (*count <= 0)
7091 			return;
7092 
7093 		(*count)--;
7094 	}
7095 
7096 	tracing_snapshot_instance(tr);
7097 }
7098 
7099 static int
7100 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7101 		      struct ftrace_probe_ops *ops, void *data)
7102 {
7103 	struct ftrace_func_mapper *mapper = data;
7104 	long *count = NULL;
7105 
7106 	seq_printf(m, "%ps:", (void *)ip);
7107 
7108 	seq_puts(m, "snapshot");
7109 
7110 	if (mapper)
7111 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7112 
7113 	if (count)
7114 		seq_printf(m, ":count=%ld\n", *count);
7115 	else
7116 		seq_puts(m, ":unlimited\n");
7117 
7118 	return 0;
7119 }
7120 
7121 static int
7122 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7123 		     unsigned long ip, void *init_data, void **data)
7124 {
7125 	struct ftrace_func_mapper *mapper = *data;
7126 
7127 	if (!mapper) {
7128 		mapper = allocate_ftrace_func_mapper();
7129 		if (!mapper)
7130 			return -ENOMEM;
7131 		*data = mapper;
7132 	}
7133 
7134 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7135 }
7136 
7137 static void
7138 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7139 		     unsigned long ip, void *data)
7140 {
7141 	struct ftrace_func_mapper *mapper = data;
7142 
7143 	if (!ip) {
7144 		if (!mapper)
7145 			return;
7146 		free_ftrace_func_mapper(mapper, NULL);
7147 		return;
7148 	}
7149 
7150 	ftrace_func_mapper_remove_ip(mapper, ip);
7151 }
7152 
7153 static struct ftrace_probe_ops snapshot_probe_ops = {
7154 	.func			= ftrace_snapshot,
7155 	.print			= ftrace_snapshot_print,
7156 };
7157 
7158 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7159 	.func			= ftrace_count_snapshot,
7160 	.print			= ftrace_snapshot_print,
7161 	.init			= ftrace_snapshot_init,
7162 	.free			= ftrace_snapshot_free,
7163 };
7164 
7165 static int
7166 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7167 			       char *glob, char *cmd, char *param, int enable)
7168 {
7169 	struct ftrace_probe_ops *ops;
7170 	void *count = (void *)-1;
7171 	char *number;
7172 	int ret;
7173 
7174 	if (!tr)
7175 		return -ENODEV;
7176 
7177 	/* hash funcs only work with set_ftrace_filter */
7178 	if (!enable)
7179 		return -EINVAL;
7180 
7181 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7182 
7183 	if (glob[0] == '!')
7184 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7185 
7186 	if (!param)
7187 		goto out_reg;
7188 
7189 	number = strsep(&param, ":");
7190 
7191 	if (!strlen(number))
7192 		goto out_reg;
7193 
7194 	/*
7195 	 * We use the callback data field (which is a pointer)
7196 	 * as our counter.
7197 	 */
7198 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7199 	if (ret)
7200 		return ret;
7201 
7202  out_reg:
7203 	ret = tracing_alloc_snapshot_instance(tr);
7204 	if (ret < 0)
7205 		goto out;
7206 
7207 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7208 
7209  out:
7210 	return ret < 0 ? ret : 0;
7211 }
7212 
7213 static struct ftrace_func_command ftrace_snapshot_cmd = {
7214 	.name			= "snapshot",
7215 	.func			= ftrace_trace_snapshot_callback,
7216 };
7217 
7218 static __init int register_snapshot_cmd(void)
7219 {
7220 	return register_ftrace_command(&ftrace_snapshot_cmd);
7221 }
7222 #else
7223 static inline __init int register_snapshot_cmd(void) { return 0; }
7224 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7225 
7226 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7227 {
7228 	if (WARN_ON(!tr->dir))
7229 		return ERR_PTR(-ENODEV);
7230 
7231 	/* Top directory uses NULL as the parent */
7232 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7233 		return NULL;
7234 
7235 	/* All sub buffers have a descriptor */
7236 	return tr->dir;
7237 }
7238 
7239 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7240 {
7241 	struct dentry *d_tracer;
7242 
7243 	if (tr->percpu_dir)
7244 		return tr->percpu_dir;
7245 
7246 	d_tracer = tracing_get_dentry(tr);
7247 	if (IS_ERR(d_tracer))
7248 		return NULL;
7249 
7250 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7251 
7252 	WARN_ONCE(!tr->percpu_dir,
7253 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7254 
7255 	return tr->percpu_dir;
7256 }
7257 
7258 static struct dentry *
7259 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7260 		      void *data, long cpu, const struct file_operations *fops)
7261 {
7262 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7263 
7264 	if (ret) /* See tracing_get_cpu() */
7265 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7266 	return ret;
7267 }
7268 
7269 static void
7270 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7271 {
7272 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7273 	struct dentry *d_cpu;
7274 	char cpu_dir[30]; /* 30 characters should be more than enough */
7275 
7276 	if (!d_percpu)
7277 		return;
7278 
7279 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7280 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7281 	if (!d_cpu) {
7282 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7283 		return;
7284 	}
7285 
7286 	/* per cpu trace_pipe */
7287 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7288 				tr, cpu, &tracing_pipe_fops);
7289 
7290 	/* per cpu trace */
7291 	trace_create_cpu_file("trace", 0644, d_cpu,
7292 				tr, cpu, &tracing_fops);
7293 
7294 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7295 				tr, cpu, &tracing_buffers_fops);
7296 
7297 	trace_create_cpu_file("stats", 0444, d_cpu,
7298 				tr, cpu, &tracing_stats_fops);
7299 
7300 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7301 				tr, cpu, &tracing_entries_fops);
7302 
7303 #ifdef CONFIG_TRACER_SNAPSHOT
7304 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7305 				tr, cpu, &snapshot_fops);
7306 
7307 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7308 				tr, cpu, &snapshot_raw_fops);
7309 #endif
7310 }
7311 
7312 #ifdef CONFIG_FTRACE_SELFTEST
7313 /* Let selftest have access to static functions in this file */
7314 #include "trace_selftest.c"
7315 #endif
7316 
7317 static ssize_t
7318 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7319 			loff_t *ppos)
7320 {
7321 	struct trace_option_dentry *topt = filp->private_data;
7322 	char *buf;
7323 
7324 	if (topt->flags->val & topt->opt->bit)
7325 		buf = "1\n";
7326 	else
7327 		buf = "0\n";
7328 
7329 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7330 }
7331 
7332 static ssize_t
7333 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7334 			 loff_t *ppos)
7335 {
7336 	struct trace_option_dentry *topt = filp->private_data;
7337 	unsigned long val;
7338 	int ret;
7339 
7340 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7341 	if (ret)
7342 		return ret;
7343 
7344 	if (val != 0 && val != 1)
7345 		return -EINVAL;
7346 
7347 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7348 		mutex_lock(&trace_types_lock);
7349 		ret = __set_tracer_option(topt->tr, topt->flags,
7350 					  topt->opt, !val);
7351 		mutex_unlock(&trace_types_lock);
7352 		if (ret)
7353 			return ret;
7354 	}
7355 
7356 	*ppos += cnt;
7357 
7358 	return cnt;
7359 }
7360 
7361 
7362 static const struct file_operations trace_options_fops = {
7363 	.open = tracing_open_generic,
7364 	.read = trace_options_read,
7365 	.write = trace_options_write,
7366 	.llseek	= generic_file_llseek,
7367 };
7368 
7369 /*
7370  * In order to pass in both the trace_array descriptor as well as the index
7371  * to the flag that the trace option file represents, the trace_array
7372  * has a character array of trace_flags_index[], which holds the index
7373  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7374  * The address of this character array is passed to the flag option file
7375  * read/write callbacks.
7376  *
7377  * In order to extract both the index and the trace_array descriptor,
7378  * get_tr_index() uses the following algorithm.
7379  *
7380  *   idx = *ptr;
7381  *
7382  * As the pointer itself contains the address of the index (remember
7383  * index[1] == 1).
7384  *
7385  * Then to get the trace_array descriptor, by subtracting that index
7386  * from the ptr, we get to the start of the index itself.
7387  *
7388  *   ptr - idx == &index[0]
7389  *
7390  * Then a simple container_of() from that pointer gets us to the
7391  * trace_array descriptor.
7392  */
7393 static void get_tr_index(void *data, struct trace_array **ptr,
7394 			 unsigned int *pindex)
7395 {
7396 	*pindex = *(unsigned char *)data;
7397 
7398 	*ptr = container_of(data - *pindex, struct trace_array,
7399 			    trace_flags_index);
7400 }
7401 
7402 static ssize_t
7403 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7404 			loff_t *ppos)
7405 {
7406 	void *tr_index = filp->private_data;
7407 	struct trace_array *tr;
7408 	unsigned int index;
7409 	char *buf;
7410 
7411 	get_tr_index(tr_index, &tr, &index);
7412 
7413 	if (tr->trace_flags & (1 << index))
7414 		buf = "1\n";
7415 	else
7416 		buf = "0\n";
7417 
7418 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7419 }
7420 
7421 static ssize_t
7422 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7423 			 loff_t *ppos)
7424 {
7425 	void *tr_index = filp->private_data;
7426 	struct trace_array *tr;
7427 	unsigned int index;
7428 	unsigned long val;
7429 	int ret;
7430 
7431 	get_tr_index(tr_index, &tr, &index);
7432 
7433 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7434 	if (ret)
7435 		return ret;
7436 
7437 	if (val != 0 && val != 1)
7438 		return -EINVAL;
7439 
7440 	mutex_lock(&trace_types_lock);
7441 	ret = set_tracer_flag(tr, 1 << index, val);
7442 	mutex_unlock(&trace_types_lock);
7443 
7444 	if (ret < 0)
7445 		return ret;
7446 
7447 	*ppos += cnt;
7448 
7449 	return cnt;
7450 }
7451 
7452 static const struct file_operations trace_options_core_fops = {
7453 	.open = tracing_open_generic,
7454 	.read = trace_options_core_read,
7455 	.write = trace_options_core_write,
7456 	.llseek = generic_file_llseek,
7457 };
7458 
7459 struct dentry *trace_create_file(const char *name,
7460 				 umode_t mode,
7461 				 struct dentry *parent,
7462 				 void *data,
7463 				 const struct file_operations *fops)
7464 {
7465 	struct dentry *ret;
7466 
7467 	ret = tracefs_create_file(name, mode, parent, data, fops);
7468 	if (!ret)
7469 		pr_warn("Could not create tracefs '%s' entry\n", name);
7470 
7471 	return ret;
7472 }
7473 
7474 
7475 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7476 {
7477 	struct dentry *d_tracer;
7478 
7479 	if (tr->options)
7480 		return tr->options;
7481 
7482 	d_tracer = tracing_get_dentry(tr);
7483 	if (IS_ERR(d_tracer))
7484 		return NULL;
7485 
7486 	tr->options = tracefs_create_dir("options", d_tracer);
7487 	if (!tr->options) {
7488 		pr_warn("Could not create tracefs directory 'options'\n");
7489 		return NULL;
7490 	}
7491 
7492 	return tr->options;
7493 }
7494 
7495 static void
7496 create_trace_option_file(struct trace_array *tr,
7497 			 struct trace_option_dentry *topt,
7498 			 struct tracer_flags *flags,
7499 			 struct tracer_opt *opt)
7500 {
7501 	struct dentry *t_options;
7502 
7503 	t_options = trace_options_init_dentry(tr);
7504 	if (!t_options)
7505 		return;
7506 
7507 	topt->flags = flags;
7508 	topt->opt = opt;
7509 	topt->tr = tr;
7510 
7511 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7512 				    &trace_options_fops);
7513 
7514 }
7515 
7516 static void
7517 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7518 {
7519 	struct trace_option_dentry *topts;
7520 	struct trace_options *tr_topts;
7521 	struct tracer_flags *flags;
7522 	struct tracer_opt *opts;
7523 	int cnt;
7524 	int i;
7525 
7526 	if (!tracer)
7527 		return;
7528 
7529 	flags = tracer->flags;
7530 
7531 	if (!flags || !flags->opts)
7532 		return;
7533 
7534 	/*
7535 	 * If this is an instance, only create flags for tracers
7536 	 * the instance may have.
7537 	 */
7538 	if (!trace_ok_for_array(tracer, tr))
7539 		return;
7540 
7541 	for (i = 0; i < tr->nr_topts; i++) {
7542 		/* Make sure there's no duplicate flags. */
7543 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7544 			return;
7545 	}
7546 
7547 	opts = flags->opts;
7548 
7549 	for (cnt = 0; opts[cnt].name; cnt++)
7550 		;
7551 
7552 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7553 	if (!topts)
7554 		return;
7555 
7556 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7557 			    GFP_KERNEL);
7558 	if (!tr_topts) {
7559 		kfree(topts);
7560 		return;
7561 	}
7562 
7563 	tr->topts = tr_topts;
7564 	tr->topts[tr->nr_topts].tracer = tracer;
7565 	tr->topts[tr->nr_topts].topts = topts;
7566 	tr->nr_topts++;
7567 
7568 	for (cnt = 0; opts[cnt].name; cnt++) {
7569 		create_trace_option_file(tr, &topts[cnt], flags,
7570 					 &opts[cnt]);
7571 		WARN_ONCE(topts[cnt].entry == NULL,
7572 			  "Failed to create trace option: %s",
7573 			  opts[cnt].name);
7574 	}
7575 }
7576 
7577 static struct dentry *
7578 create_trace_option_core_file(struct trace_array *tr,
7579 			      const char *option, long index)
7580 {
7581 	struct dentry *t_options;
7582 
7583 	t_options = trace_options_init_dentry(tr);
7584 	if (!t_options)
7585 		return NULL;
7586 
7587 	return trace_create_file(option, 0644, t_options,
7588 				 (void *)&tr->trace_flags_index[index],
7589 				 &trace_options_core_fops);
7590 }
7591 
7592 static void create_trace_options_dir(struct trace_array *tr)
7593 {
7594 	struct dentry *t_options;
7595 	bool top_level = tr == &global_trace;
7596 	int i;
7597 
7598 	t_options = trace_options_init_dentry(tr);
7599 	if (!t_options)
7600 		return;
7601 
7602 	for (i = 0; trace_options[i]; i++) {
7603 		if (top_level ||
7604 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7605 			create_trace_option_core_file(tr, trace_options[i], i);
7606 	}
7607 }
7608 
7609 static ssize_t
7610 rb_simple_read(struct file *filp, char __user *ubuf,
7611 	       size_t cnt, loff_t *ppos)
7612 {
7613 	struct trace_array *tr = filp->private_data;
7614 	char buf[64];
7615 	int r;
7616 
7617 	r = tracer_tracing_is_on(tr);
7618 	r = sprintf(buf, "%d\n", r);
7619 
7620 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7621 }
7622 
7623 static ssize_t
7624 rb_simple_write(struct file *filp, const char __user *ubuf,
7625 		size_t cnt, loff_t *ppos)
7626 {
7627 	struct trace_array *tr = filp->private_data;
7628 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7629 	unsigned long val;
7630 	int ret;
7631 
7632 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7633 	if (ret)
7634 		return ret;
7635 
7636 	if (buffer) {
7637 		mutex_lock(&trace_types_lock);
7638 		if (!!val == tracer_tracing_is_on(tr)) {
7639 			val = 0; /* do nothing */
7640 		} else if (val) {
7641 			tracer_tracing_on(tr);
7642 			if (tr->current_trace->start)
7643 				tr->current_trace->start(tr);
7644 		} else {
7645 			tracer_tracing_off(tr);
7646 			if (tr->current_trace->stop)
7647 				tr->current_trace->stop(tr);
7648 		}
7649 		mutex_unlock(&trace_types_lock);
7650 	}
7651 
7652 	(*ppos)++;
7653 
7654 	return cnt;
7655 }
7656 
7657 static const struct file_operations rb_simple_fops = {
7658 	.open		= tracing_open_generic_tr,
7659 	.read		= rb_simple_read,
7660 	.write		= rb_simple_write,
7661 	.release	= tracing_release_generic_tr,
7662 	.llseek		= default_llseek,
7663 };
7664 
7665 struct dentry *trace_instance_dir;
7666 
7667 static void
7668 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7669 
7670 static int
7671 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7672 {
7673 	enum ring_buffer_flags rb_flags;
7674 
7675 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7676 
7677 	buf->tr = tr;
7678 
7679 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7680 	if (!buf->buffer)
7681 		return -ENOMEM;
7682 
7683 	buf->data = alloc_percpu(struct trace_array_cpu);
7684 	if (!buf->data) {
7685 		ring_buffer_free(buf->buffer);
7686 		buf->buffer = NULL;
7687 		return -ENOMEM;
7688 	}
7689 
7690 	/* Allocate the first page for all buffers */
7691 	set_buffer_entries(&tr->trace_buffer,
7692 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7693 
7694 	return 0;
7695 }
7696 
7697 static int allocate_trace_buffers(struct trace_array *tr, int size)
7698 {
7699 	int ret;
7700 
7701 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7702 	if (ret)
7703 		return ret;
7704 
7705 #ifdef CONFIG_TRACER_MAX_TRACE
7706 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7707 				    allocate_snapshot ? size : 1);
7708 	if (WARN_ON(ret)) {
7709 		ring_buffer_free(tr->trace_buffer.buffer);
7710 		tr->trace_buffer.buffer = NULL;
7711 		free_percpu(tr->trace_buffer.data);
7712 		tr->trace_buffer.data = NULL;
7713 		return -ENOMEM;
7714 	}
7715 	tr->allocated_snapshot = allocate_snapshot;
7716 
7717 	/*
7718 	 * Only the top level trace array gets its snapshot allocated
7719 	 * from the kernel command line.
7720 	 */
7721 	allocate_snapshot = false;
7722 #endif
7723 	return 0;
7724 }
7725 
7726 static void free_trace_buffer(struct trace_buffer *buf)
7727 {
7728 	if (buf->buffer) {
7729 		ring_buffer_free(buf->buffer);
7730 		buf->buffer = NULL;
7731 		free_percpu(buf->data);
7732 		buf->data = NULL;
7733 	}
7734 }
7735 
7736 static void free_trace_buffers(struct trace_array *tr)
7737 {
7738 	if (!tr)
7739 		return;
7740 
7741 	free_trace_buffer(&tr->trace_buffer);
7742 
7743 #ifdef CONFIG_TRACER_MAX_TRACE
7744 	free_trace_buffer(&tr->max_buffer);
7745 #endif
7746 }
7747 
7748 static void init_trace_flags_index(struct trace_array *tr)
7749 {
7750 	int i;
7751 
7752 	/* Used by the trace options files */
7753 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7754 		tr->trace_flags_index[i] = i;
7755 }
7756 
7757 static void __update_tracer_options(struct trace_array *tr)
7758 {
7759 	struct tracer *t;
7760 
7761 	for (t = trace_types; t; t = t->next)
7762 		add_tracer_options(tr, t);
7763 }
7764 
7765 static void update_tracer_options(struct trace_array *tr)
7766 {
7767 	mutex_lock(&trace_types_lock);
7768 	__update_tracer_options(tr);
7769 	mutex_unlock(&trace_types_lock);
7770 }
7771 
7772 static int instance_mkdir(const char *name)
7773 {
7774 	struct trace_array *tr;
7775 	int ret;
7776 
7777 	mutex_lock(&event_mutex);
7778 	mutex_lock(&trace_types_lock);
7779 
7780 	ret = -EEXIST;
7781 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7782 		if (tr->name && strcmp(tr->name, name) == 0)
7783 			goto out_unlock;
7784 	}
7785 
7786 	ret = -ENOMEM;
7787 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7788 	if (!tr)
7789 		goto out_unlock;
7790 
7791 	tr->name = kstrdup(name, GFP_KERNEL);
7792 	if (!tr->name)
7793 		goto out_free_tr;
7794 
7795 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7796 		goto out_free_tr;
7797 
7798 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7799 
7800 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7801 
7802 	raw_spin_lock_init(&tr->start_lock);
7803 
7804 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7805 
7806 	tr->current_trace = &nop_trace;
7807 
7808 	INIT_LIST_HEAD(&tr->systems);
7809 	INIT_LIST_HEAD(&tr->events);
7810 	INIT_LIST_HEAD(&tr->hist_vars);
7811 
7812 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7813 		goto out_free_tr;
7814 
7815 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7816 	if (!tr->dir)
7817 		goto out_free_tr;
7818 
7819 	ret = event_trace_add_tracer(tr->dir, tr);
7820 	if (ret) {
7821 		tracefs_remove_recursive(tr->dir);
7822 		goto out_free_tr;
7823 	}
7824 
7825 	ftrace_init_trace_array(tr);
7826 
7827 	init_tracer_tracefs(tr, tr->dir);
7828 	init_trace_flags_index(tr);
7829 	__update_tracer_options(tr);
7830 
7831 	list_add(&tr->list, &ftrace_trace_arrays);
7832 
7833 	mutex_unlock(&trace_types_lock);
7834 	mutex_unlock(&event_mutex);
7835 
7836 	return 0;
7837 
7838  out_free_tr:
7839 	free_trace_buffers(tr);
7840 	free_cpumask_var(tr->tracing_cpumask);
7841 	kfree(tr->name);
7842 	kfree(tr);
7843 
7844  out_unlock:
7845 	mutex_unlock(&trace_types_lock);
7846 	mutex_unlock(&event_mutex);
7847 
7848 	return ret;
7849 
7850 }
7851 
7852 static int instance_rmdir(const char *name)
7853 {
7854 	struct trace_array *tr;
7855 	int found = 0;
7856 	int ret;
7857 	int i;
7858 
7859 	mutex_lock(&event_mutex);
7860 	mutex_lock(&trace_types_lock);
7861 
7862 	ret = -ENODEV;
7863 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7864 		if (tr->name && strcmp(tr->name, name) == 0) {
7865 			found = 1;
7866 			break;
7867 		}
7868 	}
7869 	if (!found)
7870 		goto out_unlock;
7871 
7872 	ret = -EBUSY;
7873 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7874 		goto out_unlock;
7875 
7876 	list_del(&tr->list);
7877 
7878 	/* Disable all the flags that were enabled coming in */
7879 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7880 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7881 			set_tracer_flag(tr, 1 << i, 0);
7882 	}
7883 
7884 	tracing_set_nop(tr);
7885 	clear_ftrace_function_probes(tr);
7886 	event_trace_del_tracer(tr);
7887 	ftrace_clear_pids(tr);
7888 	ftrace_destroy_function_files(tr);
7889 	tracefs_remove_recursive(tr->dir);
7890 	free_trace_buffers(tr);
7891 
7892 	for (i = 0; i < tr->nr_topts; i++) {
7893 		kfree(tr->topts[i].topts);
7894 	}
7895 	kfree(tr->topts);
7896 
7897 	free_cpumask_var(tr->tracing_cpumask);
7898 	kfree(tr->name);
7899 	kfree(tr);
7900 
7901 	ret = 0;
7902 
7903  out_unlock:
7904 	mutex_unlock(&trace_types_lock);
7905 	mutex_unlock(&event_mutex);
7906 
7907 	return ret;
7908 }
7909 
7910 static __init void create_trace_instances(struct dentry *d_tracer)
7911 {
7912 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7913 							 instance_mkdir,
7914 							 instance_rmdir);
7915 	if (WARN_ON(!trace_instance_dir))
7916 		return;
7917 }
7918 
7919 static void
7920 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7921 {
7922 	struct trace_event_file *file;
7923 	int cpu;
7924 
7925 	trace_create_file("available_tracers", 0444, d_tracer,
7926 			tr, &show_traces_fops);
7927 
7928 	trace_create_file("current_tracer", 0644, d_tracer,
7929 			tr, &set_tracer_fops);
7930 
7931 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7932 			  tr, &tracing_cpumask_fops);
7933 
7934 	trace_create_file("trace_options", 0644, d_tracer,
7935 			  tr, &tracing_iter_fops);
7936 
7937 	trace_create_file("trace", 0644, d_tracer,
7938 			  tr, &tracing_fops);
7939 
7940 	trace_create_file("trace_pipe", 0444, d_tracer,
7941 			  tr, &tracing_pipe_fops);
7942 
7943 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7944 			  tr, &tracing_entries_fops);
7945 
7946 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7947 			  tr, &tracing_total_entries_fops);
7948 
7949 	trace_create_file("free_buffer", 0200, d_tracer,
7950 			  tr, &tracing_free_buffer_fops);
7951 
7952 	trace_create_file("trace_marker", 0220, d_tracer,
7953 			  tr, &tracing_mark_fops);
7954 
7955 	file = __find_event_file(tr, "ftrace", "print");
7956 	if (file && file->dir)
7957 		trace_create_file("trigger", 0644, file->dir, file,
7958 				  &event_trigger_fops);
7959 	tr->trace_marker_file = file;
7960 
7961 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7962 			  tr, &tracing_mark_raw_fops);
7963 
7964 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7965 			  &trace_clock_fops);
7966 
7967 	trace_create_file("tracing_on", 0644, d_tracer,
7968 			  tr, &rb_simple_fops);
7969 
7970 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7971 			  &trace_time_stamp_mode_fops);
7972 
7973 	create_trace_options_dir(tr);
7974 
7975 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7976 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7977 			&tr->max_latency, &tracing_max_lat_fops);
7978 #endif
7979 
7980 	if (ftrace_create_function_files(tr, d_tracer))
7981 		WARN(1, "Could not allocate function filter files");
7982 
7983 #ifdef CONFIG_TRACER_SNAPSHOT
7984 	trace_create_file("snapshot", 0644, d_tracer,
7985 			  tr, &snapshot_fops);
7986 #endif
7987 
7988 	for_each_tracing_cpu(cpu)
7989 		tracing_init_tracefs_percpu(tr, cpu);
7990 
7991 	ftrace_init_tracefs(tr, d_tracer);
7992 }
7993 
7994 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7995 {
7996 	struct vfsmount *mnt;
7997 	struct file_system_type *type;
7998 
7999 	/*
8000 	 * To maintain backward compatibility for tools that mount
8001 	 * debugfs to get to the tracing facility, tracefs is automatically
8002 	 * mounted to the debugfs/tracing directory.
8003 	 */
8004 	type = get_fs_type("tracefs");
8005 	if (!type)
8006 		return NULL;
8007 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8008 	put_filesystem(type);
8009 	if (IS_ERR(mnt))
8010 		return NULL;
8011 	mntget(mnt);
8012 
8013 	return mnt;
8014 }
8015 
8016 /**
8017  * tracing_init_dentry - initialize top level trace array
8018  *
8019  * This is called when creating files or directories in the tracing
8020  * directory. It is called via fs_initcall() by any of the boot up code
8021  * and expects to return the dentry of the top level tracing directory.
8022  */
8023 struct dentry *tracing_init_dentry(void)
8024 {
8025 	struct trace_array *tr = &global_trace;
8026 
8027 	/* The top level trace array uses  NULL as parent */
8028 	if (tr->dir)
8029 		return NULL;
8030 
8031 	if (WARN_ON(!tracefs_initialized()) ||
8032 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8033 		 WARN_ON(!debugfs_initialized())))
8034 		return ERR_PTR(-ENODEV);
8035 
8036 	/*
8037 	 * As there may still be users that expect the tracing
8038 	 * files to exist in debugfs/tracing, we must automount
8039 	 * the tracefs file system there, so older tools still
8040 	 * work with the newer kerenl.
8041 	 */
8042 	tr->dir = debugfs_create_automount("tracing", NULL,
8043 					   trace_automount, NULL);
8044 	if (!tr->dir) {
8045 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8046 		return ERR_PTR(-ENOMEM);
8047 	}
8048 
8049 	return NULL;
8050 }
8051 
8052 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8053 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8054 
8055 static void __init trace_eval_init(void)
8056 {
8057 	int len;
8058 
8059 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8060 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8061 }
8062 
8063 #ifdef CONFIG_MODULES
8064 static void trace_module_add_evals(struct module *mod)
8065 {
8066 	if (!mod->num_trace_evals)
8067 		return;
8068 
8069 	/*
8070 	 * Modules with bad taint do not have events created, do
8071 	 * not bother with enums either.
8072 	 */
8073 	if (trace_module_has_bad_taint(mod))
8074 		return;
8075 
8076 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8077 }
8078 
8079 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8080 static void trace_module_remove_evals(struct module *mod)
8081 {
8082 	union trace_eval_map_item *map;
8083 	union trace_eval_map_item **last = &trace_eval_maps;
8084 
8085 	if (!mod->num_trace_evals)
8086 		return;
8087 
8088 	mutex_lock(&trace_eval_mutex);
8089 
8090 	map = trace_eval_maps;
8091 
8092 	while (map) {
8093 		if (map->head.mod == mod)
8094 			break;
8095 		map = trace_eval_jmp_to_tail(map);
8096 		last = &map->tail.next;
8097 		map = map->tail.next;
8098 	}
8099 	if (!map)
8100 		goto out;
8101 
8102 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8103 	kfree(map);
8104  out:
8105 	mutex_unlock(&trace_eval_mutex);
8106 }
8107 #else
8108 static inline void trace_module_remove_evals(struct module *mod) { }
8109 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8110 
8111 static int trace_module_notify(struct notifier_block *self,
8112 			       unsigned long val, void *data)
8113 {
8114 	struct module *mod = data;
8115 
8116 	switch (val) {
8117 	case MODULE_STATE_COMING:
8118 		trace_module_add_evals(mod);
8119 		break;
8120 	case MODULE_STATE_GOING:
8121 		trace_module_remove_evals(mod);
8122 		break;
8123 	}
8124 
8125 	return 0;
8126 }
8127 
8128 static struct notifier_block trace_module_nb = {
8129 	.notifier_call = trace_module_notify,
8130 	.priority = 0,
8131 };
8132 #endif /* CONFIG_MODULES */
8133 
8134 static __init int tracer_init_tracefs(void)
8135 {
8136 	struct dentry *d_tracer;
8137 
8138 	trace_access_lock_init();
8139 
8140 	d_tracer = tracing_init_dentry();
8141 	if (IS_ERR(d_tracer))
8142 		return 0;
8143 
8144 	event_trace_init();
8145 
8146 	init_tracer_tracefs(&global_trace, d_tracer);
8147 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8148 
8149 	trace_create_file("tracing_thresh", 0644, d_tracer,
8150 			&global_trace, &tracing_thresh_fops);
8151 
8152 	trace_create_file("README", 0444, d_tracer,
8153 			NULL, &tracing_readme_fops);
8154 
8155 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8156 			NULL, &tracing_saved_cmdlines_fops);
8157 
8158 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8159 			  NULL, &tracing_saved_cmdlines_size_fops);
8160 
8161 	trace_create_file("saved_tgids", 0444, d_tracer,
8162 			NULL, &tracing_saved_tgids_fops);
8163 
8164 	trace_eval_init();
8165 
8166 	trace_create_eval_file(d_tracer);
8167 
8168 #ifdef CONFIG_MODULES
8169 	register_module_notifier(&trace_module_nb);
8170 #endif
8171 
8172 #ifdef CONFIG_DYNAMIC_FTRACE
8173 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8174 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8175 #endif
8176 
8177 	create_trace_instances(d_tracer);
8178 
8179 	update_tracer_options(&global_trace);
8180 
8181 	return 0;
8182 }
8183 
8184 static int trace_panic_handler(struct notifier_block *this,
8185 			       unsigned long event, void *unused)
8186 {
8187 	if (ftrace_dump_on_oops)
8188 		ftrace_dump(ftrace_dump_on_oops);
8189 	return NOTIFY_OK;
8190 }
8191 
8192 static struct notifier_block trace_panic_notifier = {
8193 	.notifier_call  = trace_panic_handler,
8194 	.next           = NULL,
8195 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8196 };
8197 
8198 static int trace_die_handler(struct notifier_block *self,
8199 			     unsigned long val,
8200 			     void *data)
8201 {
8202 	switch (val) {
8203 	case DIE_OOPS:
8204 		if (ftrace_dump_on_oops)
8205 			ftrace_dump(ftrace_dump_on_oops);
8206 		break;
8207 	default:
8208 		break;
8209 	}
8210 	return NOTIFY_OK;
8211 }
8212 
8213 static struct notifier_block trace_die_notifier = {
8214 	.notifier_call = trace_die_handler,
8215 	.priority = 200
8216 };
8217 
8218 /*
8219  * printk is set to max of 1024, we really don't need it that big.
8220  * Nothing should be printing 1000 characters anyway.
8221  */
8222 #define TRACE_MAX_PRINT		1000
8223 
8224 /*
8225  * Define here KERN_TRACE so that we have one place to modify
8226  * it if we decide to change what log level the ftrace dump
8227  * should be at.
8228  */
8229 #define KERN_TRACE		KERN_EMERG
8230 
8231 void
8232 trace_printk_seq(struct trace_seq *s)
8233 {
8234 	/* Probably should print a warning here. */
8235 	if (s->seq.len >= TRACE_MAX_PRINT)
8236 		s->seq.len = TRACE_MAX_PRINT;
8237 
8238 	/*
8239 	 * More paranoid code. Although the buffer size is set to
8240 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8241 	 * an extra layer of protection.
8242 	 */
8243 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8244 		s->seq.len = s->seq.size - 1;
8245 
8246 	/* should be zero ended, but we are paranoid. */
8247 	s->buffer[s->seq.len] = 0;
8248 
8249 	printk(KERN_TRACE "%s", s->buffer);
8250 
8251 	trace_seq_init(s);
8252 }
8253 
8254 void trace_init_global_iter(struct trace_iterator *iter)
8255 {
8256 	iter->tr = &global_trace;
8257 	iter->trace = iter->tr->current_trace;
8258 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8259 	iter->trace_buffer = &global_trace.trace_buffer;
8260 
8261 	if (iter->trace && iter->trace->open)
8262 		iter->trace->open(iter);
8263 
8264 	/* Annotate start of buffers if we had overruns */
8265 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8266 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8267 
8268 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8269 	if (trace_clocks[iter->tr->clock_id].in_ns)
8270 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8271 }
8272 
8273 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8274 {
8275 	/* use static because iter can be a bit big for the stack */
8276 	static struct trace_iterator iter;
8277 	static atomic_t dump_running;
8278 	struct trace_array *tr = &global_trace;
8279 	unsigned int old_userobj;
8280 	unsigned long flags;
8281 	int cnt = 0, cpu;
8282 
8283 	/* Only allow one dump user at a time. */
8284 	if (atomic_inc_return(&dump_running) != 1) {
8285 		atomic_dec(&dump_running);
8286 		return;
8287 	}
8288 
8289 	/*
8290 	 * Always turn off tracing when we dump.
8291 	 * We don't need to show trace output of what happens
8292 	 * between multiple crashes.
8293 	 *
8294 	 * If the user does a sysrq-z, then they can re-enable
8295 	 * tracing with echo 1 > tracing_on.
8296 	 */
8297 	tracing_off();
8298 
8299 	local_irq_save(flags);
8300 	printk_nmi_direct_enter();
8301 
8302 	/* Simulate the iterator */
8303 	trace_init_global_iter(&iter);
8304 
8305 	for_each_tracing_cpu(cpu) {
8306 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8307 	}
8308 
8309 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8310 
8311 	/* don't look at user memory in panic mode */
8312 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8313 
8314 	switch (oops_dump_mode) {
8315 	case DUMP_ALL:
8316 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8317 		break;
8318 	case DUMP_ORIG:
8319 		iter.cpu_file = raw_smp_processor_id();
8320 		break;
8321 	case DUMP_NONE:
8322 		goto out_enable;
8323 	default:
8324 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8325 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8326 	}
8327 
8328 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8329 
8330 	/* Did function tracer already get disabled? */
8331 	if (ftrace_is_dead()) {
8332 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8333 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8334 	}
8335 
8336 	/*
8337 	 * We need to stop all tracing on all CPUS to read the
8338 	 * the next buffer. This is a bit expensive, but is
8339 	 * not done often. We fill all what we can read,
8340 	 * and then release the locks again.
8341 	 */
8342 
8343 	while (!trace_empty(&iter)) {
8344 
8345 		if (!cnt)
8346 			printk(KERN_TRACE "---------------------------------\n");
8347 
8348 		cnt++;
8349 
8350 		/* reset all but tr, trace, and overruns */
8351 		memset(&iter.seq, 0,
8352 		       sizeof(struct trace_iterator) -
8353 		       offsetof(struct trace_iterator, seq));
8354 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8355 		iter.pos = -1;
8356 
8357 		if (trace_find_next_entry_inc(&iter) != NULL) {
8358 			int ret;
8359 
8360 			ret = print_trace_line(&iter);
8361 			if (ret != TRACE_TYPE_NO_CONSUME)
8362 				trace_consume(&iter);
8363 		}
8364 		touch_nmi_watchdog();
8365 
8366 		trace_printk_seq(&iter.seq);
8367 	}
8368 
8369 	if (!cnt)
8370 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8371 	else
8372 		printk(KERN_TRACE "---------------------------------\n");
8373 
8374  out_enable:
8375 	tr->trace_flags |= old_userobj;
8376 
8377 	for_each_tracing_cpu(cpu) {
8378 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8379 	}
8380 	atomic_dec(&dump_running);
8381 	printk_nmi_direct_exit();
8382 	local_irq_restore(flags);
8383 }
8384 EXPORT_SYMBOL_GPL(ftrace_dump);
8385 
8386 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8387 {
8388 	char **argv;
8389 	int argc, ret;
8390 
8391 	argc = 0;
8392 	ret = 0;
8393 	argv = argv_split(GFP_KERNEL, buf, &argc);
8394 	if (!argv)
8395 		return -ENOMEM;
8396 
8397 	if (argc)
8398 		ret = createfn(argc, argv);
8399 
8400 	argv_free(argv);
8401 
8402 	return ret;
8403 }
8404 
8405 #define WRITE_BUFSIZE  4096
8406 
8407 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8408 				size_t count, loff_t *ppos,
8409 				int (*createfn)(int, char **))
8410 {
8411 	char *kbuf, *buf, *tmp;
8412 	int ret = 0;
8413 	size_t done = 0;
8414 	size_t size;
8415 
8416 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8417 	if (!kbuf)
8418 		return -ENOMEM;
8419 
8420 	while (done < count) {
8421 		size = count - done;
8422 
8423 		if (size >= WRITE_BUFSIZE)
8424 			size = WRITE_BUFSIZE - 1;
8425 
8426 		if (copy_from_user(kbuf, buffer + done, size)) {
8427 			ret = -EFAULT;
8428 			goto out;
8429 		}
8430 		kbuf[size] = '\0';
8431 		buf = kbuf;
8432 		do {
8433 			tmp = strchr(buf, '\n');
8434 			if (tmp) {
8435 				*tmp = '\0';
8436 				size = tmp - buf + 1;
8437 			} else {
8438 				size = strlen(buf);
8439 				if (done + size < count) {
8440 					if (buf != kbuf)
8441 						break;
8442 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8443 					pr_warn("Line length is too long: Should be less than %d\n",
8444 						WRITE_BUFSIZE - 2);
8445 					ret = -EINVAL;
8446 					goto out;
8447 				}
8448 			}
8449 			done += size;
8450 
8451 			/* Remove comments */
8452 			tmp = strchr(buf, '#');
8453 
8454 			if (tmp)
8455 				*tmp = '\0';
8456 
8457 			ret = trace_run_command(buf, createfn);
8458 			if (ret)
8459 				goto out;
8460 			buf += size;
8461 
8462 		} while (done < count);
8463 	}
8464 	ret = done;
8465 
8466 out:
8467 	kfree(kbuf);
8468 
8469 	return ret;
8470 }
8471 
8472 __init static int tracer_alloc_buffers(void)
8473 {
8474 	int ring_buf_size;
8475 	int ret = -ENOMEM;
8476 
8477 	/*
8478 	 * Make sure we don't accidently add more trace options
8479 	 * than we have bits for.
8480 	 */
8481 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8482 
8483 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8484 		goto out;
8485 
8486 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8487 		goto out_free_buffer_mask;
8488 
8489 	/* Only allocate trace_printk buffers if a trace_printk exists */
8490 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8491 		/* Must be called before global_trace.buffer is allocated */
8492 		trace_printk_init_buffers();
8493 
8494 	/* To save memory, keep the ring buffer size to its minimum */
8495 	if (ring_buffer_expanded)
8496 		ring_buf_size = trace_buf_size;
8497 	else
8498 		ring_buf_size = 1;
8499 
8500 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8501 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8502 
8503 	raw_spin_lock_init(&global_trace.start_lock);
8504 
8505 	/*
8506 	 * The prepare callbacks allocates some memory for the ring buffer. We
8507 	 * don't free the buffer if the if the CPU goes down. If we were to free
8508 	 * the buffer, then the user would lose any trace that was in the
8509 	 * buffer. The memory will be removed once the "instance" is removed.
8510 	 */
8511 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8512 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8513 				      NULL);
8514 	if (ret < 0)
8515 		goto out_free_cpumask;
8516 	/* Used for event triggers */
8517 	ret = -ENOMEM;
8518 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8519 	if (!temp_buffer)
8520 		goto out_rm_hp_state;
8521 
8522 	if (trace_create_savedcmd() < 0)
8523 		goto out_free_temp_buffer;
8524 
8525 	/* TODO: make the number of buffers hot pluggable with CPUS */
8526 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8527 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8528 		WARN_ON(1);
8529 		goto out_free_savedcmd;
8530 	}
8531 
8532 	if (global_trace.buffer_disabled)
8533 		tracing_off();
8534 
8535 	if (trace_boot_clock) {
8536 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8537 		if (ret < 0)
8538 			pr_warn("Trace clock %s not defined, going back to default\n",
8539 				trace_boot_clock);
8540 	}
8541 
8542 	/*
8543 	 * register_tracer() might reference current_trace, so it
8544 	 * needs to be set before we register anything. This is
8545 	 * just a bootstrap of current_trace anyway.
8546 	 */
8547 	global_trace.current_trace = &nop_trace;
8548 
8549 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8550 
8551 	ftrace_init_global_array_ops(&global_trace);
8552 
8553 	init_trace_flags_index(&global_trace);
8554 
8555 	register_tracer(&nop_trace);
8556 
8557 	/* Function tracing may start here (via kernel command line) */
8558 	init_function_trace();
8559 
8560 	/* All seems OK, enable tracing */
8561 	tracing_disabled = 0;
8562 
8563 	atomic_notifier_chain_register(&panic_notifier_list,
8564 				       &trace_panic_notifier);
8565 
8566 	register_die_notifier(&trace_die_notifier);
8567 
8568 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8569 
8570 	INIT_LIST_HEAD(&global_trace.systems);
8571 	INIT_LIST_HEAD(&global_trace.events);
8572 	INIT_LIST_HEAD(&global_trace.hist_vars);
8573 	list_add(&global_trace.list, &ftrace_trace_arrays);
8574 
8575 	apply_trace_boot_options();
8576 
8577 	register_snapshot_cmd();
8578 
8579 	return 0;
8580 
8581 out_free_savedcmd:
8582 	free_saved_cmdlines_buffer(savedcmd);
8583 out_free_temp_buffer:
8584 	ring_buffer_free(temp_buffer);
8585 out_rm_hp_state:
8586 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8587 out_free_cpumask:
8588 	free_cpumask_var(global_trace.tracing_cpumask);
8589 out_free_buffer_mask:
8590 	free_cpumask_var(tracing_buffer_mask);
8591 out:
8592 	return ret;
8593 }
8594 
8595 void __init early_trace_init(void)
8596 {
8597 	if (tracepoint_printk) {
8598 		tracepoint_print_iter =
8599 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8600 		if (WARN_ON(!tracepoint_print_iter))
8601 			tracepoint_printk = 0;
8602 		else
8603 			static_key_enable(&tracepoint_printk_key.key);
8604 	}
8605 	tracer_alloc_buffers();
8606 }
8607 
8608 void __init trace_init(void)
8609 {
8610 	trace_event_init();
8611 }
8612 
8613 __init static int clear_boot_tracer(void)
8614 {
8615 	/*
8616 	 * The default tracer at boot buffer is an init section.
8617 	 * This function is called in lateinit. If we did not
8618 	 * find the boot tracer, then clear it out, to prevent
8619 	 * later registration from accessing the buffer that is
8620 	 * about to be freed.
8621 	 */
8622 	if (!default_bootup_tracer)
8623 		return 0;
8624 
8625 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8626 	       default_bootup_tracer);
8627 	default_bootup_tracer = NULL;
8628 
8629 	return 0;
8630 }
8631 
8632 fs_initcall(tracer_init_tracefs);
8633 late_initcall_sync(clear_boot_tracer);
8634 
8635 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8636 __init static int tracing_set_default_clock(void)
8637 {
8638 	/* sched_clock_stable() is determined in late_initcall */
8639 	if (!trace_boot_clock && !sched_clock_stable()) {
8640 		printk(KERN_WARNING
8641 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8642 		       "If you want to keep using the local clock, then add:\n"
8643 		       "  \"trace_clock=local\"\n"
8644 		       "on the kernel command line\n");
8645 		tracing_set_clock(&global_trace, "global");
8646 	}
8647 
8648 	return 0;
8649 }
8650 late_initcall_sync(tracing_set_default_clock);
8651 #endif
8652