xref: /openbmc/linux/kernel/trace/trace.c (revision fca3aa16)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 static void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 static int alloc_snapshot(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = alloc_snapshot(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_mono_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	struct ring_buffer *buf;
1364 
1365 	if (tr->stop_count)
1366 		return;
1367 
1368 	WARN_ON_ONCE(!irqs_disabled());
1369 
1370 	if (!tr->allocated_snapshot) {
1371 		/* Only the nop tracer should hit this when disabling */
1372 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373 		return;
1374 	}
1375 
1376 	arch_spin_lock(&tr->max_lock);
1377 
1378 	buf = tr->trace_buffer.buffer;
1379 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 	tr->max_buffer.buffer = buf;
1381 
1382 	__update_max_tr(tr, tsk, cpu);
1383 	arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 	int ret;
1398 
1399 	if (tr->stop_count)
1400 		return;
1401 
1402 	WARN_ON_ONCE(!irqs_disabled());
1403 	if (!tr->allocated_snapshot) {
1404 		/* Only the nop tracer should hit this when disabling */
1405 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 		return;
1407 	}
1408 
1409 	arch_spin_lock(&tr->max_lock);
1410 
1411 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413 	if (ret == -EBUSY) {
1414 		/*
1415 		 * We failed to swap the buffer due to a commit taking
1416 		 * place on this CPU. We fail to record, but we reset
1417 		 * the max trace buffer (no one writes directly to it)
1418 		 * and flag that it failed.
1419 		 */
1420 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 			"Failed to swap buffers due to commit in progress\n");
1422 	}
1423 
1424 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426 	__update_max_tr(tr, tsk, cpu);
1427 	arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 	/* Iterators are static, they should be filled or empty */
1434 	if (trace_buffer_iter(iter, iter->cpu_file))
1435 		return 0;
1436 
1437 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 				full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445 	struct list_head		list;
1446 	struct tracer			*type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
1451 static int save_selftest(struct tracer *type)
1452 {
1453 	struct trace_selftests *selftest;
1454 
1455 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 	if (!selftest)
1457 		return -ENOMEM;
1458 
1459 	selftest->type = type;
1460 	list_add(&selftest->list, &postponed_selftests);
1461 	return 0;
1462 }
1463 
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	struct tracer *saved_tracer = tr->current_trace;
1468 	int ret;
1469 
1470 	if (!type->selftest || tracing_selftest_disabled)
1471 		return 0;
1472 
1473 	/*
1474 	 * If a tracer registers early in boot up (before scheduling is
1475 	 * initialized and such), then do not run its selftests yet.
1476 	 * Instead, run it a little later in the boot process.
1477 	 */
1478 	if (!selftests_can_run)
1479 		return save_selftest(type);
1480 
1481 	/*
1482 	 * Run a selftest on this tracer.
1483 	 * Here we reset the trace buffer, and set the current
1484 	 * tracer to be this tracer. The tracer can then run some
1485 	 * internal tracing to verify that everything is in order.
1486 	 * If we fail, we do not register this tracer.
1487 	 */
1488 	tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490 	tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 	if (type->use_max_tr) {
1494 		/* If we expanded the buffers, make sure the max is expanded too */
1495 		if (ring_buffer_expanded)
1496 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 					   RING_BUFFER_ALL_CPUS);
1498 		tr->allocated_snapshot = true;
1499 	}
1500 #endif
1501 
1502 	/* the test is responsible for initializing and enabling */
1503 	pr_info("Testing tracer %s: ", type->name);
1504 	ret = type->selftest(type, tr);
1505 	/* the test is responsible for resetting too */
1506 	tr->current_trace = saved_tracer;
1507 	if (ret) {
1508 		printk(KERN_CONT "FAILED!\n");
1509 		/* Add the warning after printing 'FAILED' */
1510 		WARN_ON(1);
1511 		return -1;
1512 	}
1513 	/* Only reset on passing, to avoid touching corrupted buffers */
1514 	tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 	if (type->use_max_tr) {
1518 		tr->allocated_snapshot = false;
1519 
1520 		/* Shrink the max buffer again */
1521 		if (ring_buffer_expanded)
1522 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 					   RING_BUFFER_ALL_CPUS);
1524 	}
1525 #endif
1526 
1527 	printk(KERN_CONT "PASSED\n");
1528 	return 0;
1529 }
1530 
1531 static __init int init_trace_selftests(void)
1532 {
1533 	struct trace_selftests *p, *n;
1534 	struct tracer *t, **last;
1535 	int ret;
1536 
1537 	selftests_can_run = true;
1538 
1539 	mutex_lock(&trace_types_lock);
1540 
1541 	if (list_empty(&postponed_selftests))
1542 		goto out;
1543 
1544 	pr_info("Running postponed tracer tests:\n");
1545 
1546 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547 		ret = run_tracer_selftest(p->type);
1548 		/* If the test fails, then warn and remove from available_tracers */
1549 		if (ret < 0) {
1550 			WARN(1, "tracer: %s failed selftest, disabling\n",
1551 			     p->type->name);
1552 			last = &trace_types;
1553 			for (t = trace_types; t; t = t->next) {
1554 				if (t == p->type) {
1555 					*last = t->next;
1556 					break;
1557 				}
1558 				last = &t->next;
1559 			}
1560 		}
1561 		list_del(&p->list);
1562 		kfree(p);
1563 	}
1564 
1565  out:
1566 	mutex_unlock(&trace_types_lock);
1567 
1568 	return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574 	return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577 
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579 
1580 static void __init apply_trace_boot_options(void);
1581 
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590 	struct tracer *t;
1591 	int ret = 0;
1592 
1593 	if (!type->name) {
1594 		pr_info("Tracer must have a name\n");
1595 		return -1;
1596 	}
1597 
1598 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600 		return -1;
1601 	}
1602 
1603 	mutex_lock(&trace_types_lock);
1604 
1605 	tracing_selftest_running = true;
1606 
1607 	for (t = trace_types; t; t = t->next) {
1608 		if (strcmp(type->name, t->name) == 0) {
1609 			/* already found */
1610 			pr_info("Tracer %s already registered\n",
1611 				type->name);
1612 			ret = -1;
1613 			goto out;
1614 		}
1615 	}
1616 
1617 	if (!type->set_flag)
1618 		type->set_flag = &dummy_set_flag;
1619 	if (!type->flags) {
1620 		/*allocate a dummy tracer_flags*/
1621 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622 		if (!type->flags) {
1623 			ret = -ENOMEM;
1624 			goto out;
1625 		}
1626 		type->flags->val = 0;
1627 		type->flags->opts = dummy_tracer_opt;
1628 	} else
1629 		if (!type->flags->opts)
1630 			type->flags->opts = dummy_tracer_opt;
1631 
1632 	/* store the tracer for __set_tracer_option */
1633 	type->flags->trace = type;
1634 
1635 	ret = run_tracer_selftest(type);
1636 	if (ret < 0)
1637 		goto out;
1638 
1639 	type->next = trace_types;
1640 	trace_types = type;
1641 	add_tracer_options(&global_trace, type);
1642 
1643  out:
1644 	tracing_selftest_running = false;
1645 	mutex_unlock(&trace_types_lock);
1646 
1647 	if (ret || !default_bootup_tracer)
1648 		goto out_unlock;
1649 
1650 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651 		goto out_unlock;
1652 
1653 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654 	/* Do we want this tracer to start on bootup? */
1655 	tracing_set_tracer(&global_trace, type->name);
1656 	default_bootup_tracer = NULL;
1657 
1658 	apply_trace_boot_options();
1659 
1660 	/* disable other selftests, since this will break it. */
1661 	tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664 	       type->name);
1665 #endif
1666 
1667  out_unlock:
1668 	return ret;
1669 }
1670 
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673 	struct ring_buffer *buffer = buf->buffer;
1674 
1675 	if (!buffer)
1676 		return;
1677 
1678 	ring_buffer_record_disable(buffer);
1679 
1680 	/* Make sure all commits have finished */
1681 	synchronize_sched();
1682 	ring_buffer_reset_cpu(buffer, cpu);
1683 
1684 	ring_buffer_record_enable(buffer);
1685 }
1686 
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689 	struct ring_buffer *buffer = buf->buffer;
1690 	int cpu;
1691 
1692 	if (!buffer)
1693 		return;
1694 
1695 	ring_buffer_record_disable(buffer);
1696 
1697 	/* Make sure all commits have finished */
1698 	synchronize_sched();
1699 
1700 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701 
1702 	for_each_online_cpu(cpu)
1703 		ring_buffer_reset_cpu(buffer, cpu);
1704 
1705 	ring_buffer_record_enable(buffer);
1706 }
1707 
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711 	struct trace_array *tr;
1712 
1713 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714 		if (!tr->clear_trace)
1715 			continue;
1716 		tr->clear_trace = false;
1717 		tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 		tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721 	}
1722 }
1723 
1724 static int *tgid_map;
1725 
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731 	unsigned *map_cmdline_to_pid;
1732 	unsigned cmdline_num;
1733 	int cmdline_idx;
1734 	char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737 
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740 
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745 
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750 
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752 				    struct saved_cmdlines_buffer *s)
1753 {
1754 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1755 					GFP_KERNEL);
1756 	if (!s->map_cmdline_to_pid)
1757 		return -ENOMEM;
1758 
1759 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1760 	if (!s->saved_cmdlines) {
1761 		kfree(s->map_cmdline_to_pid);
1762 		return -ENOMEM;
1763 	}
1764 
1765 	s->cmdline_idx = 0;
1766 	s->cmdline_num = val;
1767 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1768 	       sizeof(s->map_pid_to_cmdline));
1769 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1770 	       val * sizeof(*s->map_cmdline_to_pid));
1771 
1772 	return 0;
1773 }
1774 
1775 static int trace_create_savedcmd(void)
1776 {
1777 	int ret;
1778 
1779 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1780 	if (!savedcmd)
1781 		return -ENOMEM;
1782 
1783 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1784 	if (ret < 0) {
1785 		kfree(savedcmd);
1786 		savedcmd = NULL;
1787 		return -ENOMEM;
1788 	}
1789 
1790 	return 0;
1791 }
1792 
1793 int is_tracing_stopped(void)
1794 {
1795 	return global_trace.stop_count;
1796 }
1797 
1798 /**
1799  * tracing_start - quick start of the tracer
1800  *
1801  * If tracing is enabled but was stopped by tracing_stop,
1802  * this will start the tracer back up.
1803  */
1804 void tracing_start(void)
1805 {
1806 	struct ring_buffer *buffer;
1807 	unsigned long flags;
1808 
1809 	if (tracing_disabled)
1810 		return;
1811 
1812 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1813 	if (--global_trace.stop_count) {
1814 		if (global_trace.stop_count < 0) {
1815 			/* Someone screwed up their debugging */
1816 			WARN_ON_ONCE(1);
1817 			global_trace.stop_count = 0;
1818 		}
1819 		goto out;
1820 	}
1821 
1822 	/* Prevent the buffers from switching */
1823 	arch_spin_lock(&global_trace.max_lock);
1824 
1825 	buffer = global_trace.trace_buffer.buffer;
1826 	if (buffer)
1827 		ring_buffer_record_enable(buffer);
1828 
1829 #ifdef CONFIG_TRACER_MAX_TRACE
1830 	buffer = global_trace.max_buffer.buffer;
1831 	if (buffer)
1832 		ring_buffer_record_enable(buffer);
1833 #endif
1834 
1835 	arch_spin_unlock(&global_trace.max_lock);
1836 
1837  out:
1838 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1839 }
1840 
1841 static void tracing_start_tr(struct trace_array *tr)
1842 {
1843 	struct ring_buffer *buffer;
1844 	unsigned long flags;
1845 
1846 	if (tracing_disabled)
1847 		return;
1848 
1849 	/* If global, we need to also start the max tracer */
1850 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1851 		return tracing_start();
1852 
1853 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1854 
1855 	if (--tr->stop_count) {
1856 		if (tr->stop_count < 0) {
1857 			/* Someone screwed up their debugging */
1858 			WARN_ON_ONCE(1);
1859 			tr->stop_count = 0;
1860 		}
1861 		goto out;
1862 	}
1863 
1864 	buffer = tr->trace_buffer.buffer;
1865 	if (buffer)
1866 		ring_buffer_record_enable(buffer);
1867 
1868  out:
1869 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1870 }
1871 
1872 /**
1873  * tracing_stop - quick stop of the tracer
1874  *
1875  * Light weight way to stop tracing. Use in conjunction with
1876  * tracing_start.
1877  */
1878 void tracing_stop(void)
1879 {
1880 	struct ring_buffer *buffer;
1881 	unsigned long flags;
1882 
1883 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1884 	if (global_trace.stop_count++)
1885 		goto out;
1886 
1887 	/* Prevent the buffers from switching */
1888 	arch_spin_lock(&global_trace.max_lock);
1889 
1890 	buffer = global_trace.trace_buffer.buffer;
1891 	if (buffer)
1892 		ring_buffer_record_disable(buffer);
1893 
1894 #ifdef CONFIG_TRACER_MAX_TRACE
1895 	buffer = global_trace.max_buffer.buffer;
1896 	if (buffer)
1897 		ring_buffer_record_disable(buffer);
1898 #endif
1899 
1900 	arch_spin_unlock(&global_trace.max_lock);
1901 
1902  out:
1903 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1904 }
1905 
1906 static void tracing_stop_tr(struct trace_array *tr)
1907 {
1908 	struct ring_buffer *buffer;
1909 	unsigned long flags;
1910 
1911 	/* If global, we need to also stop the max tracer */
1912 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1913 		return tracing_stop();
1914 
1915 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1916 	if (tr->stop_count++)
1917 		goto out;
1918 
1919 	buffer = tr->trace_buffer.buffer;
1920 	if (buffer)
1921 		ring_buffer_record_disable(buffer);
1922 
1923  out:
1924 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1925 }
1926 
1927 static int trace_save_cmdline(struct task_struct *tsk)
1928 {
1929 	unsigned pid, idx;
1930 
1931 	/* treat recording of idle task as a success */
1932 	if (!tsk->pid)
1933 		return 1;
1934 
1935 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1936 		return 0;
1937 
1938 	/*
1939 	 * It's not the end of the world if we don't get
1940 	 * the lock, but we also don't want to spin
1941 	 * nor do we want to disable interrupts,
1942 	 * so if we miss here, then better luck next time.
1943 	 */
1944 	if (!arch_spin_trylock(&trace_cmdline_lock))
1945 		return 0;
1946 
1947 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1948 	if (idx == NO_CMDLINE_MAP) {
1949 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1950 
1951 		/*
1952 		 * Check whether the cmdline buffer at idx has a pid
1953 		 * mapped. We are going to overwrite that entry so we
1954 		 * need to clear the map_pid_to_cmdline. Otherwise we
1955 		 * would read the new comm for the old pid.
1956 		 */
1957 		pid = savedcmd->map_cmdline_to_pid[idx];
1958 		if (pid != NO_CMDLINE_MAP)
1959 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1960 
1961 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1962 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1963 
1964 		savedcmd->cmdline_idx = idx;
1965 	}
1966 
1967 	set_cmdline(idx, tsk->comm);
1968 
1969 	arch_spin_unlock(&trace_cmdline_lock);
1970 
1971 	return 1;
1972 }
1973 
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976 	unsigned map;
1977 
1978 	if (!pid) {
1979 		strcpy(comm, "<idle>");
1980 		return;
1981 	}
1982 
1983 	if (WARN_ON_ONCE(pid < 0)) {
1984 		strcpy(comm, "<XXX>");
1985 		return;
1986 	}
1987 
1988 	if (pid > PID_MAX_DEFAULT) {
1989 		strcpy(comm, "<...>");
1990 		return;
1991 	}
1992 
1993 	map = savedcmd->map_pid_to_cmdline[pid];
1994 	if (map != NO_CMDLINE_MAP)
1995 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1996 	else
1997 		strcpy(comm, "<...>");
1998 }
1999 
2000 void trace_find_cmdline(int pid, char comm[])
2001 {
2002 	preempt_disable();
2003 	arch_spin_lock(&trace_cmdline_lock);
2004 
2005 	__trace_find_cmdline(pid, comm);
2006 
2007 	arch_spin_unlock(&trace_cmdline_lock);
2008 	preempt_enable();
2009 }
2010 
2011 int trace_find_tgid(int pid)
2012 {
2013 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2014 		return 0;
2015 
2016 	return tgid_map[pid];
2017 }
2018 
2019 static int trace_save_tgid(struct task_struct *tsk)
2020 {
2021 	/* treat recording of idle task as a success */
2022 	if (!tsk->pid)
2023 		return 1;
2024 
2025 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2026 		return 0;
2027 
2028 	tgid_map[tsk->pid] = tsk->tgid;
2029 	return 1;
2030 }
2031 
2032 static bool tracing_record_taskinfo_skip(int flags)
2033 {
2034 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2035 		return true;
2036 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2037 		return true;
2038 	if (!__this_cpu_read(trace_taskinfo_save))
2039 		return true;
2040 	return false;
2041 }
2042 
2043 /**
2044  * tracing_record_taskinfo - record the task info of a task
2045  *
2046  * @task  - task to record
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *        - TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo(struct task_struct *task, int flags)
2051 {
2052 	bool done;
2053 
2054 	if (tracing_record_taskinfo_skip(flags))
2055 		return;
2056 
2057 	/*
2058 	 * Record as much task information as possible. If some fail, continue
2059 	 * to try to record the others.
2060 	 */
2061 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2062 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2063 
2064 	/* If recording any information failed, retry again soon. */
2065 	if (!done)
2066 		return;
2067 
2068 	__this_cpu_write(trace_taskinfo_save, false);
2069 }
2070 
2071 /**
2072  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2073  *
2074  * @prev - previous task during sched_switch
2075  * @next - next task during sched_switch
2076  * @flags - TRACE_RECORD_CMDLINE for recording comm
2077  *          TRACE_RECORD_TGID for recording tgid
2078  */
2079 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2080 					  struct task_struct *next, int flags)
2081 {
2082 	bool done;
2083 
2084 	if (tracing_record_taskinfo_skip(flags))
2085 		return;
2086 
2087 	/*
2088 	 * Record as much task information as possible. If some fail, continue
2089 	 * to try to record the others.
2090 	 */
2091 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2092 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2093 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2094 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2095 
2096 	/* If recording any information failed, retry again soon. */
2097 	if (!done)
2098 		return;
2099 
2100 	__this_cpu_write(trace_taskinfo_save, false);
2101 }
2102 
2103 /* Helpers to record a specific task information */
2104 void tracing_record_cmdline(struct task_struct *task)
2105 {
2106 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2107 }
2108 
2109 void tracing_record_tgid(struct task_struct *task)
2110 {
2111 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2112 }
2113 
2114 /*
2115  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2116  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2117  * simplifies those functions and keeps them in sync.
2118  */
2119 enum print_line_t trace_handle_return(struct trace_seq *s)
2120 {
2121 	return trace_seq_has_overflowed(s) ?
2122 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2123 }
2124 EXPORT_SYMBOL_GPL(trace_handle_return);
2125 
2126 void
2127 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2128 			     int pc)
2129 {
2130 	struct task_struct *tsk = current;
2131 
2132 	entry->preempt_count		= pc & 0xff;
2133 	entry->pid			= (tsk) ? tsk->pid : 0;
2134 	entry->flags =
2135 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2136 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2137 #else
2138 		TRACE_FLAG_IRQS_NOSUPPORT |
2139 #endif
2140 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2141 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2142 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2143 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2144 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2145 }
2146 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2147 
2148 struct ring_buffer_event *
2149 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2150 			  int type,
2151 			  unsigned long len,
2152 			  unsigned long flags, int pc)
2153 {
2154 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2155 }
2156 
2157 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2158 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2159 static int trace_buffered_event_ref;
2160 
2161 /**
2162  * trace_buffered_event_enable - enable buffering events
2163  *
2164  * When events are being filtered, it is quicker to use a temporary
2165  * buffer to write the event data into if there's a likely chance
2166  * that it will not be committed. The discard of the ring buffer
2167  * is not as fast as committing, and is much slower than copying
2168  * a commit.
2169  *
2170  * When an event is to be filtered, allocate per cpu buffers to
2171  * write the event data into, and if the event is filtered and discarded
2172  * it is simply dropped, otherwise, the entire data is to be committed
2173  * in one shot.
2174  */
2175 void trace_buffered_event_enable(void)
2176 {
2177 	struct ring_buffer_event *event;
2178 	struct page *page;
2179 	int cpu;
2180 
2181 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2182 
2183 	if (trace_buffered_event_ref++)
2184 		return;
2185 
2186 	for_each_tracing_cpu(cpu) {
2187 		page = alloc_pages_node(cpu_to_node(cpu),
2188 					GFP_KERNEL | __GFP_NORETRY, 0);
2189 		if (!page)
2190 			goto failed;
2191 
2192 		event = page_address(page);
2193 		memset(event, 0, sizeof(*event));
2194 
2195 		per_cpu(trace_buffered_event, cpu) = event;
2196 
2197 		preempt_disable();
2198 		if (cpu == smp_processor_id() &&
2199 		    this_cpu_read(trace_buffered_event) !=
2200 		    per_cpu(trace_buffered_event, cpu))
2201 			WARN_ON_ONCE(1);
2202 		preempt_enable();
2203 	}
2204 
2205 	return;
2206  failed:
2207 	trace_buffered_event_disable();
2208 }
2209 
2210 static void enable_trace_buffered_event(void *data)
2211 {
2212 	/* Probably not needed, but do it anyway */
2213 	smp_rmb();
2214 	this_cpu_dec(trace_buffered_event_cnt);
2215 }
2216 
2217 static void disable_trace_buffered_event(void *data)
2218 {
2219 	this_cpu_inc(trace_buffered_event_cnt);
2220 }
2221 
2222 /**
2223  * trace_buffered_event_disable - disable buffering events
2224  *
2225  * When a filter is removed, it is faster to not use the buffered
2226  * events, and to commit directly into the ring buffer. Free up
2227  * the temp buffers when there are no more users. This requires
2228  * special synchronization with current events.
2229  */
2230 void trace_buffered_event_disable(void)
2231 {
2232 	int cpu;
2233 
2234 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2235 
2236 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2237 		return;
2238 
2239 	if (--trace_buffered_event_ref)
2240 		return;
2241 
2242 	preempt_disable();
2243 	/* For each CPU, set the buffer as used. */
2244 	smp_call_function_many(tracing_buffer_mask,
2245 			       disable_trace_buffered_event, NULL, 1);
2246 	preempt_enable();
2247 
2248 	/* Wait for all current users to finish */
2249 	synchronize_sched();
2250 
2251 	for_each_tracing_cpu(cpu) {
2252 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2253 		per_cpu(trace_buffered_event, cpu) = NULL;
2254 	}
2255 	/*
2256 	 * Make sure trace_buffered_event is NULL before clearing
2257 	 * trace_buffered_event_cnt.
2258 	 */
2259 	smp_wmb();
2260 
2261 	preempt_disable();
2262 	/* Do the work on each cpu */
2263 	smp_call_function_many(tracing_buffer_mask,
2264 			       enable_trace_buffered_event, NULL, 1);
2265 	preempt_enable();
2266 }
2267 
2268 static struct ring_buffer *temp_buffer;
2269 
2270 struct ring_buffer_event *
2271 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2272 			  struct trace_event_file *trace_file,
2273 			  int type, unsigned long len,
2274 			  unsigned long flags, int pc)
2275 {
2276 	struct ring_buffer_event *entry;
2277 	int val;
2278 
2279 	*current_rb = trace_file->tr->trace_buffer.buffer;
2280 
2281 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2282 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2283 	    (entry = this_cpu_read(trace_buffered_event))) {
2284 		/* Try to use the per cpu buffer first */
2285 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2286 		if (val == 1) {
2287 			trace_event_setup(entry, type, flags, pc);
2288 			entry->array[0] = len;
2289 			return entry;
2290 		}
2291 		this_cpu_dec(trace_buffered_event_cnt);
2292 	}
2293 
2294 	entry = __trace_buffer_lock_reserve(*current_rb,
2295 					    type, len, flags, pc);
2296 	/*
2297 	 * If tracing is off, but we have triggers enabled
2298 	 * we still need to look at the event data. Use the temp_buffer
2299 	 * to store the trace event for the tigger to use. It's recusive
2300 	 * safe and will not be recorded anywhere.
2301 	 */
2302 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2303 		*current_rb = temp_buffer;
2304 		entry = __trace_buffer_lock_reserve(*current_rb,
2305 						    type, len, flags, pc);
2306 	}
2307 	return entry;
2308 }
2309 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2310 
2311 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2312 static DEFINE_MUTEX(tracepoint_printk_mutex);
2313 
2314 static void output_printk(struct trace_event_buffer *fbuffer)
2315 {
2316 	struct trace_event_call *event_call;
2317 	struct trace_event *event;
2318 	unsigned long flags;
2319 	struct trace_iterator *iter = tracepoint_print_iter;
2320 
2321 	/* We should never get here if iter is NULL */
2322 	if (WARN_ON_ONCE(!iter))
2323 		return;
2324 
2325 	event_call = fbuffer->trace_file->event_call;
2326 	if (!event_call || !event_call->event.funcs ||
2327 	    !event_call->event.funcs->trace)
2328 		return;
2329 
2330 	event = &fbuffer->trace_file->event_call->event;
2331 
2332 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2333 	trace_seq_init(&iter->seq);
2334 	iter->ent = fbuffer->entry;
2335 	event_call->event.funcs->trace(iter, 0, event);
2336 	trace_seq_putc(&iter->seq, 0);
2337 	printk("%s", iter->seq.buffer);
2338 
2339 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2340 }
2341 
2342 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2343 			     void __user *buffer, size_t *lenp,
2344 			     loff_t *ppos)
2345 {
2346 	int save_tracepoint_printk;
2347 	int ret;
2348 
2349 	mutex_lock(&tracepoint_printk_mutex);
2350 	save_tracepoint_printk = tracepoint_printk;
2351 
2352 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2353 
2354 	/*
2355 	 * This will force exiting early, as tracepoint_printk
2356 	 * is always zero when tracepoint_printk_iter is not allocated
2357 	 */
2358 	if (!tracepoint_print_iter)
2359 		tracepoint_printk = 0;
2360 
2361 	if (save_tracepoint_printk == tracepoint_printk)
2362 		goto out;
2363 
2364 	if (tracepoint_printk)
2365 		static_key_enable(&tracepoint_printk_key.key);
2366 	else
2367 		static_key_disable(&tracepoint_printk_key.key);
2368 
2369  out:
2370 	mutex_unlock(&tracepoint_printk_mutex);
2371 
2372 	return ret;
2373 }
2374 
2375 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2376 {
2377 	if (static_key_false(&tracepoint_printk_key.key))
2378 		output_printk(fbuffer);
2379 
2380 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2381 				    fbuffer->event, fbuffer->entry,
2382 				    fbuffer->flags, fbuffer->pc);
2383 }
2384 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2385 
2386 /*
2387  * Skip 3:
2388  *
2389  *   trace_buffer_unlock_commit_regs()
2390  *   trace_event_buffer_commit()
2391  *   trace_event_raw_event_xxx()
2392  */
2393 # define STACK_SKIP 3
2394 
2395 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2396 				     struct ring_buffer *buffer,
2397 				     struct ring_buffer_event *event,
2398 				     unsigned long flags, int pc,
2399 				     struct pt_regs *regs)
2400 {
2401 	__buffer_unlock_commit(buffer, event);
2402 
2403 	/*
2404 	 * If regs is not set, then skip the necessary functions.
2405 	 * Note, we can still get here via blktrace, wakeup tracer
2406 	 * and mmiotrace, but that's ok if they lose a function or
2407 	 * two. They are not that meaningful.
2408 	 */
2409 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2410 	ftrace_trace_userstack(buffer, flags, pc);
2411 }
2412 
2413 /*
2414  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2415  */
2416 void
2417 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2418 				   struct ring_buffer_event *event)
2419 {
2420 	__buffer_unlock_commit(buffer, event);
2421 }
2422 
2423 static void
2424 trace_process_export(struct trace_export *export,
2425 	       struct ring_buffer_event *event)
2426 {
2427 	struct trace_entry *entry;
2428 	unsigned int size = 0;
2429 
2430 	entry = ring_buffer_event_data(event);
2431 	size = ring_buffer_event_length(event);
2432 	export->write(export, entry, size);
2433 }
2434 
2435 static DEFINE_MUTEX(ftrace_export_lock);
2436 
2437 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2438 
2439 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2440 
2441 static inline void ftrace_exports_enable(void)
2442 {
2443 	static_branch_enable(&ftrace_exports_enabled);
2444 }
2445 
2446 static inline void ftrace_exports_disable(void)
2447 {
2448 	static_branch_disable(&ftrace_exports_enabled);
2449 }
2450 
2451 void ftrace_exports(struct ring_buffer_event *event)
2452 {
2453 	struct trace_export *export;
2454 
2455 	preempt_disable_notrace();
2456 
2457 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2458 	while (export) {
2459 		trace_process_export(export, event);
2460 		export = rcu_dereference_raw_notrace(export->next);
2461 	}
2462 
2463 	preempt_enable_notrace();
2464 }
2465 
2466 static inline void
2467 add_trace_export(struct trace_export **list, struct trace_export *export)
2468 {
2469 	rcu_assign_pointer(export->next, *list);
2470 	/*
2471 	 * We are entering export into the list but another
2472 	 * CPU might be walking that list. We need to make sure
2473 	 * the export->next pointer is valid before another CPU sees
2474 	 * the export pointer included into the list.
2475 	 */
2476 	rcu_assign_pointer(*list, export);
2477 }
2478 
2479 static inline int
2480 rm_trace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482 	struct trace_export **p;
2483 
2484 	for (p = list; *p != NULL; p = &(*p)->next)
2485 		if (*p == export)
2486 			break;
2487 
2488 	if (*p != export)
2489 		return -1;
2490 
2491 	rcu_assign_pointer(*p, (*p)->next);
2492 
2493 	return 0;
2494 }
2495 
2496 static inline void
2497 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499 	if (*list == NULL)
2500 		ftrace_exports_enable();
2501 
2502 	add_trace_export(list, export);
2503 }
2504 
2505 static inline int
2506 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2507 {
2508 	int ret;
2509 
2510 	ret = rm_trace_export(list, export);
2511 	if (*list == NULL)
2512 		ftrace_exports_disable();
2513 
2514 	return ret;
2515 }
2516 
2517 int register_ftrace_export(struct trace_export *export)
2518 {
2519 	if (WARN_ON_ONCE(!export->write))
2520 		return -1;
2521 
2522 	mutex_lock(&ftrace_export_lock);
2523 
2524 	add_ftrace_export(&ftrace_exports_list, export);
2525 
2526 	mutex_unlock(&ftrace_export_lock);
2527 
2528 	return 0;
2529 }
2530 EXPORT_SYMBOL_GPL(register_ftrace_export);
2531 
2532 int unregister_ftrace_export(struct trace_export *export)
2533 {
2534 	int ret;
2535 
2536 	mutex_lock(&ftrace_export_lock);
2537 
2538 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2539 
2540 	mutex_unlock(&ftrace_export_lock);
2541 
2542 	return ret;
2543 }
2544 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2545 
2546 void
2547 trace_function(struct trace_array *tr,
2548 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2549 	       int pc)
2550 {
2551 	struct trace_event_call *call = &event_function;
2552 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2553 	struct ring_buffer_event *event;
2554 	struct ftrace_entry *entry;
2555 
2556 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2557 					    flags, pc);
2558 	if (!event)
2559 		return;
2560 	entry	= ring_buffer_event_data(event);
2561 	entry->ip			= ip;
2562 	entry->parent_ip		= parent_ip;
2563 
2564 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2565 		if (static_branch_unlikely(&ftrace_exports_enabled))
2566 			ftrace_exports(event);
2567 		__buffer_unlock_commit(buffer, event);
2568 	}
2569 }
2570 
2571 #ifdef CONFIG_STACKTRACE
2572 
2573 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2574 struct ftrace_stack {
2575 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2576 };
2577 
2578 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2579 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2580 
2581 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2582 				 unsigned long flags,
2583 				 int skip, int pc, struct pt_regs *regs)
2584 {
2585 	struct trace_event_call *call = &event_kernel_stack;
2586 	struct ring_buffer_event *event;
2587 	struct stack_entry *entry;
2588 	struct stack_trace trace;
2589 	int use_stack;
2590 	int size = FTRACE_STACK_ENTRIES;
2591 
2592 	trace.nr_entries	= 0;
2593 	trace.skip		= skip;
2594 
2595 	/*
2596 	 * Add one, for this function and the call to save_stack_trace()
2597 	 * If regs is set, then these functions will not be in the way.
2598 	 */
2599 #ifndef CONFIG_UNWINDER_ORC
2600 	if (!regs)
2601 		trace.skip++;
2602 #endif
2603 
2604 	/*
2605 	 * Since events can happen in NMIs there's no safe way to
2606 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2607 	 * or NMI comes in, it will just have to use the default
2608 	 * FTRACE_STACK_SIZE.
2609 	 */
2610 	preempt_disable_notrace();
2611 
2612 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2613 	/*
2614 	 * We don't need any atomic variables, just a barrier.
2615 	 * If an interrupt comes in, we don't care, because it would
2616 	 * have exited and put the counter back to what we want.
2617 	 * We just need a barrier to keep gcc from moving things
2618 	 * around.
2619 	 */
2620 	barrier();
2621 	if (use_stack == 1) {
2622 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2623 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2624 
2625 		if (regs)
2626 			save_stack_trace_regs(regs, &trace);
2627 		else
2628 			save_stack_trace(&trace);
2629 
2630 		if (trace.nr_entries > size)
2631 			size = trace.nr_entries;
2632 	} else
2633 		/* From now on, use_stack is a boolean */
2634 		use_stack = 0;
2635 
2636 	size *= sizeof(unsigned long);
2637 
2638 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2639 					    sizeof(*entry) + size, flags, pc);
2640 	if (!event)
2641 		goto out;
2642 	entry = ring_buffer_event_data(event);
2643 
2644 	memset(&entry->caller, 0, size);
2645 
2646 	if (use_stack)
2647 		memcpy(&entry->caller, trace.entries,
2648 		       trace.nr_entries * sizeof(unsigned long));
2649 	else {
2650 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2651 		trace.entries		= entry->caller;
2652 		if (regs)
2653 			save_stack_trace_regs(regs, &trace);
2654 		else
2655 			save_stack_trace(&trace);
2656 	}
2657 
2658 	entry->size = trace.nr_entries;
2659 
2660 	if (!call_filter_check_discard(call, entry, buffer, event))
2661 		__buffer_unlock_commit(buffer, event);
2662 
2663  out:
2664 	/* Again, don't let gcc optimize things here */
2665 	barrier();
2666 	__this_cpu_dec(ftrace_stack_reserve);
2667 	preempt_enable_notrace();
2668 
2669 }
2670 
2671 static inline void ftrace_trace_stack(struct trace_array *tr,
2672 				      struct ring_buffer *buffer,
2673 				      unsigned long flags,
2674 				      int skip, int pc, struct pt_regs *regs)
2675 {
2676 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2677 		return;
2678 
2679 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2680 }
2681 
2682 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2683 		   int pc)
2684 {
2685 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2686 
2687 	if (rcu_is_watching()) {
2688 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2689 		return;
2690 	}
2691 
2692 	/*
2693 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2694 	 * but if the above rcu_is_watching() failed, then the NMI
2695 	 * triggered someplace critical, and rcu_irq_enter() should
2696 	 * not be called from NMI.
2697 	 */
2698 	if (unlikely(in_nmi()))
2699 		return;
2700 
2701 	rcu_irq_enter_irqson();
2702 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2703 	rcu_irq_exit_irqson();
2704 }
2705 
2706 /**
2707  * trace_dump_stack - record a stack back trace in the trace buffer
2708  * @skip: Number of functions to skip (helper handlers)
2709  */
2710 void trace_dump_stack(int skip)
2711 {
2712 	unsigned long flags;
2713 
2714 	if (tracing_disabled || tracing_selftest_running)
2715 		return;
2716 
2717 	local_save_flags(flags);
2718 
2719 #ifndef CONFIG_UNWINDER_ORC
2720 	/* Skip 1 to skip this function. */
2721 	skip++;
2722 #endif
2723 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2724 			     flags, skip, preempt_count(), NULL);
2725 }
2726 
2727 static DEFINE_PER_CPU(int, user_stack_count);
2728 
2729 void
2730 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2731 {
2732 	struct trace_event_call *call = &event_user_stack;
2733 	struct ring_buffer_event *event;
2734 	struct userstack_entry *entry;
2735 	struct stack_trace trace;
2736 
2737 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2738 		return;
2739 
2740 	/*
2741 	 * NMIs can not handle page faults, even with fix ups.
2742 	 * The save user stack can (and often does) fault.
2743 	 */
2744 	if (unlikely(in_nmi()))
2745 		return;
2746 
2747 	/*
2748 	 * prevent recursion, since the user stack tracing may
2749 	 * trigger other kernel events.
2750 	 */
2751 	preempt_disable();
2752 	if (__this_cpu_read(user_stack_count))
2753 		goto out;
2754 
2755 	__this_cpu_inc(user_stack_count);
2756 
2757 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2758 					    sizeof(*entry), flags, pc);
2759 	if (!event)
2760 		goto out_drop_count;
2761 	entry	= ring_buffer_event_data(event);
2762 
2763 	entry->tgid		= current->tgid;
2764 	memset(&entry->caller, 0, sizeof(entry->caller));
2765 
2766 	trace.nr_entries	= 0;
2767 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2768 	trace.skip		= 0;
2769 	trace.entries		= entry->caller;
2770 
2771 	save_stack_trace_user(&trace);
2772 	if (!call_filter_check_discard(call, entry, buffer, event))
2773 		__buffer_unlock_commit(buffer, event);
2774 
2775  out_drop_count:
2776 	__this_cpu_dec(user_stack_count);
2777  out:
2778 	preempt_enable();
2779 }
2780 
2781 #ifdef UNUSED
2782 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2783 {
2784 	ftrace_trace_userstack(tr, flags, preempt_count());
2785 }
2786 #endif /* UNUSED */
2787 
2788 #endif /* CONFIG_STACKTRACE */
2789 
2790 /* created for use with alloc_percpu */
2791 struct trace_buffer_struct {
2792 	int nesting;
2793 	char buffer[4][TRACE_BUF_SIZE];
2794 };
2795 
2796 static struct trace_buffer_struct *trace_percpu_buffer;
2797 
2798 /*
2799  * Thise allows for lockless recording.  If we're nested too deeply, then
2800  * this returns NULL.
2801  */
2802 static char *get_trace_buf(void)
2803 {
2804 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2805 
2806 	if (!buffer || buffer->nesting >= 4)
2807 		return NULL;
2808 
2809 	buffer->nesting++;
2810 
2811 	/* Interrupts must see nesting incremented before we use the buffer */
2812 	barrier();
2813 	return &buffer->buffer[buffer->nesting][0];
2814 }
2815 
2816 static void put_trace_buf(void)
2817 {
2818 	/* Don't let the decrement of nesting leak before this */
2819 	barrier();
2820 	this_cpu_dec(trace_percpu_buffer->nesting);
2821 }
2822 
2823 static int alloc_percpu_trace_buffer(void)
2824 {
2825 	struct trace_buffer_struct *buffers;
2826 
2827 	buffers = alloc_percpu(struct trace_buffer_struct);
2828 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2829 		return -ENOMEM;
2830 
2831 	trace_percpu_buffer = buffers;
2832 	return 0;
2833 }
2834 
2835 static int buffers_allocated;
2836 
2837 void trace_printk_init_buffers(void)
2838 {
2839 	if (buffers_allocated)
2840 		return;
2841 
2842 	if (alloc_percpu_trace_buffer())
2843 		return;
2844 
2845 	/* trace_printk() is for debug use only. Don't use it in production. */
2846 
2847 	pr_warn("\n");
2848 	pr_warn("**********************************************************\n");
2849 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2850 	pr_warn("**                                                      **\n");
2851 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2852 	pr_warn("**                                                      **\n");
2853 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2854 	pr_warn("** unsafe for production use.                           **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("** If you see this message and you are not debugging    **\n");
2857 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2858 	pr_warn("**                                                      **\n");
2859 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2860 	pr_warn("**********************************************************\n");
2861 
2862 	/* Expand the buffers to set size */
2863 	tracing_update_buffers();
2864 
2865 	buffers_allocated = 1;
2866 
2867 	/*
2868 	 * trace_printk_init_buffers() can be called by modules.
2869 	 * If that happens, then we need to start cmdline recording
2870 	 * directly here. If the global_trace.buffer is already
2871 	 * allocated here, then this was called by module code.
2872 	 */
2873 	if (global_trace.trace_buffer.buffer)
2874 		tracing_start_cmdline_record();
2875 }
2876 
2877 void trace_printk_start_comm(void)
2878 {
2879 	/* Start tracing comms if trace printk is set */
2880 	if (!buffers_allocated)
2881 		return;
2882 	tracing_start_cmdline_record();
2883 }
2884 
2885 static void trace_printk_start_stop_comm(int enabled)
2886 {
2887 	if (!buffers_allocated)
2888 		return;
2889 
2890 	if (enabled)
2891 		tracing_start_cmdline_record();
2892 	else
2893 		tracing_stop_cmdline_record();
2894 }
2895 
2896 /**
2897  * trace_vbprintk - write binary msg to tracing buffer
2898  *
2899  */
2900 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2901 {
2902 	struct trace_event_call *call = &event_bprint;
2903 	struct ring_buffer_event *event;
2904 	struct ring_buffer *buffer;
2905 	struct trace_array *tr = &global_trace;
2906 	struct bprint_entry *entry;
2907 	unsigned long flags;
2908 	char *tbuffer;
2909 	int len = 0, size, pc;
2910 
2911 	if (unlikely(tracing_selftest_running || tracing_disabled))
2912 		return 0;
2913 
2914 	/* Don't pollute graph traces with trace_vprintk internals */
2915 	pause_graph_tracing();
2916 
2917 	pc = preempt_count();
2918 	preempt_disable_notrace();
2919 
2920 	tbuffer = get_trace_buf();
2921 	if (!tbuffer) {
2922 		len = 0;
2923 		goto out_nobuffer;
2924 	}
2925 
2926 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2927 
2928 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2929 		goto out;
2930 
2931 	local_save_flags(flags);
2932 	size = sizeof(*entry) + sizeof(u32) * len;
2933 	buffer = tr->trace_buffer.buffer;
2934 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2935 					    flags, pc);
2936 	if (!event)
2937 		goto out;
2938 	entry = ring_buffer_event_data(event);
2939 	entry->ip			= ip;
2940 	entry->fmt			= fmt;
2941 
2942 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2943 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2944 		__buffer_unlock_commit(buffer, event);
2945 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2946 	}
2947 
2948 out:
2949 	put_trace_buf();
2950 
2951 out_nobuffer:
2952 	preempt_enable_notrace();
2953 	unpause_graph_tracing();
2954 
2955 	return len;
2956 }
2957 EXPORT_SYMBOL_GPL(trace_vbprintk);
2958 
2959 static int
2960 __trace_array_vprintk(struct ring_buffer *buffer,
2961 		      unsigned long ip, const char *fmt, va_list args)
2962 {
2963 	struct trace_event_call *call = &event_print;
2964 	struct ring_buffer_event *event;
2965 	int len = 0, size, pc;
2966 	struct print_entry *entry;
2967 	unsigned long flags;
2968 	char *tbuffer;
2969 
2970 	if (tracing_disabled || tracing_selftest_running)
2971 		return 0;
2972 
2973 	/* Don't pollute graph traces with trace_vprintk internals */
2974 	pause_graph_tracing();
2975 
2976 	pc = preempt_count();
2977 	preempt_disable_notrace();
2978 
2979 
2980 	tbuffer = get_trace_buf();
2981 	if (!tbuffer) {
2982 		len = 0;
2983 		goto out_nobuffer;
2984 	}
2985 
2986 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2987 
2988 	local_save_flags(flags);
2989 	size = sizeof(*entry) + len + 1;
2990 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2991 					    flags, pc);
2992 	if (!event)
2993 		goto out;
2994 	entry = ring_buffer_event_data(event);
2995 	entry->ip = ip;
2996 
2997 	memcpy(&entry->buf, tbuffer, len + 1);
2998 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2999 		__buffer_unlock_commit(buffer, event);
3000 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3001 	}
3002 
3003 out:
3004 	put_trace_buf();
3005 
3006 out_nobuffer:
3007 	preempt_enable_notrace();
3008 	unpause_graph_tracing();
3009 
3010 	return len;
3011 }
3012 
3013 int trace_array_vprintk(struct trace_array *tr,
3014 			unsigned long ip, const char *fmt, va_list args)
3015 {
3016 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3017 }
3018 
3019 int trace_array_printk(struct trace_array *tr,
3020 		       unsigned long ip, const char *fmt, ...)
3021 {
3022 	int ret;
3023 	va_list ap;
3024 
3025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3026 		return 0;
3027 
3028 	va_start(ap, fmt);
3029 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3030 	va_end(ap);
3031 	return ret;
3032 }
3033 
3034 int trace_array_printk_buf(struct ring_buffer *buffer,
3035 			   unsigned long ip, const char *fmt, ...)
3036 {
3037 	int ret;
3038 	va_list ap;
3039 
3040 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3041 		return 0;
3042 
3043 	va_start(ap, fmt);
3044 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3045 	va_end(ap);
3046 	return ret;
3047 }
3048 
3049 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3050 {
3051 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_vprintk);
3054 
3055 static void trace_iterator_increment(struct trace_iterator *iter)
3056 {
3057 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3058 
3059 	iter->idx++;
3060 	if (buf_iter)
3061 		ring_buffer_read(buf_iter, NULL);
3062 }
3063 
3064 static struct trace_entry *
3065 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3066 		unsigned long *lost_events)
3067 {
3068 	struct ring_buffer_event *event;
3069 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3070 
3071 	if (buf_iter)
3072 		event = ring_buffer_iter_peek(buf_iter, ts);
3073 	else
3074 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3075 					 lost_events);
3076 
3077 	if (event) {
3078 		iter->ent_size = ring_buffer_event_length(event);
3079 		return ring_buffer_event_data(event);
3080 	}
3081 	iter->ent_size = 0;
3082 	return NULL;
3083 }
3084 
3085 static struct trace_entry *
3086 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3087 		  unsigned long *missing_events, u64 *ent_ts)
3088 {
3089 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3090 	struct trace_entry *ent, *next = NULL;
3091 	unsigned long lost_events = 0, next_lost = 0;
3092 	int cpu_file = iter->cpu_file;
3093 	u64 next_ts = 0, ts;
3094 	int next_cpu = -1;
3095 	int next_size = 0;
3096 	int cpu;
3097 
3098 	/*
3099 	 * If we are in a per_cpu trace file, don't bother by iterating over
3100 	 * all cpu and peek directly.
3101 	 */
3102 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3103 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3104 			return NULL;
3105 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3106 		if (ent_cpu)
3107 			*ent_cpu = cpu_file;
3108 
3109 		return ent;
3110 	}
3111 
3112 	for_each_tracing_cpu(cpu) {
3113 
3114 		if (ring_buffer_empty_cpu(buffer, cpu))
3115 			continue;
3116 
3117 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3118 
3119 		/*
3120 		 * Pick the entry with the smallest timestamp:
3121 		 */
3122 		if (ent && (!next || ts < next_ts)) {
3123 			next = ent;
3124 			next_cpu = cpu;
3125 			next_ts = ts;
3126 			next_lost = lost_events;
3127 			next_size = iter->ent_size;
3128 		}
3129 	}
3130 
3131 	iter->ent_size = next_size;
3132 
3133 	if (ent_cpu)
3134 		*ent_cpu = next_cpu;
3135 
3136 	if (ent_ts)
3137 		*ent_ts = next_ts;
3138 
3139 	if (missing_events)
3140 		*missing_events = next_lost;
3141 
3142 	return next;
3143 }
3144 
3145 /* Find the next real entry, without updating the iterator itself */
3146 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3147 					  int *ent_cpu, u64 *ent_ts)
3148 {
3149 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3150 }
3151 
3152 /* Find the next real entry, and increment the iterator to the next entry */
3153 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3154 {
3155 	iter->ent = __find_next_entry(iter, &iter->cpu,
3156 				      &iter->lost_events, &iter->ts);
3157 
3158 	if (iter->ent)
3159 		trace_iterator_increment(iter);
3160 
3161 	return iter->ent ? iter : NULL;
3162 }
3163 
3164 static void trace_consume(struct trace_iterator *iter)
3165 {
3166 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3167 			    &iter->lost_events);
3168 }
3169 
3170 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3171 {
3172 	struct trace_iterator *iter = m->private;
3173 	int i = (int)*pos;
3174 	void *ent;
3175 
3176 	WARN_ON_ONCE(iter->leftover);
3177 
3178 	(*pos)++;
3179 
3180 	/* can't go backwards */
3181 	if (iter->idx > i)
3182 		return NULL;
3183 
3184 	if (iter->idx < 0)
3185 		ent = trace_find_next_entry_inc(iter);
3186 	else
3187 		ent = iter;
3188 
3189 	while (ent && iter->idx < i)
3190 		ent = trace_find_next_entry_inc(iter);
3191 
3192 	iter->pos = *pos;
3193 
3194 	return ent;
3195 }
3196 
3197 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3198 {
3199 	struct ring_buffer_event *event;
3200 	struct ring_buffer_iter *buf_iter;
3201 	unsigned long entries = 0;
3202 	u64 ts;
3203 
3204 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3205 
3206 	buf_iter = trace_buffer_iter(iter, cpu);
3207 	if (!buf_iter)
3208 		return;
3209 
3210 	ring_buffer_iter_reset(buf_iter);
3211 
3212 	/*
3213 	 * We could have the case with the max latency tracers
3214 	 * that a reset never took place on a cpu. This is evident
3215 	 * by the timestamp being before the start of the buffer.
3216 	 */
3217 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3218 		if (ts >= iter->trace_buffer->time_start)
3219 			break;
3220 		entries++;
3221 		ring_buffer_read(buf_iter, NULL);
3222 	}
3223 
3224 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3225 }
3226 
3227 /*
3228  * The current tracer is copied to avoid a global locking
3229  * all around.
3230  */
3231 static void *s_start(struct seq_file *m, loff_t *pos)
3232 {
3233 	struct trace_iterator *iter = m->private;
3234 	struct trace_array *tr = iter->tr;
3235 	int cpu_file = iter->cpu_file;
3236 	void *p = NULL;
3237 	loff_t l = 0;
3238 	int cpu;
3239 
3240 	/*
3241 	 * copy the tracer to avoid using a global lock all around.
3242 	 * iter->trace is a copy of current_trace, the pointer to the
3243 	 * name may be used instead of a strcmp(), as iter->trace->name
3244 	 * will point to the same string as current_trace->name.
3245 	 */
3246 	mutex_lock(&trace_types_lock);
3247 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3248 		*iter->trace = *tr->current_trace;
3249 	mutex_unlock(&trace_types_lock);
3250 
3251 #ifdef CONFIG_TRACER_MAX_TRACE
3252 	if (iter->snapshot && iter->trace->use_max_tr)
3253 		return ERR_PTR(-EBUSY);
3254 #endif
3255 
3256 	if (!iter->snapshot)
3257 		atomic_inc(&trace_record_taskinfo_disabled);
3258 
3259 	if (*pos != iter->pos) {
3260 		iter->ent = NULL;
3261 		iter->cpu = 0;
3262 		iter->idx = -1;
3263 
3264 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3265 			for_each_tracing_cpu(cpu)
3266 				tracing_iter_reset(iter, cpu);
3267 		} else
3268 			tracing_iter_reset(iter, cpu_file);
3269 
3270 		iter->leftover = 0;
3271 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3272 			;
3273 
3274 	} else {
3275 		/*
3276 		 * If we overflowed the seq_file before, then we want
3277 		 * to just reuse the trace_seq buffer again.
3278 		 */
3279 		if (iter->leftover)
3280 			p = iter;
3281 		else {
3282 			l = *pos - 1;
3283 			p = s_next(m, p, &l);
3284 		}
3285 	}
3286 
3287 	trace_event_read_lock();
3288 	trace_access_lock(cpu_file);
3289 	return p;
3290 }
3291 
3292 static void s_stop(struct seq_file *m, void *p)
3293 {
3294 	struct trace_iterator *iter = m->private;
3295 
3296 #ifdef CONFIG_TRACER_MAX_TRACE
3297 	if (iter->snapshot && iter->trace->use_max_tr)
3298 		return;
3299 #endif
3300 
3301 	if (!iter->snapshot)
3302 		atomic_dec(&trace_record_taskinfo_disabled);
3303 
3304 	trace_access_unlock(iter->cpu_file);
3305 	trace_event_read_unlock();
3306 }
3307 
3308 static void
3309 get_total_entries(struct trace_buffer *buf,
3310 		  unsigned long *total, unsigned long *entries)
3311 {
3312 	unsigned long count;
3313 	int cpu;
3314 
3315 	*total = 0;
3316 	*entries = 0;
3317 
3318 	for_each_tracing_cpu(cpu) {
3319 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3320 		/*
3321 		 * If this buffer has skipped entries, then we hold all
3322 		 * entries for the trace and we need to ignore the
3323 		 * ones before the time stamp.
3324 		 */
3325 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3326 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3327 			/* total is the same as the entries */
3328 			*total += count;
3329 		} else
3330 			*total += count +
3331 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3332 		*entries += count;
3333 	}
3334 }
3335 
3336 static void print_lat_help_header(struct seq_file *m)
3337 {
3338 	seq_puts(m, "#                  _------=> CPU#            \n"
3339 		    "#                 / _-----=> irqs-off        \n"
3340 		    "#                | / _----=> need-resched    \n"
3341 		    "#                || / _---=> hardirq/softirq \n"
3342 		    "#                ||| / _--=> preempt-depth   \n"
3343 		    "#                |||| /     delay            \n"
3344 		    "#  cmd     pid   ||||| time  |   caller      \n"
3345 		    "#     \\   /      |||||  \\    |   /         \n");
3346 }
3347 
3348 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3349 {
3350 	unsigned long total;
3351 	unsigned long entries;
3352 
3353 	get_total_entries(buf, &total, &entries);
3354 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3355 		   entries, total, num_online_cpus());
3356 	seq_puts(m, "#\n");
3357 }
3358 
3359 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3360 				   unsigned int flags)
3361 {
3362 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3363 
3364 	print_event_info(buf, m);
3365 
3366 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3367 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3368 }
3369 
3370 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3371 				       unsigned int flags)
3372 {
3373 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3374 	const char tgid_space[] = "          ";
3375 	const char space[] = "  ";
3376 
3377 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3378 		   tgid ? tgid_space : space);
3379 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3380 		   tgid ? tgid_space : space);
3381 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3382 		   tgid ? tgid_space : space);
3383 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3384 		   tgid ? tgid_space : space);
3385 	seq_printf(m, "#                          %s||| /     delay\n",
3386 		   tgid ? tgid_space : space);
3387 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3388 		   tgid ? "   TGID   " : space);
3389 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3390 		   tgid ? "     |    " : space);
3391 }
3392 
3393 void
3394 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3395 {
3396 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3397 	struct trace_buffer *buf = iter->trace_buffer;
3398 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3399 	struct tracer *type = iter->trace;
3400 	unsigned long entries;
3401 	unsigned long total;
3402 	const char *name = "preemption";
3403 
3404 	name = type->name;
3405 
3406 	get_total_entries(buf, &total, &entries);
3407 
3408 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3409 		   name, UTS_RELEASE);
3410 	seq_puts(m, "# -----------------------------------"
3411 		 "---------------------------------\n");
3412 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3413 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3414 		   nsecs_to_usecs(data->saved_latency),
3415 		   entries,
3416 		   total,
3417 		   buf->cpu,
3418 #if defined(CONFIG_PREEMPT_NONE)
3419 		   "server",
3420 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3421 		   "desktop",
3422 #elif defined(CONFIG_PREEMPT)
3423 		   "preempt",
3424 #else
3425 		   "unknown",
3426 #endif
3427 		   /* These are reserved for later use */
3428 		   0, 0, 0, 0);
3429 #ifdef CONFIG_SMP
3430 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3431 #else
3432 	seq_puts(m, ")\n");
3433 #endif
3434 	seq_puts(m, "#    -----------------\n");
3435 	seq_printf(m, "#    | task: %.16s-%d "
3436 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3437 		   data->comm, data->pid,
3438 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3439 		   data->policy, data->rt_priority);
3440 	seq_puts(m, "#    -----------------\n");
3441 
3442 	if (data->critical_start) {
3443 		seq_puts(m, "#  => started at: ");
3444 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3445 		trace_print_seq(m, &iter->seq);
3446 		seq_puts(m, "\n#  => ended at:   ");
3447 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3448 		trace_print_seq(m, &iter->seq);
3449 		seq_puts(m, "\n#\n");
3450 	}
3451 
3452 	seq_puts(m, "#\n");
3453 }
3454 
3455 static void test_cpu_buff_start(struct trace_iterator *iter)
3456 {
3457 	struct trace_seq *s = &iter->seq;
3458 	struct trace_array *tr = iter->tr;
3459 
3460 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3461 		return;
3462 
3463 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3464 		return;
3465 
3466 	if (cpumask_available(iter->started) &&
3467 	    cpumask_test_cpu(iter->cpu, iter->started))
3468 		return;
3469 
3470 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3471 		return;
3472 
3473 	if (cpumask_available(iter->started))
3474 		cpumask_set_cpu(iter->cpu, iter->started);
3475 
3476 	/* Don't print started cpu buffer for the first entry of the trace */
3477 	if (iter->idx > 1)
3478 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3479 				iter->cpu);
3480 }
3481 
3482 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3483 {
3484 	struct trace_array *tr = iter->tr;
3485 	struct trace_seq *s = &iter->seq;
3486 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3487 	struct trace_entry *entry;
3488 	struct trace_event *event;
3489 
3490 	entry = iter->ent;
3491 
3492 	test_cpu_buff_start(iter);
3493 
3494 	event = ftrace_find_event(entry->type);
3495 
3496 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3497 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3498 			trace_print_lat_context(iter);
3499 		else
3500 			trace_print_context(iter);
3501 	}
3502 
3503 	if (trace_seq_has_overflowed(s))
3504 		return TRACE_TYPE_PARTIAL_LINE;
3505 
3506 	if (event)
3507 		return event->funcs->trace(iter, sym_flags, event);
3508 
3509 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3510 
3511 	return trace_handle_return(s);
3512 }
3513 
3514 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3515 {
3516 	struct trace_array *tr = iter->tr;
3517 	struct trace_seq *s = &iter->seq;
3518 	struct trace_entry *entry;
3519 	struct trace_event *event;
3520 
3521 	entry = iter->ent;
3522 
3523 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3524 		trace_seq_printf(s, "%d %d %llu ",
3525 				 entry->pid, iter->cpu, iter->ts);
3526 
3527 	if (trace_seq_has_overflowed(s))
3528 		return TRACE_TYPE_PARTIAL_LINE;
3529 
3530 	event = ftrace_find_event(entry->type);
3531 	if (event)
3532 		return event->funcs->raw(iter, 0, event);
3533 
3534 	trace_seq_printf(s, "%d ?\n", entry->type);
3535 
3536 	return trace_handle_return(s);
3537 }
3538 
3539 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3540 {
3541 	struct trace_array *tr = iter->tr;
3542 	struct trace_seq *s = &iter->seq;
3543 	unsigned char newline = '\n';
3544 	struct trace_entry *entry;
3545 	struct trace_event *event;
3546 
3547 	entry = iter->ent;
3548 
3549 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3550 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3551 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3552 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3553 		if (trace_seq_has_overflowed(s))
3554 			return TRACE_TYPE_PARTIAL_LINE;
3555 	}
3556 
3557 	event = ftrace_find_event(entry->type);
3558 	if (event) {
3559 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3560 		if (ret != TRACE_TYPE_HANDLED)
3561 			return ret;
3562 	}
3563 
3564 	SEQ_PUT_FIELD(s, newline);
3565 
3566 	return trace_handle_return(s);
3567 }
3568 
3569 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3570 {
3571 	struct trace_array *tr = iter->tr;
3572 	struct trace_seq *s = &iter->seq;
3573 	struct trace_entry *entry;
3574 	struct trace_event *event;
3575 
3576 	entry = iter->ent;
3577 
3578 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579 		SEQ_PUT_FIELD(s, entry->pid);
3580 		SEQ_PUT_FIELD(s, iter->cpu);
3581 		SEQ_PUT_FIELD(s, iter->ts);
3582 		if (trace_seq_has_overflowed(s))
3583 			return TRACE_TYPE_PARTIAL_LINE;
3584 	}
3585 
3586 	event = ftrace_find_event(entry->type);
3587 	return event ? event->funcs->binary(iter, 0, event) :
3588 		TRACE_TYPE_HANDLED;
3589 }
3590 
3591 int trace_empty(struct trace_iterator *iter)
3592 {
3593 	struct ring_buffer_iter *buf_iter;
3594 	int cpu;
3595 
3596 	/* If we are looking at one CPU buffer, only check that one */
3597 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3598 		cpu = iter->cpu_file;
3599 		buf_iter = trace_buffer_iter(iter, cpu);
3600 		if (buf_iter) {
3601 			if (!ring_buffer_iter_empty(buf_iter))
3602 				return 0;
3603 		} else {
3604 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3605 				return 0;
3606 		}
3607 		return 1;
3608 	}
3609 
3610 	for_each_tracing_cpu(cpu) {
3611 		buf_iter = trace_buffer_iter(iter, cpu);
3612 		if (buf_iter) {
3613 			if (!ring_buffer_iter_empty(buf_iter))
3614 				return 0;
3615 		} else {
3616 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617 				return 0;
3618 		}
3619 	}
3620 
3621 	return 1;
3622 }
3623 
3624 /*  Called with trace_event_read_lock() held. */
3625 enum print_line_t print_trace_line(struct trace_iterator *iter)
3626 {
3627 	struct trace_array *tr = iter->tr;
3628 	unsigned long trace_flags = tr->trace_flags;
3629 	enum print_line_t ret;
3630 
3631 	if (iter->lost_events) {
3632 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3633 				 iter->cpu, iter->lost_events);
3634 		if (trace_seq_has_overflowed(&iter->seq))
3635 			return TRACE_TYPE_PARTIAL_LINE;
3636 	}
3637 
3638 	if (iter->trace && iter->trace->print_line) {
3639 		ret = iter->trace->print_line(iter);
3640 		if (ret != TRACE_TYPE_UNHANDLED)
3641 			return ret;
3642 	}
3643 
3644 	if (iter->ent->type == TRACE_BPUTS &&
3645 			trace_flags & TRACE_ITER_PRINTK &&
3646 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3647 		return trace_print_bputs_msg_only(iter);
3648 
3649 	if (iter->ent->type == TRACE_BPRINT &&
3650 			trace_flags & TRACE_ITER_PRINTK &&
3651 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3652 		return trace_print_bprintk_msg_only(iter);
3653 
3654 	if (iter->ent->type == TRACE_PRINT &&
3655 			trace_flags & TRACE_ITER_PRINTK &&
3656 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3657 		return trace_print_printk_msg_only(iter);
3658 
3659 	if (trace_flags & TRACE_ITER_BIN)
3660 		return print_bin_fmt(iter);
3661 
3662 	if (trace_flags & TRACE_ITER_HEX)
3663 		return print_hex_fmt(iter);
3664 
3665 	if (trace_flags & TRACE_ITER_RAW)
3666 		return print_raw_fmt(iter);
3667 
3668 	return print_trace_fmt(iter);
3669 }
3670 
3671 void trace_latency_header(struct seq_file *m)
3672 {
3673 	struct trace_iterator *iter = m->private;
3674 	struct trace_array *tr = iter->tr;
3675 
3676 	/* print nothing if the buffers are empty */
3677 	if (trace_empty(iter))
3678 		return;
3679 
3680 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3681 		print_trace_header(m, iter);
3682 
3683 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3684 		print_lat_help_header(m);
3685 }
3686 
3687 void trace_default_header(struct seq_file *m)
3688 {
3689 	struct trace_iterator *iter = m->private;
3690 	struct trace_array *tr = iter->tr;
3691 	unsigned long trace_flags = tr->trace_flags;
3692 
3693 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3694 		return;
3695 
3696 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3697 		/* print nothing if the buffers are empty */
3698 		if (trace_empty(iter))
3699 			return;
3700 		print_trace_header(m, iter);
3701 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3702 			print_lat_help_header(m);
3703 	} else {
3704 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3705 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3706 				print_func_help_header_irq(iter->trace_buffer,
3707 							   m, trace_flags);
3708 			else
3709 				print_func_help_header(iter->trace_buffer, m,
3710 						       trace_flags);
3711 		}
3712 	}
3713 }
3714 
3715 static void test_ftrace_alive(struct seq_file *m)
3716 {
3717 	if (!ftrace_is_dead())
3718 		return;
3719 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3720 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3721 }
3722 
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 static void show_snapshot_main_help(struct seq_file *m)
3725 {
3726 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3727 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3728 		    "#                      Takes a snapshot of the main buffer.\n"
3729 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3730 		    "#                      (Doesn't have to be '2' works with any number that\n"
3731 		    "#                       is not a '0' or '1')\n");
3732 }
3733 
3734 static void show_snapshot_percpu_help(struct seq_file *m)
3735 {
3736 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3737 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3738 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3739 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3740 #else
3741 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3742 		    "#                     Must use main snapshot file to allocate.\n");
3743 #endif
3744 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3745 		    "#                      (Doesn't have to be '2' works with any number that\n"
3746 		    "#                       is not a '0' or '1')\n");
3747 }
3748 
3749 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3750 {
3751 	if (iter->tr->allocated_snapshot)
3752 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3753 	else
3754 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3755 
3756 	seq_puts(m, "# Snapshot commands:\n");
3757 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3758 		show_snapshot_main_help(m);
3759 	else
3760 		show_snapshot_percpu_help(m);
3761 }
3762 #else
3763 /* Should never be called */
3764 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3765 #endif
3766 
3767 static int s_show(struct seq_file *m, void *v)
3768 {
3769 	struct trace_iterator *iter = v;
3770 	int ret;
3771 
3772 	if (iter->ent == NULL) {
3773 		if (iter->tr) {
3774 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3775 			seq_puts(m, "#\n");
3776 			test_ftrace_alive(m);
3777 		}
3778 		if (iter->snapshot && trace_empty(iter))
3779 			print_snapshot_help(m, iter);
3780 		else if (iter->trace && iter->trace->print_header)
3781 			iter->trace->print_header(m);
3782 		else
3783 			trace_default_header(m);
3784 
3785 	} else if (iter->leftover) {
3786 		/*
3787 		 * If we filled the seq_file buffer earlier, we
3788 		 * want to just show it now.
3789 		 */
3790 		ret = trace_print_seq(m, &iter->seq);
3791 
3792 		/* ret should this time be zero, but you never know */
3793 		iter->leftover = ret;
3794 
3795 	} else {
3796 		print_trace_line(iter);
3797 		ret = trace_print_seq(m, &iter->seq);
3798 		/*
3799 		 * If we overflow the seq_file buffer, then it will
3800 		 * ask us for this data again at start up.
3801 		 * Use that instead.
3802 		 *  ret is 0 if seq_file write succeeded.
3803 		 *        -1 otherwise.
3804 		 */
3805 		iter->leftover = ret;
3806 	}
3807 
3808 	return 0;
3809 }
3810 
3811 /*
3812  * Should be used after trace_array_get(), trace_types_lock
3813  * ensures that i_cdev was already initialized.
3814  */
3815 static inline int tracing_get_cpu(struct inode *inode)
3816 {
3817 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3818 		return (long)inode->i_cdev - 1;
3819 	return RING_BUFFER_ALL_CPUS;
3820 }
3821 
3822 static const struct seq_operations tracer_seq_ops = {
3823 	.start		= s_start,
3824 	.next		= s_next,
3825 	.stop		= s_stop,
3826 	.show		= s_show,
3827 };
3828 
3829 static struct trace_iterator *
3830 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3831 {
3832 	struct trace_array *tr = inode->i_private;
3833 	struct trace_iterator *iter;
3834 	int cpu;
3835 
3836 	if (tracing_disabled)
3837 		return ERR_PTR(-ENODEV);
3838 
3839 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3840 	if (!iter)
3841 		return ERR_PTR(-ENOMEM);
3842 
3843 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3844 				    GFP_KERNEL);
3845 	if (!iter->buffer_iter)
3846 		goto release;
3847 
3848 	/*
3849 	 * We make a copy of the current tracer to avoid concurrent
3850 	 * changes on it while we are reading.
3851 	 */
3852 	mutex_lock(&trace_types_lock);
3853 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3854 	if (!iter->trace)
3855 		goto fail;
3856 
3857 	*iter->trace = *tr->current_trace;
3858 
3859 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3860 		goto fail;
3861 
3862 	iter->tr = tr;
3863 
3864 #ifdef CONFIG_TRACER_MAX_TRACE
3865 	/* Currently only the top directory has a snapshot */
3866 	if (tr->current_trace->print_max || snapshot)
3867 		iter->trace_buffer = &tr->max_buffer;
3868 	else
3869 #endif
3870 		iter->trace_buffer = &tr->trace_buffer;
3871 	iter->snapshot = snapshot;
3872 	iter->pos = -1;
3873 	iter->cpu_file = tracing_get_cpu(inode);
3874 	mutex_init(&iter->mutex);
3875 
3876 	/* Notify the tracer early; before we stop tracing. */
3877 	if (iter->trace && iter->trace->open)
3878 		iter->trace->open(iter);
3879 
3880 	/* Annotate start of buffers if we had overruns */
3881 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3882 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3883 
3884 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3885 	if (trace_clocks[tr->clock_id].in_ns)
3886 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3887 
3888 	/* stop the trace while dumping if we are not opening "snapshot" */
3889 	if (!iter->snapshot)
3890 		tracing_stop_tr(tr);
3891 
3892 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3893 		for_each_tracing_cpu(cpu) {
3894 			iter->buffer_iter[cpu] =
3895 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3896 		}
3897 		ring_buffer_read_prepare_sync();
3898 		for_each_tracing_cpu(cpu) {
3899 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3900 			tracing_iter_reset(iter, cpu);
3901 		}
3902 	} else {
3903 		cpu = iter->cpu_file;
3904 		iter->buffer_iter[cpu] =
3905 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3906 		ring_buffer_read_prepare_sync();
3907 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3908 		tracing_iter_reset(iter, cpu);
3909 	}
3910 
3911 	mutex_unlock(&trace_types_lock);
3912 
3913 	return iter;
3914 
3915  fail:
3916 	mutex_unlock(&trace_types_lock);
3917 	kfree(iter->trace);
3918 	kfree(iter->buffer_iter);
3919 release:
3920 	seq_release_private(inode, file);
3921 	return ERR_PTR(-ENOMEM);
3922 }
3923 
3924 int tracing_open_generic(struct inode *inode, struct file *filp)
3925 {
3926 	if (tracing_disabled)
3927 		return -ENODEV;
3928 
3929 	filp->private_data = inode->i_private;
3930 	return 0;
3931 }
3932 
3933 bool tracing_is_disabled(void)
3934 {
3935 	return (tracing_disabled) ? true: false;
3936 }
3937 
3938 /*
3939  * Open and update trace_array ref count.
3940  * Must have the current trace_array passed to it.
3941  */
3942 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3943 {
3944 	struct trace_array *tr = inode->i_private;
3945 
3946 	if (tracing_disabled)
3947 		return -ENODEV;
3948 
3949 	if (trace_array_get(tr) < 0)
3950 		return -ENODEV;
3951 
3952 	filp->private_data = inode->i_private;
3953 
3954 	return 0;
3955 }
3956 
3957 static int tracing_release(struct inode *inode, struct file *file)
3958 {
3959 	struct trace_array *tr = inode->i_private;
3960 	struct seq_file *m = file->private_data;
3961 	struct trace_iterator *iter;
3962 	int cpu;
3963 
3964 	if (!(file->f_mode & FMODE_READ)) {
3965 		trace_array_put(tr);
3966 		return 0;
3967 	}
3968 
3969 	/* Writes do not use seq_file */
3970 	iter = m->private;
3971 	mutex_lock(&trace_types_lock);
3972 
3973 	for_each_tracing_cpu(cpu) {
3974 		if (iter->buffer_iter[cpu])
3975 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3976 	}
3977 
3978 	if (iter->trace && iter->trace->close)
3979 		iter->trace->close(iter);
3980 
3981 	if (!iter->snapshot)
3982 		/* reenable tracing if it was previously enabled */
3983 		tracing_start_tr(tr);
3984 
3985 	__trace_array_put(tr);
3986 
3987 	mutex_unlock(&trace_types_lock);
3988 
3989 	mutex_destroy(&iter->mutex);
3990 	free_cpumask_var(iter->started);
3991 	kfree(iter->trace);
3992 	kfree(iter->buffer_iter);
3993 	seq_release_private(inode, file);
3994 
3995 	return 0;
3996 }
3997 
3998 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3999 {
4000 	struct trace_array *tr = inode->i_private;
4001 
4002 	trace_array_put(tr);
4003 	return 0;
4004 }
4005 
4006 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4007 {
4008 	struct trace_array *tr = inode->i_private;
4009 
4010 	trace_array_put(tr);
4011 
4012 	return single_release(inode, file);
4013 }
4014 
4015 static int tracing_open(struct inode *inode, struct file *file)
4016 {
4017 	struct trace_array *tr = inode->i_private;
4018 	struct trace_iterator *iter;
4019 	int ret = 0;
4020 
4021 	if (trace_array_get(tr) < 0)
4022 		return -ENODEV;
4023 
4024 	/* If this file was open for write, then erase contents */
4025 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4026 		int cpu = tracing_get_cpu(inode);
4027 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4028 
4029 #ifdef CONFIG_TRACER_MAX_TRACE
4030 		if (tr->current_trace->print_max)
4031 			trace_buf = &tr->max_buffer;
4032 #endif
4033 
4034 		if (cpu == RING_BUFFER_ALL_CPUS)
4035 			tracing_reset_online_cpus(trace_buf);
4036 		else
4037 			tracing_reset(trace_buf, cpu);
4038 	}
4039 
4040 	if (file->f_mode & FMODE_READ) {
4041 		iter = __tracing_open(inode, file, false);
4042 		if (IS_ERR(iter))
4043 			ret = PTR_ERR(iter);
4044 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4045 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4046 	}
4047 
4048 	if (ret < 0)
4049 		trace_array_put(tr);
4050 
4051 	return ret;
4052 }
4053 
4054 /*
4055  * Some tracers are not suitable for instance buffers.
4056  * A tracer is always available for the global array (toplevel)
4057  * or if it explicitly states that it is.
4058  */
4059 static bool
4060 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4061 {
4062 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4063 }
4064 
4065 /* Find the next tracer that this trace array may use */
4066 static struct tracer *
4067 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4068 {
4069 	while (t && !trace_ok_for_array(t, tr))
4070 		t = t->next;
4071 
4072 	return t;
4073 }
4074 
4075 static void *
4076 t_next(struct seq_file *m, void *v, loff_t *pos)
4077 {
4078 	struct trace_array *tr = m->private;
4079 	struct tracer *t = v;
4080 
4081 	(*pos)++;
4082 
4083 	if (t)
4084 		t = get_tracer_for_array(tr, t->next);
4085 
4086 	return t;
4087 }
4088 
4089 static void *t_start(struct seq_file *m, loff_t *pos)
4090 {
4091 	struct trace_array *tr = m->private;
4092 	struct tracer *t;
4093 	loff_t l = 0;
4094 
4095 	mutex_lock(&trace_types_lock);
4096 
4097 	t = get_tracer_for_array(tr, trace_types);
4098 	for (; t && l < *pos; t = t_next(m, t, &l))
4099 			;
4100 
4101 	return t;
4102 }
4103 
4104 static void t_stop(struct seq_file *m, void *p)
4105 {
4106 	mutex_unlock(&trace_types_lock);
4107 }
4108 
4109 static int t_show(struct seq_file *m, void *v)
4110 {
4111 	struct tracer *t = v;
4112 
4113 	if (!t)
4114 		return 0;
4115 
4116 	seq_puts(m, t->name);
4117 	if (t->next)
4118 		seq_putc(m, ' ');
4119 	else
4120 		seq_putc(m, '\n');
4121 
4122 	return 0;
4123 }
4124 
4125 static const struct seq_operations show_traces_seq_ops = {
4126 	.start		= t_start,
4127 	.next		= t_next,
4128 	.stop		= t_stop,
4129 	.show		= t_show,
4130 };
4131 
4132 static int show_traces_open(struct inode *inode, struct file *file)
4133 {
4134 	struct trace_array *tr = inode->i_private;
4135 	struct seq_file *m;
4136 	int ret;
4137 
4138 	if (tracing_disabled)
4139 		return -ENODEV;
4140 
4141 	ret = seq_open(file, &show_traces_seq_ops);
4142 	if (ret)
4143 		return ret;
4144 
4145 	m = file->private_data;
4146 	m->private = tr;
4147 
4148 	return 0;
4149 }
4150 
4151 static ssize_t
4152 tracing_write_stub(struct file *filp, const char __user *ubuf,
4153 		   size_t count, loff_t *ppos)
4154 {
4155 	return count;
4156 }
4157 
4158 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4159 {
4160 	int ret;
4161 
4162 	if (file->f_mode & FMODE_READ)
4163 		ret = seq_lseek(file, offset, whence);
4164 	else
4165 		file->f_pos = ret = 0;
4166 
4167 	return ret;
4168 }
4169 
4170 static const struct file_operations tracing_fops = {
4171 	.open		= tracing_open,
4172 	.read		= seq_read,
4173 	.write		= tracing_write_stub,
4174 	.llseek		= tracing_lseek,
4175 	.release	= tracing_release,
4176 };
4177 
4178 static const struct file_operations show_traces_fops = {
4179 	.open		= show_traces_open,
4180 	.read		= seq_read,
4181 	.release	= seq_release,
4182 	.llseek		= seq_lseek,
4183 };
4184 
4185 static ssize_t
4186 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4187 		     size_t count, loff_t *ppos)
4188 {
4189 	struct trace_array *tr = file_inode(filp)->i_private;
4190 	char *mask_str;
4191 	int len;
4192 
4193 	len = snprintf(NULL, 0, "%*pb\n",
4194 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4195 	mask_str = kmalloc(len, GFP_KERNEL);
4196 	if (!mask_str)
4197 		return -ENOMEM;
4198 
4199 	len = snprintf(mask_str, len, "%*pb\n",
4200 		       cpumask_pr_args(tr->tracing_cpumask));
4201 	if (len >= count) {
4202 		count = -EINVAL;
4203 		goto out_err;
4204 	}
4205 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4206 
4207 out_err:
4208 	kfree(mask_str);
4209 
4210 	return count;
4211 }
4212 
4213 static ssize_t
4214 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4215 		      size_t count, loff_t *ppos)
4216 {
4217 	struct trace_array *tr = file_inode(filp)->i_private;
4218 	cpumask_var_t tracing_cpumask_new;
4219 	int err, cpu;
4220 
4221 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4222 		return -ENOMEM;
4223 
4224 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4225 	if (err)
4226 		goto err_unlock;
4227 
4228 	local_irq_disable();
4229 	arch_spin_lock(&tr->max_lock);
4230 	for_each_tracing_cpu(cpu) {
4231 		/*
4232 		 * Increase/decrease the disabled counter if we are
4233 		 * about to flip a bit in the cpumask:
4234 		 */
4235 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4236 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4237 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4238 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4239 		}
4240 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4241 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4242 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4243 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4244 		}
4245 	}
4246 	arch_spin_unlock(&tr->max_lock);
4247 	local_irq_enable();
4248 
4249 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4250 	free_cpumask_var(tracing_cpumask_new);
4251 
4252 	return count;
4253 
4254 err_unlock:
4255 	free_cpumask_var(tracing_cpumask_new);
4256 
4257 	return err;
4258 }
4259 
4260 static const struct file_operations tracing_cpumask_fops = {
4261 	.open		= tracing_open_generic_tr,
4262 	.read		= tracing_cpumask_read,
4263 	.write		= tracing_cpumask_write,
4264 	.release	= tracing_release_generic_tr,
4265 	.llseek		= generic_file_llseek,
4266 };
4267 
4268 static int tracing_trace_options_show(struct seq_file *m, void *v)
4269 {
4270 	struct tracer_opt *trace_opts;
4271 	struct trace_array *tr = m->private;
4272 	u32 tracer_flags;
4273 	int i;
4274 
4275 	mutex_lock(&trace_types_lock);
4276 	tracer_flags = tr->current_trace->flags->val;
4277 	trace_opts = tr->current_trace->flags->opts;
4278 
4279 	for (i = 0; trace_options[i]; i++) {
4280 		if (tr->trace_flags & (1 << i))
4281 			seq_printf(m, "%s\n", trace_options[i]);
4282 		else
4283 			seq_printf(m, "no%s\n", trace_options[i]);
4284 	}
4285 
4286 	for (i = 0; trace_opts[i].name; i++) {
4287 		if (tracer_flags & trace_opts[i].bit)
4288 			seq_printf(m, "%s\n", trace_opts[i].name);
4289 		else
4290 			seq_printf(m, "no%s\n", trace_opts[i].name);
4291 	}
4292 	mutex_unlock(&trace_types_lock);
4293 
4294 	return 0;
4295 }
4296 
4297 static int __set_tracer_option(struct trace_array *tr,
4298 			       struct tracer_flags *tracer_flags,
4299 			       struct tracer_opt *opts, int neg)
4300 {
4301 	struct tracer *trace = tracer_flags->trace;
4302 	int ret;
4303 
4304 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4305 	if (ret)
4306 		return ret;
4307 
4308 	if (neg)
4309 		tracer_flags->val &= ~opts->bit;
4310 	else
4311 		tracer_flags->val |= opts->bit;
4312 	return 0;
4313 }
4314 
4315 /* Try to assign a tracer specific option */
4316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4317 {
4318 	struct tracer *trace = tr->current_trace;
4319 	struct tracer_flags *tracer_flags = trace->flags;
4320 	struct tracer_opt *opts = NULL;
4321 	int i;
4322 
4323 	for (i = 0; tracer_flags->opts[i].name; i++) {
4324 		opts = &tracer_flags->opts[i];
4325 
4326 		if (strcmp(cmp, opts->name) == 0)
4327 			return __set_tracer_option(tr, trace->flags, opts, neg);
4328 	}
4329 
4330 	return -EINVAL;
4331 }
4332 
4333 /* Some tracers require overwrite to stay enabled */
4334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4335 {
4336 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4337 		return -1;
4338 
4339 	return 0;
4340 }
4341 
4342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4343 {
4344 	/* do nothing if flag is already set */
4345 	if (!!(tr->trace_flags & mask) == !!enabled)
4346 		return 0;
4347 
4348 	/* Give the tracer a chance to approve the change */
4349 	if (tr->current_trace->flag_changed)
4350 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4351 			return -EINVAL;
4352 
4353 	if (enabled)
4354 		tr->trace_flags |= mask;
4355 	else
4356 		tr->trace_flags &= ~mask;
4357 
4358 	if (mask == TRACE_ITER_RECORD_CMD)
4359 		trace_event_enable_cmd_record(enabled);
4360 
4361 	if (mask == TRACE_ITER_RECORD_TGID) {
4362 		if (!tgid_map)
4363 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4364 					   GFP_KERNEL);
4365 		if (!tgid_map) {
4366 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4367 			return -ENOMEM;
4368 		}
4369 
4370 		trace_event_enable_tgid_record(enabled);
4371 	}
4372 
4373 	if (mask == TRACE_ITER_EVENT_FORK)
4374 		trace_event_follow_fork(tr, enabled);
4375 
4376 	if (mask == TRACE_ITER_FUNC_FORK)
4377 		ftrace_pid_follow_fork(tr, enabled);
4378 
4379 	if (mask == TRACE_ITER_OVERWRITE) {
4380 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4381 #ifdef CONFIG_TRACER_MAX_TRACE
4382 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4383 #endif
4384 	}
4385 
4386 	if (mask == TRACE_ITER_PRINTK) {
4387 		trace_printk_start_stop_comm(enabled);
4388 		trace_printk_control(enabled);
4389 	}
4390 
4391 	return 0;
4392 }
4393 
4394 static int trace_set_options(struct trace_array *tr, char *option)
4395 {
4396 	char *cmp;
4397 	int neg = 0;
4398 	int ret = -ENODEV;
4399 	int i;
4400 	size_t orig_len = strlen(option);
4401 
4402 	cmp = strstrip(option);
4403 
4404 	if (strncmp(cmp, "no", 2) == 0) {
4405 		neg = 1;
4406 		cmp += 2;
4407 	}
4408 
4409 	mutex_lock(&trace_types_lock);
4410 
4411 	for (i = 0; trace_options[i]; i++) {
4412 		if (strcmp(cmp, trace_options[i]) == 0) {
4413 			ret = set_tracer_flag(tr, 1 << i, !neg);
4414 			break;
4415 		}
4416 	}
4417 
4418 	/* If no option could be set, test the specific tracer options */
4419 	if (!trace_options[i])
4420 		ret = set_tracer_option(tr, cmp, neg);
4421 
4422 	mutex_unlock(&trace_types_lock);
4423 
4424 	/*
4425 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4426 	 * turn it back into a space.
4427 	 */
4428 	if (orig_len > strlen(option))
4429 		option[strlen(option)] = ' ';
4430 
4431 	return ret;
4432 }
4433 
4434 static void __init apply_trace_boot_options(void)
4435 {
4436 	char *buf = trace_boot_options_buf;
4437 	char *option;
4438 
4439 	while (true) {
4440 		option = strsep(&buf, ",");
4441 
4442 		if (!option)
4443 			break;
4444 
4445 		if (*option)
4446 			trace_set_options(&global_trace, option);
4447 
4448 		/* Put back the comma to allow this to be called again */
4449 		if (buf)
4450 			*(buf - 1) = ',';
4451 	}
4452 }
4453 
4454 static ssize_t
4455 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4456 			size_t cnt, loff_t *ppos)
4457 {
4458 	struct seq_file *m = filp->private_data;
4459 	struct trace_array *tr = m->private;
4460 	char buf[64];
4461 	int ret;
4462 
4463 	if (cnt >= sizeof(buf))
4464 		return -EINVAL;
4465 
4466 	if (copy_from_user(buf, ubuf, cnt))
4467 		return -EFAULT;
4468 
4469 	buf[cnt] = 0;
4470 
4471 	ret = trace_set_options(tr, buf);
4472 	if (ret < 0)
4473 		return ret;
4474 
4475 	*ppos += cnt;
4476 
4477 	return cnt;
4478 }
4479 
4480 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4481 {
4482 	struct trace_array *tr = inode->i_private;
4483 	int ret;
4484 
4485 	if (tracing_disabled)
4486 		return -ENODEV;
4487 
4488 	if (trace_array_get(tr) < 0)
4489 		return -ENODEV;
4490 
4491 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4492 	if (ret < 0)
4493 		trace_array_put(tr);
4494 
4495 	return ret;
4496 }
4497 
4498 static const struct file_operations tracing_iter_fops = {
4499 	.open		= tracing_trace_options_open,
4500 	.read		= seq_read,
4501 	.llseek		= seq_lseek,
4502 	.release	= tracing_single_release_tr,
4503 	.write		= tracing_trace_options_write,
4504 };
4505 
4506 static const char readme_msg[] =
4507 	"tracing mini-HOWTO:\n\n"
4508 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4509 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4510 	" Important files:\n"
4511 	"  trace\t\t\t- The static contents of the buffer\n"
4512 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4513 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4514 	"  current_tracer\t- function and latency tracers\n"
4515 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4516 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4517 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4518 	"  trace_clock\t\t-change the clock used to order events\n"
4519 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4520 	"      global:   Synced across CPUs but slows tracing down.\n"
4521 	"     counter:   Not a clock, but just an increment\n"
4522 	"      uptime:   Jiffy counter from time of boot\n"
4523 	"        perf:   Same clock that perf events use\n"
4524 #ifdef CONFIG_X86_64
4525 	"     x86-tsc:   TSC cycle counter\n"
4526 #endif
4527 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4528 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4529 	"    absolute:   Absolute (standalone) timestamp\n"
4530 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4531 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4532 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4533 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4534 	"\t\t\t  Remove sub-buffer with rmdir\n"
4535 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4536 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4537 	"\t\t\t  option name\n"
4538 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4539 #ifdef CONFIG_DYNAMIC_FTRACE
4540 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4541 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4542 	"\t\t\t  functions\n"
4543 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4544 	"\t     modules: Can select a group via module\n"
4545 	"\t      Format: :mod:<module-name>\n"
4546 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4547 	"\t    triggers: a command to perform when function is hit\n"
4548 	"\t      Format: <function>:<trigger>[:count]\n"
4549 	"\t     trigger: traceon, traceoff\n"
4550 	"\t\t      enable_event:<system>:<event>\n"
4551 	"\t\t      disable_event:<system>:<event>\n"
4552 #ifdef CONFIG_STACKTRACE
4553 	"\t\t      stacktrace\n"
4554 #endif
4555 #ifdef CONFIG_TRACER_SNAPSHOT
4556 	"\t\t      snapshot\n"
4557 #endif
4558 	"\t\t      dump\n"
4559 	"\t\t      cpudump\n"
4560 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4561 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4562 	"\t     The first one will disable tracing every time do_fault is hit\n"
4563 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4564 	"\t       The first time do trap is hit and it disables tracing, the\n"
4565 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4566 	"\t       the counter will not decrement. It only decrements when the\n"
4567 	"\t       trigger did work\n"
4568 	"\t     To remove trigger without count:\n"
4569 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4570 	"\t     To remove trigger with a count:\n"
4571 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4572 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4573 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4574 	"\t    modules: Can select a group via module command :mod:\n"
4575 	"\t    Does not accept triggers\n"
4576 #endif /* CONFIG_DYNAMIC_FTRACE */
4577 #ifdef CONFIG_FUNCTION_TRACER
4578 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4579 	"\t\t    (function)\n"
4580 #endif
4581 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4582 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4583 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4584 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4585 #endif
4586 #ifdef CONFIG_TRACER_SNAPSHOT
4587 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4588 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4589 	"\t\t\t  information\n"
4590 #endif
4591 #ifdef CONFIG_STACK_TRACER
4592 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4593 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4594 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4595 	"\t\t\t  new trace)\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4598 	"\t\t\t  traces\n"
4599 #endif
4600 #endif /* CONFIG_STACK_TRACER */
4601 #ifdef CONFIG_KPROBE_EVENTS
4602 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4603 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4604 #endif
4605 #ifdef CONFIG_UPROBE_EVENTS
4606 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4607 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4608 #endif
4609 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4610 	"\t  accepts: event-definitions (one definition per line)\n"
4611 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4612 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4613 	"\t           -:[<group>/]<event>\n"
4614 #ifdef CONFIG_KPROBE_EVENTS
4615 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4616   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4617 #endif
4618 #ifdef CONFIG_UPROBE_EVENTS
4619 	"\t    place: <path>:<offset>\n"
4620 #endif
4621 	"\t     args: <name>=fetcharg[:type]\n"
4622 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4623 	"\t           $stack<index>, $stack, $retval, $comm\n"
4624 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4625 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4626 #endif
4627 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4628 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4629 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4630 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4631 	"\t\t\t  events\n"
4632 	"      filter\t\t- If set, only events passing filter are traced\n"
4633 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4634 	"\t\t\t  <event>:\n"
4635 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4636 	"      filter\t\t- If set, only events passing filter are traced\n"
4637 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4638 	"\t    Format: <trigger>[:count][if <filter>]\n"
4639 	"\t   trigger: traceon, traceoff\n"
4640 	"\t            enable_event:<system>:<event>\n"
4641 	"\t            disable_event:<system>:<event>\n"
4642 #ifdef CONFIG_HIST_TRIGGERS
4643 	"\t            enable_hist:<system>:<event>\n"
4644 	"\t            disable_hist:<system>:<event>\n"
4645 #endif
4646 #ifdef CONFIG_STACKTRACE
4647 	"\t\t    stacktrace\n"
4648 #endif
4649 #ifdef CONFIG_TRACER_SNAPSHOT
4650 	"\t\t    snapshot\n"
4651 #endif
4652 #ifdef CONFIG_HIST_TRIGGERS
4653 	"\t\t    hist (see below)\n"
4654 #endif
4655 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4656 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4657 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4658 	"\t                  events/block/block_unplug/trigger\n"
4659 	"\t   The first disables tracing every time block_unplug is hit.\n"
4660 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4661 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4662 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4663 	"\t   Like function triggers, the counter is only decremented if it\n"
4664 	"\t    enabled or disabled tracing.\n"
4665 	"\t   To remove a trigger without a count:\n"
4666 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4667 	"\t   To remove a trigger with a count:\n"
4668 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4669 	"\t   Filters can be ignored when removing a trigger.\n"
4670 #ifdef CONFIG_HIST_TRIGGERS
4671 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4672 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4673 	"\t            [:values=<field1[,field2,...]>]\n"
4674 	"\t            [:sort=<field1[,field2,...]>]\n"
4675 	"\t            [:size=#entries]\n"
4676 	"\t            [:pause][:continue][:clear]\n"
4677 	"\t            [:name=histname1]\n"
4678 	"\t            [if <filter>]\n\n"
4679 	"\t    When a matching event is hit, an entry is added to a hash\n"
4680 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4681 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4682 	"\t    correspond to fields in the event's format description.  Keys\n"
4683 	"\t    can be any field, or the special string 'stacktrace'.\n"
4684 	"\t    Compound keys consisting of up to two fields can be specified\n"
4685 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4686 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4687 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4688 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4689 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4690 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4691 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4692 	"\t    its histogram data will be shared with other triggers of the\n"
4693 	"\t    same name, and trigger hits will update this common data.\n\n"
4694 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4695 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4696 	"\t    triggers attached to an event, there will be a table for each\n"
4697 	"\t    trigger in the output.  The table displayed for a named\n"
4698 	"\t    trigger will be the same as any other instance having the\n"
4699 	"\t    same name.  The default format used to display a given field\n"
4700 	"\t    can be modified by appending any of the following modifiers\n"
4701 	"\t    to the field name, as applicable:\n\n"
4702 	"\t            .hex        display a number as a hex value\n"
4703 	"\t            .sym        display an address as a symbol\n"
4704 	"\t            .sym-offset display an address as a symbol and offset\n"
4705 	"\t            .execname   display a common_pid as a program name\n"
4706 	"\t            .syscall    display a syscall id as a syscall name\n"
4707 	"\t            .log2       display log2 value rather than raw number\n"
4708 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4709 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4710 	"\t    trigger or to start a hist trigger but not log any events\n"
4711 	"\t    until told to do so.  'continue' can be used to start or\n"
4712 	"\t    restart a paused hist trigger.\n\n"
4713 	"\t    The 'clear' parameter will clear the contents of a running\n"
4714 	"\t    hist trigger and leave its current paused/active state\n"
4715 	"\t    unchanged.\n\n"
4716 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4717 	"\t    have one event conditionally start and stop another event's\n"
4718 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4719 	"\t    the enable_event and disable_event triggers.\n"
4720 #endif
4721 ;
4722 
4723 static ssize_t
4724 tracing_readme_read(struct file *filp, char __user *ubuf,
4725 		       size_t cnt, loff_t *ppos)
4726 {
4727 	return simple_read_from_buffer(ubuf, cnt, ppos,
4728 					readme_msg, strlen(readme_msg));
4729 }
4730 
4731 static const struct file_operations tracing_readme_fops = {
4732 	.open		= tracing_open_generic,
4733 	.read		= tracing_readme_read,
4734 	.llseek		= generic_file_llseek,
4735 };
4736 
4737 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4738 {
4739 	int *ptr = v;
4740 
4741 	if (*pos || m->count)
4742 		ptr++;
4743 
4744 	(*pos)++;
4745 
4746 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4747 		if (trace_find_tgid(*ptr))
4748 			return ptr;
4749 	}
4750 
4751 	return NULL;
4752 }
4753 
4754 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4755 {
4756 	void *v;
4757 	loff_t l = 0;
4758 
4759 	if (!tgid_map)
4760 		return NULL;
4761 
4762 	v = &tgid_map[0];
4763 	while (l <= *pos) {
4764 		v = saved_tgids_next(m, v, &l);
4765 		if (!v)
4766 			return NULL;
4767 	}
4768 
4769 	return v;
4770 }
4771 
4772 static void saved_tgids_stop(struct seq_file *m, void *v)
4773 {
4774 }
4775 
4776 static int saved_tgids_show(struct seq_file *m, void *v)
4777 {
4778 	int pid = (int *)v - tgid_map;
4779 
4780 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4781 	return 0;
4782 }
4783 
4784 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4785 	.start		= saved_tgids_start,
4786 	.stop		= saved_tgids_stop,
4787 	.next		= saved_tgids_next,
4788 	.show		= saved_tgids_show,
4789 };
4790 
4791 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4792 {
4793 	if (tracing_disabled)
4794 		return -ENODEV;
4795 
4796 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4797 }
4798 
4799 
4800 static const struct file_operations tracing_saved_tgids_fops = {
4801 	.open		= tracing_saved_tgids_open,
4802 	.read		= seq_read,
4803 	.llseek		= seq_lseek,
4804 	.release	= seq_release,
4805 };
4806 
4807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4808 {
4809 	unsigned int *ptr = v;
4810 
4811 	if (*pos || m->count)
4812 		ptr++;
4813 
4814 	(*pos)++;
4815 
4816 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4817 	     ptr++) {
4818 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4819 			continue;
4820 
4821 		return ptr;
4822 	}
4823 
4824 	return NULL;
4825 }
4826 
4827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4828 {
4829 	void *v;
4830 	loff_t l = 0;
4831 
4832 	preempt_disable();
4833 	arch_spin_lock(&trace_cmdline_lock);
4834 
4835 	v = &savedcmd->map_cmdline_to_pid[0];
4836 	while (l <= *pos) {
4837 		v = saved_cmdlines_next(m, v, &l);
4838 		if (!v)
4839 			return NULL;
4840 	}
4841 
4842 	return v;
4843 }
4844 
4845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4846 {
4847 	arch_spin_unlock(&trace_cmdline_lock);
4848 	preempt_enable();
4849 }
4850 
4851 static int saved_cmdlines_show(struct seq_file *m, void *v)
4852 {
4853 	char buf[TASK_COMM_LEN];
4854 	unsigned int *pid = v;
4855 
4856 	__trace_find_cmdline(*pid, buf);
4857 	seq_printf(m, "%d %s\n", *pid, buf);
4858 	return 0;
4859 }
4860 
4861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4862 	.start		= saved_cmdlines_start,
4863 	.next		= saved_cmdlines_next,
4864 	.stop		= saved_cmdlines_stop,
4865 	.show		= saved_cmdlines_show,
4866 };
4867 
4868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4869 {
4870 	if (tracing_disabled)
4871 		return -ENODEV;
4872 
4873 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4874 }
4875 
4876 static const struct file_operations tracing_saved_cmdlines_fops = {
4877 	.open		= tracing_saved_cmdlines_open,
4878 	.read		= seq_read,
4879 	.llseek		= seq_lseek,
4880 	.release	= seq_release,
4881 };
4882 
4883 static ssize_t
4884 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4885 				 size_t cnt, loff_t *ppos)
4886 {
4887 	char buf[64];
4888 	int r;
4889 
4890 	arch_spin_lock(&trace_cmdline_lock);
4891 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4892 	arch_spin_unlock(&trace_cmdline_lock);
4893 
4894 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4895 }
4896 
4897 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4898 {
4899 	kfree(s->saved_cmdlines);
4900 	kfree(s->map_cmdline_to_pid);
4901 	kfree(s);
4902 }
4903 
4904 static int tracing_resize_saved_cmdlines(unsigned int val)
4905 {
4906 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4907 
4908 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4909 	if (!s)
4910 		return -ENOMEM;
4911 
4912 	if (allocate_cmdlines_buffer(val, s) < 0) {
4913 		kfree(s);
4914 		return -ENOMEM;
4915 	}
4916 
4917 	arch_spin_lock(&trace_cmdline_lock);
4918 	savedcmd_temp = savedcmd;
4919 	savedcmd = s;
4920 	arch_spin_unlock(&trace_cmdline_lock);
4921 	free_saved_cmdlines_buffer(savedcmd_temp);
4922 
4923 	return 0;
4924 }
4925 
4926 static ssize_t
4927 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4928 				  size_t cnt, loff_t *ppos)
4929 {
4930 	unsigned long val;
4931 	int ret;
4932 
4933 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4934 	if (ret)
4935 		return ret;
4936 
4937 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4938 	if (!val || val > PID_MAX_DEFAULT)
4939 		return -EINVAL;
4940 
4941 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4942 	if (ret < 0)
4943 		return ret;
4944 
4945 	*ppos += cnt;
4946 
4947 	return cnt;
4948 }
4949 
4950 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4951 	.open		= tracing_open_generic,
4952 	.read		= tracing_saved_cmdlines_size_read,
4953 	.write		= tracing_saved_cmdlines_size_write,
4954 };
4955 
4956 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4957 static union trace_eval_map_item *
4958 update_eval_map(union trace_eval_map_item *ptr)
4959 {
4960 	if (!ptr->map.eval_string) {
4961 		if (ptr->tail.next) {
4962 			ptr = ptr->tail.next;
4963 			/* Set ptr to the next real item (skip head) */
4964 			ptr++;
4965 		} else
4966 			return NULL;
4967 	}
4968 	return ptr;
4969 }
4970 
4971 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4972 {
4973 	union trace_eval_map_item *ptr = v;
4974 
4975 	/*
4976 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4977 	 * This really should never happen.
4978 	 */
4979 	ptr = update_eval_map(ptr);
4980 	if (WARN_ON_ONCE(!ptr))
4981 		return NULL;
4982 
4983 	ptr++;
4984 
4985 	(*pos)++;
4986 
4987 	ptr = update_eval_map(ptr);
4988 
4989 	return ptr;
4990 }
4991 
4992 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4993 {
4994 	union trace_eval_map_item *v;
4995 	loff_t l = 0;
4996 
4997 	mutex_lock(&trace_eval_mutex);
4998 
4999 	v = trace_eval_maps;
5000 	if (v)
5001 		v++;
5002 
5003 	while (v && l < *pos) {
5004 		v = eval_map_next(m, v, &l);
5005 	}
5006 
5007 	return v;
5008 }
5009 
5010 static void eval_map_stop(struct seq_file *m, void *v)
5011 {
5012 	mutex_unlock(&trace_eval_mutex);
5013 }
5014 
5015 static int eval_map_show(struct seq_file *m, void *v)
5016 {
5017 	union trace_eval_map_item *ptr = v;
5018 
5019 	seq_printf(m, "%s %ld (%s)\n",
5020 		   ptr->map.eval_string, ptr->map.eval_value,
5021 		   ptr->map.system);
5022 
5023 	return 0;
5024 }
5025 
5026 static const struct seq_operations tracing_eval_map_seq_ops = {
5027 	.start		= eval_map_start,
5028 	.next		= eval_map_next,
5029 	.stop		= eval_map_stop,
5030 	.show		= eval_map_show,
5031 };
5032 
5033 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5034 {
5035 	if (tracing_disabled)
5036 		return -ENODEV;
5037 
5038 	return seq_open(filp, &tracing_eval_map_seq_ops);
5039 }
5040 
5041 static const struct file_operations tracing_eval_map_fops = {
5042 	.open		= tracing_eval_map_open,
5043 	.read		= seq_read,
5044 	.llseek		= seq_lseek,
5045 	.release	= seq_release,
5046 };
5047 
5048 static inline union trace_eval_map_item *
5049 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5050 {
5051 	/* Return tail of array given the head */
5052 	return ptr + ptr->head.length + 1;
5053 }
5054 
5055 static void
5056 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5057 			   int len)
5058 {
5059 	struct trace_eval_map **stop;
5060 	struct trace_eval_map **map;
5061 	union trace_eval_map_item *map_array;
5062 	union trace_eval_map_item *ptr;
5063 
5064 	stop = start + len;
5065 
5066 	/*
5067 	 * The trace_eval_maps contains the map plus a head and tail item,
5068 	 * where the head holds the module and length of array, and the
5069 	 * tail holds a pointer to the next list.
5070 	 */
5071 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5072 	if (!map_array) {
5073 		pr_warn("Unable to allocate trace eval mapping\n");
5074 		return;
5075 	}
5076 
5077 	mutex_lock(&trace_eval_mutex);
5078 
5079 	if (!trace_eval_maps)
5080 		trace_eval_maps = map_array;
5081 	else {
5082 		ptr = trace_eval_maps;
5083 		for (;;) {
5084 			ptr = trace_eval_jmp_to_tail(ptr);
5085 			if (!ptr->tail.next)
5086 				break;
5087 			ptr = ptr->tail.next;
5088 
5089 		}
5090 		ptr->tail.next = map_array;
5091 	}
5092 	map_array->head.mod = mod;
5093 	map_array->head.length = len;
5094 	map_array++;
5095 
5096 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5097 		map_array->map = **map;
5098 		map_array++;
5099 	}
5100 	memset(map_array, 0, sizeof(*map_array));
5101 
5102 	mutex_unlock(&trace_eval_mutex);
5103 }
5104 
5105 static void trace_create_eval_file(struct dentry *d_tracer)
5106 {
5107 	trace_create_file("eval_map", 0444, d_tracer,
5108 			  NULL, &tracing_eval_map_fops);
5109 }
5110 
5111 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5112 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5113 static inline void trace_insert_eval_map_file(struct module *mod,
5114 			      struct trace_eval_map **start, int len) { }
5115 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5116 
5117 static void trace_insert_eval_map(struct module *mod,
5118 				  struct trace_eval_map **start, int len)
5119 {
5120 	struct trace_eval_map **map;
5121 
5122 	if (len <= 0)
5123 		return;
5124 
5125 	map = start;
5126 
5127 	trace_event_eval_update(map, len);
5128 
5129 	trace_insert_eval_map_file(mod, start, len);
5130 }
5131 
5132 static ssize_t
5133 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5134 		       size_t cnt, loff_t *ppos)
5135 {
5136 	struct trace_array *tr = filp->private_data;
5137 	char buf[MAX_TRACER_SIZE+2];
5138 	int r;
5139 
5140 	mutex_lock(&trace_types_lock);
5141 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5142 	mutex_unlock(&trace_types_lock);
5143 
5144 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5145 }
5146 
5147 int tracer_init(struct tracer *t, struct trace_array *tr)
5148 {
5149 	tracing_reset_online_cpus(&tr->trace_buffer);
5150 	return t->init(tr);
5151 }
5152 
5153 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5154 {
5155 	int cpu;
5156 
5157 	for_each_tracing_cpu(cpu)
5158 		per_cpu_ptr(buf->data, cpu)->entries = val;
5159 }
5160 
5161 #ifdef CONFIG_TRACER_MAX_TRACE
5162 /* resize @tr's buffer to the size of @size_tr's entries */
5163 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5164 					struct trace_buffer *size_buf, int cpu_id)
5165 {
5166 	int cpu, ret = 0;
5167 
5168 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5169 		for_each_tracing_cpu(cpu) {
5170 			ret = ring_buffer_resize(trace_buf->buffer,
5171 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5172 			if (ret < 0)
5173 				break;
5174 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5175 				per_cpu_ptr(size_buf->data, cpu)->entries;
5176 		}
5177 	} else {
5178 		ret = ring_buffer_resize(trace_buf->buffer,
5179 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5180 		if (ret == 0)
5181 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5182 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5183 	}
5184 
5185 	return ret;
5186 }
5187 #endif /* CONFIG_TRACER_MAX_TRACE */
5188 
5189 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5190 					unsigned long size, int cpu)
5191 {
5192 	int ret;
5193 
5194 	/*
5195 	 * If kernel or user changes the size of the ring buffer
5196 	 * we use the size that was given, and we can forget about
5197 	 * expanding it later.
5198 	 */
5199 	ring_buffer_expanded = true;
5200 
5201 	/* May be called before buffers are initialized */
5202 	if (!tr->trace_buffer.buffer)
5203 		return 0;
5204 
5205 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5206 	if (ret < 0)
5207 		return ret;
5208 
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5211 	    !tr->current_trace->use_max_tr)
5212 		goto out;
5213 
5214 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5215 	if (ret < 0) {
5216 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5217 						     &tr->trace_buffer, cpu);
5218 		if (r < 0) {
5219 			/*
5220 			 * AARGH! We are left with different
5221 			 * size max buffer!!!!
5222 			 * The max buffer is our "snapshot" buffer.
5223 			 * When a tracer needs a snapshot (one of the
5224 			 * latency tracers), it swaps the max buffer
5225 			 * with the saved snap shot. We succeeded to
5226 			 * update the size of the main buffer, but failed to
5227 			 * update the size of the max buffer. But when we tried
5228 			 * to reset the main buffer to the original size, we
5229 			 * failed there too. This is very unlikely to
5230 			 * happen, but if it does, warn and kill all
5231 			 * tracing.
5232 			 */
5233 			WARN_ON(1);
5234 			tracing_disabled = 1;
5235 		}
5236 		return ret;
5237 	}
5238 
5239 	if (cpu == RING_BUFFER_ALL_CPUS)
5240 		set_buffer_entries(&tr->max_buffer, size);
5241 	else
5242 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5243 
5244  out:
5245 #endif /* CONFIG_TRACER_MAX_TRACE */
5246 
5247 	if (cpu == RING_BUFFER_ALL_CPUS)
5248 		set_buffer_entries(&tr->trace_buffer, size);
5249 	else
5250 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5251 
5252 	return ret;
5253 }
5254 
5255 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5256 					  unsigned long size, int cpu_id)
5257 {
5258 	int ret = size;
5259 
5260 	mutex_lock(&trace_types_lock);
5261 
5262 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5263 		/* make sure, this cpu is enabled in the mask */
5264 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5265 			ret = -EINVAL;
5266 			goto out;
5267 		}
5268 	}
5269 
5270 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5271 	if (ret < 0)
5272 		ret = -ENOMEM;
5273 
5274 out:
5275 	mutex_unlock(&trace_types_lock);
5276 
5277 	return ret;
5278 }
5279 
5280 
5281 /**
5282  * tracing_update_buffers - used by tracing facility to expand ring buffers
5283  *
5284  * To save on memory when the tracing is never used on a system with it
5285  * configured in. The ring buffers are set to a minimum size. But once
5286  * a user starts to use the tracing facility, then they need to grow
5287  * to their default size.
5288  *
5289  * This function is to be called when a tracer is about to be used.
5290  */
5291 int tracing_update_buffers(void)
5292 {
5293 	int ret = 0;
5294 
5295 	mutex_lock(&trace_types_lock);
5296 	if (!ring_buffer_expanded)
5297 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5298 						RING_BUFFER_ALL_CPUS);
5299 	mutex_unlock(&trace_types_lock);
5300 
5301 	return ret;
5302 }
5303 
5304 struct trace_option_dentry;
5305 
5306 static void
5307 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5308 
5309 /*
5310  * Used to clear out the tracer before deletion of an instance.
5311  * Must have trace_types_lock held.
5312  */
5313 static void tracing_set_nop(struct trace_array *tr)
5314 {
5315 	if (tr->current_trace == &nop_trace)
5316 		return;
5317 
5318 	tr->current_trace->enabled--;
5319 
5320 	if (tr->current_trace->reset)
5321 		tr->current_trace->reset(tr);
5322 
5323 	tr->current_trace = &nop_trace;
5324 }
5325 
5326 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5327 {
5328 	/* Only enable if the directory has been created already. */
5329 	if (!tr->dir)
5330 		return;
5331 
5332 	create_trace_option_files(tr, t);
5333 }
5334 
5335 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5336 {
5337 	struct tracer *t;
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339 	bool had_max_tr;
5340 #endif
5341 	int ret = 0;
5342 
5343 	mutex_lock(&trace_types_lock);
5344 
5345 	if (!ring_buffer_expanded) {
5346 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5347 						RING_BUFFER_ALL_CPUS);
5348 		if (ret < 0)
5349 			goto out;
5350 		ret = 0;
5351 	}
5352 
5353 	for (t = trace_types; t; t = t->next) {
5354 		if (strcmp(t->name, buf) == 0)
5355 			break;
5356 	}
5357 	if (!t) {
5358 		ret = -EINVAL;
5359 		goto out;
5360 	}
5361 	if (t == tr->current_trace)
5362 		goto out;
5363 
5364 	/* Some tracers won't work on kernel command line */
5365 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5366 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5367 			t->name);
5368 		goto out;
5369 	}
5370 
5371 	/* Some tracers are only allowed for the top level buffer */
5372 	if (!trace_ok_for_array(t, tr)) {
5373 		ret = -EINVAL;
5374 		goto out;
5375 	}
5376 
5377 	/* If trace pipe files are being read, we can't change the tracer */
5378 	if (tr->current_trace->ref) {
5379 		ret = -EBUSY;
5380 		goto out;
5381 	}
5382 
5383 	trace_branch_disable();
5384 
5385 	tr->current_trace->enabled--;
5386 
5387 	if (tr->current_trace->reset)
5388 		tr->current_trace->reset(tr);
5389 
5390 	/* Current trace needs to be nop_trace before synchronize_sched */
5391 	tr->current_trace = &nop_trace;
5392 
5393 #ifdef CONFIG_TRACER_MAX_TRACE
5394 	had_max_tr = tr->allocated_snapshot;
5395 
5396 	if (had_max_tr && !t->use_max_tr) {
5397 		/*
5398 		 * We need to make sure that the update_max_tr sees that
5399 		 * current_trace changed to nop_trace to keep it from
5400 		 * swapping the buffers after we resize it.
5401 		 * The update_max_tr is called from interrupts disabled
5402 		 * so a synchronized_sched() is sufficient.
5403 		 */
5404 		synchronize_sched();
5405 		free_snapshot(tr);
5406 	}
5407 #endif
5408 
5409 #ifdef CONFIG_TRACER_MAX_TRACE
5410 	if (t->use_max_tr && !had_max_tr) {
5411 		ret = alloc_snapshot(tr);
5412 		if (ret < 0)
5413 			goto out;
5414 	}
5415 #endif
5416 
5417 	if (t->init) {
5418 		ret = tracer_init(t, tr);
5419 		if (ret)
5420 			goto out;
5421 	}
5422 
5423 	tr->current_trace = t;
5424 	tr->current_trace->enabled++;
5425 	trace_branch_enable(tr);
5426  out:
5427 	mutex_unlock(&trace_types_lock);
5428 
5429 	return ret;
5430 }
5431 
5432 static ssize_t
5433 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5434 			size_t cnt, loff_t *ppos)
5435 {
5436 	struct trace_array *tr = filp->private_data;
5437 	char buf[MAX_TRACER_SIZE+1];
5438 	int i;
5439 	size_t ret;
5440 	int err;
5441 
5442 	ret = cnt;
5443 
5444 	if (cnt > MAX_TRACER_SIZE)
5445 		cnt = MAX_TRACER_SIZE;
5446 
5447 	if (copy_from_user(buf, ubuf, cnt))
5448 		return -EFAULT;
5449 
5450 	buf[cnt] = 0;
5451 
5452 	/* strip ending whitespace. */
5453 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5454 		buf[i] = 0;
5455 
5456 	err = tracing_set_tracer(tr, buf);
5457 	if (err)
5458 		return err;
5459 
5460 	*ppos += ret;
5461 
5462 	return ret;
5463 }
5464 
5465 static ssize_t
5466 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5467 		   size_t cnt, loff_t *ppos)
5468 {
5469 	char buf[64];
5470 	int r;
5471 
5472 	r = snprintf(buf, sizeof(buf), "%ld\n",
5473 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5474 	if (r > sizeof(buf))
5475 		r = sizeof(buf);
5476 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5477 }
5478 
5479 static ssize_t
5480 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5481 		    size_t cnt, loff_t *ppos)
5482 {
5483 	unsigned long val;
5484 	int ret;
5485 
5486 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5487 	if (ret)
5488 		return ret;
5489 
5490 	*ptr = val * 1000;
5491 
5492 	return cnt;
5493 }
5494 
5495 static ssize_t
5496 tracing_thresh_read(struct file *filp, char __user *ubuf,
5497 		    size_t cnt, loff_t *ppos)
5498 {
5499 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5500 }
5501 
5502 static ssize_t
5503 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5504 		     size_t cnt, loff_t *ppos)
5505 {
5506 	struct trace_array *tr = filp->private_data;
5507 	int ret;
5508 
5509 	mutex_lock(&trace_types_lock);
5510 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5511 	if (ret < 0)
5512 		goto out;
5513 
5514 	if (tr->current_trace->update_thresh) {
5515 		ret = tr->current_trace->update_thresh(tr);
5516 		if (ret < 0)
5517 			goto out;
5518 	}
5519 
5520 	ret = cnt;
5521 out:
5522 	mutex_unlock(&trace_types_lock);
5523 
5524 	return ret;
5525 }
5526 
5527 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5528 
5529 static ssize_t
5530 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5531 		     size_t cnt, loff_t *ppos)
5532 {
5533 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5534 }
5535 
5536 static ssize_t
5537 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5538 		      size_t cnt, loff_t *ppos)
5539 {
5540 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5541 }
5542 
5543 #endif
5544 
5545 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5546 {
5547 	struct trace_array *tr = inode->i_private;
5548 	struct trace_iterator *iter;
5549 	int ret = 0;
5550 
5551 	if (tracing_disabled)
5552 		return -ENODEV;
5553 
5554 	if (trace_array_get(tr) < 0)
5555 		return -ENODEV;
5556 
5557 	mutex_lock(&trace_types_lock);
5558 
5559 	/* create a buffer to store the information to pass to userspace */
5560 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5561 	if (!iter) {
5562 		ret = -ENOMEM;
5563 		__trace_array_put(tr);
5564 		goto out;
5565 	}
5566 
5567 	trace_seq_init(&iter->seq);
5568 	iter->trace = tr->current_trace;
5569 
5570 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5571 		ret = -ENOMEM;
5572 		goto fail;
5573 	}
5574 
5575 	/* trace pipe does not show start of buffer */
5576 	cpumask_setall(iter->started);
5577 
5578 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5579 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5580 
5581 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5582 	if (trace_clocks[tr->clock_id].in_ns)
5583 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5584 
5585 	iter->tr = tr;
5586 	iter->trace_buffer = &tr->trace_buffer;
5587 	iter->cpu_file = tracing_get_cpu(inode);
5588 	mutex_init(&iter->mutex);
5589 	filp->private_data = iter;
5590 
5591 	if (iter->trace->pipe_open)
5592 		iter->trace->pipe_open(iter);
5593 
5594 	nonseekable_open(inode, filp);
5595 
5596 	tr->current_trace->ref++;
5597 out:
5598 	mutex_unlock(&trace_types_lock);
5599 	return ret;
5600 
5601 fail:
5602 	kfree(iter->trace);
5603 	kfree(iter);
5604 	__trace_array_put(tr);
5605 	mutex_unlock(&trace_types_lock);
5606 	return ret;
5607 }
5608 
5609 static int tracing_release_pipe(struct inode *inode, struct file *file)
5610 {
5611 	struct trace_iterator *iter = file->private_data;
5612 	struct trace_array *tr = inode->i_private;
5613 
5614 	mutex_lock(&trace_types_lock);
5615 
5616 	tr->current_trace->ref--;
5617 
5618 	if (iter->trace->pipe_close)
5619 		iter->trace->pipe_close(iter);
5620 
5621 	mutex_unlock(&trace_types_lock);
5622 
5623 	free_cpumask_var(iter->started);
5624 	mutex_destroy(&iter->mutex);
5625 	kfree(iter);
5626 
5627 	trace_array_put(tr);
5628 
5629 	return 0;
5630 }
5631 
5632 static __poll_t
5633 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5634 {
5635 	struct trace_array *tr = iter->tr;
5636 
5637 	/* Iterators are static, they should be filled or empty */
5638 	if (trace_buffer_iter(iter, iter->cpu_file))
5639 		return EPOLLIN | EPOLLRDNORM;
5640 
5641 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5642 		/*
5643 		 * Always select as readable when in blocking mode
5644 		 */
5645 		return EPOLLIN | EPOLLRDNORM;
5646 	else
5647 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5648 					     filp, poll_table);
5649 }
5650 
5651 static __poll_t
5652 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5653 {
5654 	struct trace_iterator *iter = filp->private_data;
5655 
5656 	return trace_poll(iter, filp, poll_table);
5657 }
5658 
5659 /* Must be called with iter->mutex held. */
5660 static int tracing_wait_pipe(struct file *filp)
5661 {
5662 	struct trace_iterator *iter = filp->private_data;
5663 	int ret;
5664 
5665 	while (trace_empty(iter)) {
5666 
5667 		if ((filp->f_flags & O_NONBLOCK)) {
5668 			return -EAGAIN;
5669 		}
5670 
5671 		/*
5672 		 * We block until we read something and tracing is disabled.
5673 		 * We still block if tracing is disabled, but we have never
5674 		 * read anything. This allows a user to cat this file, and
5675 		 * then enable tracing. But after we have read something,
5676 		 * we give an EOF when tracing is again disabled.
5677 		 *
5678 		 * iter->pos will be 0 if we haven't read anything.
5679 		 */
5680 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5681 			break;
5682 
5683 		mutex_unlock(&iter->mutex);
5684 
5685 		ret = wait_on_pipe(iter, false);
5686 
5687 		mutex_lock(&iter->mutex);
5688 
5689 		if (ret)
5690 			return ret;
5691 	}
5692 
5693 	return 1;
5694 }
5695 
5696 /*
5697  * Consumer reader.
5698  */
5699 static ssize_t
5700 tracing_read_pipe(struct file *filp, char __user *ubuf,
5701 		  size_t cnt, loff_t *ppos)
5702 {
5703 	struct trace_iterator *iter = filp->private_data;
5704 	ssize_t sret;
5705 
5706 	/*
5707 	 * Avoid more than one consumer on a single file descriptor
5708 	 * This is just a matter of traces coherency, the ring buffer itself
5709 	 * is protected.
5710 	 */
5711 	mutex_lock(&iter->mutex);
5712 
5713 	/* return any leftover data */
5714 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5715 	if (sret != -EBUSY)
5716 		goto out;
5717 
5718 	trace_seq_init(&iter->seq);
5719 
5720 	if (iter->trace->read) {
5721 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5722 		if (sret)
5723 			goto out;
5724 	}
5725 
5726 waitagain:
5727 	sret = tracing_wait_pipe(filp);
5728 	if (sret <= 0)
5729 		goto out;
5730 
5731 	/* stop when tracing is finished */
5732 	if (trace_empty(iter)) {
5733 		sret = 0;
5734 		goto out;
5735 	}
5736 
5737 	if (cnt >= PAGE_SIZE)
5738 		cnt = PAGE_SIZE - 1;
5739 
5740 	/* reset all but tr, trace, and overruns */
5741 	memset(&iter->seq, 0,
5742 	       sizeof(struct trace_iterator) -
5743 	       offsetof(struct trace_iterator, seq));
5744 	cpumask_clear(iter->started);
5745 	iter->pos = -1;
5746 
5747 	trace_event_read_lock();
5748 	trace_access_lock(iter->cpu_file);
5749 	while (trace_find_next_entry_inc(iter) != NULL) {
5750 		enum print_line_t ret;
5751 		int save_len = iter->seq.seq.len;
5752 
5753 		ret = print_trace_line(iter);
5754 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5755 			/* don't print partial lines */
5756 			iter->seq.seq.len = save_len;
5757 			break;
5758 		}
5759 		if (ret != TRACE_TYPE_NO_CONSUME)
5760 			trace_consume(iter);
5761 
5762 		if (trace_seq_used(&iter->seq) >= cnt)
5763 			break;
5764 
5765 		/*
5766 		 * Setting the full flag means we reached the trace_seq buffer
5767 		 * size and we should leave by partial output condition above.
5768 		 * One of the trace_seq_* functions is not used properly.
5769 		 */
5770 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5771 			  iter->ent->type);
5772 	}
5773 	trace_access_unlock(iter->cpu_file);
5774 	trace_event_read_unlock();
5775 
5776 	/* Now copy what we have to the user */
5777 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5778 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5779 		trace_seq_init(&iter->seq);
5780 
5781 	/*
5782 	 * If there was nothing to send to user, in spite of consuming trace
5783 	 * entries, go back to wait for more entries.
5784 	 */
5785 	if (sret == -EBUSY)
5786 		goto waitagain;
5787 
5788 out:
5789 	mutex_unlock(&iter->mutex);
5790 
5791 	return sret;
5792 }
5793 
5794 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5795 				     unsigned int idx)
5796 {
5797 	__free_page(spd->pages[idx]);
5798 }
5799 
5800 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5801 	.can_merge		= 0,
5802 	.confirm		= generic_pipe_buf_confirm,
5803 	.release		= generic_pipe_buf_release,
5804 	.steal			= generic_pipe_buf_steal,
5805 	.get			= generic_pipe_buf_get,
5806 };
5807 
5808 static size_t
5809 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5810 {
5811 	size_t count;
5812 	int save_len;
5813 	int ret;
5814 
5815 	/* Seq buffer is page-sized, exactly what we need. */
5816 	for (;;) {
5817 		save_len = iter->seq.seq.len;
5818 		ret = print_trace_line(iter);
5819 
5820 		if (trace_seq_has_overflowed(&iter->seq)) {
5821 			iter->seq.seq.len = save_len;
5822 			break;
5823 		}
5824 
5825 		/*
5826 		 * This should not be hit, because it should only
5827 		 * be set if the iter->seq overflowed. But check it
5828 		 * anyway to be safe.
5829 		 */
5830 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5831 			iter->seq.seq.len = save_len;
5832 			break;
5833 		}
5834 
5835 		count = trace_seq_used(&iter->seq) - save_len;
5836 		if (rem < count) {
5837 			rem = 0;
5838 			iter->seq.seq.len = save_len;
5839 			break;
5840 		}
5841 
5842 		if (ret != TRACE_TYPE_NO_CONSUME)
5843 			trace_consume(iter);
5844 		rem -= count;
5845 		if (!trace_find_next_entry_inc(iter))	{
5846 			rem = 0;
5847 			iter->ent = NULL;
5848 			break;
5849 		}
5850 	}
5851 
5852 	return rem;
5853 }
5854 
5855 static ssize_t tracing_splice_read_pipe(struct file *filp,
5856 					loff_t *ppos,
5857 					struct pipe_inode_info *pipe,
5858 					size_t len,
5859 					unsigned int flags)
5860 {
5861 	struct page *pages_def[PIPE_DEF_BUFFERS];
5862 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5863 	struct trace_iterator *iter = filp->private_data;
5864 	struct splice_pipe_desc spd = {
5865 		.pages		= pages_def,
5866 		.partial	= partial_def,
5867 		.nr_pages	= 0, /* This gets updated below. */
5868 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5869 		.ops		= &tracing_pipe_buf_ops,
5870 		.spd_release	= tracing_spd_release_pipe,
5871 	};
5872 	ssize_t ret;
5873 	size_t rem;
5874 	unsigned int i;
5875 
5876 	if (splice_grow_spd(pipe, &spd))
5877 		return -ENOMEM;
5878 
5879 	mutex_lock(&iter->mutex);
5880 
5881 	if (iter->trace->splice_read) {
5882 		ret = iter->trace->splice_read(iter, filp,
5883 					       ppos, pipe, len, flags);
5884 		if (ret)
5885 			goto out_err;
5886 	}
5887 
5888 	ret = tracing_wait_pipe(filp);
5889 	if (ret <= 0)
5890 		goto out_err;
5891 
5892 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5893 		ret = -EFAULT;
5894 		goto out_err;
5895 	}
5896 
5897 	trace_event_read_lock();
5898 	trace_access_lock(iter->cpu_file);
5899 
5900 	/* Fill as many pages as possible. */
5901 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5902 		spd.pages[i] = alloc_page(GFP_KERNEL);
5903 		if (!spd.pages[i])
5904 			break;
5905 
5906 		rem = tracing_fill_pipe_page(rem, iter);
5907 
5908 		/* Copy the data into the page, so we can start over. */
5909 		ret = trace_seq_to_buffer(&iter->seq,
5910 					  page_address(spd.pages[i]),
5911 					  trace_seq_used(&iter->seq));
5912 		if (ret < 0) {
5913 			__free_page(spd.pages[i]);
5914 			break;
5915 		}
5916 		spd.partial[i].offset = 0;
5917 		spd.partial[i].len = trace_seq_used(&iter->seq);
5918 
5919 		trace_seq_init(&iter->seq);
5920 	}
5921 
5922 	trace_access_unlock(iter->cpu_file);
5923 	trace_event_read_unlock();
5924 	mutex_unlock(&iter->mutex);
5925 
5926 	spd.nr_pages = i;
5927 
5928 	if (i)
5929 		ret = splice_to_pipe(pipe, &spd);
5930 	else
5931 		ret = 0;
5932 out:
5933 	splice_shrink_spd(&spd);
5934 	return ret;
5935 
5936 out_err:
5937 	mutex_unlock(&iter->mutex);
5938 	goto out;
5939 }
5940 
5941 static ssize_t
5942 tracing_entries_read(struct file *filp, char __user *ubuf,
5943 		     size_t cnt, loff_t *ppos)
5944 {
5945 	struct inode *inode = file_inode(filp);
5946 	struct trace_array *tr = inode->i_private;
5947 	int cpu = tracing_get_cpu(inode);
5948 	char buf[64];
5949 	int r = 0;
5950 	ssize_t ret;
5951 
5952 	mutex_lock(&trace_types_lock);
5953 
5954 	if (cpu == RING_BUFFER_ALL_CPUS) {
5955 		int cpu, buf_size_same;
5956 		unsigned long size;
5957 
5958 		size = 0;
5959 		buf_size_same = 1;
5960 		/* check if all cpu sizes are same */
5961 		for_each_tracing_cpu(cpu) {
5962 			/* fill in the size from first enabled cpu */
5963 			if (size == 0)
5964 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5965 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5966 				buf_size_same = 0;
5967 				break;
5968 			}
5969 		}
5970 
5971 		if (buf_size_same) {
5972 			if (!ring_buffer_expanded)
5973 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5974 					    size >> 10,
5975 					    trace_buf_size >> 10);
5976 			else
5977 				r = sprintf(buf, "%lu\n", size >> 10);
5978 		} else
5979 			r = sprintf(buf, "X\n");
5980 	} else
5981 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5982 
5983 	mutex_unlock(&trace_types_lock);
5984 
5985 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5986 	return ret;
5987 }
5988 
5989 static ssize_t
5990 tracing_entries_write(struct file *filp, const char __user *ubuf,
5991 		      size_t cnt, loff_t *ppos)
5992 {
5993 	struct inode *inode = file_inode(filp);
5994 	struct trace_array *tr = inode->i_private;
5995 	unsigned long val;
5996 	int ret;
5997 
5998 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999 	if (ret)
6000 		return ret;
6001 
6002 	/* must have at least 1 entry */
6003 	if (!val)
6004 		return -EINVAL;
6005 
6006 	/* value is in KB */
6007 	val <<= 10;
6008 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6009 	if (ret < 0)
6010 		return ret;
6011 
6012 	*ppos += cnt;
6013 
6014 	return cnt;
6015 }
6016 
6017 static ssize_t
6018 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6019 				size_t cnt, loff_t *ppos)
6020 {
6021 	struct trace_array *tr = filp->private_data;
6022 	char buf[64];
6023 	int r, cpu;
6024 	unsigned long size = 0, expanded_size = 0;
6025 
6026 	mutex_lock(&trace_types_lock);
6027 	for_each_tracing_cpu(cpu) {
6028 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6029 		if (!ring_buffer_expanded)
6030 			expanded_size += trace_buf_size >> 10;
6031 	}
6032 	if (ring_buffer_expanded)
6033 		r = sprintf(buf, "%lu\n", size);
6034 	else
6035 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6036 	mutex_unlock(&trace_types_lock);
6037 
6038 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6039 }
6040 
6041 static ssize_t
6042 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6043 			  size_t cnt, loff_t *ppos)
6044 {
6045 	/*
6046 	 * There is no need to read what the user has written, this function
6047 	 * is just to make sure that there is no error when "echo" is used
6048 	 */
6049 
6050 	*ppos += cnt;
6051 
6052 	return cnt;
6053 }
6054 
6055 static int
6056 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6057 {
6058 	struct trace_array *tr = inode->i_private;
6059 
6060 	/* disable tracing ? */
6061 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6062 		tracer_tracing_off(tr);
6063 	/* resize the ring buffer to 0 */
6064 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6065 
6066 	trace_array_put(tr);
6067 
6068 	return 0;
6069 }
6070 
6071 static ssize_t
6072 tracing_mark_write(struct file *filp, const char __user *ubuf,
6073 					size_t cnt, loff_t *fpos)
6074 {
6075 	struct trace_array *tr = filp->private_data;
6076 	struct ring_buffer_event *event;
6077 	struct ring_buffer *buffer;
6078 	struct print_entry *entry;
6079 	unsigned long irq_flags;
6080 	const char faulted[] = "<faulted>";
6081 	ssize_t written;
6082 	int size;
6083 	int len;
6084 
6085 /* Used in tracing_mark_raw_write() as well */
6086 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6087 
6088 	if (tracing_disabled)
6089 		return -EINVAL;
6090 
6091 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6092 		return -EINVAL;
6093 
6094 	if (cnt > TRACE_BUF_SIZE)
6095 		cnt = TRACE_BUF_SIZE;
6096 
6097 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6098 
6099 	local_save_flags(irq_flags);
6100 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6101 
6102 	/* If less than "<faulted>", then make sure we can still add that */
6103 	if (cnt < FAULTED_SIZE)
6104 		size += FAULTED_SIZE - cnt;
6105 
6106 	buffer = tr->trace_buffer.buffer;
6107 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6108 					    irq_flags, preempt_count());
6109 	if (unlikely(!event))
6110 		/* Ring buffer disabled, return as if not open for write */
6111 		return -EBADF;
6112 
6113 	entry = ring_buffer_event_data(event);
6114 	entry->ip = _THIS_IP_;
6115 
6116 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6117 	if (len) {
6118 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6119 		cnt = FAULTED_SIZE;
6120 		written = -EFAULT;
6121 	} else
6122 		written = cnt;
6123 	len = cnt;
6124 
6125 	if (entry->buf[cnt - 1] != '\n') {
6126 		entry->buf[cnt] = '\n';
6127 		entry->buf[cnt + 1] = '\0';
6128 	} else
6129 		entry->buf[cnt] = '\0';
6130 
6131 	__buffer_unlock_commit(buffer, event);
6132 
6133 	if (written > 0)
6134 		*fpos += written;
6135 
6136 	return written;
6137 }
6138 
6139 /* Limit it for now to 3K (including tag) */
6140 #define RAW_DATA_MAX_SIZE (1024*3)
6141 
6142 static ssize_t
6143 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6144 					size_t cnt, loff_t *fpos)
6145 {
6146 	struct trace_array *tr = filp->private_data;
6147 	struct ring_buffer_event *event;
6148 	struct ring_buffer *buffer;
6149 	struct raw_data_entry *entry;
6150 	const char faulted[] = "<faulted>";
6151 	unsigned long irq_flags;
6152 	ssize_t written;
6153 	int size;
6154 	int len;
6155 
6156 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6157 
6158 	if (tracing_disabled)
6159 		return -EINVAL;
6160 
6161 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6162 		return -EINVAL;
6163 
6164 	/* The marker must at least have a tag id */
6165 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6166 		return -EINVAL;
6167 
6168 	if (cnt > TRACE_BUF_SIZE)
6169 		cnt = TRACE_BUF_SIZE;
6170 
6171 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6172 
6173 	local_save_flags(irq_flags);
6174 	size = sizeof(*entry) + cnt;
6175 	if (cnt < FAULT_SIZE_ID)
6176 		size += FAULT_SIZE_ID - cnt;
6177 
6178 	buffer = tr->trace_buffer.buffer;
6179 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6180 					    irq_flags, preempt_count());
6181 	if (!event)
6182 		/* Ring buffer disabled, return as if not open for write */
6183 		return -EBADF;
6184 
6185 	entry = ring_buffer_event_data(event);
6186 
6187 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6188 	if (len) {
6189 		entry->id = -1;
6190 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6191 		written = -EFAULT;
6192 	} else
6193 		written = cnt;
6194 
6195 	__buffer_unlock_commit(buffer, event);
6196 
6197 	if (written > 0)
6198 		*fpos += written;
6199 
6200 	return written;
6201 }
6202 
6203 static int tracing_clock_show(struct seq_file *m, void *v)
6204 {
6205 	struct trace_array *tr = m->private;
6206 	int i;
6207 
6208 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6209 		seq_printf(m,
6210 			"%s%s%s%s", i ? " " : "",
6211 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6212 			i == tr->clock_id ? "]" : "");
6213 	seq_putc(m, '\n');
6214 
6215 	return 0;
6216 }
6217 
6218 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6219 {
6220 	int i;
6221 
6222 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6223 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6224 			break;
6225 	}
6226 	if (i == ARRAY_SIZE(trace_clocks))
6227 		return -EINVAL;
6228 
6229 	mutex_lock(&trace_types_lock);
6230 
6231 	tr->clock_id = i;
6232 
6233 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6234 
6235 	/*
6236 	 * New clock may not be consistent with the previous clock.
6237 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6238 	 */
6239 	tracing_reset_online_cpus(&tr->trace_buffer);
6240 
6241 #ifdef CONFIG_TRACER_MAX_TRACE
6242 	if (tr->max_buffer.buffer)
6243 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6244 	tracing_reset_online_cpus(&tr->max_buffer);
6245 #endif
6246 
6247 	mutex_unlock(&trace_types_lock);
6248 
6249 	return 0;
6250 }
6251 
6252 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6253 				   size_t cnt, loff_t *fpos)
6254 {
6255 	struct seq_file *m = filp->private_data;
6256 	struct trace_array *tr = m->private;
6257 	char buf[64];
6258 	const char *clockstr;
6259 	int ret;
6260 
6261 	if (cnt >= sizeof(buf))
6262 		return -EINVAL;
6263 
6264 	if (copy_from_user(buf, ubuf, cnt))
6265 		return -EFAULT;
6266 
6267 	buf[cnt] = 0;
6268 
6269 	clockstr = strstrip(buf);
6270 
6271 	ret = tracing_set_clock(tr, clockstr);
6272 	if (ret)
6273 		return ret;
6274 
6275 	*fpos += cnt;
6276 
6277 	return cnt;
6278 }
6279 
6280 static int tracing_clock_open(struct inode *inode, struct file *file)
6281 {
6282 	struct trace_array *tr = inode->i_private;
6283 	int ret;
6284 
6285 	if (tracing_disabled)
6286 		return -ENODEV;
6287 
6288 	if (trace_array_get(tr))
6289 		return -ENODEV;
6290 
6291 	ret = single_open(file, tracing_clock_show, inode->i_private);
6292 	if (ret < 0)
6293 		trace_array_put(tr);
6294 
6295 	return ret;
6296 }
6297 
6298 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6299 {
6300 	struct trace_array *tr = m->private;
6301 
6302 	mutex_lock(&trace_types_lock);
6303 
6304 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6305 		seq_puts(m, "delta [absolute]\n");
6306 	else
6307 		seq_puts(m, "[delta] absolute\n");
6308 
6309 	mutex_unlock(&trace_types_lock);
6310 
6311 	return 0;
6312 }
6313 
6314 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6315 {
6316 	struct trace_array *tr = inode->i_private;
6317 	int ret;
6318 
6319 	if (tracing_disabled)
6320 		return -ENODEV;
6321 
6322 	if (trace_array_get(tr))
6323 		return -ENODEV;
6324 
6325 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6326 	if (ret < 0)
6327 		trace_array_put(tr);
6328 
6329 	return ret;
6330 }
6331 
6332 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6333 {
6334 	int ret = 0;
6335 
6336 	mutex_lock(&trace_types_lock);
6337 
6338 	if (abs && tr->time_stamp_abs_ref++)
6339 		goto out;
6340 
6341 	if (!abs) {
6342 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6343 			ret = -EINVAL;
6344 			goto out;
6345 		}
6346 
6347 		if (--tr->time_stamp_abs_ref)
6348 			goto out;
6349 	}
6350 
6351 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6352 
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354 	if (tr->max_buffer.buffer)
6355 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6356 #endif
6357  out:
6358 	mutex_unlock(&trace_types_lock);
6359 
6360 	return ret;
6361 }
6362 
6363 struct ftrace_buffer_info {
6364 	struct trace_iterator	iter;
6365 	void			*spare;
6366 	unsigned int		spare_cpu;
6367 	unsigned int		read;
6368 };
6369 
6370 #ifdef CONFIG_TRACER_SNAPSHOT
6371 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6372 {
6373 	struct trace_array *tr = inode->i_private;
6374 	struct trace_iterator *iter;
6375 	struct seq_file *m;
6376 	int ret = 0;
6377 
6378 	if (trace_array_get(tr) < 0)
6379 		return -ENODEV;
6380 
6381 	if (file->f_mode & FMODE_READ) {
6382 		iter = __tracing_open(inode, file, true);
6383 		if (IS_ERR(iter))
6384 			ret = PTR_ERR(iter);
6385 	} else {
6386 		/* Writes still need the seq_file to hold the private data */
6387 		ret = -ENOMEM;
6388 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6389 		if (!m)
6390 			goto out;
6391 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6392 		if (!iter) {
6393 			kfree(m);
6394 			goto out;
6395 		}
6396 		ret = 0;
6397 
6398 		iter->tr = tr;
6399 		iter->trace_buffer = &tr->max_buffer;
6400 		iter->cpu_file = tracing_get_cpu(inode);
6401 		m->private = iter;
6402 		file->private_data = m;
6403 	}
6404 out:
6405 	if (ret < 0)
6406 		trace_array_put(tr);
6407 
6408 	return ret;
6409 }
6410 
6411 static ssize_t
6412 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6413 		       loff_t *ppos)
6414 {
6415 	struct seq_file *m = filp->private_data;
6416 	struct trace_iterator *iter = m->private;
6417 	struct trace_array *tr = iter->tr;
6418 	unsigned long val;
6419 	int ret;
6420 
6421 	ret = tracing_update_buffers();
6422 	if (ret < 0)
6423 		return ret;
6424 
6425 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6426 	if (ret)
6427 		return ret;
6428 
6429 	mutex_lock(&trace_types_lock);
6430 
6431 	if (tr->current_trace->use_max_tr) {
6432 		ret = -EBUSY;
6433 		goto out;
6434 	}
6435 
6436 	switch (val) {
6437 	case 0:
6438 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6439 			ret = -EINVAL;
6440 			break;
6441 		}
6442 		if (tr->allocated_snapshot)
6443 			free_snapshot(tr);
6444 		break;
6445 	case 1:
6446 /* Only allow per-cpu swap if the ring buffer supports it */
6447 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6448 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6449 			ret = -EINVAL;
6450 			break;
6451 		}
6452 #endif
6453 		if (!tr->allocated_snapshot) {
6454 			ret = alloc_snapshot(tr);
6455 			if (ret < 0)
6456 				break;
6457 		}
6458 		local_irq_disable();
6459 		/* Now, we're going to swap */
6460 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6461 			update_max_tr(tr, current, smp_processor_id());
6462 		else
6463 			update_max_tr_single(tr, current, iter->cpu_file);
6464 		local_irq_enable();
6465 		break;
6466 	default:
6467 		if (tr->allocated_snapshot) {
6468 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6469 				tracing_reset_online_cpus(&tr->max_buffer);
6470 			else
6471 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6472 		}
6473 		break;
6474 	}
6475 
6476 	if (ret >= 0) {
6477 		*ppos += cnt;
6478 		ret = cnt;
6479 	}
6480 out:
6481 	mutex_unlock(&trace_types_lock);
6482 	return ret;
6483 }
6484 
6485 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6486 {
6487 	struct seq_file *m = file->private_data;
6488 	int ret;
6489 
6490 	ret = tracing_release(inode, file);
6491 
6492 	if (file->f_mode & FMODE_READ)
6493 		return ret;
6494 
6495 	/* If write only, the seq_file is just a stub */
6496 	if (m)
6497 		kfree(m->private);
6498 	kfree(m);
6499 
6500 	return 0;
6501 }
6502 
6503 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6504 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6505 				    size_t count, loff_t *ppos);
6506 static int tracing_buffers_release(struct inode *inode, struct file *file);
6507 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6508 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6509 
6510 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6511 {
6512 	struct ftrace_buffer_info *info;
6513 	int ret;
6514 
6515 	ret = tracing_buffers_open(inode, filp);
6516 	if (ret < 0)
6517 		return ret;
6518 
6519 	info = filp->private_data;
6520 
6521 	if (info->iter.trace->use_max_tr) {
6522 		tracing_buffers_release(inode, filp);
6523 		return -EBUSY;
6524 	}
6525 
6526 	info->iter.snapshot = true;
6527 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6528 
6529 	return ret;
6530 }
6531 
6532 #endif /* CONFIG_TRACER_SNAPSHOT */
6533 
6534 
6535 static const struct file_operations tracing_thresh_fops = {
6536 	.open		= tracing_open_generic,
6537 	.read		= tracing_thresh_read,
6538 	.write		= tracing_thresh_write,
6539 	.llseek		= generic_file_llseek,
6540 };
6541 
6542 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6543 static const struct file_operations tracing_max_lat_fops = {
6544 	.open		= tracing_open_generic,
6545 	.read		= tracing_max_lat_read,
6546 	.write		= tracing_max_lat_write,
6547 	.llseek		= generic_file_llseek,
6548 };
6549 #endif
6550 
6551 static const struct file_operations set_tracer_fops = {
6552 	.open		= tracing_open_generic,
6553 	.read		= tracing_set_trace_read,
6554 	.write		= tracing_set_trace_write,
6555 	.llseek		= generic_file_llseek,
6556 };
6557 
6558 static const struct file_operations tracing_pipe_fops = {
6559 	.open		= tracing_open_pipe,
6560 	.poll		= tracing_poll_pipe,
6561 	.read		= tracing_read_pipe,
6562 	.splice_read	= tracing_splice_read_pipe,
6563 	.release	= tracing_release_pipe,
6564 	.llseek		= no_llseek,
6565 };
6566 
6567 static const struct file_operations tracing_entries_fops = {
6568 	.open		= tracing_open_generic_tr,
6569 	.read		= tracing_entries_read,
6570 	.write		= tracing_entries_write,
6571 	.llseek		= generic_file_llseek,
6572 	.release	= tracing_release_generic_tr,
6573 };
6574 
6575 static const struct file_operations tracing_total_entries_fops = {
6576 	.open		= tracing_open_generic_tr,
6577 	.read		= tracing_total_entries_read,
6578 	.llseek		= generic_file_llseek,
6579 	.release	= tracing_release_generic_tr,
6580 };
6581 
6582 static const struct file_operations tracing_free_buffer_fops = {
6583 	.open		= tracing_open_generic_tr,
6584 	.write		= tracing_free_buffer_write,
6585 	.release	= tracing_free_buffer_release,
6586 };
6587 
6588 static const struct file_operations tracing_mark_fops = {
6589 	.open		= tracing_open_generic_tr,
6590 	.write		= tracing_mark_write,
6591 	.llseek		= generic_file_llseek,
6592 	.release	= tracing_release_generic_tr,
6593 };
6594 
6595 static const struct file_operations tracing_mark_raw_fops = {
6596 	.open		= tracing_open_generic_tr,
6597 	.write		= tracing_mark_raw_write,
6598 	.llseek		= generic_file_llseek,
6599 	.release	= tracing_release_generic_tr,
6600 };
6601 
6602 static const struct file_operations trace_clock_fops = {
6603 	.open		= tracing_clock_open,
6604 	.read		= seq_read,
6605 	.llseek		= seq_lseek,
6606 	.release	= tracing_single_release_tr,
6607 	.write		= tracing_clock_write,
6608 };
6609 
6610 static const struct file_operations trace_time_stamp_mode_fops = {
6611 	.open		= tracing_time_stamp_mode_open,
6612 	.read		= seq_read,
6613 	.llseek		= seq_lseek,
6614 	.release	= tracing_single_release_tr,
6615 };
6616 
6617 #ifdef CONFIG_TRACER_SNAPSHOT
6618 static const struct file_operations snapshot_fops = {
6619 	.open		= tracing_snapshot_open,
6620 	.read		= seq_read,
6621 	.write		= tracing_snapshot_write,
6622 	.llseek		= tracing_lseek,
6623 	.release	= tracing_snapshot_release,
6624 };
6625 
6626 static const struct file_operations snapshot_raw_fops = {
6627 	.open		= snapshot_raw_open,
6628 	.read		= tracing_buffers_read,
6629 	.release	= tracing_buffers_release,
6630 	.splice_read	= tracing_buffers_splice_read,
6631 	.llseek		= no_llseek,
6632 };
6633 
6634 #endif /* CONFIG_TRACER_SNAPSHOT */
6635 
6636 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6637 {
6638 	struct trace_array *tr = inode->i_private;
6639 	struct ftrace_buffer_info *info;
6640 	int ret;
6641 
6642 	if (tracing_disabled)
6643 		return -ENODEV;
6644 
6645 	if (trace_array_get(tr) < 0)
6646 		return -ENODEV;
6647 
6648 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6649 	if (!info) {
6650 		trace_array_put(tr);
6651 		return -ENOMEM;
6652 	}
6653 
6654 	mutex_lock(&trace_types_lock);
6655 
6656 	info->iter.tr		= tr;
6657 	info->iter.cpu_file	= tracing_get_cpu(inode);
6658 	info->iter.trace	= tr->current_trace;
6659 	info->iter.trace_buffer = &tr->trace_buffer;
6660 	info->spare		= NULL;
6661 	/* Force reading ring buffer for first read */
6662 	info->read		= (unsigned int)-1;
6663 
6664 	filp->private_data = info;
6665 
6666 	tr->current_trace->ref++;
6667 
6668 	mutex_unlock(&trace_types_lock);
6669 
6670 	ret = nonseekable_open(inode, filp);
6671 	if (ret < 0)
6672 		trace_array_put(tr);
6673 
6674 	return ret;
6675 }
6676 
6677 static __poll_t
6678 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6679 {
6680 	struct ftrace_buffer_info *info = filp->private_data;
6681 	struct trace_iterator *iter = &info->iter;
6682 
6683 	return trace_poll(iter, filp, poll_table);
6684 }
6685 
6686 static ssize_t
6687 tracing_buffers_read(struct file *filp, char __user *ubuf,
6688 		     size_t count, loff_t *ppos)
6689 {
6690 	struct ftrace_buffer_info *info = filp->private_data;
6691 	struct trace_iterator *iter = &info->iter;
6692 	ssize_t ret = 0;
6693 	ssize_t size;
6694 
6695 	if (!count)
6696 		return 0;
6697 
6698 #ifdef CONFIG_TRACER_MAX_TRACE
6699 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6700 		return -EBUSY;
6701 #endif
6702 
6703 	if (!info->spare) {
6704 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6705 							  iter->cpu_file);
6706 		if (IS_ERR(info->spare)) {
6707 			ret = PTR_ERR(info->spare);
6708 			info->spare = NULL;
6709 		} else {
6710 			info->spare_cpu = iter->cpu_file;
6711 		}
6712 	}
6713 	if (!info->spare)
6714 		return ret;
6715 
6716 	/* Do we have previous read data to read? */
6717 	if (info->read < PAGE_SIZE)
6718 		goto read;
6719 
6720  again:
6721 	trace_access_lock(iter->cpu_file);
6722 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6723 				    &info->spare,
6724 				    count,
6725 				    iter->cpu_file, 0);
6726 	trace_access_unlock(iter->cpu_file);
6727 
6728 	if (ret < 0) {
6729 		if (trace_empty(iter)) {
6730 			if ((filp->f_flags & O_NONBLOCK))
6731 				return -EAGAIN;
6732 
6733 			ret = wait_on_pipe(iter, false);
6734 			if (ret)
6735 				return ret;
6736 
6737 			goto again;
6738 		}
6739 		return 0;
6740 	}
6741 
6742 	info->read = 0;
6743  read:
6744 	size = PAGE_SIZE - info->read;
6745 	if (size > count)
6746 		size = count;
6747 
6748 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6749 	if (ret == size)
6750 		return -EFAULT;
6751 
6752 	size -= ret;
6753 
6754 	*ppos += size;
6755 	info->read += size;
6756 
6757 	return size;
6758 }
6759 
6760 static int tracing_buffers_release(struct inode *inode, struct file *file)
6761 {
6762 	struct ftrace_buffer_info *info = file->private_data;
6763 	struct trace_iterator *iter = &info->iter;
6764 
6765 	mutex_lock(&trace_types_lock);
6766 
6767 	iter->tr->current_trace->ref--;
6768 
6769 	__trace_array_put(iter->tr);
6770 
6771 	if (info->spare)
6772 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6773 					   info->spare_cpu, info->spare);
6774 	kfree(info);
6775 
6776 	mutex_unlock(&trace_types_lock);
6777 
6778 	return 0;
6779 }
6780 
6781 struct buffer_ref {
6782 	struct ring_buffer	*buffer;
6783 	void			*page;
6784 	int			cpu;
6785 	int			ref;
6786 };
6787 
6788 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6789 				    struct pipe_buffer *buf)
6790 {
6791 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6792 
6793 	if (--ref->ref)
6794 		return;
6795 
6796 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6797 	kfree(ref);
6798 	buf->private = 0;
6799 }
6800 
6801 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6802 				struct pipe_buffer *buf)
6803 {
6804 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6805 
6806 	ref->ref++;
6807 }
6808 
6809 /* Pipe buffer operations for a buffer. */
6810 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6811 	.can_merge		= 0,
6812 	.confirm		= generic_pipe_buf_confirm,
6813 	.release		= buffer_pipe_buf_release,
6814 	.steal			= generic_pipe_buf_steal,
6815 	.get			= buffer_pipe_buf_get,
6816 };
6817 
6818 /*
6819  * Callback from splice_to_pipe(), if we need to release some pages
6820  * at the end of the spd in case we error'ed out in filling the pipe.
6821  */
6822 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6823 {
6824 	struct buffer_ref *ref =
6825 		(struct buffer_ref *)spd->partial[i].private;
6826 
6827 	if (--ref->ref)
6828 		return;
6829 
6830 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6831 	kfree(ref);
6832 	spd->partial[i].private = 0;
6833 }
6834 
6835 static ssize_t
6836 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6837 			    struct pipe_inode_info *pipe, size_t len,
6838 			    unsigned int flags)
6839 {
6840 	struct ftrace_buffer_info *info = file->private_data;
6841 	struct trace_iterator *iter = &info->iter;
6842 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6843 	struct page *pages_def[PIPE_DEF_BUFFERS];
6844 	struct splice_pipe_desc spd = {
6845 		.pages		= pages_def,
6846 		.partial	= partial_def,
6847 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6848 		.ops		= &buffer_pipe_buf_ops,
6849 		.spd_release	= buffer_spd_release,
6850 	};
6851 	struct buffer_ref *ref;
6852 	int entries, i;
6853 	ssize_t ret = 0;
6854 
6855 #ifdef CONFIG_TRACER_MAX_TRACE
6856 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6857 		return -EBUSY;
6858 #endif
6859 
6860 	if (*ppos & (PAGE_SIZE - 1))
6861 		return -EINVAL;
6862 
6863 	if (len & (PAGE_SIZE - 1)) {
6864 		if (len < PAGE_SIZE)
6865 			return -EINVAL;
6866 		len &= PAGE_MASK;
6867 	}
6868 
6869 	if (splice_grow_spd(pipe, &spd))
6870 		return -ENOMEM;
6871 
6872  again:
6873 	trace_access_lock(iter->cpu_file);
6874 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6875 
6876 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6877 		struct page *page;
6878 		int r;
6879 
6880 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6881 		if (!ref) {
6882 			ret = -ENOMEM;
6883 			break;
6884 		}
6885 
6886 		ref->ref = 1;
6887 		ref->buffer = iter->trace_buffer->buffer;
6888 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6889 		if (IS_ERR(ref->page)) {
6890 			ret = PTR_ERR(ref->page);
6891 			ref->page = NULL;
6892 			kfree(ref);
6893 			break;
6894 		}
6895 		ref->cpu = iter->cpu_file;
6896 
6897 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6898 					  len, iter->cpu_file, 1);
6899 		if (r < 0) {
6900 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6901 						   ref->page);
6902 			kfree(ref);
6903 			break;
6904 		}
6905 
6906 		page = virt_to_page(ref->page);
6907 
6908 		spd.pages[i] = page;
6909 		spd.partial[i].len = PAGE_SIZE;
6910 		spd.partial[i].offset = 0;
6911 		spd.partial[i].private = (unsigned long)ref;
6912 		spd.nr_pages++;
6913 		*ppos += PAGE_SIZE;
6914 
6915 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6916 	}
6917 
6918 	trace_access_unlock(iter->cpu_file);
6919 	spd.nr_pages = i;
6920 
6921 	/* did we read anything? */
6922 	if (!spd.nr_pages) {
6923 		if (ret)
6924 			goto out;
6925 
6926 		ret = -EAGAIN;
6927 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6928 			goto out;
6929 
6930 		ret = wait_on_pipe(iter, true);
6931 		if (ret)
6932 			goto out;
6933 
6934 		goto again;
6935 	}
6936 
6937 	ret = splice_to_pipe(pipe, &spd);
6938 out:
6939 	splice_shrink_spd(&spd);
6940 
6941 	return ret;
6942 }
6943 
6944 static const struct file_operations tracing_buffers_fops = {
6945 	.open		= tracing_buffers_open,
6946 	.read		= tracing_buffers_read,
6947 	.poll		= tracing_buffers_poll,
6948 	.release	= tracing_buffers_release,
6949 	.splice_read	= tracing_buffers_splice_read,
6950 	.llseek		= no_llseek,
6951 };
6952 
6953 static ssize_t
6954 tracing_stats_read(struct file *filp, char __user *ubuf,
6955 		   size_t count, loff_t *ppos)
6956 {
6957 	struct inode *inode = file_inode(filp);
6958 	struct trace_array *tr = inode->i_private;
6959 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6960 	int cpu = tracing_get_cpu(inode);
6961 	struct trace_seq *s;
6962 	unsigned long cnt;
6963 	unsigned long long t;
6964 	unsigned long usec_rem;
6965 
6966 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6967 	if (!s)
6968 		return -ENOMEM;
6969 
6970 	trace_seq_init(s);
6971 
6972 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6973 	trace_seq_printf(s, "entries: %ld\n", cnt);
6974 
6975 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6976 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6977 
6978 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6979 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6980 
6981 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6982 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6983 
6984 	if (trace_clocks[tr->clock_id].in_ns) {
6985 		/* local or global for trace_clock */
6986 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6987 		usec_rem = do_div(t, USEC_PER_SEC);
6988 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6989 								t, usec_rem);
6990 
6991 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6992 		usec_rem = do_div(t, USEC_PER_SEC);
6993 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6994 	} else {
6995 		/* counter or tsc mode for trace_clock */
6996 		trace_seq_printf(s, "oldest event ts: %llu\n",
6997 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6998 
6999 		trace_seq_printf(s, "now ts: %llu\n",
7000 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7001 	}
7002 
7003 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7004 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7005 
7006 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7007 	trace_seq_printf(s, "read events: %ld\n", cnt);
7008 
7009 	count = simple_read_from_buffer(ubuf, count, ppos,
7010 					s->buffer, trace_seq_used(s));
7011 
7012 	kfree(s);
7013 
7014 	return count;
7015 }
7016 
7017 static const struct file_operations tracing_stats_fops = {
7018 	.open		= tracing_open_generic_tr,
7019 	.read		= tracing_stats_read,
7020 	.llseek		= generic_file_llseek,
7021 	.release	= tracing_release_generic_tr,
7022 };
7023 
7024 #ifdef CONFIG_DYNAMIC_FTRACE
7025 
7026 static ssize_t
7027 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7028 		  size_t cnt, loff_t *ppos)
7029 {
7030 	unsigned long *p = filp->private_data;
7031 	char buf[64]; /* Not too big for a shallow stack */
7032 	int r;
7033 
7034 	r = scnprintf(buf, 63, "%ld", *p);
7035 	buf[r++] = '\n';
7036 
7037 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7038 }
7039 
7040 static const struct file_operations tracing_dyn_info_fops = {
7041 	.open		= tracing_open_generic,
7042 	.read		= tracing_read_dyn_info,
7043 	.llseek		= generic_file_llseek,
7044 };
7045 #endif /* CONFIG_DYNAMIC_FTRACE */
7046 
7047 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7048 static void
7049 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7050 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7051 		void *data)
7052 {
7053 	tracing_snapshot_instance(tr);
7054 }
7055 
7056 static void
7057 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7058 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7059 		      void *data)
7060 {
7061 	struct ftrace_func_mapper *mapper = data;
7062 	long *count = NULL;
7063 
7064 	if (mapper)
7065 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7066 
7067 	if (count) {
7068 
7069 		if (*count <= 0)
7070 			return;
7071 
7072 		(*count)--;
7073 	}
7074 
7075 	tracing_snapshot_instance(tr);
7076 }
7077 
7078 static int
7079 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7080 		      struct ftrace_probe_ops *ops, void *data)
7081 {
7082 	struct ftrace_func_mapper *mapper = data;
7083 	long *count = NULL;
7084 
7085 	seq_printf(m, "%ps:", (void *)ip);
7086 
7087 	seq_puts(m, "snapshot");
7088 
7089 	if (mapper)
7090 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7091 
7092 	if (count)
7093 		seq_printf(m, ":count=%ld\n", *count);
7094 	else
7095 		seq_puts(m, ":unlimited\n");
7096 
7097 	return 0;
7098 }
7099 
7100 static int
7101 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7102 		     unsigned long ip, void *init_data, void **data)
7103 {
7104 	struct ftrace_func_mapper *mapper = *data;
7105 
7106 	if (!mapper) {
7107 		mapper = allocate_ftrace_func_mapper();
7108 		if (!mapper)
7109 			return -ENOMEM;
7110 		*data = mapper;
7111 	}
7112 
7113 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7114 }
7115 
7116 static void
7117 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7118 		     unsigned long ip, void *data)
7119 {
7120 	struct ftrace_func_mapper *mapper = data;
7121 
7122 	if (!ip) {
7123 		if (!mapper)
7124 			return;
7125 		free_ftrace_func_mapper(mapper, NULL);
7126 		return;
7127 	}
7128 
7129 	ftrace_func_mapper_remove_ip(mapper, ip);
7130 }
7131 
7132 static struct ftrace_probe_ops snapshot_probe_ops = {
7133 	.func			= ftrace_snapshot,
7134 	.print			= ftrace_snapshot_print,
7135 };
7136 
7137 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7138 	.func			= ftrace_count_snapshot,
7139 	.print			= ftrace_snapshot_print,
7140 	.init			= ftrace_snapshot_init,
7141 	.free			= ftrace_snapshot_free,
7142 };
7143 
7144 static int
7145 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7146 			       char *glob, char *cmd, char *param, int enable)
7147 {
7148 	struct ftrace_probe_ops *ops;
7149 	void *count = (void *)-1;
7150 	char *number;
7151 	int ret;
7152 
7153 	if (!tr)
7154 		return -ENODEV;
7155 
7156 	/* hash funcs only work with set_ftrace_filter */
7157 	if (!enable)
7158 		return -EINVAL;
7159 
7160 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7161 
7162 	if (glob[0] == '!')
7163 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7164 
7165 	if (!param)
7166 		goto out_reg;
7167 
7168 	number = strsep(&param, ":");
7169 
7170 	if (!strlen(number))
7171 		goto out_reg;
7172 
7173 	/*
7174 	 * We use the callback data field (which is a pointer)
7175 	 * as our counter.
7176 	 */
7177 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7178 	if (ret)
7179 		return ret;
7180 
7181  out_reg:
7182 	ret = alloc_snapshot(tr);
7183 	if (ret < 0)
7184 		goto out;
7185 
7186 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7187 
7188  out:
7189 	return ret < 0 ? ret : 0;
7190 }
7191 
7192 static struct ftrace_func_command ftrace_snapshot_cmd = {
7193 	.name			= "snapshot",
7194 	.func			= ftrace_trace_snapshot_callback,
7195 };
7196 
7197 static __init int register_snapshot_cmd(void)
7198 {
7199 	return register_ftrace_command(&ftrace_snapshot_cmd);
7200 }
7201 #else
7202 static inline __init int register_snapshot_cmd(void) { return 0; }
7203 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7204 
7205 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7206 {
7207 	if (WARN_ON(!tr->dir))
7208 		return ERR_PTR(-ENODEV);
7209 
7210 	/* Top directory uses NULL as the parent */
7211 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7212 		return NULL;
7213 
7214 	/* All sub buffers have a descriptor */
7215 	return tr->dir;
7216 }
7217 
7218 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7219 {
7220 	struct dentry *d_tracer;
7221 
7222 	if (tr->percpu_dir)
7223 		return tr->percpu_dir;
7224 
7225 	d_tracer = tracing_get_dentry(tr);
7226 	if (IS_ERR(d_tracer))
7227 		return NULL;
7228 
7229 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7230 
7231 	WARN_ONCE(!tr->percpu_dir,
7232 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7233 
7234 	return tr->percpu_dir;
7235 }
7236 
7237 static struct dentry *
7238 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7239 		      void *data, long cpu, const struct file_operations *fops)
7240 {
7241 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7242 
7243 	if (ret) /* See tracing_get_cpu() */
7244 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7245 	return ret;
7246 }
7247 
7248 static void
7249 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7250 {
7251 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7252 	struct dentry *d_cpu;
7253 	char cpu_dir[30]; /* 30 characters should be more than enough */
7254 
7255 	if (!d_percpu)
7256 		return;
7257 
7258 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7259 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7260 	if (!d_cpu) {
7261 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7262 		return;
7263 	}
7264 
7265 	/* per cpu trace_pipe */
7266 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7267 				tr, cpu, &tracing_pipe_fops);
7268 
7269 	/* per cpu trace */
7270 	trace_create_cpu_file("trace", 0644, d_cpu,
7271 				tr, cpu, &tracing_fops);
7272 
7273 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7274 				tr, cpu, &tracing_buffers_fops);
7275 
7276 	trace_create_cpu_file("stats", 0444, d_cpu,
7277 				tr, cpu, &tracing_stats_fops);
7278 
7279 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7280 				tr, cpu, &tracing_entries_fops);
7281 
7282 #ifdef CONFIG_TRACER_SNAPSHOT
7283 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7284 				tr, cpu, &snapshot_fops);
7285 
7286 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7287 				tr, cpu, &snapshot_raw_fops);
7288 #endif
7289 }
7290 
7291 #ifdef CONFIG_FTRACE_SELFTEST
7292 /* Let selftest have access to static functions in this file */
7293 #include "trace_selftest.c"
7294 #endif
7295 
7296 static ssize_t
7297 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7298 			loff_t *ppos)
7299 {
7300 	struct trace_option_dentry *topt = filp->private_data;
7301 	char *buf;
7302 
7303 	if (topt->flags->val & topt->opt->bit)
7304 		buf = "1\n";
7305 	else
7306 		buf = "0\n";
7307 
7308 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7309 }
7310 
7311 static ssize_t
7312 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7313 			 loff_t *ppos)
7314 {
7315 	struct trace_option_dentry *topt = filp->private_data;
7316 	unsigned long val;
7317 	int ret;
7318 
7319 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7320 	if (ret)
7321 		return ret;
7322 
7323 	if (val != 0 && val != 1)
7324 		return -EINVAL;
7325 
7326 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7327 		mutex_lock(&trace_types_lock);
7328 		ret = __set_tracer_option(topt->tr, topt->flags,
7329 					  topt->opt, !val);
7330 		mutex_unlock(&trace_types_lock);
7331 		if (ret)
7332 			return ret;
7333 	}
7334 
7335 	*ppos += cnt;
7336 
7337 	return cnt;
7338 }
7339 
7340 
7341 static const struct file_operations trace_options_fops = {
7342 	.open = tracing_open_generic,
7343 	.read = trace_options_read,
7344 	.write = trace_options_write,
7345 	.llseek	= generic_file_llseek,
7346 };
7347 
7348 /*
7349  * In order to pass in both the trace_array descriptor as well as the index
7350  * to the flag that the trace option file represents, the trace_array
7351  * has a character array of trace_flags_index[], which holds the index
7352  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7353  * The address of this character array is passed to the flag option file
7354  * read/write callbacks.
7355  *
7356  * In order to extract both the index and the trace_array descriptor,
7357  * get_tr_index() uses the following algorithm.
7358  *
7359  *   idx = *ptr;
7360  *
7361  * As the pointer itself contains the address of the index (remember
7362  * index[1] == 1).
7363  *
7364  * Then to get the trace_array descriptor, by subtracting that index
7365  * from the ptr, we get to the start of the index itself.
7366  *
7367  *   ptr - idx == &index[0]
7368  *
7369  * Then a simple container_of() from that pointer gets us to the
7370  * trace_array descriptor.
7371  */
7372 static void get_tr_index(void *data, struct trace_array **ptr,
7373 			 unsigned int *pindex)
7374 {
7375 	*pindex = *(unsigned char *)data;
7376 
7377 	*ptr = container_of(data - *pindex, struct trace_array,
7378 			    trace_flags_index);
7379 }
7380 
7381 static ssize_t
7382 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7383 			loff_t *ppos)
7384 {
7385 	void *tr_index = filp->private_data;
7386 	struct trace_array *tr;
7387 	unsigned int index;
7388 	char *buf;
7389 
7390 	get_tr_index(tr_index, &tr, &index);
7391 
7392 	if (tr->trace_flags & (1 << index))
7393 		buf = "1\n";
7394 	else
7395 		buf = "0\n";
7396 
7397 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7398 }
7399 
7400 static ssize_t
7401 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7402 			 loff_t *ppos)
7403 {
7404 	void *tr_index = filp->private_data;
7405 	struct trace_array *tr;
7406 	unsigned int index;
7407 	unsigned long val;
7408 	int ret;
7409 
7410 	get_tr_index(tr_index, &tr, &index);
7411 
7412 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7413 	if (ret)
7414 		return ret;
7415 
7416 	if (val != 0 && val != 1)
7417 		return -EINVAL;
7418 
7419 	mutex_lock(&trace_types_lock);
7420 	ret = set_tracer_flag(tr, 1 << index, val);
7421 	mutex_unlock(&trace_types_lock);
7422 
7423 	if (ret < 0)
7424 		return ret;
7425 
7426 	*ppos += cnt;
7427 
7428 	return cnt;
7429 }
7430 
7431 static const struct file_operations trace_options_core_fops = {
7432 	.open = tracing_open_generic,
7433 	.read = trace_options_core_read,
7434 	.write = trace_options_core_write,
7435 	.llseek = generic_file_llseek,
7436 };
7437 
7438 struct dentry *trace_create_file(const char *name,
7439 				 umode_t mode,
7440 				 struct dentry *parent,
7441 				 void *data,
7442 				 const struct file_operations *fops)
7443 {
7444 	struct dentry *ret;
7445 
7446 	ret = tracefs_create_file(name, mode, parent, data, fops);
7447 	if (!ret)
7448 		pr_warn("Could not create tracefs '%s' entry\n", name);
7449 
7450 	return ret;
7451 }
7452 
7453 
7454 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7455 {
7456 	struct dentry *d_tracer;
7457 
7458 	if (tr->options)
7459 		return tr->options;
7460 
7461 	d_tracer = tracing_get_dentry(tr);
7462 	if (IS_ERR(d_tracer))
7463 		return NULL;
7464 
7465 	tr->options = tracefs_create_dir("options", d_tracer);
7466 	if (!tr->options) {
7467 		pr_warn("Could not create tracefs directory 'options'\n");
7468 		return NULL;
7469 	}
7470 
7471 	return tr->options;
7472 }
7473 
7474 static void
7475 create_trace_option_file(struct trace_array *tr,
7476 			 struct trace_option_dentry *topt,
7477 			 struct tracer_flags *flags,
7478 			 struct tracer_opt *opt)
7479 {
7480 	struct dentry *t_options;
7481 
7482 	t_options = trace_options_init_dentry(tr);
7483 	if (!t_options)
7484 		return;
7485 
7486 	topt->flags = flags;
7487 	topt->opt = opt;
7488 	topt->tr = tr;
7489 
7490 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7491 				    &trace_options_fops);
7492 
7493 }
7494 
7495 static void
7496 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7497 {
7498 	struct trace_option_dentry *topts;
7499 	struct trace_options *tr_topts;
7500 	struct tracer_flags *flags;
7501 	struct tracer_opt *opts;
7502 	int cnt;
7503 	int i;
7504 
7505 	if (!tracer)
7506 		return;
7507 
7508 	flags = tracer->flags;
7509 
7510 	if (!flags || !flags->opts)
7511 		return;
7512 
7513 	/*
7514 	 * If this is an instance, only create flags for tracers
7515 	 * the instance may have.
7516 	 */
7517 	if (!trace_ok_for_array(tracer, tr))
7518 		return;
7519 
7520 	for (i = 0; i < tr->nr_topts; i++) {
7521 		/* Make sure there's no duplicate flags. */
7522 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7523 			return;
7524 	}
7525 
7526 	opts = flags->opts;
7527 
7528 	for (cnt = 0; opts[cnt].name; cnt++)
7529 		;
7530 
7531 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7532 	if (!topts)
7533 		return;
7534 
7535 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7536 			    GFP_KERNEL);
7537 	if (!tr_topts) {
7538 		kfree(topts);
7539 		return;
7540 	}
7541 
7542 	tr->topts = tr_topts;
7543 	tr->topts[tr->nr_topts].tracer = tracer;
7544 	tr->topts[tr->nr_topts].topts = topts;
7545 	tr->nr_topts++;
7546 
7547 	for (cnt = 0; opts[cnt].name; cnt++) {
7548 		create_trace_option_file(tr, &topts[cnt], flags,
7549 					 &opts[cnt]);
7550 		WARN_ONCE(topts[cnt].entry == NULL,
7551 			  "Failed to create trace option: %s",
7552 			  opts[cnt].name);
7553 	}
7554 }
7555 
7556 static struct dentry *
7557 create_trace_option_core_file(struct trace_array *tr,
7558 			      const char *option, long index)
7559 {
7560 	struct dentry *t_options;
7561 
7562 	t_options = trace_options_init_dentry(tr);
7563 	if (!t_options)
7564 		return NULL;
7565 
7566 	return trace_create_file(option, 0644, t_options,
7567 				 (void *)&tr->trace_flags_index[index],
7568 				 &trace_options_core_fops);
7569 }
7570 
7571 static void create_trace_options_dir(struct trace_array *tr)
7572 {
7573 	struct dentry *t_options;
7574 	bool top_level = tr == &global_trace;
7575 	int i;
7576 
7577 	t_options = trace_options_init_dentry(tr);
7578 	if (!t_options)
7579 		return;
7580 
7581 	for (i = 0; trace_options[i]; i++) {
7582 		if (top_level ||
7583 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7584 			create_trace_option_core_file(tr, trace_options[i], i);
7585 	}
7586 }
7587 
7588 static ssize_t
7589 rb_simple_read(struct file *filp, char __user *ubuf,
7590 	       size_t cnt, loff_t *ppos)
7591 {
7592 	struct trace_array *tr = filp->private_data;
7593 	char buf[64];
7594 	int r;
7595 
7596 	r = tracer_tracing_is_on(tr);
7597 	r = sprintf(buf, "%d\n", r);
7598 
7599 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7600 }
7601 
7602 static ssize_t
7603 rb_simple_write(struct file *filp, const char __user *ubuf,
7604 		size_t cnt, loff_t *ppos)
7605 {
7606 	struct trace_array *tr = filp->private_data;
7607 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7608 	unsigned long val;
7609 	int ret;
7610 
7611 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7612 	if (ret)
7613 		return ret;
7614 
7615 	if (buffer) {
7616 		mutex_lock(&trace_types_lock);
7617 		if (val) {
7618 			tracer_tracing_on(tr);
7619 			if (tr->current_trace->start)
7620 				tr->current_trace->start(tr);
7621 		} else {
7622 			tracer_tracing_off(tr);
7623 			if (tr->current_trace->stop)
7624 				tr->current_trace->stop(tr);
7625 		}
7626 		mutex_unlock(&trace_types_lock);
7627 	}
7628 
7629 	(*ppos)++;
7630 
7631 	return cnt;
7632 }
7633 
7634 static const struct file_operations rb_simple_fops = {
7635 	.open		= tracing_open_generic_tr,
7636 	.read		= rb_simple_read,
7637 	.write		= rb_simple_write,
7638 	.release	= tracing_release_generic_tr,
7639 	.llseek		= default_llseek,
7640 };
7641 
7642 struct dentry *trace_instance_dir;
7643 
7644 static void
7645 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7646 
7647 static int
7648 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7649 {
7650 	enum ring_buffer_flags rb_flags;
7651 
7652 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7653 
7654 	buf->tr = tr;
7655 
7656 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7657 	if (!buf->buffer)
7658 		return -ENOMEM;
7659 
7660 	buf->data = alloc_percpu(struct trace_array_cpu);
7661 	if (!buf->data) {
7662 		ring_buffer_free(buf->buffer);
7663 		buf->buffer = NULL;
7664 		return -ENOMEM;
7665 	}
7666 
7667 	/* Allocate the first page for all buffers */
7668 	set_buffer_entries(&tr->trace_buffer,
7669 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7670 
7671 	return 0;
7672 }
7673 
7674 static int allocate_trace_buffers(struct trace_array *tr, int size)
7675 {
7676 	int ret;
7677 
7678 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7679 	if (ret)
7680 		return ret;
7681 
7682 #ifdef CONFIG_TRACER_MAX_TRACE
7683 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7684 				    allocate_snapshot ? size : 1);
7685 	if (WARN_ON(ret)) {
7686 		ring_buffer_free(tr->trace_buffer.buffer);
7687 		tr->trace_buffer.buffer = NULL;
7688 		free_percpu(tr->trace_buffer.data);
7689 		tr->trace_buffer.data = NULL;
7690 		return -ENOMEM;
7691 	}
7692 	tr->allocated_snapshot = allocate_snapshot;
7693 
7694 	/*
7695 	 * Only the top level trace array gets its snapshot allocated
7696 	 * from the kernel command line.
7697 	 */
7698 	allocate_snapshot = false;
7699 #endif
7700 	return 0;
7701 }
7702 
7703 static void free_trace_buffer(struct trace_buffer *buf)
7704 {
7705 	if (buf->buffer) {
7706 		ring_buffer_free(buf->buffer);
7707 		buf->buffer = NULL;
7708 		free_percpu(buf->data);
7709 		buf->data = NULL;
7710 	}
7711 }
7712 
7713 static void free_trace_buffers(struct trace_array *tr)
7714 {
7715 	if (!tr)
7716 		return;
7717 
7718 	free_trace_buffer(&tr->trace_buffer);
7719 
7720 #ifdef CONFIG_TRACER_MAX_TRACE
7721 	free_trace_buffer(&tr->max_buffer);
7722 #endif
7723 }
7724 
7725 static void init_trace_flags_index(struct trace_array *tr)
7726 {
7727 	int i;
7728 
7729 	/* Used by the trace options files */
7730 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7731 		tr->trace_flags_index[i] = i;
7732 }
7733 
7734 static void __update_tracer_options(struct trace_array *tr)
7735 {
7736 	struct tracer *t;
7737 
7738 	for (t = trace_types; t; t = t->next)
7739 		add_tracer_options(tr, t);
7740 }
7741 
7742 static void update_tracer_options(struct trace_array *tr)
7743 {
7744 	mutex_lock(&trace_types_lock);
7745 	__update_tracer_options(tr);
7746 	mutex_unlock(&trace_types_lock);
7747 }
7748 
7749 static int instance_mkdir(const char *name)
7750 {
7751 	struct trace_array *tr;
7752 	int ret;
7753 
7754 	mutex_lock(&event_mutex);
7755 	mutex_lock(&trace_types_lock);
7756 
7757 	ret = -EEXIST;
7758 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7759 		if (tr->name && strcmp(tr->name, name) == 0)
7760 			goto out_unlock;
7761 	}
7762 
7763 	ret = -ENOMEM;
7764 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7765 	if (!tr)
7766 		goto out_unlock;
7767 
7768 	tr->name = kstrdup(name, GFP_KERNEL);
7769 	if (!tr->name)
7770 		goto out_free_tr;
7771 
7772 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7773 		goto out_free_tr;
7774 
7775 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7776 
7777 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7778 
7779 	raw_spin_lock_init(&tr->start_lock);
7780 
7781 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7782 
7783 	tr->current_trace = &nop_trace;
7784 
7785 	INIT_LIST_HEAD(&tr->systems);
7786 	INIT_LIST_HEAD(&tr->events);
7787 	INIT_LIST_HEAD(&tr->hist_vars);
7788 
7789 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7790 		goto out_free_tr;
7791 
7792 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7793 	if (!tr->dir)
7794 		goto out_free_tr;
7795 
7796 	ret = event_trace_add_tracer(tr->dir, tr);
7797 	if (ret) {
7798 		tracefs_remove_recursive(tr->dir);
7799 		goto out_free_tr;
7800 	}
7801 
7802 	ftrace_init_trace_array(tr);
7803 
7804 	init_tracer_tracefs(tr, tr->dir);
7805 	init_trace_flags_index(tr);
7806 	__update_tracer_options(tr);
7807 
7808 	list_add(&tr->list, &ftrace_trace_arrays);
7809 
7810 	mutex_unlock(&trace_types_lock);
7811 	mutex_unlock(&event_mutex);
7812 
7813 	return 0;
7814 
7815  out_free_tr:
7816 	free_trace_buffers(tr);
7817 	free_cpumask_var(tr->tracing_cpumask);
7818 	kfree(tr->name);
7819 	kfree(tr);
7820 
7821  out_unlock:
7822 	mutex_unlock(&trace_types_lock);
7823 	mutex_unlock(&event_mutex);
7824 
7825 	return ret;
7826 
7827 }
7828 
7829 static int instance_rmdir(const char *name)
7830 {
7831 	struct trace_array *tr;
7832 	int found = 0;
7833 	int ret;
7834 	int i;
7835 
7836 	mutex_lock(&event_mutex);
7837 	mutex_lock(&trace_types_lock);
7838 
7839 	ret = -ENODEV;
7840 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7841 		if (tr->name && strcmp(tr->name, name) == 0) {
7842 			found = 1;
7843 			break;
7844 		}
7845 	}
7846 	if (!found)
7847 		goto out_unlock;
7848 
7849 	ret = -EBUSY;
7850 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7851 		goto out_unlock;
7852 
7853 	list_del(&tr->list);
7854 
7855 	/* Disable all the flags that were enabled coming in */
7856 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7857 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7858 			set_tracer_flag(tr, 1 << i, 0);
7859 	}
7860 
7861 	tracing_set_nop(tr);
7862 	clear_ftrace_function_probes(tr);
7863 	event_trace_del_tracer(tr);
7864 	ftrace_clear_pids(tr);
7865 	ftrace_destroy_function_files(tr);
7866 	tracefs_remove_recursive(tr->dir);
7867 	free_trace_buffers(tr);
7868 
7869 	for (i = 0; i < tr->nr_topts; i++) {
7870 		kfree(tr->topts[i].topts);
7871 	}
7872 	kfree(tr->topts);
7873 
7874 	free_cpumask_var(tr->tracing_cpumask);
7875 	kfree(tr->name);
7876 	kfree(tr);
7877 
7878 	ret = 0;
7879 
7880  out_unlock:
7881 	mutex_unlock(&trace_types_lock);
7882 	mutex_unlock(&event_mutex);
7883 
7884 	return ret;
7885 }
7886 
7887 static __init void create_trace_instances(struct dentry *d_tracer)
7888 {
7889 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7890 							 instance_mkdir,
7891 							 instance_rmdir);
7892 	if (WARN_ON(!trace_instance_dir))
7893 		return;
7894 }
7895 
7896 static void
7897 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7898 {
7899 	int cpu;
7900 
7901 	trace_create_file("available_tracers", 0444, d_tracer,
7902 			tr, &show_traces_fops);
7903 
7904 	trace_create_file("current_tracer", 0644, d_tracer,
7905 			tr, &set_tracer_fops);
7906 
7907 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7908 			  tr, &tracing_cpumask_fops);
7909 
7910 	trace_create_file("trace_options", 0644, d_tracer,
7911 			  tr, &tracing_iter_fops);
7912 
7913 	trace_create_file("trace", 0644, d_tracer,
7914 			  tr, &tracing_fops);
7915 
7916 	trace_create_file("trace_pipe", 0444, d_tracer,
7917 			  tr, &tracing_pipe_fops);
7918 
7919 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7920 			  tr, &tracing_entries_fops);
7921 
7922 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7923 			  tr, &tracing_total_entries_fops);
7924 
7925 	trace_create_file("free_buffer", 0200, d_tracer,
7926 			  tr, &tracing_free_buffer_fops);
7927 
7928 	trace_create_file("trace_marker", 0220, d_tracer,
7929 			  tr, &tracing_mark_fops);
7930 
7931 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7932 			  tr, &tracing_mark_raw_fops);
7933 
7934 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7935 			  &trace_clock_fops);
7936 
7937 	trace_create_file("tracing_on", 0644, d_tracer,
7938 			  tr, &rb_simple_fops);
7939 
7940 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7941 			  &trace_time_stamp_mode_fops);
7942 
7943 	create_trace_options_dir(tr);
7944 
7945 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7946 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7947 			&tr->max_latency, &tracing_max_lat_fops);
7948 #endif
7949 
7950 	if (ftrace_create_function_files(tr, d_tracer))
7951 		WARN(1, "Could not allocate function filter files");
7952 
7953 #ifdef CONFIG_TRACER_SNAPSHOT
7954 	trace_create_file("snapshot", 0644, d_tracer,
7955 			  tr, &snapshot_fops);
7956 #endif
7957 
7958 	for_each_tracing_cpu(cpu)
7959 		tracing_init_tracefs_percpu(tr, cpu);
7960 
7961 	ftrace_init_tracefs(tr, d_tracer);
7962 }
7963 
7964 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7965 {
7966 	struct vfsmount *mnt;
7967 	struct file_system_type *type;
7968 
7969 	/*
7970 	 * To maintain backward compatibility for tools that mount
7971 	 * debugfs to get to the tracing facility, tracefs is automatically
7972 	 * mounted to the debugfs/tracing directory.
7973 	 */
7974 	type = get_fs_type("tracefs");
7975 	if (!type)
7976 		return NULL;
7977 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7978 	put_filesystem(type);
7979 	if (IS_ERR(mnt))
7980 		return NULL;
7981 	mntget(mnt);
7982 
7983 	return mnt;
7984 }
7985 
7986 /**
7987  * tracing_init_dentry - initialize top level trace array
7988  *
7989  * This is called when creating files or directories in the tracing
7990  * directory. It is called via fs_initcall() by any of the boot up code
7991  * and expects to return the dentry of the top level tracing directory.
7992  */
7993 struct dentry *tracing_init_dentry(void)
7994 {
7995 	struct trace_array *tr = &global_trace;
7996 
7997 	/* The top level trace array uses  NULL as parent */
7998 	if (tr->dir)
7999 		return NULL;
8000 
8001 	if (WARN_ON(!tracefs_initialized()) ||
8002 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8003 		 WARN_ON(!debugfs_initialized())))
8004 		return ERR_PTR(-ENODEV);
8005 
8006 	/*
8007 	 * As there may still be users that expect the tracing
8008 	 * files to exist in debugfs/tracing, we must automount
8009 	 * the tracefs file system there, so older tools still
8010 	 * work with the newer kerenl.
8011 	 */
8012 	tr->dir = debugfs_create_automount("tracing", NULL,
8013 					   trace_automount, NULL);
8014 	if (!tr->dir) {
8015 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8016 		return ERR_PTR(-ENOMEM);
8017 	}
8018 
8019 	return NULL;
8020 }
8021 
8022 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8023 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8024 
8025 static void __init trace_eval_init(void)
8026 {
8027 	int len;
8028 
8029 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8030 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8031 }
8032 
8033 #ifdef CONFIG_MODULES
8034 static void trace_module_add_evals(struct module *mod)
8035 {
8036 	if (!mod->num_trace_evals)
8037 		return;
8038 
8039 	/*
8040 	 * Modules with bad taint do not have events created, do
8041 	 * not bother with enums either.
8042 	 */
8043 	if (trace_module_has_bad_taint(mod))
8044 		return;
8045 
8046 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8047 }
8048 
8049 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8050 static void trace_module_remove_evals(struct module *mod)
8051 {
8052 	union trace_eval_map_item *map;
8053 	union trace_eval_map_item **last = &trace_eval_maps;
8054 
8055 	if (!mod->num_trace_evals)
8056 		return;
8057 
8058 	mutex_lock(&trace_eval_mutex);
8059 
8060 	map = trace_eval_maps;
8061 
8062 	while (map) {
8063 		if (map->head.mod == mod)
8064 			break;
8065 		map = trace_eval_jmp_to_tail(map);
8066 		last = &map->tail.next;
8067 		map = map->tail.next;
8068 	}
8069 	if (!map)
8070 		goto out;
8071 
8072 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8073 	kfree(map);
8074  out:
8075 	mutex_unlock(&trace_eval_mutex);
8076 }
8077 #else
8078 static inline void trace_module_remove_evals(struct module *mod) { }
8079 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8080 
8081 static int trace_module_notify(struct notifier_block *self,
8082 			       unsigned long val, void *data)
8083 {
8084 	struct module *mod = data;
8085 
8086 	switch (val) {
8087 	case MODULE_STATE_COMING:
8088 		trace_module_add_evals(mod);
8089 		break;
8090 	case MODULE_STATE_GOING:
8091 		trace_module_remove_evals(mod);
8092 		break;
8093 	}
8094 
8095 	return 0;
8096 }
8097 
8098 static struct notifier_block trace_module_nb = {
8099 	.notifier_call = trace_module_notify,
8100 	.priority = 0,
8101 };
8102 #endif /* CONFIG_MODULES */
8103 
8104 static __init int tracer_init_tracefs(void)
8105 {
8106 	struct dentry *d_tracer;
8107 
8108 	trace_access_lock_init();
8109 
8110 	d_tracer = tracing_init_dentry();
8111 	if (IS_ERR(d_tracer))
8112 		return 0;
8113 
8114 	init_tracer_tracefs(&global_trace, d_tracer);
8115 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8116 
8117 	trace_create_file("tracing_thresh", 0644, d_tracer,
8118 			&global_trace, &tracing_thresh_fops);
8119 
8120 	trace_create_file("README", 0444, d_tracer,
8121 			NULL, &tracing_readme_fops);
8122 
8123 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8124 			NULL, &tracing_saved_cmdlines_fops);
8125 
8126 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8127 			  NULL, &tracing_saved_cmdlines_size_fops);
8128 
8129 	trace_create_file("saved_tgids", 0444, d_tracer,
8130 			NULL, &tracing_saved_tgids_fops);
8131 
8132 	trace_eval_init();
8133 
8134 	trace_create_eval_file(d_tracer);
8135 
8136 #ifdef CONFIG_MODULES
8137 	register_module_notifier(&trace_module_nb);
8138 #endif
8139 
8140 #ifdef CONFIG_DYNAMIC_FTRACE
8141 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8142 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8143 #endif
8144 
8145 	create_trace_instances(d_tracer);
8146 
8147 	update_tracer_options(&global_trace);
8148 
8149 	return 0;
8150 }
8151 
8152 static int trace_panic_handler(struct notifier_block *this,
8153 			       unsigned long event, void *unused)
8154 {
8155 	if (ftrace_dump_on_oops)
8156 		ftrace_dump(ftrace_dump_on_oops);
8157 	return NOTIFY_OK;
8158 }
8159 
8160 static struct notifier_block trace_panic_notifier = {
8161 	.notifier_call  = trace_panic_handler,
8162 	.next           = NULL,
8163 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8164 };
8165 
8166 static int trace_die_handler(struct notifier_block *self,
8167 			     unsigned long val,
8168 			     void *data)
8169 {
8170 	switch (val) {
8171 	case DIE_OOPS:
8172 		if (ftrace_dump_on_oops)
8173 			ftrace_dump(ftrace_dump_on_oops);
8174 		break;
8175 	default:
8176 		break;
8177 	}
8178 	return NOTIFY_OK;
8179 }
8180 
8181 static struct notifier_block trace_die_notifier = {
8182 	.notifier_call = trace_die_handler,
8183 	.priority = 200
8184 };
8185 
8186 /*
8187  * printk is set to max of 1024, we really don't need it that big.
8188  * Nothing should be printing 1000 characters anyway.
8189  */
8190 #define TRACE_MAX_PRINT		1000
8191 
8192 /*
8193  * Define here KERN_TRACE so that we have one place to modify
8194  * it if we decide to change what log level the ftrace dump
8195  * should be at.
8196  */
8197 #define KERN_TRACE		KERN_EMERG
8198 
8199 void
8200 trace_printk_seq(struct trace_seq *s)
8201 {
8202 	/* Probably should print a warning here. */
8203 	if (s->seq.len >= TRACE_MAX_PRINT)
8204 		s->seq.len = TRACE_MAX_PRINT;
8205 
8206 	/*
8207 	 * More paranoid code. Although the buffer size is set to
8208 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8209 	 * an extra layer of protection.
8210 	 */
8211 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8212 		s->seq.len = s->seq.size - 1;
8213 
8214 	/* should be zero ended, but we are paranoid. */
8215 	s->buffer[s->seq.len] = 0;
8216 
8217 	printk(KERN_TRACE "%s", s->buffer);
8218 
8219 	trace_seq_init(s);
8220 }
8221 
8222 void trace_init_global_iter(struct trace_iterator *iter)
8223 {
8224 	iter->tr = &global_trace;
8225 	iter->trace = iter->tr->current_trace;
8226 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8227 	iter->trace_buffer = &global_trace.trace_buffer;
8228 
8229 	if (iter->trace && iter->trace->open)
8230 		iter->trace->open(iter);
8231 
8232 	/* Annotate start of buffers if we had overruns */
8233 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8234 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8235 
8236 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8237 	if (trace_clocks[iter->tr->clock_id].in_ns)
8238 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8239 }
8240 
8241 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8242 {
8243 	/* use static because iter can be a bit big for the stack */
8244 	static struct trace_iterator iter;
8245 	static atomic_t dump_running;
8246 	struct trace_array *tr = &global_trace;
8247 	unsigned int old_userobj;
8248 	unsigned long flags;
8249 	int cnt = 0, cpu;
8250 
8251 	/* Only allow one dump user at a time. */
8252 	if (atomic_inc_return(&dump_running) != 1) {
8253 		atomic_dec(&dump_running);
8254 		return;
8255 	}
8256 
8257 	/*
8258 	 * Always turn off tracing when we dump.
8259 	 * We don't need to show trace output of what happens
8260 	 * between multiple crashes.
8261 	 *
8262 	 * If the user does a sysrq-z, then they can re-enable
8263 	 * tracing with echo 1 > tracing_on.
8264 	 */
8265 	tracing_off();
8266 
8267 	local_irq_save(flags);
8268 
8269 	/* Simulate the iterator */
8270 	trace_init_global_iter(&iter);
8271 
8272 	for_each_tracing_cpu(cpu) {
8273 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8274 	}
8275 
8276 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8277 
8278 	/* don't look at user memory in panic mode */
8279 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8280 
8281 	switch (oops_dump_mode) {
8282 	case DUMP_ALL:
8283 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8284 		break;
8285 	case DUMP_ORIG:
8286 		iter.cpu_file = raw_smp_processor_id();
8287 		break;
8288 	case DUMP_NONE:
8289 		goto out_enable;
8290 	default:
8291 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8292 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8293 	}
8294 
8295 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8296 
8297 	/* Did function tracer already get disabled? */
8298 	if (ftrace_is_dead()) {
8299 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8300 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8301 	}
8302 
8303 	/*
8304 	 * We need to stop all tracing on all CPUS to read the
8305 	 * the next buffer. This is a bit expensive, but is
8306 	 * not done often. We fill all what we can read,
8307 	 * and then release the locks again.
8308 	 */
8309 
8310 	while (!trace_empty(&iter)) {
8311 
8312 		if (!cnt)
8313 			printk(KERN_TRACE "---------------------------------\n");
8314 
8315 		cnt++;
8316 
8317 		/* reset all but tr, trace, and overruns */
8318 		memset(&iter.seq, 0,
8319 		       sizeof(struct trace_iterator) -
8320 		       offsetof(struct trace_iterator, seq));
8321 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8322 		iter.pos = -1;
8323 
8324 		if (trace_find_next_entry_inc(&iter) != NULL) {
8325 			int ret;
8326 
8327 			ret = print_trace_line(&iter);
8328 			if (ret != TRACE_TYPE_NO_CONSUME)
8329 				trace_consume(&iter);
8330 		}
8331 		touch_nmi_watchdog();
8332 
8333 		trace_printk_seq(&iter.seq);
8334 	}
8335 
8336 	if (!cnt)
8337 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8338 	else
8339 		printk(KERN_TRACE "---------------------------------\n");
8340 
8341  out_enable:
8342 	tr->trace_flags |= old_userobj;
8343 
8344 	for_each_tracing_cpu(cpu) {
8345 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8346 	}
8347  	atomic_dec(&dump_running);
8348 	local_irq_restore(flags);
8349 }
8350 EXPORT_SYMBOL_GPL(ftrace_dump);
8351 
8352 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8353 {
8354 	char **argv;
8355 	int argc, ret;
8356 
8357 	argc = 0;
8358 	ret = 0;
8359 	argv = argv_split(GFP_KERNEL, buf, &argc);
8360 	if (!argv)
8361 		return -ENOMEM;
8362 
8363 	if (argc)
8364 		ret = createfn(argc, argv);
8365 
8366 	argv_free(argv);
8367 
8368 	return ret;
8369 }
8370 
8371 #define WRITE_BUFSIZE  4096
8372 
8373 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8374 				size_t count, loff_t *ppos,
8375 				int (*createfn)(int, char **))
8376 {
8377 	char *kbuf, *buf, *tmp;
8378 	int ret = 0;
8379 	size_t done = 0;
8380 	size_t size;
8381 
8382 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8383 	if (!kbuf)
8384 		return -ENOMEM;
8385 
8386 	while (done < count) {
8387 		size = count - done;
8388 
8389 		if (size >= WRITE_BUFSIZE)
8390 			size = WRITE_BUFSIZE - 1;
8391 
8392 		if (copy_from_user(kbuf, buffer + done, size)) {
8393 			ret = -EFAULT;
8394 			goto out;
8395 		}
8396 		kbuf[size] = '\0';
8397 		buf = kbuf;
8398 		do {
8399 			tmp = strchr(buf, '\n');
8400 			if (tmp) {
8401 				*tmp = '\0';
8402 				size = tmp - buf + 1;
8403 			} else {
8404 				size = strlen(buf);
8405 				if (done + size < count) {
8406 					if (buf != kbuf)
8407 						break;
8408 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8409 					pr_warn("Line length is too long: Should be less than %d\n",
8410 						WRITE_BUFSIZE - 2);
8411 					ret = -EINVAL;
8412 					goto out;
8413 				}
8414 			}
8415 			done += size;
8416 
8417 			/* Remove comments */
8418 			tmp = strchr(buf, '#');
8419 
8420 			if (tmp)
8421 				*tmp = '\0';
8422 
8423 			ret = trace_run_command(buf, createfn);
8424 			if (ret)
8425 				goto out;
8426 			buf += size;
8427 
8428 		} while (done < count);
8429 	}
8430 	ret = done;
8431 
8432 out:
8433 	kfree(kbuf);
8434 
8435 	return ret;
8436 }
8437 
8438 __init static int tracer_alloc_buffers(void)
8439 {
8440 	int ring_buf_size;
8441 	int ret = -ENOMEM;
8442 
8443 	/*
8444 	 * Make sure we don't accidently add more trace options
8445 	 * than we have bits for.
8446 	 */
8447 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8448 
8449 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8450 		goto out;
8451 
8452 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8453 		goto out_free_buffer_mask;
8454 
8455 	/* Only allocate trace_printk buffers if a trace_printk exists */
8456 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8457 		/* Must be called before global_trace.buffer is allocated */
8458 		trace_printk_init_buffers();
8459 
8460 	/* To save memory, keep the ring buffer size to its minimum */
8461 	if (ring_buffer_expanded)
8462 		ring_buf_size = trace_buf_size;
8463 	else
8464 		ring_buf_size = 1;
8465 
8466 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8467 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8468 
8469 	raw_spin_lock_init(&global_trace.start_lock);
8470 
8471 	/*
8472 	 * The prepare callbacks allocates some memory for the ring buffer. We
8473 	 * don't free the buffer if the if the CPU goes down. If we were to free
8474 	 * the buffer, then the user would lose any trace that was in the
8475 	 * buffer. The memory will be removed once the "instance" is removed.
8476 	 */
8477 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8478 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8479 				      NULL);
8480 	if (ret < 0)
8481 		goto out_free_cpumask;
8482 	/* Used for event triggers */
8483 	ret = -ENOMEM;
8484 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8485 	if (!temp_buffer)
8486 		goto out_rm_hp_state;
8487 
8488 	if (trace_create_savedcmd() < 0)
8489 		goto out_free_temp_buffer;
8490 
8491 	/* TODO: make the number of buffers hot pluggable with CPUS */
8492 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8493 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8494 		WARN_ON(1);
8495 		goto out_free_savedcmd;
8496 	}
8497 
8498 	if (global_trace.buffer_disabled)
8499 		tracing_off();
8500 
8501 	if (trace_boot_clock) {
8502 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8503 		if (ret < 0)
8504 			pr_warn("Trace clock %s not defined, going back to default\n",
8505 				trace_boot_clock);
8506 	}
8507 
8508 	/*
8509 	 * register_tracer() might reference current_trace, so it
8510 	 * needs to be set before we register anything. This is
8511 	 * just a bootstrap of current_trace anyway.
8512 	 */
8513 	global_trace.current_trace = &nop_trace;
8514 
8515 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8516 
8517 	ftrace_init_global_array_ops(&global_trace);
8518 
8519 	init_trace_flags_index(&global_trace);
8520 
8521 	register_tracer(&nop_trace);
8522 
8523 	/* Function tracing may start here (via kernel command line) */
8524 	init_function_trace();
8525 
8526 	/* All seems OK, enable tracing */
8527 	tracing_disabled = 0;
8528 
8529 	atomic_notifier_chain_register(&panic_notifier_list,
8530 				       &trace_panic_notifier);
8531 
8532 	register_die_notifier(&trace_die_notifier);
8533 
8534 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8535 
8536 	INIT_LIST_HEAD(&global_trace.systems);
8537 	INIT_LIST_HEAD(&global_trace.events);
8538 	INIT_LIST_HEAD(&global_trace.hist_vars);
8539 	list_add(&global_trace.list, &ftrace_trace_arrays);
8540 
8541 	apply_trace_boot_options();
8542 
8543 	register_snapshot_cmd();
8544 
8545 	return 0;
8546 
8547 out_free_savedcmd:
8548 	free_saved_cmdlines_buffer(savedcmd);
8549 out_free_temp_buffer:
8550 	ring_buffer_free(temp_buffer);
8551 out_rm_hp_state:
8552 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8553 out_free_cpumask:
8554 	free_cpumask_var(global_trace.tracing_cpumask);
8555 out_free_buffer_mask:
8556 	free_cpumask_var(tracing_buffer_mask);
8557 out:
8558 	return ret;
8559 }
8560 
8561 void __init early_trace_init(void)
8562 {
8563 	if (tracepoint_printk) {
8564 		tracepoint_print_iter =
8565 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8566 		if (WARN_ON(!tracepoint_print_iter))
8567 			tracepoint_printk = 0;
8568 		else
8569 			static_key_enable(&tracepoint_printk_key.key);
8570 	}
8571 	tracer_alloc_buffers();
8572 }
8573 
8574 void __init trace_init(void)
8575 {
8576 	trace_event_init();
8577 }
8578 
8579 __init static int clear_boot_tracer(void)
8580 {
8581 	/*
8582 	 * The default tracer at boot buffer is an init section.
8583 	 * This function is called in lateinit. If we did not
8584 	 * find the boot tracer, then clear it out, to prevent
8585 	 * later registration from accessing the buffer that is
8586 	 * about to be freed.
8587 	 */
8588 	if (!default_bootup_tracer)
8589 		return 0;
8590 
8591 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8592 	       default_bootup_tracer);
8593 	default_bootup_tracer = NULL;
8594 
8595 	return 0;
8596 }
8597 
8598 fs_initcall(tracer_init_tracefs);
8599 late_initcall_sync(clear_boot_tracer);
8600 
8601 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8602 __init static int tracing_set_default_clock(void)
8603 {
8604 	/* sched_clock_stable() is determined in late_initcall */
8605 	if (!trace_boot_clock && !sched_clock_stable()) {
8606 		printk(KERN_WARNING
8607 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8608 		       "If you want to keep using the local clock, then add:\n"
8609 		       "  \"trace_clock=local\"\n"
8610 		       "on the kernel command line\n");
8611 		tracing_set_clock(&global_trace, "global");
8612 	}
8613 
8614 	return 0;
8615 }
8616 late_initcall_sync(tracing_set_default_clock);
8617 #endif
8618