xref: /openbmc/linux/kernel/trace/trace.c (revision dea54fba)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45 
46 #include "trace.h"
47 #include "trace_output.h"
48 
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54 
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63 
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68 
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73 
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 	{ }
77 };
78 
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 	return 0;
83 }
84 
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91 
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99 
100 cpumask_var_t __read_mostly	tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 	struct module			*mod;
127 	unsigned long			length;
128 };
129 
130 union trace_eval_map_item;
131 
132 struct trace_eval_map_tail {
133 	/*
134 	 * "end" is first and points to NULL as it must be different
135 	 * than "mod" or "eval_string"
136 	 */
137 	union trace_eval_map_item	*next;
138 	const char			*end;	/* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_eval_mutex);
142 
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151 	struct trace_eval_map		map;
152 	struct trace_eval_map_head	head;
153 	struct trace_eval_map_tail	tail;
154 };
155 
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE		100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 	default_bootup_tracer = bootup_tracer_buf;
171 	/* We are using ftrace early, expand it */
172 	ring_buffer_expanded = true;
173 	return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 	if (*str++ != '=' || !*str) {
180 		ftrace_dump_on_oops = DUMP_ALL;
181 		return 1;
182 	}
183 
184 	if (!strcmp("orig_cpu", str)) {
185 		ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 		__disable_trace_on_warning = 1;
197 	return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 	allocate_snapshot = true;
204 	/* We also need the main ring buffer expanded */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 	return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 	trace_boot_clock = trace_boot_clock_buf;
227 	return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 		tracepoint_printk = 1;
235 	return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 	nsec += 500;
242 	do_div(nsec, 1000);
243 	return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS						\
248 	(FUNCTION_DEFAULT_FLAGS |					\
249 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
250 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
251 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
252 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
256 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267 	.trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 	struct trace_array *tr;
275 	int ret = -ENODEV;
276 
277 	mutex_lock(&trace_types_lock);
278 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 		if (tr == this_tr) {
280 			tr->ref++;
281 			ret = 0;
282 			break;
283 		}
284 	}
285 	mutex_unlock(&trace_types_lock);
286 
287 	return ret;
288 }
289 
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 	WARN_ON(!this_tr->ref);
293 	this_tr->ref--;
294 }
295 
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 	mutex_lock(&trace_types_lock);
299 	__trace_array_put(this_tr);
300 	mutex_unlock(&trace_types_lock);
301 }
302 
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 			      struct ring_buffer *buffer,
305 			      struct ring_buffer_event *event)
306 {
307 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 	    !filter_match_preds(call->filter, rec)) {
309 		__trace_event_discard_commit(buffer, event);
310 		return 1;
311 	}
312 
313 	return 0;
314 }
315 
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 	vfree(pid_list->pids);
319 	kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 	/*
333 	 * If pid_max changed after filtered_pids was created, we
334 	 * by default ignore all pids greater than the previous pid_max.
335 	 */
336 	if (search_pid >= filtered_pids->pid_max)
337 		return false;
338 
339 	return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 	/*
355 	 * Return false, because if filtered_pids does not exist,
356 	 * all pids are good to trace.
357 	 */
358 	if (!filtered_pids)
359 		return false;
360 
361 	return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 				  struct task_struct *self,
378 				  struct task_struct *task)
379 {
380 	if (!pid_list)
381 		return;
382 
383 	/* For forks, we only add if the forking task is listed */
384 	if (self) {
385 		if (!trace_find_filtered_pid(pid_list, self->pid))
386 			return;
387 	}
388 
389 	/* Sorry, but we don't support pid_max changing after setting */
390 	if (task->pid >= pid_list->pid_max)
391 		return;
392 
393 	/* "self" is set for forks, and NULL for exits */
394 	if (self)
395 		set_bit(task->pid, pid_list->pids);
396 	else
397 		clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 	unsigned long pid = (unsigned long)v;
415 
416 	(*pos)++;
417 
418 	/* pid already is +1 of the actual prevous bit */
419 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421 	/* Return pid + 1 to allow zero to be represented */
422 	if (pid < pid_list->pid_max)
423 		return (void *)(pid + 1);
424 
425 	return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 	unsigned long pid;
442 	loff_t l = 0;
443 
444 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 	if (pid >= pid_list->pid_max)
446 		return NULL;
447 
448 	/* Return pid + 1 so that zero can be the exit value */
449 	for (pid++; pid && l < *pos;
450 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 		;
452 	return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 	unsigned long pid = (unsigned long)v - 1;
466 
467 	seq_printf(m, "%lu\n", pid);
468 	return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE		127
473 
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 		    struct trace_pid_list **new_pid_list,
476 		    const char __user *ubuf, size_t cnt)
477 {
478 	struct trace_pid_list *pid_list;
479 	struct trace_parser parser;
480 	unsigned long val;
481 	int nr_pids = 0;
482 	ssize_t read = 0;
483 	ssize_t ret = 0;
484 	loff_t pos;
485 	pid_t pid;
486 
487 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 		return -ENOMEM;
489 
490 	/*
491 	 * Always recreate a new array. The write is an all or nothing
492 	 * operation. Always create a new array when adding new pids by
493 	 * the user. If the operation fails, then the current list is
494 	 * not modified.
495 	 */
496 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 	if (!pid_list)
498 		return -ENOMEM;
499 
500 	pid_list->pid_max = READ_ONCE(pid_max);
501 
502 	/* Only truncating will shrink pid_max */
503 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 		pid_list->pid_max = filtered_pids->pid_max;
505 
506 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 	if (!pid_list->pids) {
508 		kfree(pid_list);
509 		return -ENOMEM;
510 	}
511 
512 	if (filtered_pids) {
513 		/* copy the current bits to the new max */
514 		for_each_set_bit(pid, filtered_pids->pids,
515 				 filtered_pids->pid_max) {
516 			set_bit(pid, pid_list->pids);
517 			nr_pids++;
518 		}
519 	}
520 
521 	while (cnt > 0) {
522 
523 		pos = 0;
524 
525 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 		if (ret < 0 || !trace_parser_loaded(&parser))
527 			break;
528 
529 		read += ret;
530 		ubuf += ret;
531 		cnt -= ret;
532 
533 		parser.buffer[parser.idx] = 0;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = alloc_snapshot(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 	memset(parser, 0, sizeof(*parser));
1179 
1180 	parser->buffer = kmalloc(size, GFP_KERNEL);
1181 	if (!parser->buffer)
1182 		return 1;
1183 
1184 	parser->size = size;
1185 	return 0;
1186 }
1187 
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 	kfree(parser->buffer);
1194 	parser->buffer = NULL;
1195 }
1196 
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 	size_t cnt, loff_t *ppos)
1210 {
1211 	char ch;
1212 	size_t read = 0;
1213 	ssize_t ret;
1214 
1215 	if (!*ppos)
1216 		trace_parser_clear(parser);
1217 
1218 	ret = get_user(ch, ubuf++);
1219 	if (ret)
1220 		goto out;
1221 
1222 	read++;
1223 	cnt--;
1224 
1225 	/*
1226 	 * The parser is not finished with the last write,
1227 	 * continue reading the user input without skipping spaces.
1228 	 */
1229 	if (!parser->cont) {
1230 		/* skip white space */
1231 		while (cnt && isspace(ch)) {
1232 			ret = get_user(ch, ubuf++);
1233 			if (ret)
1234 				goto out;
1235 			read++;
1236 			cnt--;
1237 		}
1238 
1239 		/* only spaces were written */
1240 		if (isspace(ch)) {
1241 			*ppos += read;
1242 			ret = read;
1243 			goto out;
1244 		}
1245 
1246 		parser->idx = 0;
1247 	}
1248 
1249 	/* read the non-space input */
1250 	while (cnt && !isspace(ch)) {
1251 		if (parser->idx < parser->size - 1)
1252 			parser->buffer[parser->idx++] = ch;
1253 		else {
1254 			ret = -EINVAL;
1255 			goto out;
1256 		}
1257 		ret = get_user(ch, ubuf++);
1258 		if (ret)
1259 			goto out;
1260 		read++;
1261 		cnt--;
1262 	}
1263 
1264 	/* We either got finished input or we have to wait for another call. */
1265 	if (isspace(ch)) {
1266 		parser->buffer[parser->idx] = 0;
1267 		parser->cont = false;
1268 	} else if (parser->idx < parser->size - 1) {
1269 		parser->cont = true;
1270 		parser->buffer[parser->idx++] = ch;
1271 	} else {
1272 		ret = -EINVAL;
1273 		goto out;
1274 	}
1275 
1276 	*ppos += read;
1277 	ret = read;
1278 
1279 out:
1280 	return ret;
1281 }
1282 
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 	int len;
1287 
1288 	if (trace_seq_used(s) <= s->seq.readpos)
1289 		return -EBUSY;
1290 
1291 	len = trace_seq_used(s) - s->seq.readpos;
1292 	if (cnt > len)
1293 		cnt = len;
1294 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295 
1296 	s->seq.readpos += cnt;
1297 	return cnt;
1298 }
1299 
1300 unsigned long __read_mostly	tracing_thresh;
1301 
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 	struct trace_buffer *max_buf = &tr->max_buffer;
1313 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315 
1316 	max_buf->cpu = cpu;
1317 	max_buf->time_start = data->preempt_timestamp;
1318 
1319 	max_data->saved_latency = tr->max_latency;
1320 	max_data->critical_start = data->critical_start;
1321 	max_data->critical_end = data->critical_end;
1322 
1323 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 	max_data->pid = tsk->pid;
1325 	/*
1326 	 * If tsk == current, then use current_uid(), as that does not use
1327 	 * RCU. The irq tracer can be called out of RCU scope.
1328 	 */
1329 	if (tsk == current)
1330 		max_data->uid = current_uid();
1331 	else
1332 		max_data->uid = task_uid(tsk);
1333 
1334 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 	max_data->policy = tsk->policy;
1336 	max_data->rt_priority = tsk->rt_priority;
1337 
1338 	/* record this tasks comm */
1339 	tracing_record_cmdline(tsk);
1340 }
1341 
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 	struct ring_buffer *buf;
1355 
1356 	if (tr->stop_count)
1357 		return;
1358 
1359 	WARN_ON_ONCE(!irqs_disabled());
1360 
1361 	if (!tr->allocated_snapshot) {
1362 		/* Only the nop tracer should hit this when disabling */
1363 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 		return;
1365 	}
1366 
1367 	arch_spin_lock(&tr->max_lock);
1368 
1369 	buf = tr->trace_buffer.buffer;
1370 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 	tr->max_buffer.buffer = buf;
1372 
1373 	__update_max_tr(tr, tsk, cpu);
1374 	arch_spin_unlock(&tr->max_lock);
1375 }
1376 
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388 	int ret;
1389 
1390 	if (tr->stop_count)
1391 		return;
1392 
1393 	WARN_ON_ONCE(!irqs_disabled());
1394 	if (!tr->allocated_snapshot) {
1395 		/* Only the nop tracer should hit this when disabling */
1396 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397 		return;
1398 	}
1399 
1400 	arch_spin_lock(&tr->max_lock);
1401 
1402 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403 
1404 	if (ret == -EBUSY) {
1405 		/*
1406 		 * We failed to swap the buffer due to a commit taking
1407 		 * place on this CPU. We fail to record, but we reset
1408 		 * the max trace buffer (no one writes directly to it)
1409 		 * and flag that it failed.
1410 		 */
1411 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 			"Failed to swap buffers due to commit in progress\n");
1413 	}
1414 
1415 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416 
1417 	__update_max_tr(tr, tsk, cpu);
1418 	arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421 
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424 	/* Iterators are static, they should be filled or empty */
1425 	if (trace_buffer_iter(iter, iter->cpu_file))
1426 		return 0;
1427 
1428 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429 				full);
1430 }
1431 
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434 
1435 struct trace_selftests {
1436 	struct list_head		list;
1437 	struct tracer			*type;
1438 };
1439 
1440 static LIST_HEAD(postponed_selftests);
1441 
1442 static int save_selftest(struct tracer *type)
1443 {
1444 	struct trace_selftests *selftest;
1445 
1446 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447 	if (!selftest)
1448 		return -ENOMEM;
1449 
1450 	selftest->type = type;
1451 	list_add(&selftest->list, &postponed_selftests);
1452 	return 0;
1453 }
1454 
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457 	struct trace_array *tr = &global_trace;
1458 	struct tracer *saved_tracer = tr->current_trace;
1459 	int ret;
1460 
1461 	if (!type->selftest || tracing_selftest_disabled)
1462 		return 0;
1463 
1464 	/*
1465 	 * If a tracer registers early in boot up (before scheduling is
1466 	 * initialized and such), then do not run its selftests yet.
1467 	 * Instead, run it a little later in the boot process.
1468 	 */
1469 	if (!selftests_can_run)
1470 		return save_selftest(type);
1471 
1472 	/*
1473 	 * Run a selftest on this tracer.
1474 	 * Here we reset the trace buffer, and set the current
1475 	 * tracer to be this tracer. The tracer can then run some
1476 	 * internal tracing to verify that everything is in order.
1477 	 * If we fail, we do not register this tracer.
1478 	 */
1479 	tracing_reset_online_cpus(&tr->trace_buffer);
1480 
1481 	tr->current_trace = type;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 	if (type->use_max_tr) {
1485 		/* If we expanded the buffers, make sure the max is expanded too */
1486 		if (ring_buffer_expanded)
1487 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 					   RING_BUFFER_ALL_CPUS);
1489 		tr->allocated_snapshot = true;
1490 	}
1491 #endif
1492 
1493 	/* the test is responsible for initializing and enabling */
1494 	pr_info("Testing tracer %s: ", type->name);
1495 	ret = type->selftest(type, tr);
1496 	/* the test is responsible for resetting too */
1497 	tr->current_trace = saved_tracer;
1498 	if (ret) {
1499 		printk(KERN_CONT "FAILED!\n");
1500 		/* Add the warning after printing 'FAILED' */
1501 		WARN_ON(1);
1502 		return -1;
1503 	}
1504 	/* Only reset on passing, to avoid touching corrupted buffers */
1505 	tracing_reset_online_cpus(&tr->trace_buffer);
1506 
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 	if (type->use_max_tr) {
1509 		tr->allocated_snapshot = false;
1510 
1511 		/* Shrink the max buffer again */
1512 		if (ring_buffer_expanded)
1513 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 					   RING_BUFFER_ALL_CPUS);
1515 	}
1516 #endif
1517 
1518 	printk(KERN_CONT "PASSED\n");
1519 	return 0;
1520 }
1521 
1522 static __init int init_trace_selftests(void)
1523 {
1524 	struct trace_selftests *p, *n;
1525 	struct tracer *t, **last;
1526 	int ret;
1527 
1528 	selftests_can_run = true;
1529 
1530 	mutex_lock(&trace_types_lock);
1531 
1532 	if (list_empty(&postponed_selftests))
1533 		goto out;
1534 
1535 	pr_info("Running postponed tracer tests:\n");
1536 
1537 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 		ret = run_tracer_selftest(p->type);
1539 		/* If the test fails, then warn and remove from available_tracers */
1540 		if (ret < 0) {
1541 			WARN(1, "tracer: %s failed selftest, disabling\n",
1542 			     p->type->name);
1543 			last = &trace_types;
1544 			for (t = trace_types; t; t = t->next) {
1545 				if (t == p->type) {
1546 					*last = t->next;
1547 					break;
1548 				}
1549 				last = &t->next;
1550 			}
1551 		}
1552 		list_del(&p->list);
1553 		kfree(p);
1554 	}
1555 
1556  out:
1557 	mutex_unlock(&trace_types_lock);
1558 
1559 	return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565 	return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568 
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570 
1571 static void __init apply_trace_boot_options(void);
1572 
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581 	struct tracer *t;
1582 	int ret = 0;
1583 
1584 	if (!type->name) {
1585 		pr_info("Tracer must have a name\n");
1586 		return -1;
1587 	}
1588 
1589 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591 		return -1;
1592 	}
1593 
1594 	mutex_lock(&trace_types_lock);
1595 
1596 	tracing_selftest_running = true;
1597 
1598 	for (t = trace_types; t; t = t->next) {
1599 		if (strcmp(type->name, t->name) == 0) {
1600 			/* already found */
1601 			pr_info("Tracer %s already registered\n",
1602 				type->name);
1603 			ret = -1;
1604 			goto out;
1605 		}
1606 	}
1607 
1608 	if (!type->set_flag)
1609 		type->set_flag = &dummy_set_flag;
1610 	if (!type->flags) {
1611 		/*allocate a dummy tracer_flags*/
1612 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613 		if (!type->flags) {
1614 			ret = -ENOMEM;
1615 			goto out;
1616 		}
1617 		type->flags->val = 0;
1618 		type->flags->opts = dummy_tracer_opt;
1619 	} else
1620 		if (!type->flags->opts)
1621 			type->flags->opts = dummy_tracer_opt;
1622 
1623 	/* store the tracer for __set_tracer_option */
1624 	type->flags->trace = type;
1625 
1626 	ret = run_tracer_selftest(type);
1627 	if (ret < 0)
1628 		goto out;
1629 
1630 	type->next = trace_types;
1631 	trace_types = type;
1632 	add_tracer_options(&global_trace, type);
1633 
1634  out:
1635 	tracing_selftest_running = false;
1636 	mutex_unlock(&trace_types_lock);
1637 
1638 	if (ret || !default_bootup_tracer)
1639 		goto out_unlock;
1640 
1641 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642 		goto out_unlock;
1643 
1644 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 	/* Do we want this tracer to start on bootup? */
1646 	tracing_set_tracer(&global_trace, type->name);
1647 	default_bootup_tracer = NULL;
1648 
1649 	apply_trace_boot_options();
1650 
1651 	/* disable other selftests, since this will break it. */
1652 	tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655 	       type->name);
1656 #endif
1657 
1658  out_unlock:
1659 	return ret;
1660 }
1661 
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664 	struct ring_buffer *buffer = buf->buffer;
1665 
1666 	if (!buffer)
1667 		return;
1668 
1669 	ring_buffer_record_disable(buffer);
1670 
1671 	/* Make sure all commits have finished */
1672 	synchronize_sched();
1673 	ring_buffer_reset_cpu(buffer, cpu);
1674 
1675 	ring_buffer_record_enable(buffer);
1676 }
1677 
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680 	struct ring_buffer *buffer = buf->buffer;
1681 	int cpu;
1682 
1683 	if (!buffer)
1684 		return;
1685 
1686 	ring_buffer_record_disable(buffer);
1687 
1688 	/* Make sure all commits have finished */
1689 	synchronize_sched();
1690 
1691 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692 
1693 	for_each_online_cpu(cpu)
1694 		ring_buffer_reset_cpu(buffer, cpu);
1695 
1696 	ring_buffer_record_enable(buffer);
1697 }
1698 
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702 	struct trace_array *tr;
1703 
1704 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 		tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 		tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709 	}
1710 }
1711 
1712 static int *tgid_map;
1713 
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719 	unsigned *map_cmdline_to_pid;
1720 	unsigned cmdline_num;
1721 	int cmdline_idx;
1722 	char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725 
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728 
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733 
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738 
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740 				    struct saved_cmdlines_buffer *s)
1741 {
1742 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743 					GFP_KERNEL);
1744 	if (!s->map_cmdline_to_pid)
1745 		return -ENOMEM;
1746 
1747 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748 	if (!s->saved_cmdlines) {
1749 		kfree(s->map_cmdline_to_pid);
1750 		return -ENOMEM;
1751 	}
1752 
1753 	s->cmdline_idx = 0;
1754 	s->cmdline_num = val;
1755 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756 	       sizeof(s->map_pid_to_cmdline));
1757 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758 	       val * sizeof(*s->map_cmdline_to_pid));
1759 
1760 	return 0;
1761 }
1762 
1763 static int trace_create_savedcmd(void)
1764 {
1765 	int ret;
1766 
1767 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768 	if (!savedcmd)
1769 		return -ENOMEM;
1770 
1771 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772 	if (ret < 0) {
1773 		kfree(savedcmd);
1774 		savedcmd = NULL;
1775 		return -ENOMEM;
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 int is_tracing_stopped(void)
1782 {
1783 	return global_trace.stop_count;
1784 }
1785 
1786 /**
1787  * tracing_start - quick start of the tracer
1788  *
1789  * If tracing is enabled but was stopped by tracing_stop,
1790  * this will start the tracer back up.
1791  */
1792 void tracing_start(void)
1793 {
1794 	struct ring_buffer *buffer;
1795 	unsigned long flags;
1796 
1797 	if (tracing_disabled)
1798 		return;
1799 
1800 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801 	if (--global_trace.stop_count) {
1802 		if (global_trace.stop_count < 0) {
1803 			/* Someone screwed up their debugging */
1804 			WARN_ON_ONCE(1);
1805 			global_trace.stop_count = 0;
1806 		}
1807 		goto out;
1808 	}
1809 
1810 	/* Prevent the buffers from switching */
1811 	arch_spin_lock(&global_trace.max_lock);
1812 
1813 	buffer = global_trace.trace_buffer.buffer;
1814 	if (buffer)
1815 		ring_buffer_record_enable(buffer);
1816 
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818 	buffer = global_trace.max_buffer.buffer;
1819 	if (buffer)
1820 		ring_buffer_record_enable(buffer);
1821 #endif
1822 
1823 	arch_spin_unlock(&global_trace.max_lock);
1824 
1825  out:
1826 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828 
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831 	struct ring_buffer *buffer;
1832 	unsigned long flags;
1833 
1834 	if (tracing_disabled)
1835 		return;
1836 
1837 	/* If global, we need to also start the max tracer */
1838 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839 		return tracing_start();
1840 
1841 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1842 
1843 	if (--tr->stop_count) {
1844 		if (tr->stop_count < 0) {
1845 			/* Someone screwed up their debugging */
1846 			WARN_ON_ONCE(1);
1847 			tr->stop_count = 0;
1848 		}
1849 		goto out;
1850 	}
1851 
1852 	buffer = tr->trace_buffer.buffer;
1853 	if (buffer)
1854 		ring_buffer_record_enable(buffer);
1855 
1856  out:
1857 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859 
1860 /**
1861  * tracing_stop - quick stop of the tracer
1862  *
1863  * Light weight way to stop tracing. Use in conjunction with
1864  * tracing_start.
1865  */
1866 void tracing_stop(void)
1867 {
1868 	struct ring_buffer *buffer;
1869 	unsigned long flags;
1870 
1871 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872 	if (global_trace.stop_count++)
1873 		goto out;
1874 
1875 	/* Prevent the buffers from switching */
1876 	arch_spin_lock(&global_trace.max_lock);
1877 
1878 	buffer = global_trace.trace_buffer.buffer;
1879 	if (buffer)
1880 		ring_buffer_record_disable(buffer);
1881 
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883 	buffer = global_trace.max_buffer.buffer;
1884 	if (buffer)
1885 		ring_buffer_record_disable(buffer);
1886 #endif
1887 
1888 	arch_spin_unlock(&global_trace.max_lock);
1889 
1890  out:
1891 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893 
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896 	struct ring_buffer *buffer;
1897 	unsigned long flags;
1898 
1899 	/* If global, we need to also stop the max tracer */
1900 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901 		return tracing_stop();
1902 
1903 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1904 	if (tr->stop_count++)
1905 		goto out;
1906 
1907 	buffer = tr->trace_buffer.buffer;
1908 	if (buffer)
1909 		ring_buffer_record_disable(buffer);
1910 
1911  out:
1912 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914 
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917 	unsigned pid, idx;
1918 
1919 	/* treat recording of idle task as a success */
1920 	if (!tsk->pid)
1921 		return 1;
1922 
1923 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1924 		return 0;
1925 
1926 	/*
1927 	 * It's not the end of the world if we don't get
1928 	 * the lock, but we also don't want to spin
1929 	 * nor do we want to disable interrupts,
1930 	 * so if we miss here, then better luck next time.
1931 	 */
1932 	if (!arch_spin_trylock(&trace_cmdline_lock))
1933 		return 0;
1934 
1935 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1936 	if (idx == NO_CMDLINE_MAP) {
1937 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1938 
1939 		/*
1940 		 * Check whether the cmdline buffer at idx has a pid
1941 		 * mapped. We are going to overwrite that entry so we
1942 		 * need to clear the map_pid_to_cmdline. Otherwise we
1943 		 * would read the new comm for the old pid.
1944 		 */
1945 		pid = savedcmd->map_cmdline_to_pid[idx];
1946 		if (pid != NO_CMDLINE_MAP)
1947 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1948 
1949 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1950 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1951 
1952 		savedcmd->cmdline_idx = idx;
1953 	}
1954 
1955 	set_cmdline(idx, tsk->comm);
1956 
1957 	arch_spin_unlock(&trace_cmdline_lock);
1958 
1959 	return 1;
1960 }
1961 
1962 static void __trace_find_cmdline(int pid, char comm[])
1963 {
1964 	unsigned map;
1965 
1966 	if (!pid) {
1967 		strcpy(comm, "<idle>");
1968 		return;
1969 	}
1970 
1971 	if (WARN_ON_ONCE(pid < 0)) {
1972 		strcpy(comm, "<XXX>");
1973 		return;
1974 	}
1975 
1976 	if (pid > PID_MAX_DEFAULT) {
1977 		strcpy(comm, "<...>");
1978 		return;
1979 	}
1980 
1981 	map = savedcmd->map_pid_to_cmdline[pid];
1982 	if (map != NO_CMDLINE_MAP)
1983 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1984 	else
1985 		strcpy(comm, "<...>");
1986 }
1987 
1988 void trace_find_cmdline(int pid, char comm[])
1989 {
1990 	preempt_disable();
1991 	arch_spin_lock(&trace_cmdline_lock);
1992 
1993 	__trace_find_cmdline(pid, comm);
1994 
1995 	arch_spin_unlock(&trace_cmdline_lock);
1996 	preempt_enable();
1997 }
1998 
1999 int trace_find_tgid(int pid)
2000 {
2001 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2002 		return 0;
2003 
2004 	return tgid_map[pid];
2005 }
2006 
2007 static int trace_save_tgid(struct task_struct *tsk)
2008 {
2009 	/* treat recording of idle task as a success */
2010 	if (!tsk->pid)
2011 		return 1;
2012 
2013 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2014 		return 0;
2015 
2016 	tgid_map[tsk->pid] = tsk->tgid;
2017 	return 1;
2018 }
2019 
2020 static bool tracing_record_taskinfo_skip(int flags)
2021 {
2022 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2023 		return true;
2024 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2025 		return true;
2026 	if (!__this_cpu_read(trace_taskinfo_save))
2027 		return true;
2028 	return false;
2029 }
2030 
2031 /**
2032  * tracing_record_taskinfo - record the task info of a task
2033  *
2034  * @task  - task to record
2035  * @flags - TRACE_RECORD_CMDLINE for recording comm
2036  *        - TRACE_RECORD_TGID for recording tgid
2037  */
2038 void tracing_record_taskinfo(struct task_struct *task, int flags)
2039 {
2040 	bool done;
2041 
2042 	if (tracing_record_taskinfo_skip(flags))
2043 		return;
2044 
2045 	/*
2046 	 * Record as much task information as possible. If some fail, continue
2047 	 * to try to record the others.
2048 	 */
2049 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2050 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2051 
2052 	/* If recording any information failed, retry again soon. */
2053 	if (!done)
2054 		return;
2055 
2056 	__this_cpu_write(trace_taskinfo_save, false);
2057 }
2058 
2059 /**
2060  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2061  *
2062  * @prev - previous task during sched_switch
2063  * @next - next task during sched_switch
2064  * @flags - TRACE_RECORD_CMDLINE for recording comm
2065  *          TRACE_RECORD_TGID for recording tgid
2066  */
2067 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2068 					  struct task_struct *next, int flags)
2069 {
2070 	bool done;
2071 
2072 	if (tracing_record_taskinfo_skip(flags))
2073 		return;
2074 
2075 	/*
2076 	 * Record as much task information as possible. If some fail, continue
2077 	 * to try to record the others.
2078 	 */
2079 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2080 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2081 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2082 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2083 
2084 	/* If recording any information failed, retry again soon. */
2085 	if (!done)
2086 		return;
2087 
2088 	__this_cpu_write(trace_taskinfo_save, false);
2089 }
2090 
2091 /* Helpers to record a specific task information */
2092 void tracing_record_cmdline(struct task_struct *task)
2093 {
2094 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2095 }
2096 
2097 void tracing_record_tgid(struct task_struct *task)
2098 {
2099 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2100 }
2101 
2102 /*
2103  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2104  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2105  * simplifies those functions and keeps them in sync.
2106  */
2107 enum print_line_t trace_handle_return(struct trace_seq *s)
2108 {
2109 	return trace_seq_has_overflowed(s) ?
2110 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2111 }
2112 EXPORT_SYMBOL_GPL(trace_handle_return);
2113 
2114 void
2115 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2116 			     int pc)
2117 {
2118 	struct task_struct *tsk = current;
2119 
2120 	entry->preempt_count		= pc & 0xff;
2121 	entry->pid			= (tsk) ? tsk->pid : 0;
2122 	entry->flags =
2123 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2124 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2125 #else
2126 		TRACE_FLAG_IRQS_NOSUPPORT |
2127 #endif
2128 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2129 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2130 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2131 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2132 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2133 }
2134 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2135 
2136 struct ring_buffer_event *
2137 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2138 			  int type,
2139 			  unsigned long len,
2140 			  unsigned long flags, int pc)
2141 {
2142 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2143 }
2144 
2145 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2146 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2147 static int trace_buffered_event_ref;
2148 
2149 /**
2150  * trace_buffered_event_enable - enable buffering events
2151  *
2152  * When events are being filtered, it is quicker to use a temporary
2153  * buffer to write the event data into if there's a likely chance
2154  * that it will not be committed. The discard of the ring buffer
2155  * is not as fast as committing, and is much slower than copying
2156  * a commit.
2157  *
2158  * When an event is to be filtered, allocate per cpu buffers to
2159  * write the event data into, and if the event is filtered and discarded
2160  * it is simply dropped, otherwise, the entire data is to be committed
2161  * in one shot.
2162  */
2163 void trace_buffered_event_enable(void)
2164 {
2165 	struct ring_buffer_event *event;
2166 	struct page *page;
2167 	int cpu;
2168 
2169 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2170 
2171 	if (trace_buffered_event_ref++)
2172 		return;
2173 
2174 	for_each_tracing_cpu(cpu) {
2175 		page = alloc_pages_node(cpu_to_node(cpu),
2176 					GFP_KERNEL | __GFP_NORETRY, 0);
2177 		if (!page)
2178 			goto failed;
2179 
2180 		event = page_address(page);
2181 		memset(event, 0, sizeof(*event));
2182 
2183 		per_cpu(trace_buffered_event, cpu) = event;
2184 
2185 		preempt_disable();
2186 		if (cpu == smp_processor_id() &&
2187 		    this_cpu_read(trace_buffered_event) !=
2188 		    per_cpu(trace_buffered_event, cpu))
2189 			WARN_ON_ONCE(1);
2190 		preempt_enable();
2191 	}
2192 
2193 	return;
2194  failed:
2195 	trace_buffered_event_disable();
2196 }
2197 
2198 static void enable_trace_buffered_event(void *data)
2199 {
2200 	/* Probably not needed, but do it anyway */
2201 	smp_rmb();
2202 	this_cpu_dec(trace_buffered_event_cnt);
2203 }
2204 
2205 static void disable_trace_buffered_event(void *data)
2206 {
2207 	this_cpu_inc(trace_buffered_event_cnt);
2208 }
2209 
2210 /**
2211  * trace_buffered_event_disable - disable buffering events
2212  *
2213  * When a filter is removed, it is faster to not use the buffered
2214  * events, and to commit directly into the ring buffer. Free up
2215  * the temp buffers when there are no more users. This requires
2216  * special synchronization with current events.
2217  */
2218 void trace_buffered_event_disable(void)
2219 {
2220 	int cpu;
2221 
2222 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2223 
2224 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2225 		return;
2226 
2227 	if (--trace_buffered_event_ref)
2228 		return;
2229 
2230 	preempt_disable();
2231 	/* For each CPU, set the buffer as used. */
2232 	smp_call_function_many(tracing_buffer_mask,
2233 			       disable_trace_buffered_event, NULL, 1);
2234 	preempt_enable();
2235 
2236 	/* Wait for all current users to finish */
2237 	synchronize_sched();
2238 
2239 	for_each_tracing_cpu(cpu) {
2240 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2241 		per_cpu(trace_buffered_event, cpu) = NULL;
2242 	}
2243 	/*
2244 	 * Make sure trace_buffered_event is NULL before clearing
2245 	 * trace_buffered_event_cnt.
2246 	 */
2247 	smp_wmb();
2248 
2249 	preempt_disable();
2250 	/* Do the work on each cpu */
2251 	smp_call_function_many(tracing_buffer_mask,
2252 			       enable_trace_buffered_event, NULL, 1);
2253 	preempt_enable();
2254 }
2255 
2256 static struct ring_buffer *temp_buffer;
2257 
2258 struct ring_buffer_event *
2259 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2260 			  struct trace_event_file *trace_file,
2261 			  int type, unsigned long len,
2262 			  unsigned long flags, int pc)
2263 {
2264 	struct ring_buffer_event *entry;
2265 	int val;
2266 
2267 	*current_rb = trace_file->tr->trace_buffer.buffer;
2268 
2269 	if ((trace_file->flags &
2270 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2271 	    (entry = this_cpu_read(trace_buffered_event))) {
2272 		/* Try to use the per cpu buffer first */
2273 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2274 		if (val == 1) {
2275 			trace_event_setup(entry, type, flags, pc);
2276 			entry->array[0] = len;
2277 			return entry;
2278 		}
2279 		this_cpu_dec(trace_buffered_event_cnt);
2280 	}
2281 
2282 	entry = __trace_buffer_lock_reserve(*current_rb,
2283 					    type, len, flags, pc);
2284 	/*
2285 	 * If tracing is off, but we have triggers enabled
2286 	 * we still need to look at the event data. Use the temp_buffer
2287 	 * to store the trace event for the tigger to use. It's recusive
2288 	 * safe and will not be recorded anywhere.
2289 	 */
2290 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2291 		*current_rb = temp_buffer;
2292 		entry = __trace_buffer_lock_reserve(*current_rb,
2293 						    type, len, flags, pc);
2294 	}
2295 	return entry;
2296 }
2297 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2298 
2299 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2300 static DEFINE_MUTEX(tracepoint_printk_mutex);
2301 
2302 static void output_printk(struct trace_event_buffer *fbuffer)
2303 {
2304 	struct trace_event_call *event_call;
2305 	struct trace_event *event;
2306 	unsigned long flags;
2307 	struct trace_iterator *iter = tracepoint_print_iter;
2308 
2309 	/* We should never get here if iter is NULL */
2310 	if (WARN_ON_ONCE(!iter))
2311 		return;
2312 
2313 	event_call = fbuffer->trace_file->event_call;
2314 	if (!event_call || !event_call->event.funcs ||
2315 	    !event_call->event.funcs->trace)
2316 		return;
2317 
2318 	event = &fbuffer->trace_file->event_call->event;
2319 
2320 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2321 	trace_seq_init(&iter->seq);
2322 	iter->ent = fbuffer->entry;
2323 	event_call->event.funcs->trace(iter, 0, event);
2324 	trace_seq_putc(&iter->seq, 0);
2325 	printk("%s", iter->seq.buffer);
2326 
2327 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2328 }
2329 
2330 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2331 			     void __user *buffer, size_t *lenp,
2332 			     loff_t *ppos)
2333 {
2334 	int save_tracepoint_printk;
2335 	int ret;
2336 
2337 	mutex_lock(&tracepoint_printk_mutex);
2338 	save_tracepoint_printk = tracepoint_printk;
2339 
2340 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2341 
2342 	/*
2343 	 * This will force exiting early, as tracepoint_printk
2344 	 * is always zero when tracepoint_printk_iter is not allocated
2345 	 */
2346 	if (!tracepoint_print_iter)
2347 		tracepoint_printk = 0;
2348 
2349 	if (save_tracepoint_printk == tracepoint_printk)
2350 		goto out;
2351 
2352 	if (tracepoint_printk)
2353 		static_key_enable(&tracepoint_printk_key.key);
2354 	else
2355 		static_key_disable(&tracepoint_printk_key.key);
2356 
2357  out:
2358 	mutex_unlock(&tracepoint_printk_mutex);
2359 
2360 	return ret;
2361 }
2362 
2363 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2364 {
2365 	if (static_key_false(&tracepoint_printk_key.key))
2366 		output_printk(fbuffer);
2367 
2368 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2369 				    fbuffer->event, fbuffer->entry,
2370 				    fbuffer->flags, fbuffer->pc);
2371 }
2372 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2373 
2374 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2375 				     struct ring_buffer *buffer,
2376 				     struct ring_buffer_event *event,
2377 				     unsigned long flags, int pc,
2378 				     struct pt_regs *regs)
2379 {
2380 	__buffer_unlock_commit(buffer, event);
2381 
2382 	/*
2383 	 * If regs is not set, then skip the following callers:
2384 	 *   trace_buffer_unlock_commit_regs
2385 	 *   event_trigger_unlock_commit
2386 	 *   trace_event_buffer_commit
2387 	 *   trace_event_raw_event_sched_switch
2388 	 * Note, we can still get here via blktrace, wakeup tracer
2389 	 * and mmiotrace, but that's ok if they lose a function or
2390 	 * two. They are that meaningful.
2391 	 */
2392 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2393 	ftrace_trace_userstack(buffer, flags, pc);
2394 }
2395 
2396 /*
2397  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2398  */
2399 void
2400 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2401 				   struct ring_buffer_event *event)
2402 {
2403 	__buffer_unlock_commit(buffer, event);
2404 }
2405 
2406 static void
2407 trace_process_export(struct trace_export *export,
2408 	       struct ring_buffer_event *event)
2409 {
2410 	struct trace_entry *entry;
2411 	unsigned int size = 0;
2412 
2413 	entry = ring_buffer_event_data(event);
2414 	size = ring_buffer_event_length(event);
2415 	export->write(entry, size);
2416 }
2417 
2418 static DEFINE_MUTEX(ftrace_export_lock);
2419 
2420 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2421 
2422 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2423 
2424 static inline void ftrace_exports_enable(void)
2425 {
2426 	static_branch_enable(&ftrace_exports_enabled);
2427 }
2428 
2429 static inline void ftrace_exports_disable(void)
2430 {
2431 	static_branch_disable(&ftrace_exports_enabled);
2432 }
2433 
2434 void ftrace_exports(struct ring_buffer_event *event)
2435 {
2436 	struct trace_export *export;
2437 
2438 	preempt_disable_notrace();
2439 
2440 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2441 	while (export) {
2442 		trace_process_export(export, event);
2443 		export = rcu_dereference_raw_notrace(export->next);
2444 	}
2445 
2446 	preempt_enable_notrace();
2447 }
2448 
2449 static inline void
2450 add_trace_export(struct trace_export **list, struct trace_export *export)
2451 {
2452 	rcu_assign_pointer(export->next, *list);
2453 	/*
2454 	 * We are entering export into the list but another
2455 	 * CPU might be walking that list. We need to make sure
2456 	 * the export->next pointer is valid before another CPU sees
2457 	 * the export pointer included into the list.
2458 	 */
2459 	rcu_assign_pointer(*list, export);
2460 }
2461 
2462 static inline int
2463 rm_trace_export(struct trace_export **list, struct trace_export *export)
2464 {
2465 	struct trace_export **p;
2466 
2467 	for (p = list; *p != NULL; p = &(*p)->next)
2468 		if (*p == export)
2469 			break;
2470 
2471 	if (*p != export)
2472 		return -1;
2473 
2474 	rcu_assign_pointer(*p, (*p)->next);
2475 
2476 	return 0;
2477 }
2478 
2479 static inline void
2480 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482 	if (*list == NULL)
2483 		ftrace_exports_enable();
2484 
2485 	add_trace_export(list, export);
2486 }
2487 
2488 static inline int
2489 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2490 {
2491 	int ret;
2492 
2493 	ret = rm_trace_export(list, export);
2494 	if (*list == NULL)
2495 		ftrace_exports_disable();
2496 
2497 	return ret;
2498 }
2499 
2500 int register_ftrace_export(struct trace_export *export)
2501 {
2502 	if (WARN_ON_ONCE(!export->write))
2503 		return -1;
2504 
2505 	mutex_lock(&ftrace_export_lock);
2506 
2507 	add_ftrace_export(&ftrace_exports_list, export);
2508 
2509 	mutex_unlock(&ftrace_export_lock);
2510 
2511 	return 0;
2512 }
2513 EXPORT_SYMBOL_GPL(register_ftrace_export);
2514 
2515 int unregister_ftrace_export(struct trace_export *export)
2516 {
2517 	int ret;
2518 
2519 	mutex_lock(&ftrace_export_lock);
2520 
2521 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2522 
2523 	mutex_unlock(&ftrace_export_lock);
2524 
2525 	return ret;
2526 }
2527 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2528 
2529 void
2530 trace_function(struct trace_array *tr,
2531 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2532 	       int pc)
2533 {
2534 	struct trace_event_call *call = &event_function;
2535 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2536 	struct ring_buffer_event *event;
2537 	struct ftrace_entry *entry;
2538 
2539 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2540 					    flags, pc);
2541 	if (!event)
2542 		return;
2543 	entry	= ring_buffer_event_data(event);
2544 	entry->ip			= ip;
2545 	entry->parent_ip		= parent_ip;
2546 
2547 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2548 		if (static_branch_unlikely(&ftrace_exports_enabled))
2549 			ftrace_exports(event);
2550 		__buffer_unlock_commit(buffer, event);
2551 	}
2552 }
2553 
2554 #ifdef CONFIG_STACKTRACE
2555 
2556 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2557 struct ftrace_stack {
2558 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2559 };
2560 
2561 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2562 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2563 
2564 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2565 				 unsigned long flags,
2566 				 int skip, int pc, struct pt_regs *regs)
2567 {
2568 	struct trace_event_call *call = &event_kernel_stack;
2569 	struct ring_buffer_event *event;
2570 	struct stack_entry *entry;
2571 	struct stack_trace trace;
2572 	int use_stack;
2573 	int size = FTRACE_STACK_ENTRIES;
2574 
2575 	trace.nr_entries	= 0;
2576 	trace.skip		= skip;
2577 
2578 	/*
2579 	 * Add two, for this function and the call to save_stack_trace()
2580 	 * If regs is set, then these functions will not be in the way.
2581 	 */
2582 	if (!regs)
2583 		trace.skip += 2;
2584 
2585 	/*
2586 	 * Since events can happen in NMIs there's no safe way to
2587 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2588 	 * or NMI comes in, it will just have to use the default
2589 	 * FTRACE_STACK_SIZE.
2590 	 */
2591 	preempt_disable_notrace();
2592 
2593 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2594 	/*
2595 	 * We don't need any atomic variables, just a barrier.
2596 	 * If an interrupt comes in, we don't care, because it would
2597 	 * have exited and put the counter back to what we want.
2598 	 * We just need a barrier to keep gcc from moving things
2599 	 * around.
2600 	 */
2601 	barrier();
2602 	if (use_stack == 1) {
2603 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2604 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2605 
2606 		if (regs)
2607 			save_stack_trace_regs(regs, &trace);
2608 		else
2609 			save_stack_trace(&trace);
2610 
2611 		if (trace.nr_entries > size)
2612 			size = trace.nr_entries;
2613 	} else
2614 		/* From now on, use_stack is a boolean */
2615 		use_stack = 0;
2616 
2617 	size *= sizeof(unsigned long);
2618 
2619 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2620 					    sizeof(*entry) + size, flags, pc);
2621 	if (!event)
2622 		goto out;
2623 	entry = ring_buffer_event_data(event);
2624 
2625 	memset(&entry->caller, 0, size);
2626 
2627 	if (use_stack)
2628 		memcpy(&entry->caller, trace.entries,
2629 		       trace.nr_entries * sizeof(unsigned long));
2630 	else {
2631 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2632 		trace.entries		= entry->caller;
2633 		if (regs)
2634 			save_stack_trace_regs(regs, &trace);
2635 		else
2636 			save_stack_trace(&trace);
2637 	}
2638 
2639 	entry->size = trace.nr_entries;
2640 
2641 	if (!call_filter_check_discard(call, entry, buffer, event))
2642 		__buffer_unlock_commit(buffer, event);
2643 
2644  out:
2645 	/* Again, don't let gcc optimize things here */
2646 	barrier();
2647 	__this_cpu_dec(ftrace_stack_reserve);
2648 	preempt_enable_notrace();
2649 
2650 }
2651 
2652 static inline void ftrace_trace_stack(struct trace_array *tr,
2653 				      struct ring_buffer *buffer,
2654 				      unsigned long flags,
2655 				      int skip, int pc, struct pt_regs *regs)
2656 {
2657 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2658 		return;
2659 
2660 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2661 }
2662 
2663 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2664 		   int pc)
2665 {
2666 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2667 
2668 	if (rcu_is_watching()) {
2669 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2670 		return;
2671 	}
2672 
2673 	/*
2674 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2675 	 * but if the above rcu_is_watching() failed, then the NMI
2676 	 * triggered someplace critical, and rcu_irq_enter() should
2677 	 * not be called from NMI.
2678 	 */
2679 	if (unlikely(in_nmi()))
2680 		return;
2681 
2682 	/*
2683 	 * It is possible that a function is being traced in a
2684 	 * location that RCU is not watching. A call to
2685 	 * rcu_irq_enter() will make sure that it is, but there's
2686 	 * a few internal rcu functions that could be traced
2687 	 * where that wont work either. In those cases, we just
2688 	 * do nothing.
2689 	 */
2690 	if (unlikely(rcu_irq_enter_disabled()))
2691 		return;
2692 
2693 	rcu_irq_enter_irqson();
2694 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2695 	rcu_irq_exit_irqson();
2696 }
2697 
2698 /**
2699  * trace_dump_stack - record a stack back trace in the trace buffer
2700  * @skip: Number of functions to skip (helper handlers)
2701  */
2702 void trace_dump_stack(int skip)
2703 {
2704 	unsigned long flags;
2705 
2706 	if (tracing_disabled || tracing_selftest_running)
2707 		return;
2708 
2709 	local_save_flags(flags);
2710 
2711 	/*
2712 	 * Skip 3 more, seems to get us at the caller of
2713 	 * this function.
2714 	 */
2715 	skip += 3;
2716 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2717 			     flags, skip, preempt_count(), NULL);
2718 }
2719 
2720 static DEFINE_PER_CPU(int, user_stack_count);
2721 
2722 void
2723 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2724 {
2725 	struct trace_event_call *call = &event_user_stack;
2726 	struct ring_buffer_event *event;
2727 	struct userstack_entry *entry;
2728 	struct stack_trace trace;
2729 
2730 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2731 		return;
2732 
2733 	/*
2734 	 * NMIs can not handle page faults, even with fix ups.
2735 	 * The save user stack can (and often does) fault.
2736 	 */
2737 	if (unlikely(in_nmi()))
2738 		return;
2739 
2740 	/*
2741 	 * prevent recursion, since the user stack tracing may
2742 	 * trigger other kernel events.
2743 	 */
2744 	preempt_disable();
2745 	if (__this_cpu_read(user_stack_count))
2746 		goto out;
2747 
2748 	__this_cpu_inc(user_stack_count);
2749 
2750 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 					    sizeof(*entry), flags, pc);
2752 	if (!event)
2753 		goto out_drop_count;
2754 	entry	= ring_buffer_event_data(event);
2755 
2756 	entry->tgid		= current->tgid;
2757 	memset(&entry->caller, 0, sizeof(entry->caller));
2758 
2759 	trace.nr_entries	= 0;
2760 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2761 	trace.skip		= 0;
2762 	trace.entries		= entry->caller;
2763 
2764 	save_stack_trace_user(&trace);
2765 	if (!call_filter_check_discard(call, entry, buffer, event))
2766 		__buffer_unlock_commit(buffer, event);
2767 
2768  out_drop_count:
2769 	__this_cpu_dec(user_stack_count);
2770  out:
2771 	preempt_enable();
2772 }
2773 
2774 #ifdef UNUSED
2775 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2776 {
2777 	ftrace_trace_userstack(tr, flags, preempt_count());
2778 }
2779 #endif /* UNUSED */
2780 
2781 #endif /* CONFIG_STACKTRACE */
2782 
2783 /* created for use with alloc_percpu */
2784 struct trace_buffer_struct {
2785 	int nesting;
2786 	char buffer[4][TRACE_BUF_SIZE];
2787 };
2788 
2789 static struct trace_buffer_struct *trace_percpu_buffer;
2790 
2791 /*
2792  * Thise allows for lockless recording.  If we're nested too deeply, then
2793  * this returns NULL.
2794  */
2795 static char *get_trace_buf(void)
2796 {
2797 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2798 
2799 	if (!buffer || buffer->nesting >= 4)
2800 		return NULL;
2801 
2802 	return &buffer->buffer[buffer->nesting++][0];
2803 }
2804 
2805 static void put_trace_buf(void)
2806 {
2807 	this_cpu_dec(trace_percpu_buffer->nesting);
2808 }
2809 
2810 static int alloc_percpu_trace_buffer(void)
2811 {
2812 	struct trace_buffer_struct *buffers;
2813 
2814 	buffers = alloc_percpu(struct trace_buffer_struct);
2815 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2816 		return -ENOMEM;
2817 
2818 	trace_percpu_buffer = buffers;
2819 	return 0;
2820 }
2821 
2822 static int buffers_allocated;
2823 
2824 void trace_printk_init_buffers(void)
2825 {
2826 	if (buffers_allocated)
2827 		return;
2828 
2829 	if (alloc_percpu_trace_buffer())
2830 		return;
2831 
2832 	/* trace_printk() is for debug use only. Don't use it in production. */
2833 
2834 	pr_warn("\n");
2835 	pr_warn("**********************************************************\n");
2836 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2837 	pr_warn("**                                                      **\n");
2838 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2839 	pr_warn("**                                                      **\n");
2840 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2841 	pr_warn("** unsafe for production use.                           **\n");
2842 	pr_warn("**                                                      **\n");
2843 	pr_warn("** If you see this message and you are not debugging    **\n");
2844 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2845 	pr_warn("**                                                      **\n");
2846 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2847 	pr_warn("**********************************************************\n");
2848 
2849 	/* Expand the buffers to set size */
2850 	tracing_update_buffers();
2851 
2852 	buffers_allocated = 1;
2853 
2854 	/*
2855 	 * trace_printk_init_buffers() can be called by modules.
2856 	 * If that happens, then we need to start cmdline recording
2857 	 * directly here. If the global_trace.buffer is already
2858 	 * allocated here, then this was called by module code.
2859 	 */
2860 	if (global_trace.trace_buffer.buffer)
2861 		tracing_start_cmdline_record();
2862 }
2863 
2864 void trace_printk_start_comm(void)
2865 {
2866 	/* Start tracing comms if trace printk is set */
2867 	if (!buffers_allocated)
2868 		return;
2869 	tracing_start_cmdline_record();
2870 }
2871 
2872 static void trace_printk_start_stop_comm(int enabled)
2873 {
2874 	if (!buffers_allocated)
2875 		return;
2876 
2877 	if (enabled)
2878 		tracing_start_cmdline_record();
2879 	else
2880 		tracing_stop_cmdline_record();
2881 }
2882 
2883 /**
2884  * trace_vbprintk - write binary msg to tracing buffer
2885  *
2886  */
2887 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2888 {
2889 	struct trace_event_call *call = &event_bprint;
2890 	struct ring_buffer_event *event;
2891 	struct ring_buffer *buffer;
2892 	struct trace_array *tr = &global_trace;
2893 	struct bprint_entry *entry;
2894 	unsigned long flags;
2895 	char *tbuffer;
2896 	int len = 0, size, pc;
2897 
2898 	if (unlikely(tracing_selftest_running || tracing_disabled))
2899 		return 0;
2900 
2901 	/* Don't pollute graph traces with trace_vprintk internals */
2902 	pause_graph_tracing();
2903 
2904 	pc = preempt_count();
2905 	preempt_disable_notrace();
2906 
2907 	tbuffer = get_trace_buf();
2908 	if (!tbuffer) {
2909 		len = 0;
2910 		goto out_nobuffer;
2911 	}
2912 
2913 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2914 
2915 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2916 		goto out;
2917 
2918 	local_save_flags(flags);
2919 	size = sizeof(*entry) + sizeof(u32) * len;
2920 	buffer = tr->trace_buffer.buffer;
2921 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2922 					    flags, pc);
2923 	if (!event)
2924 		goto out;
2925 	entry = ring_buffer_event_data(event);
2926 	entry->ip			= ip;
2927 	entry->fmt			= fmt;
2928 
2929 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2930 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2931 		__buffer_unlock_commit(buffer, event);
2932 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2933 	}
2934 
2935 out:
2936 	put_trace_buf();
2937 
2938 out_nobuffer:
2939 	preempt_enable_notrace();
2940 	unpause_graph_tracing();
2941 
2942 	return len;
2943 }
2944 EXPORT_SYMBOL_GPL(trace_vbprintk);
2945 
2946 static int
2947 __trace_array_vprintk(struct ring_buffer *buffer,
2948 		      unsigned long ip, const char *fmt, va_list args)
2949 {
2950 	struct trace_event_call *call = &event_print;
2951 	struct ring_buffer_event *event;
2952 	int len = 0, size, pc;
2953 	struct print_entry *entry;
2954 	unsigned long flags;
2955 	char *tbuffer;
2956 
2957 	if (tracing_disabled || tracing_selftest_running)
2958 		return 0;
2959 
2960 	/* Don't pollute graph traces with trace_vprintk internals */
2961 	pause_graph_tracing();
2962 
2963 	pc = preempt_count();
2964 	preempt_disable_notrace();
2965 
2966 
2967 	tbuffer = get_trace_buf();
2968 	if (!tbuffer) {
2969 		len = 0;
2970 		goto out_nobuffer;
2971 	}
2972 
2973 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2974 
2975 	local_save_flags(flags);
2976 	size = sizeof(*entry) + len + 1;
2977 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2978 					    flags, pc);
2979 	if (!event)
2980 		goto out;
2981 	entry = ring_buffer_event_data(event);
2982 	entry->ip = ip;
2983 
2984 	memcpy(&entry->buf, tbuffer, len + 1);
2985 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2986 		__buffer_unlock_commit(buffer, event);
2987 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2988 	}
2989 
2990 out:
2991 	put_trace_buf();
2992 
2993 out_nobuffer:
2994 	preempt_enable_notrace();
2995 	unpause_graph_tracing();
2996 
2997 	return len;
2998 }
2999 
3000 int trace_array_vprintk(struct trace_array *tr,
3001 			unsigned long ip, const char *fmt, va_list args)
3002 {
3003 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3004 }
3005 
3006 int trace_array_printk(struct trace_array *tr,
3007 		       unsigned long ip, const char *fmt, ...)
3008 {
3009 	int ret;
3010 	va_list ap;
3011 
3012 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3013 		return 0;
3014 
3015 	va_start(ap, fmt);
3016 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3017 	va_end(ap);
3018 	return ret;
3019 }
3020 
3021 int trace_array_printk_buf(struct ring_buffer *buffer,
3022 			   unsigned long ip, const char *fmt, ...)
3023 {
3024 	int ret;
3025 	va_list ap;
3026 
3027 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3028 		return 0;
3029 
3030 	va_start(ap, fmt);
3031 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3032 	va_end(ap);
3033 	return ret;
3034 }
3035 
3036 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3037 {
3038 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3039 }
3040 EXPORT_SYMBOL_GPL(trace_vprintk);
3041 
3042 static void trace_iterator_increment(struct trace_iterator *iter)
3043 {
3044 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3045 
3046 	iter->idx++;
3047 	if (buf_iter)
3048 		ring_buffer_read(buf_iter, NULL);
3049 }
3050 
3051 static struct trace_entry *
3052 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3053 		unsigned long *lost_events)
3054 {
3055 	struct ring_buffer_event *event;
3056 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3057 
3058 	if (buf_iter)
3059 		event = ring_buffer_iter_peek(buf_iter, ts);
3060 	else
3061 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3062 					 lost_events);
3063 
3064 	if (event) {
3065 		iter->ent_size = ring_buffer_event_length(event);
3066 		return ring_buffer_event_data(event);
3067 	}
3068 	iter->ent_size = 0;
3069 	return NULL;
3070 }
3071 
3072 static struct trace_entry *
3073 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3074 		  unsigned long *missing_events, u64 *ent_ts)
3075 {
3076 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3077 	struct trace_entry *ent, *next = NULL;
3078 	unsigned long lost_events = 0, next_lost = 0;
3079 	int cpu_file = iter->cpu_file;
3080 	u64 next_ts = 0, ts;
3081 	int next_cpu = -1;
3082 	int next_size = 0;
3083 	int cpu;
3084 
3085 	/*
3086 	 * If we are in a per_cpu trace file, don't bother by iterating over
3087 	 * all cpu and peek directly.
3088 	 */
3089 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3090 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3091 			return NULL;
3092 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3093 		if (ent_cpu)
3094 			*ent_cpu = cpu_file;
3095 
3096 		return ent;
3097 	}
3098 
3099 	for_each_tracing_cpu(cpu) {
3100 
3101 		if (ring_buffer_empty_cpu(buffer, cpu))
3102 			continue;
3103 
3104 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3105 
3106 		/*
3107 		 * Pick the entry with the smallest timestamp:
3108 		 */
3109 		if (ent && (!next || ts < next_ts)) {
3110 			next = ent;
3111 			next_cpu = cpu;
3112 			next_ts = ts;
3113 			next_lost = lost_events;
3114 			next_size = iter->ent_size;
3115 		}
3116 	}
3117 
3118 	iter->ent_size = next_size;
3119 
3120 	if (ent_cpu)
3121 		*ent_cpu = next_cpu;
3122 
3123 	if (ent_ts)
3124 		*ent_ts = next_ts;
3125 
3126 	if (missing_events)
3127 		*missing_events = next_lost;
3128 
3129 	return next;
3130 }
3131 
3132 /* Find the next real entry, without updating the iterator itself */
3133 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3134 					  int *ent_cpu, u64 *ent_ts)
3135 {
3136 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3137 }
3138 
3139 /* Find the next real entry, and increment the iterator to the next entry */
3140 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3141 {
3142 	iter->ent = __find_next_entry(iter, &iter->cpu,
3143 				      &iter->lost_events, &iter->ts);
3144 
3145 	if (iter->ent)
3146 		trace_iterator_increment(iter);
3147 
3148 	return iter->ent ? iter : NULL;
3149 }
3150 
3151 static void trace_consume(struct trace_iterator *iter)
3152 {
3153 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3154 			    &iter->lost_events);
3155 }
3156 
3157 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3158 {
3159 	struct trace_iterator *iter = m->private;
3160 	int i = (int)*pos;
3161 	void *ent;
3162 
3163 	WARN_ON_ONCE(iter->leftover);
3164 
3165 	(*pos)++;
3166 
3167 	/* can't go backwards */
3168 	if (iter->idx > i)
3169 		return NULL;
3170 
3171 	if (iter->idx < 0)
3172 		ent = trace_find_next_entry_inc(iter);
3173 	else
3174 		ent = iter;
3175 
3176 	while (ent && iter->idx < i)
3177 		ent = trace_find_next_entry_inc(iter);
3178 
3179 	iter->pos = *pos;
3180 
3181 	return ent;
3182 }
3183 
3184 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3185 {
3186 	struct ring_buffer_event *event;
3187 	struct ring_buffer_iter *buf_iter;
3188 	unsigned long entries = 0;
3189 	u64 ts;
3190 
3191 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3192 
3193 	buf_iter = trace_buffer_iter(iter, cpu);
3194 	if (!buf_iter)
3195 		return;
3196 
3197 	ring_buffer_iter_reset(buf_iter);
3198 
3199 	/*
3200 	 * We could have the case with the max latency tracers
3201 	 * that a reset never took place on a cpu. This is evident
3202 	 * by the timestamp being before the start of the buffer.
3203 	 */
3204 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3205 		if (ts >= iter->trace_buffer->time_start)
3206 			break;
3207 		entries++;
3208 		ring_buffer_read(buf_iter, NULL);
3209 	}
3210 
3211 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3212 }
3213 
3214 /*
3215  * The current tracer is copied to avoid a global locking
3216  * all around.
3217  */
3218 static void *s_start(struct seq_file *m, loff_t *pos)
3219 {
3220 	struct trace_iterator *iter = m->private;
3221 	struct trace_array *tr = iter->tr;
3222 	int cpu_file = iter->cpu_file;
3223 	void *p = NULL;
3224 	loff_t l = 0;
3225 	int cpu;
3226 
3227 	/*
3228 	 * copy the tracer to avoid using a global lock all around.
3229 	 * iter->trace is a copy of current_trace, the pointer to the
3230 	 * name may be used instead of a strcmp(), as iter->trace->name
3231 	 * will point to the same string as current_trace->name.
3232 	 */
3233 	mutex_lock(&trace_types_lock);
3234 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3235 		*iter->trace = *tr->current_trace;
3236 	mutex_unlock(&trace_types_lock);
3237 
3238 #ifdef CONFIG_TRACER_MAX_TRACE
3239 	if (iter->snapshot && iter->trace->use_max_tr)
3240 		return ERR_PTR(-EBUSY);
3241 #endif
3242 
3243 	if (!iter->snapshot)
3244 		atomic_inc(&trace_record_taskinfo_disabled);
3245 
3246 	if (*pos != iter->pos) {
3247 		iter->ent = NULL;
3248 		iter->cpu = 0;
3249 		iter->idx = -1;
3250 
3251 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3252 			for_each_tracing_cpu(cpu)
3253 				tracing_iter_reset(iter, cpu);
3254 		} else
3255 			tracing_iter_reset(iter, cpu_file);
3256 
3257 		iter->leftover = 0;
3258 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3259 			;
3260 
3261 	} else {
3262 		/*
3263 		 * If we overflowed the seq_file before, then we want
3264 		 * to just reuse the trace_seq buffer again.
3265 		 */
3266 		if (iter->leftover)
3267 			p = iter;
3268 		else {
3269 			l = *pos - 1;
3270 			p = s_next(m, p, &l);
3271 		}
3272 	}
3273 
3274 	trace_event_read_lock();
3275 	trace_access_lock(cpu_file);
3276 	return p;
3277 }
3278 
3279 static void s_stop(struct seq_file *m, void *p)
3280 {
3281 	struct trace_iterator *iter = m->private;
3282 
3283 #ifdef CONFIG_TRACER_MAX_TRACE
3284 	if (iter->snapshot && iter->trace->use_max_tr)
3285 		return;
3286 #endif
3287 
3288 	if (!iter->snapshot)
3289 		atomic_dec(&trace_record_taskinfo_disabled);
3290 
3291 	trace_access_unlock(iter->cpu_file);
3292 	trace_event_read_unlock();
3293 }
3294 
3295 static void
3296 get_total_entries(struct trace_buffer *buf,
3297 		  unsigned long *total, unsigned long *entries)
3298 {
3299 	unsigned long count;
3300 	int cpu;
3301 
3302 	*total = 0;
3303 	*entries = 0;
3304 
3305 	for_each_tracing_cpu(cpu) {
3306 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3307 		/*
3308 		 * If this buffer has skipped entries, then we hold all
3309 		 * entries for the trace and we need to ignore the
3310 		 * ones before the time stamp.
3311 		 */
3312 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3313 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3314 			/* total is the same as the entries */
3315 			*total += count;
3316 		} else
3317 			*total += count +
3318 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3319 		*entries += count;
3320 	}
3321 }
3322 
3323 static void print_lat_help_header(struct seq_file *m)
3324 {
3325 	seq_puts(m, "#                  _------=> CPU#            \n"
3326 		    "#                 / _-----=> irqs-off        \n"
3327 		    "#                | / _----=> need-resched    \n"
3328 		    "#                || / _---=> hardirq/softirq \n"
3329 		    "#                ||| / _--=> preempt-depth   \n"
3330 		    "#                |||| /     delay            \n"
3331 		    "#  cmd     pid   ||||| time  |   caller      \n"
3332 		    "#     \\   /      |||||  \\    |   /         \n");
3333 }
3334 
3335 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3336 {
3337 	unsigned long total;
3338 	unsigned long entries;
3339 
3340 	get_total_entries(buf, &total, &entries);
3341 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3342 		   entries, total, num_online_cpus());
3343 	seq_puts(m, "#\n");
3344 }
3345 
3346 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3347 				   unsigned int flags)
3348 {
3349 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3350 
3351 	print_event_info(buf, m);
3352 
3353 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3354 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3355 }
3356 
3357 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3358 				       unsigned int flags)
3359 {
3360 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3361 	const char tgid_space[] = "          ";
3362 	const char space[] = "  ";
3363 
3364 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3365 		   tgid ? tgid_space : space);
3366 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3367 		   tgid ? tgid_space : space);
3368 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3369 		   tgid ? tgid_space : space);
3370 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3371 		   tgid ? tgid_space : space);
3372 	seq_printf(m, "#                          %s||| /     delay\n",
3373 		   tgid ? tgid_space : space);
3374 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3375 		   tgid ? "   TGID   " : space);
3376 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3377 		   tgid ? "     |    " : space);
3378 }
3379 
3380 void
3381 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3382 {
3383 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3384 	struct trace_buffer *buf = iter->trace_buffer;
3385 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3386 	struct tracer *type = iter->trace;
3387 	unsigned long entries;
3388 	unsigned long total;
3389 	const char *name = "preemption";
3390 
3391 	name = type->name;
3392 
3393 	get_total_entries(buf, &total, &entries);
3394 
3395 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3396 		   name, UTS_RELEASE);
3397 	seq_puts(m, "# -----------------------------------"
3398 		 "---------------------------------\n");
3399 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3400 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3401 		   nsecs_to_usecs(data->saved_latency),
3402 		   entries,
3403 		   total,
3404 		   buf->cpu,
3405 #if defined(CONFIG_PREEMPT_NONE)
3406 		   "server",
3407 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3408 		   "desktop",
3409 #elif defined(CONFIG_PREEMPT)
3410 		   "preempt",
3411 #else
3412 		   "unknown",
3413 #endif
3414 		   /* These are reserved for later use */
3415 		   0, 0, 0, 0);
3416 #ifdef CONFIG_SMP
3417 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3418 #else
3419 	seq_puts(m, ")\n");
3420 #endif
3421 	seq_puts(m, "#    -----------------\n");
3422 	seq_printf(m, "#    | task: %.16s-%d "
3423 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3424 		   data->comm, data->pid,
3425 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3426 		   data->policy, data->rt_priority);
3427 	seq_puts(m, "#    -----------------\n");
3428 
3429 	if (data->critical_start) {
3430 		seq_puts(m, "#  => started at: ");
3431 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3432 		trace_print_seq(m, &iter->seq);
3433 		seq_puts(m, "\n#  => ended at:   ");
3434 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3435 		trace_print_seq(m, &iter->seq);
3436 		seq_puts(m, "\n#\n");
3437 	}
3438 
3439 	seq_puts(m, "#\n");
3440 }
3441 
3442 static void test_cpu_buff_start(struct trace_iterator *iter)
3443 {
3444 	struct trace_seq *s = &iter->seq;
3445 	struct trace_array *tr = iter->tr;
3446 
3447 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3448 		return;
3449 
3450 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3451 		return;
3452 
3453 	if (cpumask_available(iter->started) &&
3454 	    cpumask_test_cpu(iter->cpu, iter->started))
3455 		return;
3456 
3457 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3458 		return;
3459 
3460 	if (cpumask_available(iter->started))
3461 		cpumask_set_cpu(iter->cpu, iter->started);
3462 
3463 	/* Don't print started cpu buffer for the first entry of the trace */
3464 	if (iter->idx > 1)
3465 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3466 				iter->cpu);
3467 }
3468 
3469 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3470 {
3471 	struct trace_array *tr = iter->tr;
3472 	struct trace_seq *s = &iter->seq;
3473 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3474 	struct trace_entry *entry;
3475 	struct trace_event *event;
3476 
3477 	entry = iter->ent;
3478 
3479 	test_cpu_buff_start(iter);
3480 
3481 	event = ftrace_find_event(entry->type);
3482 
3483 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3484 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3485 			trace_print_lat_context(iter);
3486 		else
3487 			trace_print_context(iter);
3488 	}
3489 
3490 	if (trace_seq_has_overflowed(s))
3491 		return TRACE_TYPE_PARTIAL_LINE;
3492 
3493 	if (event)
3494 		return event->funcs->trace(iter, sym_flags, event);
3495 
3496 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3497 
3498 	return trace_handle_return(s);
3499 }
3500 
3501 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3502 {
3503 	struct trace_array *tr = iter->tr;
3504 	struct trace_seq *s = &iter->seq;
3505 	struct trace_entry *entry;
3506 	struct trace_event *event;
3507 
3508 	entry = iter->ent;
3509 
3510 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3511 		trace_seq_printf(s, "%d %d %llu ",
3512 				 entry->pid, iter->cpu, iter->ts);
3513 
3514 	if (trace_seq_has_overflowed(s))
3515 		return TRACE_TYPE_PARTIAL_LINE;
3516 
3517 	event = ftrace_find_event(entry->type);
3518 	if (event)
3519 		return event->funcs->raw(iter, 0, event);
3520 
3521 	trace_seq_printf(s, "%d ?\n", entry->type);
3522 
3523 	return trace_handle_return(s);
3524 }
3525 
3526 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3527 {
3528 	struct trace_array *tr = iter->tr;
3529 	struct trace_seq *s = &iter->seq;
3530 	unsigned char newline = '\n';
3531 	struct trace_entry *entry;
3532 	struct trace_event *event;
3533 
3534 	entry = iter->ent;
3535 
3536 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3537 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3538 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3539 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3540 		if (trace_seq_has_overflowed(s))
3541 			return TRACE_TYPE_PARTIAL_LINE;
3542 	}
3543 
3544 	event = ftrace_find_event(entry->type);
3545 	if (event) {
3546 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3547 		if (ret != TRACE_TYPE_HANDLED)
3548 			return ret;
3549 	}
3550 
3551 	SEQ_PUT_FIELD(s, newline);
3552 
3553 	return trace_handle_return(s);
3554 }
3555 
3556 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3557 {
3558 	struct trace_array *tr = iter->tr;
3559 	struct trace_seq *s = &iter->seq;
3560 	struct trace_entry *entry;
3561 	struct trace_event *event;
3562 
3563 	entry = iter->ent;
3564 
3565 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3566 		SEQ_PUT_FIELD(s, entry->pid);
3567 		SEQ_PUT_FIELD(s, iter->cpu);
3568 		SEQ_PUT_FIELD(s, iter->ts);
3569 		if (trace_seq_has_overflowed(s))
3570 			return TRACE_TYPE_PARTIAL_LINE;
3571 	}
3572 
3573 	event = ftrace_find_event(entry->type);
3574 	return event ? event->funcs->binary(iter, 0, event) :
3575 		TRACE_TYPE_HANDLED;
3576 }
3577 
3578 int trace_empty(struct trace_iterator *iter)
3579 {
3580 	struct ring_buffer_iter *buf_iter;
3581 	int cpu;
3582 
3583 	/* If we are looking at one CPU buffer, only check that one */
3584 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3585 		cpu = iter->cpu_file;
3586 		buf_iter = trace_buffer_iter(iter, cpu);
3587 		if (buf_iter) {
3588 			if (!ring_buffer_iter_empty(buf_iter))
3589 				return 0;
3590 		} else {
3591 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3592 				return 0;
3593 		}
3594 		return 1;
3595 	}
3596 
3597 	for_each_tracing_cpu(cpu) {
3598 		buf_iter = trace_buffer_iter(iter, cpu);
3599 		if (buf_iter) {
3600 			if (!ring_buffer_iter_empty(buf_iter))
3601 				return 0;
3602 		} else {
3603 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3604 				return 0;
3605 		}
3606 	}
3607 
3608 	return 1;
3609 }
3610 
3611 /*  Called with trace_event_read_lock() held. */
3612 enum print_line_t print_trace_line(struct trace_iterator *iter)
3613 {
3614 	struct trace_array *tr = iter->tr;
3615 	unsigned long trace_flags = tr->trace_flags;
3616 	enum print_line_t ret;
3617 
3618 	if (iter->lost_events) {
3619 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3620 				 iter->cpu, iter->lost_events);
3621 		if (trace_seq_has_overflowed(&iter->seq))
3622 			return TRACE_TYPE_PARTIAL_LINE;
3623 	}
3624 
3625 	if (iter->trace && iter->trace->print_line) {
3626 		ret = iter->trace->print_line(iter);
3627 		if (ret != TRACE_TYPE_UNHANDLED)
3628 			return ret;
3629 	}
3630 
3631 	if (iter->ent->type == TRACE_BPUTS &&
3632 			trace_flags & TRACE_ITER_PRINTK &&
3633 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3634 		return trace_print_bputs_msg_only(iter);
3635 
3636 	if (iter->ent->type == TRACE_BPRINT &&
3637 			trace_flags & TRACE_ITER_PRINTK &&
3638 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3639 		return trace_print_bprintk_msg_only(iter);
3640 
3641 	if (iter->ent->type == TRACE_PRINT &&
3642 			trace_flags & TRACE_ITER_PRINTK &&
3643 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3644 		return trace_print_printk_msg_only(iter);
3645 
3646 	if (trace_flags & TRACE_ITER_BIN)
3647 		return print_bin_fmt(iter);
3648 
3649 	if (trace_flags & TRACE_ITER_HEX)
3650 		return print_hex_fmt(iter);
3651 
3652 	if (trace_flags & TRACE_ITER_RAW)
3653 		return print_raw_fmt(iter);
3654 
3655 	return print_trace_fmt(iter);
3656 }
3657 
3658 void trace_latency_header(struct seq_file *m)
3659 {
3660 	struct trace_iterator *iter = m->private;
3661 	struct trace_array *tr = iter->tr;
3662 
3663 	/* print nothing if the buffers are empty */
3664 	if (trace_empty(iter))
3665 		return;
3666 
3667 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3668 		print_trace_header(m, iter);
3669 
3670 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3671 		print_lat_help_header(m);
3672 }
3673 
3674 void trace_default_header(struct seq_file *m)
3675 {
3676 	struct trace_iterator *iter = m->private;
3677 	struct trace_array *tr = iter->tr;
3678 	unsigned long trace_flags = tr->trace_flags;
3679 
3680 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3681 		return;
3682 
3683 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3684 		/* print nothing if the buffers are empty */
3685 		if (trace_empty(iter))
3686 			return;
3687 		print_trace_header(m, iter);
3688 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3689 			print_lat_help_header(m);
3690 	} else {
3691 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3692 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3693 				print_func_help_header_irq(iter->trace_buffer,
3694 							   m, trace_flags);
3695 			else
3696 				print_func_help_header(iter->trace_buffer, m,
3697 						       trace_flags);
3698 		}
3699 	}
3700 }
3701 
3702 static void test_ftrace_alive(struct seq_file *m)
3703 {
3704 	if (!ftrace_is_dead())
3705 		return;
3706 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3707 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3708 }
3709 
3710 #ifdef CONFIG_TRACER_MAX_TRACE
3711 static void show_snapshot_main_help(struct seq_file *m)
3712 {
3713 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3714 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3715 		    "#                      Takes a snapshot of the main buffer.\n"
3716 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3717 		    "#                      (Doesn't have to be '2' works with any number that\n"
3718 		    "#                       is not a '0' or '1')\n");
3719 }
3720 
3721 static void show_snapshot_percpu_help(struct seq_file *m)
3722 {
3723 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3724 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3725 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3726 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3727 #else
3728 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3729 		    "#                     Must use main snapshot file to allocate.\n");
3730 #endif
3731 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3732 		    "#                      (Doesn't have to be '2' works with any number that\n"
3733 		    "#                       is not a '0' or '1')\n");
3734 }
3735 
3736 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3737 {
3738 	if (iter->tr->allocated_snapshot)
3739 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3740 	else
3741 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3742 
3743 	seq_puts(m, "# Snapshot commands:\n");
3744 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3745 		show_snapshot_main_help(m);
3746 	else
3747 		show_snapshot_percpu_help(m);
3748 }
3749 #else
3750 /* Should never be called */
3751 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3752 #endif
3753 
3754 static int s_show(struct seq_file *m, void *v)
3755 {
3756 	struct trace_iterator *iter = v;
3757 	int ret;
3758 
3759 	if (iter->ent == NULL) {
3760 		if (iter->tr) {
3761 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3762 			seq_puts(m, "#\n");
3763 			test_ftrace_alive(m);
3764 		}
3765 		if (iter->snapshot && trace_empty(iter))
3766 			print_snapshot_help(m, iter);
3767 		else if (iter->trace && iter->trace->print_header)
3768 			iter->trace->print_header(m);
3769 		else
3770 			trace_default_header(m);
3771 
3772 	} else if (iter->leftover) {
3773 		/*
3774 		 * If we filled the seq_file buffer earlier, we
3775 		 * want to just show it now.
3776 		 */
3777 		ret = trace_print_seq(m, &iter->seq);
3778 
3779 		/* ret should this time be zero, but you never know */
3780 		iter->leftover = ret;
3781 
3782 	} else {
3783 		print_trace_line(iter);
3784 		ret = trace_print_seq(m, &iter->seq);
3785 		/*
3786 		 * If we overflow the seq_file buffer, then it will
3787 		 * ask us for this data again at start up.
3788 		 * Use that instead.
3789 		 *  ret is 0 if seq_file write succeeded.
3790 		 *        -1 otherwise.
3791 		 */
3792 		iter->leftover = ret;
3793 	}
3794 
3795 	return 0;
3796 }
3797 
3798 /*
3799  * Should be used after trace_array_get(), trace_types_lock
3800  * ensures that i_cdev was already initialized.
3801  */
3802 static inline int tracing_get_cpu(struct inode *inode)
3803 {
3804 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3805 		return (long)inode->i_cdev - 1;
3806 	return RING_BUFFER_ALL_CPUS;
3807 }
3808 
3809 static const struct seq_operations tracer_seq_ops = {
3810 	.start		= s_start,
3811 	.next		= s_next,
3812 	.stop		= s_stop,
3813 	.show		= s_show,
3814 };
3815 
3816 static struct trace_iterator *
3817 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3818 {
3819 	struct trace_array *tr = inode->i_private;
3820 	struct trace_iterator *iter;
3821 	int cpu;
3822 
3823 	if (tracing_disabled)
3824 		return ERR_PTR(-ENODEV);
3825 
3826 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3827 	if (!iter)
3828 		return ERR_PTR(-ENOMEM);
3829 
3830 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3831 				    GFP_KERNEL);
3832 	if (!iter->buffer_iter)
3833 		goto release;
3834 
3835 	/*
3836 	 * We make a copy of the current tracer to avoid concurrent
3837 	 * changes on it while we are reading.
3838 	 */
3839 	mutex_lock(&trace_types_lock);
3840 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3841 	if (!iter->trace)
3842 		goto fail;
3843 
3844 	*iter->trace = *tr->current_trace;
3845 
3846 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3847 		goto fail;
3848 
3849 	iter->tr = tr;
3850 
3851 #ifdef CONFIG_TRACER_MAX_TRACE
3852 	/* Currently only the top directory has a snapshot */
3853 	if (tr->current_trace->print_max || snapshot)
3854 		iter->trace_buffer = &tr->max_buffer;
3855 	else
3856 #endif
3857 		iter->trace_buffer = &tr->trace_buffer;
3858 	iter->snapshot = snapshot;
3859 	iter->pos = -1;
3860 	iter->cpu_file = tracing_get_cpu(inode);
3861 	mutex_init(&iter->mutex);
3862 
3863 	/* Notify the tracer early; before we stop tracing. */
3864 	if (iter->trace && iter->trace->open)
3865 		iter->trace->open(iter);
3866 
3867 	/* Annotate start of buffers if we had overruns */
3868 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3869 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3870 
3871 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3872 	if (trace_clocks[tr->clock_id].in_ns)
3873 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3874 
3875 	/* stop the trace while dumping if we are not opening "snapshot" */
3876 	if (!iter->snapshot)
3877 		tracing_stop_tr(tr);
3878 
3879 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3880 		for_each_tracing_cpu(cpu) {
3881 			iter->buffer_iter[cpu] =
3882 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3883 		}
3884 		ring_buffer_read_prepare_sync();
3885 		for_each_tracing_cpu(cpu) {
3886 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3887 			tracing_iter_reset(iter, cpu);
3888 		}
3889 	} else {
3890 		cpu = iter->cpu_file;
3891 		iter->buffer_iter[cpu] =
3892 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3893 		ring_buffer_read_prepare_sync();
3894 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3895 		tracing_iter_reset(iter, cpu);
3896 	}
3897 
3898 	mutex_unlock(&trace_types_lock);
3899 
3900 	return iter;
3901 
3902  fail:
3903 	mutex_unlock(&trace_types_lock);
3904 	kfree(iter->trace);
3905 	kfree(iter->buffer_iter);
3906 release:
3907 	seq_release_private(inode, file);
3908 	return ERR_PTR(-ENOMEM);
3909 }
3910 
3911 int tracing_open_generic(struct inode *inode, struct file *filp)
3912 {
3913 	if (tracing_disabled)
3914 		return -ENODEV;
3915 
3916 	filp->private_data = inode->i_private;
3917 	return 0;
3918 }
3919 
3920 bool tracing_is_disabled(void)
3921 {
3922 	return (tracing_disabled) ? true: false;
3923 }
3924 
3925 /*
3926  * Open and update trace_array ref count.
3927  * Must have the current trace_array passed to it.
3928  */
3929 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3930 {
3931 	struct trace_array *tr = inode->i_private;
3932 
3933 	if (tracing_disabled)
3934 		return -ENODEV;
3935 
3936 	if (trace_array_get(tr) < 0)
3937 		return -ENODEV;
3938 
3939 	filp->private_data = inode->i_private;
3940 
3941 	return 0;
3942 }
3943 
3944 static int tracing_release(struct inode *inode, struct file *file)
3945 {
3946 	struct trace_array *tr = inode->i_private;
3947 	struct seq_file *m = file->private_data;
3948 	struct trace_iterator *iter;
3949 	int cpu;
3950 
3951 	if (!(file->f_mode & FMODE_READ)) {
3952 		trace_array_put(tr);
3953 		return 0;
3954 	}
3955 
3956 	/* Writes do not use seq_file */
3957 	iter = m->private;
3958 	mutex_lock(&trace_types_lock);
3959 
3960 	for_each_tracing_cpu(cpu) {
3961 		if (iter->buffer_iter[cpu])
3962 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3963 	}
3964 
3965 	if (iter->trace && iter->trace->close)
3966 		iter->trace->close(iter);
3967 
3968 	if (!iter->snapshot)
3969 		/* reenable tracing if it was previously enabled */
3970 		tracing_start_tr(tr);
3971 
3972 	__trace_array_put(tr);
3973 
3974 	mutex_unlock(&trace_types_lock);
3975 
3976 	mutex_destroy(&iter->mutex);
3977 	free_cpumask_var(iter->started);
3978 	kfree(iter->trace);
3979 	kfree(iter->buffer_iter);
3980 	seq_release_private(inode, file);
3981 
3982 	return 0;
3983 }
3984 
3985 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3986 {
3987 	struct trace_array *tr = inode->i_private;
3988 
3989 	trace_array_put(tr);
3990 	return 0;
3991 }
3992 
3993 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3994 {
3995 	struct trace_array *tr = inode->i_private;
3996 
3997 	trace_array_put(tr);
3998 
3999 	return single_release(inode, file);
4000 }
4001 
4002 static int tracing_open(struct inode *inode, struct file *file)
4003 {
4004 	struct trace_array *tr = inode->i_private;
4005 	struct trace_iterator *iter;
4006 	int ret = 0;
4007 
4008 	if (trace_array_get(tr) < 0)
4009 		return -ENODEV;
4010 
4011 	/* If this file was open for write, then erase contents */
4012 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4013 		int cpu = tracing_get_cpu(inode);
4014 
4015 		if (cpu == RING_BUFFER_ALL_CPUS)
4016 			tracing_reset_online_cpus(&tr->trace_buffer);
4017 		else
4018 			tracing_reset(&tr->trace_buffer, cpu);
4019 	}
4020 
4021 	if (file->f_mode & FMODE_READ) {
4022 		iter = __tracing_open(inode, file, false);
4023 		if (IS_ERR(iter))
4024 			ret = PTR_ERR(iter);
4025 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4026 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4027 	}
4028 
4029 	if (ret < 0)
4030 		trace_array_put(tr);
4031 
4032 	return ret;
4033 }
4034 
4035 /*
4036  * Some tracers are not suitable for instance buffers.
4037  * A tracer is always available for the global array (toplevel)
4038  * or if it explicitly states that it is.
4039  */
4040 static bool
4041 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4042 {
4043 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4044 }
4045 
4046 /* Find the next tracer that this trace array may use */
4047 static struct tracer *
4048 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4049 {
4050 	while (t && !trace_ok_for_array(t, tr))
4051 		t = t->next;
4052 
4053 	return t;
4054 }
4055 
4056 static void *
4057 t_next(struct seq_file *m, void *v, loff_t *pos)
4058 {
4059 	struct trace_array *tr = m->private;
4060 	struct tracer *t = v;
4061 
4062 	(*pos)++;
4063 
4064 	if (t)
4065 		t = get_tracer_for_array(tr, t->next);
4066 
4067 	return t;
4068 }
4069 
4070 static void *t_start(struct seq_file *m, loff_t *pos)
4071 {
4072 	struct trace_array *tr = m->private;
4073 	struct tracer *t;
4074 	loff_t l = 0;
4075 
4076 	mutex_lock(&trace_types_lock);
4077 
4078 	t = get_tracer_for_array(tr, trace_types);
4079 	for (; t && l < *pos; t = t_next(m, t, &l))
4080 			;
4081 
4082 	return t;
4083 }
4084 
4085 static void t_stop(struct seq_file *m, void *p)
4086 {
4087 	mutex_unlock(&trace_types_lock);
4088 }
4089 
4090 static int t_show(struct seq_file *m, void *v)
4091 {
4092 	struct tracer *t = v;
4093 
4094 	if (!t)
4095 		return 0;
4096 
4097 	seq_puts(m, t->name);
4098 	if (t->next)
4099 		seq_putc(m, ' ');
4100 	else
4101 		seq_putc(m, '\n');
4102 
4103 	return 0;
4104 }
4105 
4106 static const struct seq_operations show_traces_seq_ops = {
4107 	.start		= t_start,
4108 	.next		= t_next,
4109 	.stop		= t_stop,
4110 	.show		= t_show,
4111 };
4112 
4113 static int show_traces_open(struct inode *inode, struct file *file)
4114 {
4115 	struct trace_array *tr = inode->i_private;
4116 	struct seq_file *m;
4117 	int ret;
4118 
4119 	if (tracing_disabled)
4120 		return -ENODEV;
4121 
4122 	ret = seq_open(file, &show_traces_seq_ops);
4123 	if (ret)
4124 		return ret;
4125 
4126 	m = file->private_data;
4127 	m->private = tr;
4128 
4129 	return 0;
4130 }
4131 
4132 static ssize_t
4133 tracing_write_stub(struct file *filp, const char __user *ubuf,
4134 		   size_t count, loff_t *ppos)
4135 {
4136 	return count;
4137 }
4138 
4139 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4140 {
4141 	int ret;
4142 
4143 	if (file->f_mode & FMODE_READ)
4144 		ret = seq_lseek(file, offset, whence);
4145 	else
4146 		file->f_pos = ret = 0;
4147 
4148 	return ret;
4149 }
4150 
4151 static const struct file_operations tracing_fops = {
4152 	.open		= tracing_open,
4153 	.read		= seq_read,
4154 	.write		= tracing_write_stub,
4155 	.llseek		= tracing_lseek,
4156 	.release	= tracing_release,
4157 };
4158 
4159 static const struct file_operations show_traces_fops = {
4160 	.open		= show_traces_open,
4161 	.read		= seq_read,
4162 	.release	= seq_release,
4163 	.llseek		= seq_lseek,
4164 };
4165 
4166 /*
4167  * The tracer itself will not take this lock, but still we want
4168  * to provide a consistent cpumask to user-space:
4169  */
4170 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4171 
4172 /*
4173  * Temporary storage for the character representation of the
4174  * CPU bitmask (and one more byte for the newline):
4175  */
4176 static char mask_str[NR_CPUS + 1];
4177 
4178 static ssize_t
4179 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4180 		     size_t count, loff_t *ppos)
4181 {
4182 	struct trace_array *tr = file_inode(filp)->i_private;
4183 	int len;
4184 
4185 	mutex_lock(&tracing_cpumask_update_lock);
4186 
4187 	len = snprintf(mask_str, count, "%*pb\n",
4188 		       cpumask_pr_args(tr->tracing_cpumask));
4189 	if (len >= count) {
4190 		count = -EINVAL;
4191 		goto out_err;
4192 	}
4193 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4194 
4195 out_err:
4196 	mutex_unlock(&tracing_cpumask_update_lock);
4197 
4198 	return count;
4199 }
4200 
4201 static ssize_t
4202 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4203 		      size_t count, loff_t *ppos)
4204 {
4205 	struct trace_array *tr = file_inode(filp)->i_private;
4206 	cpumask_var_t tracing_cpumask_new;
4207 	int err, cpu;
4208 
4209 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4210 		return -ENOMEM;
4211 
4212 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4213 	if (err)
4214 		goto err_unlock;
4215 
4216 	mutex_lock(&tracing_cpumask_update_lock);
4217 
4218 	local_irq_disable();
4219 	arch_spin_lock(&tr->max_lock);
4220 	for_each_tracing_cpu(cpu) {
4221 		/*
4222 		 * Increase/decrease the disabled counter if we are
4223 		 * about to flip a bit in the cpumask:
4224 		 */
4225 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4226 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4227 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4228 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4229 		}
4230 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4231 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4232 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4233 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4234 		}
4235 	}
4236 	arch_spin_unlock(&tr->max_lock);
4237 	local_irq_enable();
4238 
4239 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4240 
4241 	mutex_unlock(&tracing_cpumask_update_lock);
4242 	free_cpumask_var(tracing_cpumask_new);
4243 
4244 	return count;
4245 
4246 err_unlock:
4247 	free_cpumask_var(tracing_cpumask_new);
4248 
4249 	return err;
4250 }
4251 
4252 static const struct file_operations tracing_cpumask_fops = {
4253 	.open		= tracing_open_generic_tr,
4254 	.read		= tracing_cpumask_read,
4255 	.write		= tracing_cpumask_write,
4256 	.release	= tracing_release_generic_tr,
4257 	.llseek		= generic_file_llseek,
4258 };
4259 
4260 static int tracing_trace_options_show(struct seq_file *m, void *v)
4261 {
4262 	struct tracer_opt *trace_opts;
4263 	struct trace_array *tr = m->private;
4264 	u32 tracer_flags;
4265 	int i;
4266 
4267 	mutex_lock(&trace_types_lock);
4268 	tracer_flags = tr->current_trace->flags->val;
4269 	trace_opts = tr->current_trace->flags->opts;
4270 
4271 	for (i = 0; trace_options[i]; i++) {
4272 		if (tr->trace_flags & (1 << i))
4273 			seq_printf(m, "%s\n", trace_options[i]);
4274 		else
4275 			seq_printf(m, "no%s\n", trace_options[i]);
4276 	}
4277 
4278 	for (i = 0; trace_opts[i].name; i++) {
4279 		if (tracer_flags & trace_opts[i].bit)
4280 			seq_printf(m, "%s\n", trace_opts[i].name);
4281 		else
4282 			seq_printf(m, "no%s\n", trace_opts[i].name);
4283 	}
4284 	mutex_unlock(&trace_types_lock);
4285 
4286 	return 0;
4287 }
4288 
4289 static int __set_tracer_option(struct trace_array *tr,
4290 			       struct tracer_flags *tracer_flags,
4291 			       struct tracer_opt *opts, int neg)
4292 {
4293 	struct tracer *trace = tracer_flags->trace;
4294 	int ret;
4295 
4296 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4297 	if (ret)
4298 		return ret;
4299 
4300 	if (neg)
4301 		tracer_flags->val &= ~opts->bit;
4302 	else
4303 		tracer_flags->val |= opts->bit;
4304 	return 0;
4305 }
4306 
4307 /* Try to assign a tracer specific option */
4308 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4309 {
4310 	struct tracer *trace = tr->current_trace;
4311 	struct tracer_flags *tracer_flags = trace->flags;
4312 	struct tracer_opt *opts = NULL;
4313 	int i;
4314 
4315 	for (i = 0; tracer_flags->opts[i].name; i++) {
4316 		opts = &tracer_flags->opts[i];
4317 
4318 		if (strcmp(cmp, opts->name) == 0)
4319 			return __set_tracer_option(tr, trace->flags, opts, neg);
4320 	}
4321 
4322 	return -EINVAL;
4323 }
4324 
4325 /* Some tracers require overwrite to stay enabled */
4326 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4327 {
4328 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4329 		return -1;
4330 
4331 	return 0;
4332 }
4333 
4334 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4335 {
4336 	/* do nothing if flag is already set */
4337 	if (!!(tr->trace_flags & mask) == !!enabled)
4338 		return 0;
4339 
4340 	/* Give the tracer a chance to approve the change */
4341 	if (tr->current_trace->flag_changed)
4342 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4343 			return -EINVAL;
4344 
4345 	if (enabled)
4346 		tr->trace_flags |= mask;
4347 	else
4348 		tr->trace_flags &= ~mask;
4349 
4350 	if (mask == TRACE_ITER_RECORD_CMD)
4351 		trace_event_enable_cmd_record(enabled);
4352 
4353 	if (mask == TRACE_ITER_RECORD_TGID) {
4354 		if (!tgid_map)
4355 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4356 					   GFP_KERNEL);
4357 		if (!tgid_map) {
4358 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4359 			return -ENOMEM;
4360 		}
4361 
4362 		trace_event_enable_tgid_record(enabled);
4363 	}
4364 
4365 	if (mask == TRACE_ITER_EVENT_FORK)
4366 		trace_event_follow_fork(tr, enabled);
4367 
4368 	if (mask == TRACE_ITER_FUNC_FORK)
4369 		ftrace_pid_follow_fork(tr, enabled);
4370 
4371 	if (mask == TRACE_ITER_OVERWRITE) {
4372 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4373 #ifdef CONFIG_TRACER_MAX_TRACE
4374 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4375 #endif
4376 	}
4377 
4378 	if (mask == TRACE_ITER_PRINTK) {
4379 		trace_printk_start_stop_comm(enabled);
4380 		trace_printk_control(enabled);
4381 	}
4382 
4383 	return 0;
4384 }
4385 
4386 static int trace_set_options(struct trace_array *tr, char *option)
4387 {
4388 	char *cmp;
4389 	int neg = 0;
4390 	int ret = -ENODEV;
4391 	int i;
4392 	size_t orig_len = strlen(option);
4393 
4394 	cmp = strstrip(option);
4395 
4396 	if (strncmp(cmp, "no", 2) == 0) {
4397 		neg = 1;
4398 		cmp += 2;
4399 	}
4400 
4401 	mutex_lock(&trace_types_lock);
4402 
4403 	for (i = 0; trace_options[i]; i++) {
4404 		if (strcmp(cmp, trace_options[i]) == 0) {
4405 			ret = set_tracer_flag(tr, 1 << i, !neg);
4406 			break;
4407 		}
4408 	}
4409 
4410 	/* If no option could be set, test the specific tracer options */
4411 	if (!trace_options[i])
4412 		ret = set_tracer_option(tr, cmp, neg);
4413 
4414 	mutex_unlock(&trace_types_lock);
4415 
4416 	/*
4417 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4418 	 * turn it back into a space.
4419 	 */
4420 	if (orig_len > strlen(option))
4421 		option[strlen(option)] = ' ';
4422 
4423 	return ret;
4424 }
4425 
4426 static void __init apply_trace_boot_options(void)
4427 {
4428 	char *buf = trace_boot_options_buf;
4429 	char *option;
4430 
4431 	while (true) {
4432 		option = strsep(&buf, ",");
4433 
4434 		if (!option)
4435 			break;
4436 
4437 		if (*option)
4438 			trace_set_options(&global_trace, option);
4439 
4440 		/* Put back the comma to allow this to be called again */
4441 		if (buf)
4442 			*(buf - 1) = ',';
4443 	}
4444 }
4445 
4446 static ssize_t
4447 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4448 			size_t cnt, loff_t *ppos)
4449 {
4450 	struct seq_file *m = filp->private_data;
4451 	struct trace_array *tr = m->private;
4452 	char buf[64];
4453 	int ret;
4454 
4455 	if (cnt >= sizeof(buf))
4456 		return -EINVAL;
4457 
4458 	if (copy_from_user(buf, ubuf, cnt))
4459 		return -EFAULT;
4460 
4461 	buf[cnt] = 0;
4462 
4463 	ret = trace_set_options(tr, buf);
4464 	if (ret < 0)
4465 		return ret;
4466 
4467 	*ppos += cnt;
4468 
4469 	return cnt;
4470 }
4471 
4472 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4473 {
4474 	struct trace_array *tr = inode->i_private;
4475 	int ret;
4476 
4477 	if (tracing_disabled)
4478 		return -ENODEV;
4479 
4480 	if (trace_array_get(tr) < 0)
4481 		return -ENODEV;
4482 
4483 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4484 	if (ret < 0)
4485 		trace_array_put(tr);
4486 
4487 	return ret;
4488 }
4489 
4490 static const struct file_operations tracing_iter_fops = {
4491 	.open		= tracing_trace_options_open,
4492 	.read		= seq_read,
4493 	.llseek		= seq_lseek,
4494 	.release	= tracing_single_release_tr,
4495 	.write		= tracing_trace_options_write,
4496 };
4497 
4498 static const char readme_msg[] =
4499 	"tracing mini-HOWTO:\n\n"
4500 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4501 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4502 	" Important files:\n"
4503 	"  trace\t\t\t- The static contents of the buffer\n"
4504 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4505 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4506 	"  current_tracer\t- function and latency tracers\n"
4507 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4508 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4509 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4510 	"  trace_clock\t\t-change the clock used to order events\n"
4511 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4512 	"      global:   Synced across CPUs but slows tracing down.\n"
4513 	"     counter:   Not a clock, but just an increment\n"
4514 	"      uptime:   Jiffy counter from time of boot\n"
4515 	"        perf:   Same clock that perf events use\n"
4516 #ifdef CONFIG_X86_64
4517 	"     x86-tsc:   TSC cycle counter\n"
4518 #endif
4519 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4520 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4521 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4522 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4523 	"\t\t\t  Remove sub-buffer with rmdir\n"
4524 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4525 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4526 	"\t\t\t  option name\n"
4527 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4528 #ifdef CONFIG_DYNAMIC_FTRACE
4529 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4530 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4531 	"\t\t\t  functions\n"
4532 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4533 	"\t     modules: Can select a group via module\n"
4534 	"\t      Format: :mod:<module-name>\n"
4535 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4536 	"\t    triggers: a command to perform when function is hit\n"
4537 	"\t      Format: <function>:<trigger>[:count]\n"
4538 	"\t     trigger: traceon, traceoff\n"
4539 	"\t\t      enable_event:<system>:<event>\n"
4540 	"\t\t      disable_event:<system>:<event>\n"
4541 #ifdef CONFIG_STACKTRACE
4542 	"\t\t      stacktrace\n"
4543 #endif
4544 #ifdef CONFIG_TRACER_SNAPSHOT
4545 	"\t\t      snapshot\n"
4546 #endif
4547 	"\t\t      dump\n"
4548 	"\t\t      cpudump\n"
4549 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4550 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4551 	"\t     The first one will disable tracing every time do_fault is hit\n"
4552 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4553 	"\t       The first time do trap is hit and it disables tracing, the\n"
4554 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4555 	"\t       the counter will not decrement. It only decrements when the\n"
4556 	"\t       trigger did work\n"
4557 	"\t     To remove trigger without count:\n"
4558 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4559 	"\t     To remove trigger with a count:\n"
4560 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4561 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4562 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4563 	"\t    modules: Can select a group via module command :mod:\n"
4564 	"\t    Does not accept triggers\n"
4565 #endif /* CONFIG_DYNAMIC_FTRACE */
4566 #ifdef CONFIG_FUNCTION_TRACER
4567 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4568 	"\t\t    (function)\n"
4569 #endif
4570 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4571 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4572 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4573 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4574 #endif
4575 #ifdef CONFIG_TRACER_SNAPSHOT
4576 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4577 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4578 	"\t\t\t  information\n"
4579 #endif
4580 #ifdef CONFIG_STACK_TRACER
4581 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4582 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4583 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4584 	"\t\t\t  new trace)\n"
4585 #ifdef CONFIG_DYNAMIC_FTRACE
4586 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4587 	"\t\t\t  traces\n"
4588 #endif
4589 #endif /* CONFIG_STACK_TRACER */
4590 #ifdef CONFIG_KPROBE_EVENTS
4591 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4592 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4593 #endif
4594 #ifdef CONFIG_UPROBE_EVENTS
4595 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4596 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4597 #endif
4598 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4599 	"\t  accepts: event-definitions (one definition per line)\n"
4600 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4601 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4602 	"\t           -:[<group>/]<event>\n"
4603 #ifdef CONFIG_KPROBE_EVENTS
4604 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4605   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4606 #endif
4607 #ifdef CONFIG_UPROBE_EVENTS
4608 	"\t    place: <path>:<offset>\n"
4609 #endif
4610 	"\t     args: <name>=fetcharg[:type]\n"
4611 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4612 	"\t           $stack<index>, $stack, $retval, $comm\n"
4613 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4614 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4615 #endif
4616 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4617 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4618 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4619 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4620 	"\t\t\t  events\n"
4621 	"      filter\t\t- If set, only events passing filter are traced\n"
4622 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4623 	"\t\t\t  <event>:\n"
4624 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4625 	"      filter\t\t- If set, only events passing filter are traced\n"
4626 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4627 	"\t    Format: <trigger>[:count][if <filter>]\n"
4628 	"\t   trigger: traceon, traceoff\n"
4629 	"\t            enable_event:<system>:<event>\n"
4630 	"\t            disable_event:<system>:<event>\n"
4631 #ifdef CONFIG_HIST_TRIGGERS
4632 	"\t            enable_hist:<system>:<event>\n"
4633 	"\t            disable_hist:<system>:<event>\n"
4634 #endif
4635 #ifdef CONFIG_STACKTRACE
4636 	"\t\t    stacktrace\n"
4637 #endif
4638 #ifdef CONFIG_TRACER_SNAPSHOT
4639 	"\t\t    snapshot\n"
4640 #endif
4641 #ifdef CONFIG_HIST_TRIGGERS
4642 	"\t\t    hist (see below)\n"
4643 #endif
4644 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4645 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4646 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4647 	"\t                  events/block/block_unplug/trigger\n"
4648 	"\t   The first disables tracing every time block_unplug is hit.\n"
4649 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4650 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4651 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4652 	"\t   Like function triggers, the counter is only decremented if it\n"
4653 	"\t    enabled or disabled tracing.\n"
4654 	"\t   To remove a trigger without a count:\n"
4655 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4656 	"\t   To remove a trigger with a count:\n"
4657 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4658 	"\t   Filters can be ignored when removing a trigger.\n"
4659 #ifdef CONFIG_HIST_TRIGGERS
4660 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4661 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4662 	"\t            [:values=<field1[,field2,...]>]\n"
4663 	"\t            [:sort=<field1[,field2,...]>]\n"
4664 	"\t            [:size=#entries]\n"
4665 	"\t            [:pause][:continue][:clear]\n"
4666 	"\t            [:name=histname1]\n"
4667 	"\t            [if <filter>]\n\n"
4668 	"\t    When a matching event is hit, an entry is added to a hash\n"
4669 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4670 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4671 	"\t    correspond to fields in the event's format description.  Keys\n"
4672 	"\t    can be any field, or the special string 'stacktrace'.\n"
4673 	"\t    Compound keys consisting of up to two fields can be specified\n"
4674 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4675 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4676 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4677 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4678 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4679 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4680 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4681 	"\t    its histogram data will be shared with other triggers of the\n"
4682 	"\t    same name, and trigger hits will update this common data.\n\n"
4683 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4684 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4685 	"\t    triggers attached to an event, there will be a table for each\n"
4686 	"\t    trigger in the output.  The table displayed for a named\n"
4687 	"\t    trigger will be the same as any other instance having the\n"
4688 	"\t    same name.  The default format used to display a given field\n"
4689 	"\t    can be modified by appending any of the following modifiers\n"
4690 	"\t    to the field name, as applicable:\n\n"
4691 	"\t            .hex        display a number as a hex value\n"
4692 	"\t            .sym        display an address as a symbol\n"
4693 	"\t            .sym-offset display an address as a symbol and offset\n"
4694 	"\t            .execname   display a common_pid as a program name\n"
4695 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4696 	"\t            .log2       display log2 value rather than raw number\n\n"
4697 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4698 	"\t    trigger or to start a hist trigger but not log any events\n"
4699 	"\t    until told to do so.  'continue' can be used to start or\n"
4700 	"\t    restart a paused hist trigger.\n\n"
4701 	"\t    The 'clear' parameter will clear the contents of a running\n"
4702 	"\t    hist trigger and leave its current paused/active state\n"
4703 	"\t    unchanged.\n\n"
4704 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4705 	"\t    have one event conditionally start and stop another event's\n"
4706 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4707 	"\t    the enable_event and disable_event triggers.\n"
4708 #endif
4709 ;
4710 
4711 static ssize_t
4712 tracing_readme_read(struct file *filp, char __user *ubuf,
4713 		       size_t cnt, loff_t *ppos)
4714 {
4715 	return simple_read_from_buffer(ubuf, cnt, ppos,
4716 					readme_msg, strlen(readme_msg));
4717 }
4718 
4719 static const struct file_operations tracing_readme_fops = {
4720 	.open		= tracing_open_generic,
4721 	.read		= tracing_readme_read,
4722 	.llseek		= generic_file_llseek,
4723 };
4724 
4725 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4726 {
4727 	int *ptr = v;
4728 
4729 	if (*pos || m->count)
4730 		ptr++;
4731 
4732 	(*pos)++;
4733 
4734 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4735 		if (trace_find_tgid(*ptr))
4736 			return ptr;
4737 	}
4738 
4739 	return NULL;
4740 }
4741 
4742 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4743 {
4744 	void *v;
4745 	loff_t l = 0;
4746 
4747 	if (!tgid_map)
4748 		return NULL;
4749 
4750 	v = &tgid_map[0];
4751 	while (l <= *pos) {
4752 		v = saved_tgids_next(m, v, &l);
4753 		if (!v)
4754 			return NULL;
4755 	}
4756 
4757 	return v;
4758 }
4759 
4760 static void saved_tgids_stop(struct seq_file *m, void *v)
4761 {
4762 }
4763 
4764 static int saved_tgids_show(struct seq_file *m, void *v)
4765 {
4766 	int pid = (int *)v - tgid_map;
4767 
4768 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4769 	return 0;
4770 }
4771 
4772 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4773 	.start		= saved_tgids_start,
4774 	.stop		= saved_tgids_stop,
4775 	.next		= saved_tgids_next,
4776 	.show		= saved_tgids_show,
4777 };
4778 
4779 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4780 {
4781 	if (tracing_disabled)
4782 		return -ENODEV;
4783 
4784 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4785 }
4786 
4787 
4788 static const struct file_operations tracing_saved_tgids_fops = {
4789 	.open		= tracing_saved_tgids_open,
4790 	.read		= seq_read,
4791 	.llseek		= seq_lseek,
4792 	.release	= seq_release,
4793 };
4794 
4795 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4796 {
4797 	unsigned int *ptr = v;
4798 
4799 	if (*pos || m->count)
4800 		ptr++;
4801 
4802 	(*pos)++;
4803 
4804 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4805 	     ptr++) {
4806 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4807 			continue;
4808 
4809 		return ptr;
4810 	}
4811 
4812 	return NULL;
4813 }
4814 
4815 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4816 {
4817 	void *v;
4818 	loff_t l = 0;
4819 
4820 	preempt_disable();
4821 	arch_spin_lock(&trace_cmdline_lock);
4822 
4823 	v = &savedcmd->map_cmdline_to_pid[0];
4824 	while (l <= *pos) {
4825 		v = saved_cmdlines_next(m, v, &l);
4826 		if (!v)
4827 			return NULL;
4828 	}
4829 
4830 	return v;
4831 }
4832 
4833 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4834 {
4835 	arch_spin_unlock(&trace_cmdline_lock);
4836 	preempt_enable();
4837 }
4838 
4839 static int saved_cmdlines_show(struct seq_file *m, void *v)
4840 {
4841 	char buf[TASK_COMM_LEN];
4842 	unsigned int *pid = v;
4843 
4844 	__trace_find_cmdline(*pid, buf);
4845 	seq_printf(m, "%d %s\n", *pid, buf);
4846 	return 0;
4847 }
4848 
4849 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4850 	.start		= saved_cmdlines_start,
4851 	.next		= saved_cmdlines_next,
4852 	.stop		= saved_cmdlines_stop,
4853 	.show		= saved_cmdlines_show,
4854 };
4855 
4856 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4857 {
4858 	if (tracing_disabled)
4859 		return -ENODEV;
4860 
4861 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4862 }
4863 
4864 static const struct file_operations tracing_saved_cmdlines_fops = {
4865 	.open		= tracing_saved_cmdlines_open,
4866 	.read		= seq_read,
4867 	.llseek		= seq_lseek,
4868 	.release	= seq_release,
4869 };
4870 
4871 static ssize_t
4872 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4873 				 size_t cnt, loff_t *ppos)
4874 {
4875 	char buf[64];
4876 	int r;
4877 
4878 	arch_spin_lock(&trace_cmdline_lock);
4879 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4880 	arch_spin_unlock(&trace_cmdline_lock);
4881 
4882 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4883 }
4884 
4885 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4886 {
4887 	kfree(s->saved_cmdlines);
4888 	kfree(s->map_cmdline_to_pid);
4889 	kfree(s);
4890 }
4891 
4892 static int tracing_resize_saved_cmdlines(unsigned int val)
4893 {
4894 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4895 
4896 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4897 	if (!s)
4898 		return -ENOMEM;
4899 
4900 	if (allocate_cmdlines_buffer(val, s) < 0) {
4901 		kfree(s);
4902 		return -ENOMEM;
4903 	}
4904 
4905 	arch_spin_lock(&trace_cmdline_lock);
4906 	savedcmd_temp = savedcmd;
4907 	savedcmd = s;
4908 	arch_spin_unlock(&trace_cmdline_lock);
4909 	free_saved_cmdlines_buffer(savedcmd_temp);
4910 
4911 	return 0;
4912 }
4913 
4914 static ssize_t
4915 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4916 				  size_t cnt, loff_t *ppos)
4917 {
4918 	unsigned long val;
4919 	int ret;
4920 
4921 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4922 	if (ret)
4923 		return ret;
4924 
4925 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4926 	if (!val || val > PID_MAX_DEFAULT)
4927 		return -EINVAL;
4928 
4929 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4930 	if (ret < 0)
4931 		return ret;
4932 
4933 	*ppos += cnt;
4934 
4935 	return cnt;
4936 }
4937 
4938 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4939 	.open		= tracing_open_generic,
4940 	.read		= tracing_saved_cmdlines_size_read,
4941 	.write		= tracing_saved_cmdlines_size_write,
4942 };
4943 
4944 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4945 static union trace_eval_map_item *
4946 update_eval_map(union trace_eval_map_item *ptr)
4947 {
4948 	if (!ptr->map.eval_string) {
4949 		if (ptr->tail.next) {
4950 			ptr = ptr->tail.next;
4951 			/* Set ptr to the next real item (skip head) */
4952 			ptr++;
4953 		} else
4954 			return NULL;
4955 	}
4956 	return ptr;
4957 }
4958 
4959 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	union trace_eval_map_item *ptr = v;
4962 
4963 	/*
4964 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4965 	 * This really should never happen.
4966 	 */
4967 	ptr = update_eval_map(ptr);
4968 	if (WARN_ON_ONCE(!ptr))
4969 		return NULL;
4970 
4971 	ptr++;
4972 
4973 	(*pos)++;
4974 
4975 	ptr = update_eval_map(ptr);
4976 
4977 	return ptr;
4978 }
4979 
4980 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4981 {
4982 	union trace_eval_map_item *v;
4983 	loff_t l = 0;
4984 
4985 	mutex_lock(&trace_eval_mutex);
4986 
4987 	v = trace_eval_maps;
4988 	if (v)
4989 		v++;
4990 
4991 	while (v && l < *pos) {
4992 		v = eval_map_next(m, v, &l);
4993 	}
4994 
4995 	return v;
4996 }
4997 
4998 static void eval_map_stop(struct seq_file *m, void *v)
4999 {
5000 	mutex_unlock(&trace_eval_mutex);
5001 }
5002 
5003 static int eval_map_show(struct seq_file *m, void *v)
5004 {
5005 	union trace_eval_map_item *ptr = v;
5006 
5007 	seq_printf(m, "%s %ld (%s)\n",
5008 		   ptr->map.eval_string, ptr->map.eval_value,
5009 		   ptr->map.system);
5010 
5011 	return 0;
5012 }
5013 
5014 static const struct seq_operations tracing_eval_map_seq_ops = {
5015 	.start		= eval_map_start,
5016 	.next		= eval_map_next,
5017 	.stop		= eval_map_stop,
5018 	.show		= eval_map_show,
5019 };
5020 
5021 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5022 {
5023 	if (tracing_disabled)
5024 		return -ENODEV;
5025 
5026 	return seq_open(filp, &tracing_eval_map_seq_ops);
5027 }
5028 
5029 static const struct file_operations tracing_eval_map_fops = {
5030 	.open		= tracing_eval_map_open,
5031 	.read		= seq_read,
5032 	.llseek		= seq_lseek,
5033 	.release	= seq_release,
5034 };
5035 
5036 static inline union trace_eval_map_item *
5037 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5038 {
5039 	/* Return tail of array given the head */
5040 	return ptr + ptr->head.length + 1;
5041 }
5042 
5043 static void
5044 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5045 			   int len)
5046 {
5047 	struct trace_eval_map **stop;
5048 	struct trace_eval_map **map;
5049 	union trace_eval_map_item *map_array;
5050 	union trace_eval_map_item *ptr;
5051 
5052 	stop = start + len;
5053 
5054 	/*
5055 	 * The trace_eval_maps contains the map plus a head and tail item,
5056 	 * where the head holds the module and length of array, and the
5057 	 * tail holds a pointer to the next list.
5058 	 */
5059 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5060 	if (!map_array) {
5061 		pr_warn("Unable to allocate trace eval mapping\n");
5062 		return;
5063 	}
5064 
5065 	mutex_lock(&trace_eval_mutex);
5066 
5067 	if (!trace_eval_maps)
5068 		trace_eval_maps = map_array;
5069 	else {
5070 		ptr = trace_eval_maps;
5071 		for (;;) {
5072 			ptr = trace_eval_jmp_to_tail(ptr);
5073 			if (!ptr->tail.next)
5074 				break;
5075 			ptr = ptr->tail.next;
5076 
5077 		}
5078 		ptr->tail.next = map_array;
5079 	}
5080 	map_array->head.mod = mod;
5081 	map_array->head.length = len;
5082 	map_array++;
5083 
5084 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5085 		map_array->map = **map;
5086 		map_array++;
5087 	}
5088 	memset(map_array, 0, sizeof(*map_array));
5089 
5090 	mutex_unlock(&trace_eval_mutex);
5091 }
5092 
5093 static void trace_create_eval_file(struct dentry *d_tracer)
5094 {
5095 	trace_create_file("eval_map", 0444, d_tracer,
5096 			  NULL, &tracing_eval_map_fops);
5097 }
5098 
5099 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5100 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5101 static inline void trace_insert_eval_map_file(struct module *mod,
5102 			      struct trace_eval_map **start, int len) { }
5103 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5104 
5105 static void trace_insert_eval_map(struct module *mod,
5106 				  struct trace_eval_map **start, int len)
5107 {
5108 	struct trace_eval_map **map;
5109 
5110 	if (len <= 0)
5111 		return;
5112 
5113 	map = start;
5114 
5115 	trace_event_eval_update(map, len);
5116 
5117 	trace_insert_eval_map_file(mod, start, len);
5118 }
5119 
5120 static ssize_t
5121 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5122 		       size_t cnt, loff_t *ppos)
5123 {
5124 	struct trace_array *tr = filp->private_data;
5125 	char buf[MAX_TRACER_SIZE+2];
5126 	int r;
5127 
5128 	mutex_lock(&trace_types_lock);
5129 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5130 	mutex_unlock(&trace_types_lock);
5131 
5132 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5133 }
5134 
5135 int tracer_init(struct tracer *t, struct trace_array *tr)
5136 {
5137 	tracing_reset_online_cpus(&tr->trace_buffer);
5138 	return t->init(tr);
5139 }
5140 
5141 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5142 {
5143 	int cpu;
5144 
5145 	for_each_tracing_cpu(cpu)
5146 		per_cpu_ptr(buf->data, cpu)->entries = val;
5147 }
5148 
5149 #ifdef CONFIG_TRACER_MAX_TRACE
5150 /* resize @tr's buffer to the size of @size_tr's entries */
5151 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5152 					struct trace_buffer *size_buf, int cpu_id)
5153 {
5154 	int cpu, ret = 0;
5155 
5156 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5157 		for_each_tracing_cpu(cpu) {
5158 			ret = ring_buffer_resize(trace_buf->buffer,
5159 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5160 			if (ret < 0)
5161 				break;
5162 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5163 				per_cpu_ptr(size_buf->data, cpu)->entries;
5164 		}
5165 	} else {
5166 		ret = ring_buffer_resize(trace_buf->buffer,
5167 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5168 		if (ret == 0)
5169 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5170 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5171 	}
5172 
5173 	return ret;
5174 }
5175 #endif /* CONFIG_TRACER_MAX_TRACE */
5176 
5177 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5178 					unsigned long size, int cpu)
5179 {
5180 	int ret;
5181 
5182 	/*
5183 	 * If kernel or user changes the size of the ring buffer
5184 	 * we use the size that was given, and we can forget about
5185 	 * expanding it later.
5186 	 */
5187 	ring_buffer_expanded = true;
5188 
5189 	/* May be called before buffers are initialized */
5190 	if (!tr->trace_buffer.buffer)
5191 		return 0;
5192 
5193 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5194 	if (ret < 0)
5195 		return ret;
5196 
5197 #ifdef CONFIG_TRACER_MAX_TRACE
5198 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5199 	    !tr->current_trace->use_max_tr)
5200 		goto out;
5201 
5202 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5203 	if (ret < 0) {
5204 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5205 						     &tr->trace_buffer, cpu);
5206 		if (r < 0) {
5207 			/*
5208 			 * AARGH! We are left with different
5209 			 * size max buffer!!!!
5210 			 * The max buffer is our "snapshot" buffer.
5211 			 * When a tracer needs a snapshot (one of the
5212 			 * latency tracers), it swaps the max buffer
5213 			 * with the saved snap shot. We succeeded to
5214 			 * update the size of the main buffer, but failed to
5215 			 * update the size of the max buffer. But when we tried
5216 			 * to reset the main buffer to the original size, we
5217 			 * failed there too. This is very unlikely to
5218 			 * happen, but if it does, warn and kill all
5219 			 * tracing.
5220 			 */
5221 			WARN_ON(1);
5222 			tracing_disabled = 1;
5223 		}
5224 		return ret;
5225 	}
5226 
5227 	if (cpu == RING_BUFFER_ALL_CPUS)
5228 		set_buffer_entries(&tr->max_buffer, size);
5229 	else
5230 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5231 
5232  out:
5233 #endif /* CONFIG_TRACER_MAX_TRACE */
5234 
5235 	if (cpu == RING_BUFFER_ALL_CPUS)
5236 		set_buffer_entries(&tr->trace_buffer, size);
5237 	else
5238 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5239 
5240 	return ret;
5241 }
5242 
5243 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5244 					  unsigned long size, int cpu_id)
5245 {
5246 	int ret = size;
5247 
5248 	mutex_lock(&trace_types_lock);
5249 
5250 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5251 		/* make sure, this cpu is enabled in the mask */
5252 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5253 			ret = -EINVAL;
5254 			goto out;
5255 		}
5256 	}
5257 
5258 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5259 	if (ret < 0)
5260 		ret = -ENOMEM;
5261 
5262 out:
5263 	mutex_unlock(&trace_types_lock);
5264 
5265 	return ret;
5266 }
5267 
5268 
5269 /**
5270  * tracing_update_buffers - used by tracing facility to expand ring buffers
5271  *
5272  * To save on memory when the tracing is never used on a system with it
5273  * configured in. The ring buffers are set to a minimum size. But once
5274  * a user starts to use the tracing facility, then they need to grow
5275  * to their default size.
5276  *
5277  * This function is to be called when a tracer is about to be used.
5278  */
5279 int tracing_update_buffers(void)
5280 {
5281 	int ret = 0;
5282 
5283 	mutex_lock(&trace_types_lock);
5284 	if (!ring_buffer_expanded)
5285 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5286 						RING_BUFFER_ALL_CPUS);
5287 	mutex_unlock(&trace_types_lock);
5288 
5289 	return ret;
5290 }
5291 
5292 struct trace_option_dentry;
5293 
5294 static void
5295 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5296 
5297 /*
5298  * Used to clear out the tracer before deletion of an instance.
5299  * Must have trace_types_lock held.
5300  */
5301 static void tracing_set_nop(struct trace_array *tr)
5302 {
5303 	if (tr->current_trace == &nop_trace)
5304 		return;
5305 
5306 	tr->current_trace->enabled--;
5307 
5308 	if (tr->current_trace->reset)
5309 		tr->current_trace->reset(tr);
5310 
5311 	tr->current_trace = &nop_trace;
5312 }
5313 
5314 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5315 {
5316 	/* Only enable if the directory has been created already. */
5317 	if (!tr->dir)
5318 		return;
5319 
5320 	create_trace_option_files(tr, t);
5321 }
5322 
5323 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5324 {
5325 	struct tracer *t;
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327 	bool had_max_tr;
5328 #endif
5329 	int ret = 0;
5330 
5331 	mutex_lock(&trace_types_lock);
5332 
5333 	if (!ring_buffer_expanded) {
5334 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5335 						RING_BUFFER_ALL_CPUS);
5336 		if (ret < 0)
5337 			goto out;
5338 		ret = 0;
5339 	}
5340 
5341 	for (t = trace_types; t; t = t->next) {
5342 		if (strcmp(t->name, buf) == 0)
5343 			break;
5344 	}
5345 	if (!t) {
5346 		ret = -EINVAL;
5347 		goto out;
5348 	}
5349 	if (t == tr->current_trace)
5350 		goto out;
5351 
5352 	/* Some tracers are only allowed for the top level buffer */
5353 	if (!trace_ok_for_array(t, tr)) {
5354 		ret = -EINVAL;
5355 		goto out;
5356 	}
5357 
5358 	/* If trace pipe files are being read, we can't change the tracer */
5359 	if (tr->current_trace->ref) {
5360 		ret = -EBUSY;
5361 		goto out;
5362 	}
5363 
5364 	trace_branch_disable();
5365 
5366 	tr->current_trace->enabled--;
5367 
5368 	if (tr->current_trace->reset)
5369 		tr->current_trace->reset(tr);
5370 
5371 	/* Current trace needs to be nop_trace before synchronize_sched */
5372 	tr->current_trace = &nop_trace;
5373 
5374 #ifdef CONFIG_TRACER_MAX_TRACE
5375 	had_max_tr = tr->allocated_snapshot;
5376 
5377 	if (had_max_tr && !t->use_max_tr) {
5378 		/*
5379 		 * We need to make sure that the update_max_tr sees that
5380 		 * current_trace changed to nop_trace to keep it from
5381 		 * swapping the buffers after we resize it.
5382 		 * The update_max_tr is called from interrupts disabled
5383 		 * so a synchronized_sched() is sufficient.
5384 		 */
5385 		synchronize_sched();
5386 		free_snapshot(tr);
5387 	}
5388 #endif
5389 
5390 #ifdef CONFIG_TRACER_MAX_TRACE
5391 	if (t->use_max_tr && !had_max_tr) {
5392 		ret = alloc_snapshot(tr);
5393 		if (ret < 0)
5394 			goto out;
5395 	}
5396 #endif
5397 
5398 	if (t->init) {
5399 		ret = tracer_init(t, tr);
5400 		if (ret)
5401 			goto out;
5402 	}
5403 
5404 	tr->current_trace = t;
5405 	tr->current_trace->enabled++;
5406 	trace_branch_enable(tr);
5407  out:
5408 	mutex_unlock(&trace_types_lock);
5409 
5410 	return ret;
5411 }
5412 
5413 static ssize_t
5414 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5415 			size_t cnt, loff_t *ppos)
5416 {
5417 	struct trace_array *tr = filp->private_data;
5418 	char buf[MAX_TRACER_SIZE+1];
5419 	int i;
5420 	size_t ret;
5421 	int err;
5422 
5423 	ret = cnt;
5424 
5425 	if (cnt > MAX_TRACER_SIZE)
5426 		cnt = MAX_TRACER_SIZE;
5427 
5428 	if (copy_from_user(buf, ubuf, cnt))
5429 		return -EFAULT;
5430 
5431 	buf[cnt] = 0;
5432 
5433 	/* strip ending whitespace. */
5434 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5435 		buf[i] = 0;
5436 
5437 	err = tracing_set_tracer(tr, buf);
5438 	if (err)
5439 		return err;
5440 
5441 	*ppos += ret;
5442 
5443 	return ret;
5444 }
5445 
5446 static ssize_t
5447 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5448 		   size_t cnt, loff_t *ppos)
5449 {
5450 	char buf[64];
5451 	int r;
5452 
5453 	r = snprintf(buf, sizeof(buf), "%ld\n",
5454 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5455 	if (r > sizeof(buf))
5456 		r = sizeof(buf);
5457 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5458 }
5459 
5460 static ssize_t
5461 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5462 		    size_t cnt, loff_t *ppos)
5463 {
5464 	unsigned long val;
5465 	int ret;
5466 
5467 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5468 	if (ret)
5469 		return ret;
5470 
5471 	*ptr = val * 1000;
5472 
5473 	return cnt;
5474 }
5475 
5476 static ssize_t
5477 tracing_thresh_read(struct file *filp, char __user *ubuf,
5478 		    size_t cnt, loff_t *ppos)
5479 {
5480 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5481 }
5482 
5483 static ssize_t
5484 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5485 		     size_t cnt, loff_t *ppos)
5486 {
5487 	struct trace_array *tr = filp->private_data;
5488 	int ret;
5489 
5490 	mutex_lock(&trace_types_lock);
5491 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5492 	if (ret < 0)
5493 		goto out;
5494 
5495 	if (tr->current_trace->update_thresh) {
5496 		ret = tr->current_trace->update_thresh(tr);
5497 		if (ret < 0)
5498 			goto out;
5499 	}
5500 
5501 	ret = cnt;
5502 out:
5503 	mutex_unlock(&trace_types_lock);
5504 
5505 	return ret;
5506 }
5507 
5508 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5509 
5510 static ssize_t
5511 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5512 		     size_t cnt, loff_t *ppos)
5513 {
5514 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5515 }
5516 
5517 static ssize_t
5518 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5519 		      size_t cnt, loff_t *ppos)
5520 {
5521 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5522 }
5523 
5524 #endif
5525 
5526 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5527 {
5528 	struct trace_array *tr = inode->i_private;
5529 	struct trace_iterator *iter;
5530 	int ret = 0;
5531 
5532 	if (tracing_disabled)
5533 		return -ENODEV;
5534 
5535 	if (trace_array_get(tr) < 0)
5536 		return -ENODEV;
5537 
5538 	mutex_lock(&trace_types_lock);
5539 
5540 	/* create a buffer to store the information to pass to userspace */
5541 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5542 	if (!iter) {
5543 		ret = -ENOMEM;
5544 		__trace_array_put(tr);
5545 		goto out;
5546 	}
5547 
5548 	trace_seq_init(&iter->seq);
5549 	iter->trace = tr->current_trace;
5550 
5551 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5552 		ret = -ENOMEM;
5553 		goto fail;
5554 	}
5555 
5556 	/* trace pipe does not show start of buffer */
5557 	cpumask_setall(iter->started);
5558 
5559 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5560 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5561 
5562 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5563 	if (trace_clocks[tr->clock_id].in_ns)
5564 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5565 
5566 	iter->tr = tr;
5567 	iter->trace_buffer = &tr->trace_buffer;
5568 	iter->cpu_file = tracing_get_cpu(inode);
5569 	mutex_init(&iter->mutex);
5570 	filp->private_data = iter;
5571 
5572 	if (iter->trace->pipe_open)
5573 		iter->trace->pipe_open(iter);
5574 
5575 	nonseekable_open(inode, filp);
5576 
5577 	tr->current_trace->ref++;
5578 out:
5579 	mutex_unlock(&trace_types_lock);
5580 	return ret;
5581 
5582 fail:
5583 	kfree(iter->trace);
5584 	kfree(iter);
5585 	__trace_array_put(tr);
5586 	mutex_unlock(&trace_types_lock);
5587 	return ret;
5588 }
5589 
5590 static int tracing_release_pipe(struct inode *inode, struct file *file)
5591 {
5592 	struct trace_iterator *iter = file->private_data;
5593 	struct trace_array *tr = inode->i_private;
5594 
5595 	mutex_lock(&trace_types_lock);
5596 
5597 	tr->current_trace->ref--;
5598 
5599 	if (iter->trace->pipe_close)
5600 		iter->trace->pipe_close(iter);
5601 
5602 	mutex_unlock(&trace_types_lock);
5603 
5604 	free_cpumask_var(iter->started);
5605 	mutex_destroy(&iter->mutex);
5606 	kfree(iter);
5607 
5608 	trace_array_put(tr);
5609 
5610 	return 0;
5611 }
5612 
5613 static unsigned int
5614 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5615 {
5616 	struct trace_array *tr = iter->tr;
5617 
5618 	/* Iterators are static, they should be filled or empty */
5619 	if (trace_buffer_iter(iter, iter->cpu_file))
5620 		return POLLIN | POLLRDNORM;
5621 
5622 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5623 		/*
5624 		 * Always select as readable when in blocking mode
5625 		 */
5626 		return POLLIN | POLLRDNORM;
5627 	else
5628 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5629 					     filp, poll_table);
5630 }
5631 
5632 static unsigned int
5633 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5634 {
5635 	struct trace_iterator *iter = filp->private_data;
5636 
5637 	return trace_poll(iter, filp, poll_table);
5638 }
5639 
5640 /* Must be called with iter->mutex held. */
5641 static int tracing_wait_pipe(struct file *filp)
5642 {
5643 	struct trace_iterator *iter = filp->private_data;
5644 	int ret;
5645 
5646 	while (trace_empty(iter)) {
5647 
5648 		if ((filp->f_flags & O_NONBLOCK)) {
5649 			return -EAGAIN;
5650 		}
5651 
5652 		/*
5653 		 * We block until we read something and tracing is disabled.
5654 		 * We still block if tracing is disabled, but we have never
5655 		 * read anything. This allows a user to cat this file, and
5656 		 * then enable tracing. But after we have read something,
5657 		 * we give an EOF when tracing is again disabled.
5658 		 *
5659 		 * iter->pos will be 0 if we haven't read anything.
5660 		 */
5661 		if (!tracing_is_on() && iter->pos)
5662 			break;
5663 
5664 		mutex_unlock(&iter->mutex);
5665 
5666 		ret = wait_on_pipe(iter, false);
5667 
5668 		mutex_lock(&iter->mutex);
5669 
5670 		if (ret)
5671 			return ret;
5672 	}
5673 
5674 	return 1;
5675 }
5676 
5677 /*
5678  * Consumer reader.
5679  */
5680 static ssize_t
5681 tracing_read_pipe(struct file *filp, char __user *ubuf,
5682 		  size_t cnt, loff_t *ppos)
5683 {
5684 	struct trace_iterator *iter = filp->private_data;
5685 	ssize_t sret;
5686 
5687 	/*
5688 	 * Avoid more than one consumer on a single file descriptor
5689 	 * This is just a matter of traces coherency, the ring buffer itself
5690 	 * is protected.
5691 	 */
5692 	mutex_lock(&iter->mutex);
5693 
5694 	/* return any leftover data */
5695 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5696 	if (sret != -EBUSY)
5697 		goto out;
5698 
5699 	trace_seq_init(&iter->seq);
5700 
5701 	if (iter->trace->read) {
5702 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5703 		if (sret)
5704 			goto out;
5705 	}
5706 
5707 waitagain:
5708 	sret = tracing_wait_pipe(filp);
5709 	if (sret <= 0)
5710 		goto out;
5711 
5712 	/* stop when tracing is finished */
5713 	if (trace_empty(iter)) {
5714 		sret = 0;
5715 		goto out;
5716 	}
5717 
5718 	if (cnt >= PAGE_SIZE)
5719 		cnt = PAGE_SIZE - 1;
5720 
5721 	/* reset all but tr, trace, and overruns */
5722 	memset(&iter->seq, 0,
5723 	       sizeof(struct trace_iterator) -
5724 	       offsetof(struct trace_iterator, seq));
5725 	cpumask_clear(iter->started);
5726 	iter->pos = -1;
5727 
5728 	trace_event_read_lock();
5729 	trace_access_lock(iter->cpu_file);
5730 	while (trace_find_next_entry_inc(iter) != NULL) {
5731 		enum print_line_t ret;
5732 		int save_len = iter->seq.seq.len;
5733 
5734 		ret = print_trace_line(iter);
5735 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5736 			/* don't print partial lines */
5737 			iter->seq.seq.len = save_len;
5738 			break;
5739 		}
5740 		if (ret != TRACE_TYPE_NO_CONSUME)
5741 			trace_consume(iter);
5742 
5743 		if (trace_seq_used(&iter->seq) >= cnt)
5744 			break;
5745 
5746 		/*
5747 		 * Setting the full flag means we reached the trace_seq buffer
5748 		 * size and we should leave by partial output condition above.
5749 		 * One of the trace_seq_* functions is not used properly.
5750 		 */
5751 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5752 			  iter->ent->type);
5753 	}
5754 	trace_access_unlock(iter->cpu_file);
5755 	trace_event_read_unlock();
5756 
5757 	/* Now copy what we have to the user */
5758 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5759 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5760 		trace_seq_init(&iter->seq);
5761 
5762 	/*
5763 	 * If there was nothing to send to user, in spite of consuming trace
5764 	 * entries, go back to wait for more entries.
5765 	 */
5766 	if (sret == -EBUSY)
5767 		goto waitagain;
5768 
5769 out:
5770 	mutex_unlock(&iter->mutex);
5771 
5772 	return sret;
5773 }
5774 
5775 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5776 				     unsigned int idx)
5777 {
5778 	__free_page(spd->pages[idx]);
5779 }
5780 
5781 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5782 	.can_merge		= 0,
5783 	.confirm		= generic_pipe_buf_confirm,
5784 	.release		= generic_pipe_buf_release,
5785 	.steal			= generic_pipe_buf_steal,
5786 	.get			= generic_pipe_buf_get,
5787 };
5788 
5789 static size_t
5790 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5791 {
5792 	size_t count;
5793 	int save_len;
5794 	int ret;
5795 
5796 	/* Seq buffer is page-sized, exactly what we need. */
5797 	for (;;) {
5798 		save_len = iter->seq.seq.len;
5799 		ret = print_trace_line(iter);
5800 
5801 		if (trace_seq_has_overflowed(&iter->seq)) {
5802 			iter->seq.seq.len = save_len;
5803 			break;
5804 		}
5805 
5806 		/*
5807 		 * This should not be hit, because it should only
5808 		 * be set if the iter->seq overflowed. But check it
5809 		 * anyway to be safe.
5810 		 */
5811 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5812 			iter->seq.seq.len = save_len;
5813 			break;
5814 		}
5815 
5816 		count = trace_seq_used(&iter->seq) - save_len;
5817 		if (rem < count) {
5818 			rem = 0;
5819 			iter->seq.seq.len = save_len;
5820 			break;
5821 		}
5822 
5823 		if (ret != TRACE_TYPE_NO_CONSUME)
5824 			trace_consume(iter);
5825 		rem -= count;
5826 		if (!trace_find_next_entry_inc(iter))	{
5827 			rem = 0;
5828 			iter->ent = NULL;
5829 			break;
5830 		}
5831 	}
5832 
5833 	return rem;
5834 }
5835 
5836 static ssize_t tracing_splice_read_pipe(struct file *filp,
5837 					loff_t *ppos,
5838 					struct pipe_inode_info *pipe,
5839 					size_t len,
5840 					unsigned int flags)
5841 {
5842 	struct page *pages_def[PIPE_DEF_BUFFERS];
5843 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5844 	struct trace_iterator *iter = filp->private_data;
5845 	struct splice_pipe_desc spd = {
5846 		.pages		= pages_def,
5847 		.partial	= partial_def,
5848 		.nr_pages	= 0, /* This gets updated below. */
5849 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5850 		.ops		= &tracing_pipe_buf_ops,
5851 		.spd_release	= tracing_spd_release_pipe,
5852 	};
5853 	ssize_t ret;
5854 	size_t rem;
5855 	unsigned int i;
5856 
5857 	if (splice_grow_spd(pipe, &spd))
5858 		return -ENOMEM;
5859 
5860 	mutex_lock(&iter->mutex);
5861 
5862 	if (iter->trace->splice_read) {
5863 		ret = iter->trace->splice_read(iter, filp,
5864 					       ppos, pipe, len, flags);
5865 		if (ret)
5866 			goto out_err;
5867 	}
5868 
5869 	ret = tracing_wait_pipe(filp);
5870 	if (ret <= 0)
5871 		goto out_err;
5872 
5873 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5874 		ret = -EFAULT;
5875 		goto out_err;
5876 	}
5877 
5878 	trace_event_read_lock();
5879 	trace_access_lock(iter->cpu_file);
5880 
5881 	/* Fill as many pages as possible. */
5882 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5883 		spd.pages[i] = alloc_page(GFP_KERNEL);
5884 		if (!spd.pages[i])
5885 			break;
5886 
5887 		rem = tracing_fill_pipe_page(rem, iter);
5888 
5889 		/* Copy the data into the page, so we can start over. */
5890 		ret = trace_seq_to_buffer(&iter->seq,
5891 					  page_address(spd.pages[i]),
5892 					  trace_seq_used(&iter->seq));
5893 		if (ret < 0) {
5894 			__free_page(spd.pages[i]);
5895 			break;
5896 		}
5897 		spd.partial[i].offset = 0;
5898 		spd.partial[i].len = trace_seq_used(&iter->seq);
5899 
5900 		trace_seq_init(&iter->seq);
5901 	}
5902 
5903 	trace_access_unlock(iter->cpu_file);
5904 	trace_event_read_unlock();
5905 	mutex_unlock(&iter->mutex);
5906 
5907 	spd.nr_pages = i;
5908 
5909 	if (i)
5910 		ret = splice_to_pipe(pipe, &spd);
5911 	else
5912 		ret = 0;
5913 out:
5914 	splice_shrink_spd(&spd);
5915 	return ret;
5916 
5917 out_err:
5918 	mutex_unlock(&iter->mutex);
5919 	goto out;
5920 }
5921 
5922 static ssize_t
5923 tracing_entries_read(struct file *filp, char __user *ubuf,
5924 		     size_t cnt, loff_t *ppos)
5925 {
5926 	struct inode *inode = file_inode(filp);
5927 	struct trace_array *tr = inode->i_private;
5928 	int cpu = tracing_get_cpu(inode);
5929 	char buf[64];
5930 	int r = 0;
5931 	ssize_t ret;
5932 
5933 	mutex_lock(&trace_types_lock);
5934 
5935 	if (cpu == RING_BUFFER_ALL_CPUS) {
5936 		int cpu, buf_size_same;
5937 		unsigned long size;
5938 
5939 		size = 0;
5940 		buf_size_same = 1;
5941 		/* check if all cpu sizes are same */
5942 		for_each_tracing_cpu(cpu) {
5943 			/* fill in the size from first enabled cpu */
5944 			if (size == 0)
5945 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5946 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5947 				buf_size_same = 0;
5948 				break;
5949 			}
5950 		}
5951 
5952 		if (buf_size_same) {
5953 			if (!ring_buffer_expanded)
5954 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5955 					    size >> 10,
5956 					    trace_buf_size >> 10);
5957 			else
5958 				r = sprintf(buf, "%lu\n", size >> 10);
5959 		} else
5960 			r = sprintf(buf, "X\n");
5961 	} else
5962 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5963 
5964 	mutex_unlock(&trace_types_lock);
5965 
5966 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5967 	return ret;
5968 }
5969 
5970 static ssize_t
5971 tracing_entries_write(struct file *filp, const char __user *ubuf,
5972 		      size_t cnt, loff_t *ppos)
5973 {
5974 	struct inode *inode = file_inode(filp);
5975 	struct trace_array *tr = inode->i_private;
5976 	unsigned long val;
5977 	int ret;
5978 
5979 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5980 	if (ret)
5981 		return ret;
5982 
5983 	/* must have at least 1 entry */
5984 	if (!val)
5985 		return -EINVAL;
5986 
5987 	/* value is in KB */
5988 	val <<= 10;
5989 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5990 	if (ret < 0)
5991 		return ret;
5992 
5993 	*ppos += cnt;
5994 
5995 	return cnt;
5996 }
5997 
5998 static ssize_t
5999 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6000 				size_t cnt, loff_t *ppos)
6001 {
6002 	struct trace_array *tr = filp->private_data;
6003 	char buf[64];
6004 	int r, cpu;
6005 	unsigned long size = 0, expanded_size = 0;
6006 
6007 	mutex_lock(&trace_types_lock);
6008 	for_each_tracing_cpu(cpu) {
6009 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6010 		if (!ring_buffer_expanded)
6011 			expanded_size += trace_buf_size >> 10;
6012 	}
6013 	if (ring_buffer_expanded)
6014 		r = sprintf(buf, "%lu\n", size);
6015 	else
6016 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6017 	mutex_unlock(&trace_types_lock);
6018 
6019 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6020 }
6021 
6022 static ssize_t
6023 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6024 			  size_t cnt, loff_t *ppos)
6025 {
6026 	/*
6027 	 * There is no need to read what the user has written, this function
6028 	 * is just to make sure that there is no error when "echo" is used
6029 	 */
6030 
6031 	*ppos += cnt;
6032 
6033 	return cnt;
6034 }
6035 
6036 static int
6037 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6038 {
6039 	struct trace_array *tr = inode->i_private;
6040 
6041 	/* disable tracing ? */
6042 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6043 		tracer_tracing_off(tr);
6044 	/* resize the ring buffer to 0 */
6045 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6046 
6047 	trace_array_put(tr);
6048 
6049 	return 0;
6050 }
6051 
6052 static ssize_t
6053 tracing_mark_write(struct file *filp, const char __user *ubuf,
6054 					size_t cnt, loff_t *fpos)
6055 {
6056 	struct trace_array *tr = filp->private_data;
6057 	struct ring_buffer_event *event;
6058 	struct ring_buffer *buffer;
6059 	struct print_entry *entry;
6060 	unsigned long irq_flags;
6061 	const char faulted[] = "<faulted>";
6062 	ssize_t written;
6063 	int size;
6064 	int len;
6065 
6066 /* Used in tracing_mark_raw_write() as well */
6067 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6068 
6069 	if (tracing_disabled)
6070 		return -EINVAL;
6071 
6072 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6073 		return -EINVAL;
6074 
6075 	if (cnt > TRACE_BUF_SIZE)
6076 		cnt = TRACE_BUF_SIZE;
6077 
6078 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6079 
6080 	local_save_flags(irq_flags);
6081 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6082 
6083 	/* If less than "<faulted>", then make sure we can still add that */
6084 	if (cnt < FAULTED_SIZE)
6085 		size += FAULTED_SIZE - cnt;
6086 
6087 	buffer = tr->trace_buffer.buffer;
6088 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6089 					    irq_flags, preempt_count());
6090 	if (unlikely(!event))
6091 		/* Ring buffer disabled, return as if not open for write */
6092 		return -EBADF;
6093 
6094 	entry = ring_buffer_event_data(event);
6095 	entry->ip = _THIS_IP_;
6096 
6097 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6098 	if (len) {
6099 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6100 		cnt = FAULTED_SIZE;
6101 		written = -EFAULT;
6102 	} else
6103 		written = cnt;
6104 	len = cnt;
6105 
6106 	if (entry->buf[cnt - 1] != '\n') {
6107 		entry->buf[cnt] = '\n';
6108 		entry->buf[cnt + 1] = '\0';
6109 	} else
6110 		entry->buf[cnt] = '\0';
6111 
6112 	__buffer_unlock_commit(buffer, event);
6113 
6114 	if (written > 0)
6115 		*fpos += written;
6116 
6117 	return written;
6118 }
6119 
6120 /* Limit it for now to 3K (including tag) */
6121 #define RAW_DATA_MAX_SIZE (1024*3)
6122 
6123 static ssize_t
6124 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6125 					size_t cnt, loff_t *fpos)
6126 {
6127 	struct trace_array *tr = filp->private_data;
6128 	struct ring_buffer_event *event;
6129 	struct ring_buffer *buffer;
6130 	struct raw_data_entry *entry;
6131 	const char faulted[] = "<faulted>";
6132 	unsigned long irq_flags;
6133 	ssize_t written;
6134 	int size;
6135 	int len;
6136 
6137 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6138 
6139 	if (tracing_disabled)
6140 		return -EINVAL;
6141 
6142 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6143 		return -EINVAL;
6144 
6145 	/* The marker must at least have a tag id */
6146 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6147 		return -EINVAL;
6148 
6149 	if (cnt > TRACE_BUF_SIZE)
6150 		cnt = TRACE_BUF_SIZE;
6151 
6152 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6153 
6154 	local_save_flags(irq_flags);
6155 	size = sizeof(*entry) + cnt;
6156 	if (cnt < FAULT_SIZE_ID)
6157 		size += FAULT_SIZE_ID - cnt;
6158 
6159 	buffer = tr->trace_buffer.buffer;
6160 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6161 					    irq_flags, preempt_count());
6162 	if (!event)
6163 		/* Ring buffer disabled, return as if not open for write */
6164 		return -EBADF;
6165 
6166 	entry = ring_buffer_event_data(event);
6167 
6168 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6169 	if (len) {
6170 		entry->id = -1;
6171 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6172 		written = -EFAULT;
6173 	} else
6174 		written = cnt;
6175 
6176 	__buffer_unlock_commit(buffer, event);
6177 
6178 	if (written > 0)
6179 		*fpos += written;
6180 
6181 	return written;
6182 }
6183 
6184 static int tracing_clock_show(struct seq_file *m, void *v)
6185 {
6186 	struct trace_array *tr = m->private;
6187 	int i;
6188 
6189 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6190 		seq_printf(m,
6191 			"%s%s%s%s", i ? " " : "",
6192 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6193 			i == tr->clock_id ? "]" : "");
6194 	seq_putc(m, '\n');
6195 
6196 	return 0;
6197 }
6198 
6199 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6200 {
6201 	int i;
6202 
6203 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6204 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6205 			break;
6206 	}
6207 	if (i == ARRAY_SIZE(trace_clocks))
6208 		return -EINVAL;
6209 
6210 	mutex_lock(&trace_types_lock);
6211 
6212 	tr->clock_id = i;
6213 
6214 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6215 
6216 	/*
6217 	 * New clock may not be consistent with the previous clock.
6218 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6219 	 */
6220 	tracing_reset_online_cpus(&tr->trace_buffer);
6221 
6222 #ifdef CONFIG_TRACER_MAX_TRACE
6223 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6224 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6225 	tracing_reset_online_cpus(&tr->max_buffer);
6226 #endif
6227 
6228 	mutex_unlock(&trace_types_lock);
6229 
6230 	return 0;
6231 }
6232 
6233 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6234 				   size_t cnt, loff_t *fpos)
6235 {
6236 	struct seq_file *m = filp->private_data;
6237 	struct trace_array *tr = m->private;
6238 	char buf[64];
6239 	const char *clockstr;
6240 	int ret;
6241 
6242 	if (cnt >= sizeof(buf))
6243 		return -EINVAL;
6244 
6245 	if (copy_from_user(buf, ubuf, cnt))
6246 		return -EFAULT;
6247 
6248 	buf[cnt] = 0;
6249 
6250 	clockstr = strstrip(buf);
6251 
6252 	ret = tracing_set_clock(tr, clockstr);
6253 	if (ret)
6254 		return ret;
6255 
6256 	*fpos += cnt;
6257 
6258 	return cnt;
6259 }
6260 
6261 static int tracing_clock_open(struct inode *inode, struct file *file)
6262 {
6263 	struct trace_array *tr = inode->i_private;
6264 	int ret;
6265 
6266 	if (tracing_disabled)
6267 		return -ENODEV;
6268 
6269 	if (trace_array_get(tr))
6270 		return -ENODEV;
6271 
6272 	ret = single_open(file, tracing_clock_show, inode->i_private);
6273 	if (ret < 0)
6274 		trace_array_put(tr);
6275 
6276 	return ret;
6277 }
6278 
6279 struct ftrace_buffer_info {
6280 	struct trace_iterator	iter;
6281 	void			*spare;
6282 	unsigned int		spare_cpu;
6283 	unsigned int		read;
6284 };
6285 
6286 #ifdef CONFIG_TRACER_SNAPSHOT
6287 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6288 {
6289 	struct trace_array *tr = inode->i_private;
6290 	struct trace_iterator *iter;
6291 	struct seq_file *m;
6292 	int ret = 0;
6293 
6294 	if (trace_array_get(tr) < 0)
6295 		return -ENODEV;
6296 
6297 	if (file->f_mode & FMODE_READ) {
6298 		iter = __tracing_open(inode, file, true);
6299 		if (IS_ERR(iter))
6300 			ret = PTR_ERR(iter);
6301 	} else {
6302 		/* Writes still need the seq_file to hold the private data */
6303 		ret = -ENOMEM;
6304 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6305 		if (!m)
6306 			goto out;
6307 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6308 		if (!iter) {
6309 			kfree(m);
6310 			goto out;
6311 		}
6312 		ret = 0;
6313 
6314 		iter->tr = tr;
6315 		iter->trace_buffer = &tr->max_buffer;
6316 		iter->cpu_file = tracing_get_cpu(inode);
6317 		m->private = iter;
6318 		file->private_data = m;
6319 	}
6320 out:
6321 	if (ret < 0)
6322 		trace_array_put(tr);
6323 
6324 	return ret;
6325 }
6326 
6327 static ssize_t
6328 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6329 		       loff_t *ppos)
6330 {
6331 	struct seq_file *m = filp->private_data;
6332 	struct trace_iterator *iter = m->private;
6333 	struct trace_array *tr = iter->tr;
6334 	unsigned long val;
6335 	int ret;
6336 
6337 	ret = tracing_update_buffers();
6338 	if (ret < 0)
6339 		return ret;
6340 
6341 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6342 	if (ret)
6343 		return ret;
6344 
6345 	mutex_lock(&trace_types_lock);
6346 
6347 	if (tr->current_trace->use_max_tr) {
6348 		ret = -EBUSY;
6349 		goto out;
6350 	}
6351 
6352 	switch (val) {
6353 	case 0:
6354 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6355 			ret = -EINVAL;
6356 			break;
6357 		}
6358 		if (tr->allocated_snapshot)
6359 			free_snapshot(tr);
6360 		break;
6361 	case 1:
6362 /* Only allow per-cpu swap if the ring buffer supports it */
6363 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6364 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6365 			ret = -EINVAL;
6366 			break;
6367 		}
6368 #endif
6369 		if (!tr->allocated_snapshot) {
6370 			ret = alloc_snapshot(tr);
6371 			if (ret < 0)
6372 				break;
6373 		}
6374 		local_irq_disable();
6375 		/* Now, we're going to swap */
6376 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6377 			update_max_tr(tr, current, smp_processor_id());
6378 		else
6379 			update_max_tr_single(tr, current, iter->cpu_file);
6380 		local_irq_enable();
6381 		break;
6382 	default:
6383 		if (tr->allocated_snapshot) {
6384 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6385 				tracing_reset_online_cpus(&tr->max_buffer);
6386 			else
6387 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6388 		}
6389 		break;
6390 	}
6391 
6392 	if (ret >= 0) {
6393 		*ppos += cnt;
6394 		ret = cnt;
6395 	}
6396 out:
6397 	mutex_unlock(&trace_types_lock);
6398 	return ret;
6399 }
6400 
6401 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6402 {
6403 	struct seq_file *m = file->private_data;
6404 	int ret;
6405 
6406 	ret = tracing_release(inode, file);
6407 
6408 	if (file->f_mode & FMODE_READ)
6409 		return ret;
6410 
6411 	/* If write only, the seq_file is just a stub */
6412 	if (m)
6413 		kfree(m->private);
6414 	kfree(m);
6415 
6416 	return 0;
6417 }
6418 
6419 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6420 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6421 				    size_t count, loff_t *ppos);
6422 static int tracing_buffers_release(struct inode *inode, struct file *file);
6423 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6424 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6425 
6426 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6427 {
6428 	struct ftrace_buffer_info *info;
6429 	int ret;
6430 
6431 	ret = tracing_buffers_open(inode, filp);
6432 	if (ret < 0)
6433 		return ret;
6434 
6435 	info = filp->private_data;
6436 
6437 	if (info->iter.trace->use_max_tr) {
6438 		tracing_buffers_release(inode, filp);
6439 		return -EBUSY;
6440 	}
6441 
6442 	info->iter.snapshot = true;
6443 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6444 
6445 	return ret;
6446 }
6447 
6448 #endif /* CONFIG_TRACER_SNAPSHOT */
6449 
6450 
6451 static const struct file_operations tracing_thresh_fops = {
6452 	.open		= tracing_open_generic,
6453 	.read		= tracing_thresh_read,
6454 	.write		= tracing_thresh_write,
6455 	.llseek		= generic_file_llseek,
6456 };
6457 
6458 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6459 static const struct file_operations tracing_max_lat_fops = {
6460 	.open		= tracing_open_generic,
6461 	.read		= tracing_max_lat_read,
6462 	.write		= tracing_max_lat_write,
6463 	.llseek		= generic_file_llseek,
6464 };
6465 #endif
6466 
6467 static const struct file_operations set_tracer_fops = {
6468 	.open		= tracing_open_generic,
6469 	.read		= tracing_set_trace_read,
6470 	.write		= tracing_set_trace_write,
6471 	.llseek		= generic_file_llseek,
6472 };
6473 
6474 static const struct file_operations tracing_pipe_fops = {
6475 	.open		= tracing_open_pipe,
6476 	.poll		= tracing_poll_pipe,
6477 	.read		= tracing_read_pipe,
6478 	.splice_read	= tracing_splice_read_pipe,
6479 	.release	= tracing_release_pipe,
6480 	.llseek		= no_llseek,
6481 };
6482 
6483 static const struct file_operations tracing_entries_fops = {
6484 	.open		= tracing_open_generic_tr,
6485 	.read		= tracing_entries_read,
6486 	.write		= tracing_entries_write,
6487 	.llseek		= generic_file_llseek,
6488 	.release	= tracing_release_generic_tr,
6489 };
6490 
6491 static const struct file_operations tracing_total_entries_fops = {
6492 	.open		= tracing_open_generic_tr,
6493 	.read		= tracing_total_entries_read,
6494 	.llseek		= generic_file_llseek,
6495 	.release	= tracing_release_generic_tr,
6496 };
6497 
6498 static const struct file_operations tracing_free_buffer_fops = {
6499 	.open		= tracing_open_generic_tr,
6500 	.write		= tracing_free_buffer_write,
6501 	.release	= tracing_free_buffer_release,
6502 };
6503 
6504 static const struct file_operations tracing_mark_fops = {
6505 	.open		= tracing_open_generic_tr,
6506 	.write		= tracing_mark_write,
6507 	.llseek		= generic_file_llseek,
6508 	.release	= tracing_release_generic_tr,
6509 };
6510 
6511 static const struct file_operations tracing_mark_raw_fops = {
6512 	.open		= tracing_open_generic_tr,
6513 	.write		= tracing_mark_raw_write,
6514 	.llseek		= generic_file_llseek,
6515 	.release	= tracing_release_generic_tr,
6516 };
6517 
6518 static const struct file_operations trace_clock_fops = {
6519 	.open		= tracing_clock_open,
6520 	.read		= seq_read,
6521 	.llseek		= seq_lseek,
6522 	.release	= tracing_single_release_tr,
6523 	.write		= tracing_clock_write,
6524 };
6525 
6526 #ifdef CONFIG_TRACER_SNAPSHOT
6527 static const struct file_operations snapshot_fops = {
6528 	.open		= tracing_snapshot_open,
6529 	.read		= seq_read,
6530 	.write		= tracing_snapshot_write,
6531 	.llseek		= tracing_lseek,
6532 	.release	= tracing_snapshot_release,
6533 };
6534 
6535 static const struct file_operations snapshot_raw_fops = {
6536 	.open		= snapshot_raw_open,
6537 	.read		= tracing_buffers_read,
6538 	.release	= tracing_buffers_release,
6539 	.splice_read	= tracing_buffers_splice_read,
6540 	.llseek		= no_llseek,
6541 };
6542 
6543 #endif /* CONFIG_TRACER_SNAPSHOT */
6544 
6545 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6546 {
6547 	struct trace_array *tr = inode->i_private;
6548 	struct ftrace_buffer_info *info;
6549 	int ret;
6550 
6551 	if (tracing_disabled)
6552 		return -ENODEV;
6553 
6554 	if (trace_array_get(tr) < 0)
6555 		return -ENODEV;
6556 
6557 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6558 	if (!info) {
6559 		trace_array_put(tr);
6560 		return -ENOMEM;
6561 	}
6562 
6563 	mutex_lock(&trace_types_lock);
6564 
6565 	info->iter.tr		= tr;
6566 	info->iter.cpu_file	= tracing_get_cpu(inode);
6567 	info->iter.trace	= tr->current_trace;
6568 	info->iter.trace_buffer = &tr->trace_buffer;
6569 	info->spare		= NULL;
6570 	/* Force reading ring buffer for first read */
6571 	info->read		= (unsigned int)-1;
6572 
6573 	filp->private_data = info;
6574 
6575 	tr->current_trace->ref++;
6576 
6577 	mutex_unlock(&trace_types_lock);
6578 
6579 	ret = nonseekable_open(inode, filp);
6580 	if (ret < 0)
6581 		trace_array_put(tr);
6582 
6583 	return ret;
6584 }
6585 
6586 static unsigned int
6587 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6588 {
6589 	struct ftrace_buffer_info *info = filp->private_data;
6590 	struct trace_iterator *iter = &info->iter;
6591 
6592 	return trace_poll(iter, filp, poll_table);
6593 }
6594 
6595 static ssize_t
6596 tracing_buffers_read(struct file *filp, char __user *ubuf,
6597 		     size_t count, loff_t *ppos)
6598 {
6599 	struct ftrace_buffer_info *info = filp->private_data;
6600 	struct trace_iterator *iter = &info->iter;
6601 	ssize_t ret = 0;
6602 	ssize_t size;
6603 
6604 	if (!count)
6605 		return 0;
6606 
6607 #ifdef CONFIG_TRACER_MAX_TRACE
6608 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6609 		return -EBUSY;
6610 #endif
6611 
6612 	if (!info->spare) {
6613 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6614 							  iter->cpu_file);
6615 		if (IS_ERR(info->spare)) {
6616 			ret = PTR_ERR(info->spare);
6617 			info->spare = NULL;
6618 		} else {
6619 			info->spare_cpu = iter->cpu_file;
6620 		}
6621 	}
6622 	if (!info->spare)
6623 		return ret;
6624 
6625 	/* Do we have previous read data to read? */
6626 	if (info->read < PAGE_SIZE)
6627 		goto read;
6628 
6629  again:
6630 	trace_access_lock(iter->cpu_file);
6631 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6632 				    &info->spare,
6633 				    count,
6634 				    iter->cpu_file, 0);
6635 	trace_access_unlock(iter->cpu_file);
6636 
6637 	if (ret < 0) {
6638 		if (trace_empty(iter)) {
6639 			if ((filp->f_flags & O_NONBLOCK))
6640 				return -EAGAIN;
6641 
6642 			ret = wait_on_pipe(iter, false);
6643 			if (ret)
6644 				return ret;
6645 
6646 			goto again;
6647 		}
6648 		return 0;
6649 	}
6650 
6651 	info->read = 0;
6652  read:
6653 	size = PAGE_SIZE - info->read;
6654 	if (size > count)
6655 		size = count;
6656 
6657 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6658 	if (ret == size)
6659 		return -EFAULT;
6660 
6661 	size -= ret;
6662 
6663 	*ppos += size;
6664 	info->read += size;
6665 
6666 	return size;
6667 }
6668 
6669 static int tracing_buffers_release(struct inode *inode, struct file *file)
6670 {
6671 	struct ftrace_buffer_info *info = file->private_data;
6672 	struct trace_iterator *iter = &info->iter;
6673 
6674 	mutex_lock(&trace_types_lock);
6675 
6676 	iter->tr->current_trace->ref--;
6677 
6678 	__trace_array_put(iter->tr);
6679 
6680 	if (info->spare)
6681 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6682 					   info->spare_cpu, info->spare);
6683 	kfree(info);
6684 
6685 	mutex_unlock(&trace_types_lock);
6686 
6687 	return 0;
6688 }
6689 
6690 struct buffer_ref {
6691 	struct ring_buffer	*buffer;
6692 	void			*page;
6693 	int			cpu;
6694 	int			ref;
6695 };
6696 
6697 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6698 				    struct pipe_buffer *buf)
6699 {
6700 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6701 
6702 	if (--ref->ref)
6703 		return;
6704 
6705 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6706 	kfree(ref);
6707 	buf->private = 0;
6708 }
6709 
6710 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6711 				struct pipe_buffer *buf)
6712 {
6713 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6714 
6715 	ref->ref++;
6716 }
6717 
6718 /* Pipe buffer operations for a buffer. */
6719 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6720 	.can_merge		= 0,
6721 	.confirm		= generic_pipe_buf_confirm,
6722 	.release		= buffer_pipe_buf_release,
6723 	.steal			= generic_pipe_buf_steal,
6724 	.get			= buffer_pipe_buf_get,
6725 };
6726 
6727 /*
6728  * Callback from splice_to_pipe(), if we need to release some pages
6729  * at the end of the spd in case we error'ed out in filling the pipe.
6730  */
6731 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6732 {
6733 	struct buffer_ref *ref =
6734 		(struct buffer_ref *)spd->partial[i].private;
6735 
6736 	if (--ref->ref)
6737 		return;
6738 
6739 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6740 	kfree(ref);
6741 	spd->partial[i].private = 0;
6742 }
6743 
6744 static ssize_t
6745 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6746 			    struct pipe_inode_info *pipe, size_t len,
6747 			    unsigned int flags)
6748 {
6749 	struct ftrace_buffer_info *info = file->private_data;
6750 	struct trace_iterator *iter = &info->iter;
6751 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6752 	struct page *pages_def[PIPE_DEF_BUFFERS];
6753 	struct splice_pipe_desc spd = {
6754 		.pages		= pages_def,
6755 		.partial	= partial_def,
6756 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6757 		.ops		= &buffer_pipe_buf_ops,
6758 		.spd_release	= buffer_spd_release,
6759 	};
6760 	struct buffer_ref *ref;
6761 	int entries, size, i;
6762 	ssize_t ret = 0;
6763 
6764 #ifdef CONFIG_TRACER_MAX_TRACE
6765 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6766 		return -EBUSY;
6767 #endif
6768 
6769 	if (*ppos & (PAGE_SIZE - 1))
6770 		return -EINVAL;
6771 
6772 	if (len & (PAGE_SIZE - 1)) {
6773 		if (len < PAGE_SIZE)
6774 			return -EINVAL;
6775 		len &= PAGE_MASK;
6776 	}
6777 
6778 	if (splice_grow_spd(pipe, &spd))
6779 		return -ENOMEM;
6780 
6781  again:
6782 	trace_access_lock(iter->cpu_file);
6783 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6784 
6785 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6786 		struct page *page;
6787 		int r;
6788 
6789 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6790 		if (!ref) {
6791 			ret = -ENOMEM;
6792 			break;
6793 		}
6794 
6795 		ref->ref = 1;
6796 		ref->buffer = iter->trace_buffer->buffer;
6797 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6798 		if (IS_ERR(ref->page)) {
6799 			ret = PTR_ERR(ref->page);
6800 			ref->page = NULL;
6801 			kfree(ref);
6802 			break;
6803 		}
6804 		ref->cpu = iter->cpu_file;
6805 
6806 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6807 					  len, iter->cpu_file, 1);
6808 		if (r < 0) {
6809 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6810 						   ref->page);
6811 			kfree(ref);
6812 			break;
6813 		}
6814 
6815 		/*
6816 		 * zero out any left over data, this is going to
6817 		 * user land.
6818 		 */
6819 		size = ring_buffer_page_len(ref->page);
6820 		if (size < PAGE_SIZE)
6821 			memset(ref->page + size, 0, PAGE_SIZE - size);
6822 
6823 		page = virt_to_page(ref->page);
6824 
6825 		spd.pages[i] = page;
6826 		spd.partial[i].len = PAGE_SIZE;
6827 		spd.partial[i].offset = 0;
6828 		spd.partial[i].private = (unsigned long)ref;
6829 		spd.nr_pages++;
6830 		*ppos += PAGE_SIZE;
6831 
6832 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6833 	}
6834 
6835 	trace_access_unlock(iter->cpu_file);
6836 	spd.nr_pages = i;
6837 
6838 	/* did we read anything? */
6839 	if (!spd.nr_pages) {
6840 		if (ret)
6841 			goto out;
6842 
6843 		ret = -EAGAIN;
6844 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6845 			goto out;
6846 
6847 		ret = wait_on_pipe(iter, true);
6848 		if (ret)
6849 			goto out;
6850 
6851 		goto again;
6852 	}
6853 
6854 	ret = splice_to_pipe(pipe, &spd);
6855 out:
6856 	splice_shrink_spd(&spd);
6857 
6858 	return ret;
6859 }
6860 
6861 static const struct file_operations tracing_buffers_fops = {
6862 	.open		= tracing_buffers_open,
6863 	.read		= tracing_buffers_read,
6864 	.poll		= tracing_buffers_poll,
6865 	.release	= tracing_buffers_release,
6866 	.splice_read	= tracing_buffers_splice_read,
6867 	.llseek		= no_llseek,
6868 };
6869 
6870 static ssize_t
6871 tracing_stats_read(struct file *filp, char __user *ubuf,
6872 		   size_t count, loff_t *ppos)
6873 {
6874 	struct inode *inode = file_inode(filp);
6875 	struct trace_array *tr = inode->i_private;
6876 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6877 	int cpu = tracing_get_cpu(inode);
6878 	struct trace_seq *s;
6879 	unsigned long cnt;
6880 	unsigned long long t;
6881 	unsigned long usec_rem;
6882 
6883 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6884 	if (!s)
6885 		return -ENOMEM;
6886 
6887 	trace_seq_init(s);
6888 
6889 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6890 	trace_seq_printf(s, "entries: %ld\n", cnt);
6891 
6892 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6893 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6894 
6895 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6896 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6897 
6898 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6899 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6900 
6901 	if (trace_clocks[tr->clock_id].in_ns) {
6902 		/* local or global for trace_clock */
6903 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6904 		usec_rem = do_div(t, USEC_PER_SEC);
6905 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6906 								t, usec_rem);
6907 
6908 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6909 		usec_rem = do_div(t, USEC_PER_SEC);
6910 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6911 	} else {
6912 		/* counter or tsc mode for trace_clock */
6913 		trace_seq_printf(s, "oldest event ts: %llu\n",
6914 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6915 
6916 		trace_seq_printf(s, "now ts: %llu\n",
6917 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6918 	}
6919 
6920 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6921 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6922 
6923 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6924 	trace_seq_printf(s, "read events: %ld\n", cnt);
6925 
6926 	count = simple_read_from_buffer(ubuf, count, ppos,
6927 					s->buffer, trace_seq_used(s));
6928 
6929 	kfree(s);
6930 
6931 	return count;
6932 }
6933 
6934 static const struct file_operations tracing_stats_fops = {
6935 	.open		= tracing_open_generic_tr,
6936 	.read		= tracing_stats_read,
6937 	.llseek		= generic_file_llseek,
6938 	.release	= tracing_release_generic_tr,
6939 };
6940 
6941 #ifdef CONFIG_DYNAMIC_FTRACE
6942 
6943 static ssize_t
6944 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6945 		  size_t cnt, loff_t *ppos)
6946 {
6947 	unsigned long *p = filp->private_data;
6948 	char buf[64]; /* Not too big for a shallow stack */
6949 	int r;
6950 
6951 	r = scnprintf(buf, 63, "%ld", *p);
6952 	buf[r++] = '\n';
6953 
6954 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6955 }
6956 
6957 static const struct file_operations tracing_dyn_info_fops = {
6958 	.open		= tracing_open_generic,
6959 	.read		= tracing_read_dyn_info,
6960 	.llseek		= generic_file_llseek,
6961 };
6962 #endif /* CONFIG_DYNAMIC_FTRACE */
6963 
6964 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6965 static void
6966 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6967 		struct trace_array *tr, struct ftrace_probe_ops *ops,
6968 		void *data)
6969 {
6970 	tracing_snapshot_instance(tr);
6971 }
6972 
6973 static void
6974 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6975 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
6976 		      void *data)
6977 {
6978 	struct ftrace_func_mapper *mapper = data;
6979 	long *count = NULL;
6980 
6981 	if (mapper)
6982 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6983 
6984 	if (count) {
6985 
6986 		if (*count <= 0)
6987 			return;
6988 
6989 		(*count)--;
6990 	}
6991 
6992 	tracing_snapshot_instance(tr);
6993 }
6994 
6995 static int
6996 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6997 		      struct ftrace_probe_ops *ops, void *data)
6998 {
6999 	struct ftrace_func_mapper *mapper = data;
7000 	long *count = NULL;
7001 
7002 	seq_printf(m, "%ps:", (void *)ip);
7003 
7004 	seq_puts(m, "snapshot");
7005 
7006 	if (mapper)
7007 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7008 
7009 	if (count)
7010 		seq_printf(m, ":count=%ld\n", *count);
7011 	else
7012 		seq_puts(m, ":unlimited\n");
7013 
7014 	return 0;
7015 }
7016 
7017 static int
7018 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7019 		     unsigned long ip, void *init_data, void **data)
7020 {
7021 	struct ftrace_func_mapper *mapper = *data;
7022 
7023 	if (!mapper) {
7024 		mapper = allocate_ftrace_func_mapper();
7025 		if (!mapper)
7026 			return -ENOMEM;
7027 		*data = mapper;
7028 	}
7029 
7030 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7031 }
7032 
7033 static void
7034 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7035 		     unsigned long ip, void *data)
7036 {
7037 	struct ftrace_func_mapper *mapper = data;
7038 
7039 	if (!ip) {
7040 		if (!mapper)
7041 			return;
7042 		free_ftrace_func_mapper(mapper, NULL);
7043 		return;
7044 	}
7045 
7046 	ftrace_func_mapper_remove_ip(mapper, ip);
7047 }
7048 
7049 static struct ftrace_probe_ops snapshot_probe_ops = {
7050 	.func			= ftrace_snapshot,
7051 	.print			= ftrace_snapshot_print,
7052 };
7053 
7054 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7055 	.func			= ftrace_count_snapshot,
7056 	.print			= ftrace_snapshot_print,
7057 	.init			= ftrace_snapshot_init,
7058 	.free			= ftrace_snapshot_free,
7059 };
7060 
7061 static int
7062 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7063 			       char *glob, char *cmd, char *param, int enable)
7064 {
7065 	struct ftrace_probe_ops *ops;
7066 	void *count = (void *)-1;
7067 	char *number;
7068 	int ret;
7069 
7070 	if (!tr)
7071 		return -ENODEV;
7072 
7073 	/* hash funcs only work with set_ftrace_filter */
7074 	if (!enable)
7075 		return -EINVAL;
7076 
7077 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7078 
7079 	if (glob[0] == '!')
7080 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7081 
7082 	if (!param)
7083 		goto out_reg;
7084 
7085 	number = strsep(&param, ":");
7086 
7087 	if (!strlen(number))
7088 		goto out_reg;
7089 
7090 	/*
7091 	 * We use the callback data field (which is a pointer)
7092 	 * as our counter.
7093 	 */
7094 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7095 	if (ret)
7096 		return ret;
7097 
7098  out_reg:
7099 	ret = alloc_snapshot(tr);
7100 	if (ret < 0)
7101 		goto out;
7102 
7103 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7104 
7105  out:
7106 	return ret < 0 ? ret : 0;
7107 }
7108 
7109 static struct ftrace_func_command ftrace_snapshot_cmd = {
7110 	.name			= "snapshot",
7111 	.func			= ftrace_trace_snapshot_callback,
7112 };
7113 
7114 static __init int register_snapshot_cmd(void)
7115 {
7116 	return register_ftrace_command(&ftrace_snapshot_cmd);
7117 }
7118 #else
7119 static inline __init int register_snapshot_cmd(void) { return 0; }
7120 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7121 
7122 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7123 {
7124 	if (WARN_ON(!tr->dir))
7125 		return ERR_PTR(-ENODEV);
7126 
7127 	/* Top directory uses NULL as the parent */
7128 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7129 		return NULL;
7130 
7131 	/* All sub buffers have a descriptor */
7132 	return tr->dir;
7133 }
7134 
7135 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7136 {
7137 	struct dentry *d_tracer;
7138 
7139 	if (tr->percpu_dir)
7140 		return tr->percpu_dir;
7141 
7142 	d_tracer = tracing_get_dentry(tr);
7143 	if (IS_ERR(d_tracer))
7144 		return NULL;
7145 
7146 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7147 
7148 	WARN_ONCE(!tr->percpu_dir,
7149 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7150 
7151 	return tr->percpu_dir;
7152 }
7153 
7154 static struct dentry *
7155 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7156 		      void *data, long cpu, const struct file_operations *fops)
7157 {
7158 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7159 
7160 	if (ret) /* See tracing_get_cpu() */
7161 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7162 	return ret;
7163 }
7164 
7165 static void
7166 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7167 {
7168 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7169 	struct dentry *d_cpu;
7170 	char cpu_dir[30]; /* 30 characters should be more than enough */
7171 
7172 	if (!d_percpu)
7173 		return;
7174 
7175 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7176 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7177 	if (!d_cpu) {
7178 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7179 		return;
7180 	}
7181 
7182 	/* per cpu trace_pipe */
7183 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7184 				tr, cpu, &tracing_pipe_fops);
7185 
7186 	/* per cpu trace */
7187 	trace_create_cpu_file("trace", 0644, d_cpu,
7188 				tr, cpu, &tracing_fops);
7189 
7190 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7191 				tr, cpu, &tracing_buffers_fops);
7192 
7193 	trace_create_cpu_file("stats", 0444, d_cpu,
7194 				tr, cpu, &tracing_stats_fops);
7195 
7196 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7197 				tr, cpu, &tracing_entries_fops);
7198 
7199 #ifdef CONFIG_TRACER_SNAPSHOT
7200 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7201 				tr, cpu, &snapshot_fops);
7202 
7203 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7204 				tr, cpu, &snapshot_raw_fops);
7205 #endif
7206 }
7207 
7208 #ifdef CONFIG_FTRACE_SELFTEST
7209 /* Let selftest have access to static functions in this file */
7210 #include "trace_selftest.c"
7211 #endif
7212 
7213 static ssize_t
7214 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7215 			loff_t *ppos)
7216 {
7217 	struct trace_option_dentry *topt = filp->private_data;
7218 	char *buf;
7219 
7220 	if (topt->flags->val & topt->opt->bit)
7221 		buf = "1\n";
7222 	else
7223 		buf = "0\n";
7224 
7225 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7226 }
7227 
7228 static ssize_t
7229 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7230 			 loff_t *ppos)
7231 {
7232 	struct trace_option_dentry *topt = filp->private_data;
7233 	unsigned long val;
7234 	int ret;
7235 
7236 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7237 	if (ret)
7238 		return ret;
7239 
7240 	if (val != 0 && val != 1)
7241 		return -EINVAL;
7242 
7243 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7244 		mutex_lock(&trace_types_lock);
7245 		ret = __set_tracer_option(topt->tr, topt->flags,
7246 					  topt->opt, !val);
7247 		mutex_unlock(&trace_types_lock);
7248 		if (ret)
7249 			return ret;
7250 	}
7251 
7252 	*ppos += cnt;
7253 
7254 	return cnt;
7255 }
7256 
7257 
7258 static const struct file_operations trace_options_fops = {
7259 	.open = tracing_open_generic,
7260 	.read = trace_options_read,
7261 	.write = trace_options_write,
7262 	.llseek	= generic_file_llseek,
7263 };
7264 
7265 /*
7266  * In order to pass in both the trace_array descriptor as well as the index
7267  * to the flag that the trace option file represents, the trace_array
7268  * has a character array of trace_flags_index[], which holds the index
7269  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7270  * The address of this character array is passed to the flag option file
7271  * read/write callbacks.
7272  *
7273  * In order to extract both the index and the trace_array descriptor,
7274  * get_tr_index() uses the following algorithm.
7275  *
7276  *   idx = *ptr;
7277  *
7278  * As the pointer itself contains the address of the index (remember
7279  * index[1] == 1).
7280  *
7281  * Then to get the trace_array descriptor, by subtracting that index
7282  * from the ptr, we get to the start of the index itself.
7283  *
7284  *   ptr - idx == &index[0]
7285  *
7286  * Then a simple container_of() from that pointer gets us to the
7287  * trace_array descriptor.
7288  */
7289 static void get_tr_index(void *data, struct trace_array **ptr,
7290 			 unsigned int *pindex)
7291 {
7292 	*pindex = *(unsigned char *)data;
7293 
7294 	*ptr = container_of(data - *pindex, struct trace_array,
7295 			    trace_flags_index);
7296 }
7297 
7298 static ssize_t
7299 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7300 			loff_t *ppos)
7301 {
7302 	void *tr_index = filp->private_data;
7303 	struct trace_array *tr;
7304 	unsigned int index;
7305 	char *buf;
7306 
7307 	get_tr_index(tr_index, &tr, &index);
7308 
7309 	if (tr->trace_flags & (1 << index))
7310 		buf = "1\n";
7311 	else
7312 		buf = "0\n";
7313 
7314 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7315 }
7316 
7317 static ssize_t
7318 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7319 			 loff_t *ppos)
7320 {
7321 	void *tr_index = filp->private_data;
7322 	struct trace_array *tr;
7323 	unsigned int index;
7324 	unsigned long val;
7325 	int ret;
7326 
7327 	get_tr_index(tr_index, &tr, &index);
7328 
7329 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7330 	if (ret)
7331 		return ret;
7332 
7333 	if (val != 0 && val != 1)
7334 		return -EINVAL;
7335 
7336 	mutex_lock(&trace_types_lock);
7337 	ret = set_tracer_flag(tr, 1 << index, val);
7338 	mutex_unlock(&trace_types_lock);
7339 
7340 	if (ret < 0)
7341 		return ret;
7342 
7343 	*ppos += cnt;
7344 
7345 	return cnt;
7346 }
7347 
7348 static const struct file_operations trace_options_core_fops = {
7349 	.open = tracing_open_generic,
7350 	.read = trace_options_core_read,
7351 	.write = trace_options_core_write,
7352 	.llseek = generic_file_llseek,
7353 };
7354 
7355 struct dentry *trace_create_file(const char *name,
7356 				 umode_t mode,
7357 				 struct dentry *parent,
7358 				 void *data,
7359 				 const struct file_operations *fops)
7360 {
7361 	struct dentry *ret;
7362 
7363 	ret = tracefs_create_file(name, mode, parent, data, fops);
7364 	if (!ret)
7365 		pr_warn("Could not create tracefs '%s' entry\n", name);
7366 
7367 	return ret;
7368 }
7369 
7370 
7371 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7372 {
7373 	struct dentry *d_tracer;
7374 
7375 	if (tr->options)
7376 		return tr->options;
7377 
7378 	d_tracer = tracing_get_dentry(tr);
7379 	if (IS_ERR(d_tracer))
7380 		return NULL;
7381 
7382 	tr->options = tracefs_create_dir("options", d_tracer);
7383 	if (!tr->options) {
7384 		pr_warn("Could not create tracefs directory 'options'\n");
7385 		return NULL;
7386 	}
7387 
7388 	return tr->options;
7389 }
7390 
7391 static void
7392 create_trace_option_file(struct trace_array *tr,
7393 			 struct trace_option_dentry *topt,
7394 			 struct tracer_flags *flags,
7395 			 struct tracer_opt *opt)
7396 {
7397 	struct dentry *t_options;
7398 
7399 	t_options = trace_options_init_dentry(tr);
7400 	if (!t_options)
7401 		return;
7402 
7403 	topt->flags = flags;
7404 	topt->opt = opt;
7405 	topt->tr = tr;
7406 
7407 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7408 				    &trace_options_fops);
7409 
7410 }
7411 
7412 static void
7413 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7414 {
7415 	struct trace_option_dentry *topts;
7416 	struct trace_options *tr_topts;
7417 	struct tracer_flags *flags;
7418 	struct tracer_opt *opts;
7419 	int cnt;
7420 	int i;
7421 
7422 	if (!tracer)
7423 		return;
7424 
7425 	flags = tracer->flags;
7426 
7427 	if (!flags || !flags->opts)
7428 		return;
7429 
7430 	/*
7431 	 * If this is an instance, only create flags for tracers
7432 	 * the instance may have.
7433 	 */
7434 	if (!trace_ok_for_array(tracer, tr))
7435 		return;
7436 
7437 	for (i = 0; i < tr->nr_topts; i++) {
7438 		/* Make sure there's no duplicate flags. */
7439 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7440 			return;
7441 	}
7442 
7443 	opts = flags->opts;
7444 
7445 	for (cnt = 0; opts[cnt].name; cnt++)
7446 		;
7447 
7448 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7449 	if (!topts)
7450 		return;
7451 
7452 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7453 			    GFP_KERNEL);
7454 	if (!tr_topts) {
7455 		kfree(topts);
7456 		return;
7457 	}
7458 
7459 	tr->topts = tr_topts;
7460 	tr->topts[tr->nr_topts].tracer = tracer;
7461 	tr->topts[tr->nr_topts].topts = topts;
7462 	tr->nr_topts++;
7463 
7464 	for (cnt = 0; opts[cnt].name; cnt++) {
7465 		create_trace_option_file(tr, &topts[cnt], flags,
7466 					 &opts[cnt]);
7467 		WARN_ONCE(topts[cnt].entry == NULL,
7468 			  "Failed to create trace option: %s",
7469 			  opts[cnt].name);
7470 	}
7471 }
7472 
7473 static struct dentry *
7474 create_trace_option_core_file(struct trace_array *tr,
7475 			      const char *option, long index)
7476 {
7477 	struct dentry *t_options;
7478 
7479 	t_options = trace_options_init_dentry(tr);
7480 	if (!t_options)
7481 		return NULL;
7482 
7483 	return trace_create_file(option, 0644, t_options,
7484 				 (void *)&tr->trace_flags_index[index],
7485 				 &trace_options_core_fops);
7486 }
7487 
7488 static void create_trace_options_dir(struct trace_array *tr)
7489 {
7490 	struct dentry *t_options;
7491 	bool top_level = tr == &global_trace;
7492 	int i;
7493 
7494 	t_options = trace_options_init_dentry(tr);
7495 	if (!t_options)
7496 		return;
7497 
7498 	for (i = 0; trace_options[i]; i++) {
7499 		if (top_level ||
7500 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7501 			create_trace_option_core_file(tr, trace_options[i], i);
7502 	}
7503 }
7504 
7505 static ssize_t
7506 rb_simple_read(struct file *filp, char __user *ubuf,
7507 	       size_t cnt, loff_t *ppos)
7508 {
7509 	struct trace_array *tr = filp->private_data;
7510 	char buf[64];
7511 	int r;
7512 
7513 	r = tracer_tracing_is_on(tr);
7514 	r = sprintf(buf, "%d\n", r);
7515 
7516 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7517 }
7518 
7519 static ssize_t
7520 rb_simple_write(struct file *filp, const char __user *ubuf,
7521 		size_t cnt, loff_t *ppos)
7522 {
7523 	struct trace_array *tr = filp->private_data;
7524 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7525 	unsigned long val;
7526 	int ret;
7527 
7528 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7529 	if (ret)
7530 		return ret;
7531 
7532 	if (buffer) {
7533 		mutex_lock(&trace_types_lock);
7534 		if (val) {
7535 			tracer_tracing_on(tr);
7536 			if (tr->current_trace->start)
7537 				tr->current_trace->start(tr);
7538 		} else {
7539 			tracer_tracing_off(tr);
7540 			if (tr->current_trace->stop)
7541 				tr->current_trace->stop(tr);
7542 		}
7543 		mutex_unlock(&trace_types_lock);
7544 	}
7545 
7546 	(*ppos)++;
7547 
7548 	return cnt;
7549 }
7550 
7551 static const struct file_operations rb_simple_fops = {
7552 	.open		= tracing_open_generic_tr,
7553 	.read		= rb_simple_read,
7554 	.write		= rb_simple_write,
7555 	.release	= tracing_release_generic_tr,
7556 	.llseek		= default_llseek,
7557 };
7558 
7559 struct dentry *trace_instance_dir;
7560 
7561 static void
7562 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7563 
7564 static int
7565 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7566 {
7567 	enum ring_buffer_flags rb_flags;
7568 
7569 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7570 
7571 	buf->tr = tr;
7572 
7573 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7574 	if (!buf->buffer)
7575 		return -ENOMEM;
7576 
7577 	buf->data = alloc_percpu(struct trace_array_cpu);
7578 	if (!buf->data) {
7579 		ring_buffer_free(buf->buffer);
7580 		return -ENOMEM;
7581 	}
7582 
7583 	/* Allocate the first page for all buffers */
7584 	set_buffer_entries(&tr->trace_buffer,
7585 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7586 
7587 	return 0;
7588 }
7589 
7590 static int allocate_trace_buffers(struct trace_array *tr, int size)
7591 {
7592 	int ret;
7593 
7594 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7595 	if (ret)
7596 		return ret;
7597 
7598 #ifdef CONFIG_TRACER_MAX_TRACE
7599 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7600 				    allocate_snapshot ? size : 1);
7601 	if (WARN_ON(ret)) {
7602 		ring_buffer_free(tr->trace_buffer.buffer);
7603 		free_percpu(tr->trace_buffer.data);
7604 		return -ENOMEM;
7605 	}
7606 	tr->allocated_snapshot = allocate_snapshot;
7607 
7608 	/*
7609 	 * Only the top level trace array gets its snapshot allocated
7610 	 * from the kernel command line.
7611 	 */
7612 	allocate_snapshot = false;
7613 #endif
7614 	return 0;
7615 }
7616 
7617 static void free_trace_buffer(struct trace_buffer *buf)
7618 {
7619 	if (buf->buffer) {
7620 		ring_buffer_free(buf->buffer);
7621 		buf->buffer = NULL;
7622 		free_percpu(buf->data);
7623 		buf->data = NULL;
7624 	}
7625 }
7626 
7627 static void free_trace_buffers(struct trace_array *tr)
7628 {
7629 	if (!tr)
7630 		return;
7631 
7632 	free_trace_buffer(&tr->trace_buffer);
7633 
7634 #ifdef CONFIG_TRACER_MAX_TRACE
7635 	free_trace_buffer(&tr->max_buffer);
7636 #endif
7637 }
7638 
7639 static void init_trace_flags_index(struct trace_array *tr)
7640 {
7641 	int i;
7642 
7643 	/* Used by the trace options files */
7644 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7645 		tr->trace_flags_index[i] = i;
7646 }
7647 
7648 static void __update_tracer_options(struct trace_array *tr)
7649 {
7650 	struct tracer *t;
7651 
7652 	for (t = trace_types; t; t = t->next)
7653 		add_tracer_options(tr, t);
7654 }
7655 
7656 static void update_tracer_options(struct trace_array *tr)
7657 {
7658 	mutex_lock(&trace_types_lock);
7659 	__update_tracer_options(tr);
7660 	mutex_unlock(&trace_types_lock);
7661 }
7662 
7663 static int instance_mkdir(const char *name)
7664 {
7665 	struct trace_array *tr;
7666 	int ret;
7667 
7668 	mutex_lock(&trace_types_lock);
7669 
7670 	ret = -EEXIST;
7671 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7672 		if (tr->name && strcmp(tr->name, name) == 0)
7673 			goto out_unlock;
7674 	}
7675 
7676 	ret = -ENOMEM;
7677 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7678 	if (!tr)
7679 		goto out_unlock;
7680 
7681 	tr->name = kstrdup(name, GFP_KERNEL);
7682 	if (!tr->name)
7683 		goto out_free_tr;
7684 
7685 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7686 		goto out_free_tr;
7687 
7688 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7689 
7690 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7691 
7692 	raw_spin_lock_init(&tr->start_lock);
7693 
7694 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7695 
7696 	tr->current_trace = &nop_trace;
7697 
7698 	INIT_LIST_HEAD(&tr->systems);
7699 	INIT_LIST_HEAD(&tr->events);
7700 
7701 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7702 		goto out_free_tr;
7703 
7704 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7705 	if (!tr->dir)
7706 		goto out_free_tr;
7707 
7708 	ret = event_trace_add_tracer(tr->dir, tr);
7709 	if (ret) {
7710 		tracefs_remove_recursive(tr->dir);
7711 		goto out_free_tr;
7712 	}
7713 
7714 	ftrace_init_trace_array(tr);
7715 
7716 	init_tracer_tracefs(tr, tr->dir);
7717 	init_trace_flags_index(tr);
7718 	__update_tracer_options(tr);
7719 
7720 	list_add(&tr->list, &ftrace_trace_arrays);
7721 
7722 	mutex_unlock(&trace_types_lock);
7723 
7724 	return 0;
7725 
7726  out_free_tr:
7727 	free_trace_buffers(tr);
7728 	free_cpumask_var(tr->tracing_cpumask);
7729 	kfree(tr->name);
7730 	kfree(tr);
7731 
7732  out_unlock:
7733 	mutex_unlock(&trace_types_lock);
7734 
7735 	return ret;
7736 
7737 }
7738 
7739 static int instance_rmdir(const char *name)
7740 {
7741 	struct trace_array *tr;
7742 	int found = 0;
7743 	int ret;
7744 	int i;
7745 
7746 	mutex_lock(&trace_types_lock);
7747 
7748 	ret = -ENODEV;
7749 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7750 		if (tr->name && strcmp(tr->name, name) == 0) {
7751 			found = 1;
7752 			break;
7753 		}
7754 	}
7755 	if (!found)
7756 		goto out_unlock;
7757 
7758 	ret = -EBUSY;
7759 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7760 		goto out_unlock;
7761 
7762 	list_del(&tr->list);
7763 
7764 	/* Disable all the flags that were enabled coming in */
7765 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7766 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7767 			set_tracer_flag(tr, 1 << i, 0);
7768 	}
7769 
7770 	tracing_set_nop(tr);
7771 	clear_ftrace_function_probes(tr);
7772 	event_trace_del_tracer(tr);
7773 	ftrace_clear_pids(tr);
7774 	ftrace_destroy_function_files(tr);
7775 	tracefs_remove_recursive(tr->dir);
7776 	free_trace_buffers(tr);
7777 
7778 	for (i = 0; i < tr->nr_topts; i++) {
7779 		kfree(tr->topts[i].topts);
7780 	}
7781 	kfree(tr->topts);
7782 
7783 	free_cpumask_var(tr->tracing_cpumask);
7784 	kfree(tr->name);
7785 	kfree(tr);
7786 
7787 	ret = 0;
7788 
7789  out_unlock:
7790 	mutex_unlock(&trace_types_lock);
7791 
7792 	return ret;
7793 }
7794 
7795 static __init void create_trace_instances(struct dentry *d_tracer)
7796 {
7797 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7798 							 instance_mkdir,
7799 							 instance_rmdir);
7800 	if (WARN_ON(!trace_instance_dir))
7801 		return;
7802 }
7803 
7804 static void
7805 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7806 {
7807 	int cpu;
7808 
7809 	trace_create_file("available_tracers", 0444, d_tracer,
7810 			tr, &show_traces_fops);
7811 
7812 	trace_create_file("current_tracer", 0644, d_tracer,
7813 			tr, &set_tracer_fops);
7814 
7815 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7816 			  tr, &tracing_cpumask_fops);
7817 
7818 	trace_create_file("trace_options", 0644, d_tracer,
7819 			  tr, &tracing_iter_fops);
7820 
7821 	trace_create_file("trace", 0644, d_tracer,
7822 			  tr, &tracing_fops);
7823 
7824 	trace_create_file("trace_pipe", 0444, d_tracer,
7825 			  tr, &tracing_pipe_fops);
7826 
7827 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7828 			  tr, &tracing_entries_fops);
7829 
7830 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7831 			  tr, &tracing_total_entries_fops);
7832 
7833 	trace_create_file("free_buffer", 0200, d_tracer,
7834 			  tr, &tracing_free_buffer_fops);
7835 
7836 	trace_create_file("trace_marker", 0220, d_tracer,
7837 			  tr, &tracing_mark_fops);
7838 
7839 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7840 			  tr, &tracing_mark_raw_fops);
7841 
7842 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7843 			  &trace_clock_fops);
7844 
7845 	trace_create_file("tracing_on", 0644, d_tracer,
7846 			  tr, &rb_simple_fops);
7847 
7848 	create_trace_options_dir(tr);
7849 
7850 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7851 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7852 			&tr->max_latency, &tracing_max_lat_fops);
7853 #endif
7854 
7855 	if (ftrace_create_function_files(tr, d_tracer))
7856 		WARN(1, "Could not allocate function filter files");
7857 
7858 #ifdef CONFIG_TRACER_SNAPSHOT
7859 	trace_create_file("snapshot", 0644, d_tracer,
7860 			  tr, &snapshot_fops);
7861 #endif
7862 
7863 	for_each_tracing_cpu(cpu)
7864 		tracing_init_tracefs_percpu(tr, cpu);
7865 
7866 	ftrace_init_tracefs(tr, d_tracer);
7867 }
7868 
7869 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7870 {
7871 	struct vfsmount *mnt;
7872 	struct file_system_type *type;
7873 
7874 	/*
7875 	 * To maintain backward compatibility for tools that mount
7876 	 * debugfs to get to the tracing facility, tracefs is automatically
7877 	 * mounted to the debugfs/tracing directory.
7878 	 */
7879 	type = get_fs_type("tracefs");
7880 	if (!type)
7881 		return NULL;
7882 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7883 	put_filesystem(type);
7884 	if (IS_ERR(mnt))
7885 		return NULL;
7886 	mntget(mnt);
7887 
7888 	return mnt;
7889 }
7890 
7891 /**
7892  * tracing_init_dentry - initialize top level trace array
7893  *
7894  * This is called when creating files or directories in the tracing
7895  * directory. It is called via fs_initcall() by any of the boot up code
7896  * and expects to return the dentry of the top level tracing directory.
7897  */
7898 struct dentry *tracing_init_dentry(void)
7899 {
7900 	struct trace_array *tr = &global_trace;
7901 
7902 	/* The top level trace array uses  NULL as parent */
7903 	if (tr->dir)
7904 		return NULL;
7905 
7906 	if (WARN_ON(!tracefs_initialized()) ||
7907 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7908 		 WARN_ON(!debugfs_initialized())))
7909 		return ERR_PTR(-ENODEV);
7910 
7911 	/*
7912 	 * As there may still be users that expect the tracing
7913 	 * files to exist in debugfs/tracing, we must automount
7914 	 * the tracefs file system there, so older tools still
7915 	 * work with the newer kerenl.
7916 	 */
7917 	tr->dir = debugfs_create_automount("tracing", NULL,
7918 					   trace_automount, NULL);
7919 	if (!tr->dir) {
7920 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7921 		return ERR_PTR(-ENOMEM);
7922 	}
7923 
7924 	return NULL;
7925 }
7926 
7927 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7928 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7929 
7930 static void __init trace_eval_init(void)
7931 {
7932 	int len;
7933 
7934 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7935 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7936 }
7937 
7938 #ifdef CONFIG_MODULES
7939 static void trace_module_add_evals(struct module *mod)
7940 {
7941 	if (!mod->num_trace_evals)
7942 		return;
7943 
7944 	/*
7945 	 * Modules with bad taint do not have events created, do
7946 	 * not bother with enums either.
7947 	 */
7948 	if (trace_module_has_bad_taint(mod))
7949 		return;
7950 
7951 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7952 }
7953 
7954 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7955 static void trace_module_remove_evals(struct module *mod)
7956 {
7957 	union trace_eval_map_item *map;
7958 	union trace_eval_map_item **last = &trace_eval_maps;
7959 
7960 	if (!mod->num_trace_evals)
7961 		return;
7962 
7963 	mutex_lock(&trace_eval_mutex);
7964 
7965 	map = trace_eval_maps;
7966 
7967 	while (map) {
7968 		if (map->head.mod == mod)
7969 			break;
7970 		map = trace_eval_jmp_to_tail(map);
7971 		last = &map->tail.next;
7972 		map = map->tail.next;
7973 	}
7974 	if (!map)
7975 		goto out;
7976 
7977 	*last = trace_eval_jmp_to_tail(map)->tail.next;
7978 	kfree(map);
7979  out:
7980 	mutex_unlock(&trace_eval_mutex);
7981 }
7982 #else
7983 static inline void trace_module_remove_evals(struct module *mod) { }
7984 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7985 
7986 static int trace_module_notify(struct notifier_block *self,
7987 			       unsigned long val, void *data)
7988 {
7989 	struct module *mod = data;
7990 
7991 	switch (val) {
7992 	case MODULE_STATE_COMING:
7993 		trace_module_add_evals(mod);
7994 		break;
7995 	case MODULE_STATE_GOING:
7996 		trace_module_remove_evals(mod);
7997 		break;
7998 	}
7999 
8000 	return 0;
8001 }
8002 
8003 static struct notifier_block trace_module_nb = {
8004 	.notifier_call = trace_module_notify,
8005 	.priority = 0,
8006 };
8007 #endif /* CONFIG_MODULES */
8008 
8009 static __init int tracer_init_tracefs(void)
8010 {
8011 	struct dentry *d_tracer;
8012 
8013 	trace_access_lock_init();
8014 
8015 	d_tracer = tracing_init_dentry();
8016 	if (IS_ERR(d_tracer))
8017 		return 0;
8018 
8019 	init_tracer_tracefs(&global_trace, d_tracer);
8020 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8021 
8022 	trace_create_file("tracing_thresh", 0644, d_tracer,
8023 			&global_trace, &tracing_thresh_fops);
8024 
8025 	trace_create_file("README", 0444, d_tracer,
8026 			NULL, &tracing_readme_fops);
8027 
8028 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8029 			NULL, &tracing_saved_cmdlines_fops);
8030 
8031 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8032 			  NULL, &tracing_saved_cmdlines_size_fops);
8033 
8034 	trace_create_file("saved_tgids", 0444, d_tracer,
8035 			NULL, &tracing_saved_tgids_fops);
8036 
8037 	trace_eval_init();
8038 
8039 	trace_create_eval_file(d_tracer);
8040 
8041 #ifdef CONFIG_MODULES
8042 	register_module_notifier(&trace_module_nb);
8043 #endif
8044 
8045 #ifdef CONFIG_DYNAMIC_FTRACE
8046 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8047 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8048 #endif
8049 
8050 	create_trace_instances(d_tracer);
8051 
8052 	update_tracer_options(&global_trace);
8053 
8054 	return 0;
8055 }
8056 
8057 static int trace_panic_handler(struct notifier_block *this,
8058 			       unsigned long event, void *unused)
8059 {
8060 	if (ftrace_dump_on_oops)
8061 		ftrace_dump(ftrace_dump_on_oops);
8062 	return NOTIFY_OK;
8063 }
8064 
8065 static struct notifier_block trace_panic_notifier = {
8066 	.notifier_call  = trace_panic_handler,
8067 	.next           = NULL,
8068 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8069 };
8070 
8071 static int trace_die_handler(struct notifier_block *self,
8072 			     unsigned long val,
8073 			     void *data)
8074 {
8075 	switch (val) {
8076 	case DIE_OOPS:
8077 		if (ftrace_dump_on_oops)
8078 			ftrace_dump(ftrace_dump_on_oops);
8079 		break;
8080 	default:
8081 		break;
8082 	}
8083 	return NOTIFY_OK;
8084 }
8085 
8086 static struct notifier_block trace_die_notifier = {
8087 	.notifier_call = trace_die_handler,
8088 	.priority = 200
8089 };
8090 
8091 /*
8092  * printk is set to max of 1024, we really don't need it that big.
8093  * Nothing should be printing 1000 characters anyway.
8094  */
8095 #define TRACE_MAX_PRINT		1000
8096 
8097 /*
8098  * Define here KERN_TRACE so that we have one place to modify
8099  * it if we decide to change what log level the ftrace dump
8100  * should be at.
8101  */
8102 #define KERN_TRACE		KERN_EMERG
8103 
8104 void
8105 trace_printk_seq(struct trace_seq *s)
8106 {
8107 	/* Probably should print a warning here. */
8108 	if (s->seq.len >= TRACE_MAX_PRINT)
8109 		s->seq.len = TRACE_MAX_PRINT;
8110 
8111 	/*
8112 	 * More paranoid code. Although the buffer size is set to
8113 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8114 	 * an extra layer of protection.
8115 	 */
8116 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8117 		s->seq.len = s->seq.size - 1;
8118 
8119 	/* should be zero ended, but we are paranoid. */
8120 	s->buffer[s->seq.len] = 0;
8121 
8122 	printk(KERN_TRACE "%s", s->buffer);
8123 
8124 	trace_seq_init(s);
8125 }
8126 
8127 void trace_init_global_iter(struct trace_iterator *iter)
8128 {
8129 	iter->tr = &global_trace;
8130 	iter->trace = iter->tr->current_trace;
8131 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8132 	iter->trace_buffer = &global_trace.trace_buffer;
8133 
8134 	if (iter->trace && iter->trace->open)
8135 		iter->trace->open(iter);
8136 
8137 	/* Annotate start of buffers if we had overruns */
8138 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8139 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8140 
8141 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8142 	if (trace_clocks[iter->tr->clock_id].in_ns)
8143 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8144 }
8145 
8146 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8147 {
8148 	/* use static because iter can be a bit big for the stack */
8149 	static struct trace_iterator iter;
8150 	static atomic_t dump_running;
8151 	struct trace_array *tr = &global_trace;
8152 	unsigned int old_userobj;
8153 	unsigned long flags;
8154 	int cnt = 0, cpu;
8155 
8156 	/* Only allow one dump user at a time. */
8157 	if (atomic_inc_return(&dump_running) != 1) {
8158 		atomic_dec(&dump_running);
8159 		return;
8160 	}
8161 
8162 	/*
8163 	 * Always turn off tracing when we dump.
8164 	 * We don't need to show trace output of what happens
8165 	 * between multiple crashes.
8166 	 *
8167 	 * If the user does a sysrq-z, then they can re-enable
8168 	 * tracing with echo 1 > tracing_on.
8169 	 */
8170 	tracing_off();
8171 
8172 	local_irq_save(flags);
8173 
8174 	/* Simulate the iterator */
8175 	trace_init_global_iter(&iter);
8176 
8177 	for_each_tracing_cpu(cpu) {
8178 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8179 	}
8180 
8181 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8182 
8183 	/* don't look at user memory in panic mode */
8184 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8185 
8186 	switch (oops_dump_mode) {
8187 	case DUMP_ALL:
8188 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8189 		break;
8190 	case DUMP_ORIG:
8191 		iter.cpu_file = raw_smp_processor_id();
8192 		break;
8193 	case DUMP_NONE:
8194 		goto out_enable;
8195 	default:
8196 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8197 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8198 	}
8199 
8200 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8201 
8202 	/* Did function tracer already get disabled? */
8203 	if (ftrace_is_dead()) {
8204 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8205 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8206 	}
8207 
8208 	/*
8209 	 * We need to stop all tracing on all CPUS to read the
8210 	 * the next buffer. This is a bit expensive, but is
8211 	 * not done often. We fill all what we can read,
8212 	 * and then release the locks again.
8213 	 */
8214 
8215 	while (!trace_empty(&iter)) {
8216 
8217 		if (!cnt)
8218 			printk(KERN_TRACE "---------------------------------\n");
8219 
8220 		cnt++;
8221 
8222 		/* reset all but tr, trace, and overruns */
8223 		memset(&iter.seq, 0,
8224 		       sizeof(struct trace_iterator) -
8225 		       offsetof(struct trace_iterator, seq));
8226 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8227 		iter.pos = -1;
8228 
8229 		if (trace_find_next_entry_inc(&iter) != NULL) {
8230 			int ret;
8231 
8232 			ret = print_trace_line(&iter);
8233 			if (ret != TRACE_TYPE_NO_CONSUME)
8234 				trace_consume(&iter);
8235 		}
8236 		touch_nmi_watchdog();
8237 
8238 		trace_printk_seq(&iter.seq);
8239 	}
8240 
8241 	if (!cnt)
8242 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8243 	else
8244 		printk(KERN_TRACE "---------------------------------\n");
8245 
8246  out_enable:
8247 	tr->trace_flags |= old_userobj;
8248 
8249 	for_each_tracing_cpu(cpu) {
8250 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8251 	}
8252  	atomic_dec(&dump_running);
8253 	local_irq_restore(flags);
8254 }
8255 EXPORT_SYMBOL_GPL(ftrace_dump);
8256 
8257 __init static int tracer_alloc_buffers(void)
8258 {
8259 	int ring_buf_size;
8260 	int ret = -ENOMEM;
8261 
8262 	/*
8263 	 * Make sure we don't accidently add more trace options
8264 	 * than we have bits for.
8265 	 */
8266 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8267 
8268 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8269 		goto out;
8270 
8271 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8272 		goto out_free_buffer_mask;
8273 
8274 	/* Only allocate trace_printk buffers if a trace_printk exists */
8275 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8276 		/* Must be called before global_trace.buffer is allocated */
8277 		trace_printk_init_buffers();
8278 
8279 	/* To save memory, keep the ring buffer size to its minimum */
8280 	if (ring_buffer_expanded)
8281 		ring_buf_size = trace_buf_size;
8282 	else
8283 		ring_buf_size = 1;
8284 
8285 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8286 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8287 
8288 	raw_spin_lock_init(&global_trace.start_lock);
8289 
8290 	/*
8291 	 * The prepare callbacks allocates some memory for the ring buffer. We
8292 	 * don't free the buffer if the if the CPU goes down. If we were to free
8293 	 * the buffer, then the user would lose any trace that was in the
8294 	 * buffer. The memory will be removed once the "instance" is removed.
8295 	 */
8296 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8297 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8298 				      NULL);
8299 	if (ret < 0)
8300 		goto out_free_cpumask;
8301 	/* Used for event triggers */
8302 	ret = -ENOMEM;
8303 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8304 	if (!temp_buffer)
8305 		goto out_rm_hp_state;
8306 
8307 	if (trace_create_savedcmd() < 0)
8308 		goto out_free_temp_buffer;
8309 
8310 	/* TODO: make the number of buffers hot pluggable with CPUS */
8311 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8312 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8313 		WARN_ON(1);
8314 		goto out_free_savedcmd;
8315 	}
8316 
8317 	if (global_trace.buffer_disabled)
8318 		tracing_off();
8319 
8320 	if (trace_boot_clock) {
8321 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8322 		if (ret < 0)
8323 			pr_warn("Trace clock %s not defined, going back to default\n",
8324 				trace_boot_clock);
8325 	}
8326 
8327 	/*
8328 	 * register_tracer() might reference current_trace, so it
8329 	 * needs to be set before we register anything. This is
8330 	 * just a bootstrap of current_trace anyway.
8331 	 */
8332 	global_trace.current_trace = &nop_trace;
8333 
8334 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8335 
8336 	ftrace_init_global_array_ops(&global_trace);
8337 
8338 	init_trace_flags_index(&global_trace);
8339 
8340 	register_tracer(&nop_trace);
8341 
8342 	/* Function tracing may start here (via kernel command line) */
8343 	init_function_trace();
8344 
8345 	/* All seems OK, enable tracing */
8346 	tracing_disabled = 0;
8347 
8348 	atomic_notifier_chain_register(&panic_notifier_list,
8349 				       &trace_panic_notifier);
8350 
8351 	register_die_notifier(&trace_die_notifier);
8352 
8353 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8354 
8355 	INIT_LIST_HEAD(&global_trace.systems);
8356 	INIT_LIST_HEAD(&global_trace.events);
8357 	list_add(&global_trace.list, &ftrace_trace_arrays);
8358 
8359 	apply_trace_boot_options();
8360 
8361 	register_snapshot_cmd();
8362 
8363 	return 0;
8364 
8365 out_free_savedcmd:
8366 	free_saved_cmdlines_buffer(savedcmd);
8367 out_free_temp_buffer:
8368 	ring_buffer_free(temp_buffer);
8369 out_rm_hp_state:
8370 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8371 out_free_cpumask:
8372 	free_cpumask_var(global_trace.tracing_cpumask);
8373 out_free_buffer_mask:
8374 	free_cpumask_var(tracing_buffer_mask);
8375 out:
8376 	return ret;
8377 }
8378 
8379 void __init early_trace_init(void)
8380 {
8381 	if (tracepoint_printk) {
8382 		tracepoint_print_iter =
8383 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8384 		if (WARN_ON(!tracepoint_print_iter))
8385 			tracepoint_printk = 0;
8386 		else
8387 			static_key_enable(&tracepoint_printk_key.key);
8388 	}
8389 	tracer_alloc_buffers();
8390 }
8391 
8392 void __init trace_init(void)
8393 {
8394 	trace_event_init();
8395 }
8396 
8397 __init static int clear_boot_tracer(void)
8398 {
8399 	/*
8400 	 * The default tracer at boot buffer is an init section.
8401 	 * This function is called in lateinit. If we did not
8402 	 * find the boot tracer, then clear it out, to prevent
8403 	 * later registration from accessing the buffer that is
8404 	 * about to be freed.
8405 	 */
8406 	if (!default_bootup_tracer)
8407 		return 0;
8408 
8409 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8410 	       default_bootup_tracer);
8411 	default_bootup_tracer = NULL;
8412 
8413 	return 0;
8414 }
8415 
8416 fs_initcall(tracer_init_tracefs);
8417 late_initcall_sync(clear_boot_tracer);
8418