xref: /openbmc/linux/kernel/trace/trace.c (revision 57ee11ea)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45 
46 #include "trace.h"
47 #include "trace_output.h"
48 
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54 
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63 
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68 
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73 
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 	{ }
77 };
78 
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 	return 0;
83 }
84 
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91 
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99 
100 cpumask_var_t __read_mostly	tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 	struct module			*mod;
127 	unsigned long			length;
128 };
129 
130 union trace_eval_map_item;
131 
132 struct trace_eval_map_tail {
133 	/*
134 	 * "end" is first and points to NULL as it must be different
135 	 * than "mod" or "eval_string"
136 	 */
137 	union trace_eval_map_item	*next;
138 	const char			*end;	/* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_eval_mutex);
142 
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151 	struct trace_eval_map		map;
152 	struct trace_eval_map_head	head;
153 	struct trace_eval_map_tail	tail;
154 };
155 
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE		100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 	default_bootup_tracer = bootup_tracer_buf;
171 	/* We are using ftrace early, expand it */
172 	ring_buffer_expanded = true;
173 	return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 	if (*str++ != '=' || !*str) {
180 		ftrace_dump_on_oops = DUMP_ALL;
181 		return 1;
182 	}
183 
184 	if (!strcmp("orig_cpu", str)) {
185 		ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 		__disable_trace_on_warning = 1;
197 	return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 	allocate_snapshot = true;
204 	/* We also need the main ring buffer expanded */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 	return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 	trace_boot_clock = trace_boot_clock_buf;
227 	return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 		tracepoint_printk = 1;
235 	return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 	nsec += 500;
242 	do_div(nsec, 1000);
243 	return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS						\
248 	(FUNCTION_DEFAULT_FLAGS |					\
249 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
250 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
251 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
252 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
256 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267 	.trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 	struct trace_array *tr;
275 	int ret = -ENODEV;
276 
277 	mutex_lock(&trace_types_lock);
278 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 		if (tr == this_tr) {
280 			tr->ref++;
281 			ret = 0;
282 			break;
283 		}
284 	}
285 	mutex_unlock(&trace_types_lock);
286 
287 	return ret;
288 }
289 
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 	WARN_ON(!this_tr->ref);
293 	this_tr->ref--;
294 }
295 
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 	mutex_lock(&trace_types_lock);
299 	__trace_array_put(this_tr);
300 	mutex_unlock(&trace_types_lock);
301 }
302 
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 			      struct ring_buffer *buffer,
305 			      struct ring_buffer_event *event)
306 {
307 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 	    !filter_match_preds(call->filter, rec)) {
309 		__trace_event_discard_commit(buffer, event);
310 		return 1;
311 	}
312 
313 	return 0;
314 }
315 
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 	vfree(pid_list->pids);
319 	kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 	/*
333 	 * If pid_max changed after filtered_pids was created, we
334 	 * by default ignore all pids greater than the previous pid_max.
335 	 */
336 	if (search_pid >= filtered_pids->pid_max)
337 		return false;
338 
339 	return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 	/*
355 	 * Return false, because if filtered_pids does not exist,
356 	 * all pids are good to trace.
357 	 */
358 	if (!filtered_pids)
359 		return false;
360 
361 	return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 				  struct task_struct *self,
378 				  struct task_struct *task)
379 {
380 	if (!pid_list)
381 		return;
382 
383 	/* For forks, we only add if the forking task is listed */
384 	if (self) {
385 		if (!trace_find_filtered_pid(pid_list, self->pid))
386 			return;
387 	}
388 
389 	/* Sorry, but we don't support pid_max changing after setting */
390 	if (task->pid >= pid_list->pid_max)
391 		return;
392 
393 	/* "self" is set for forks, and NULL for exits */
394 	if (self)
395 		set_bit(task->pid, pid_list->pids);
396 	else
397 		clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 	unsigned long pid = (unsigned long)v;
415 
416 	(*pos)++;
417 
418 	/* pid already is +1 of the actual prevous bit */
419 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421 	/* Return pid + 1 to allow zero to be represented */
422 	if (pid < pid_list->pid_max)
423 		return (void *)(pid + 1);
424 
425 	return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 	unsigned long pid;
442 	loff_t l = 0;
443 
444 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 	if (pid >= pid_list->pid_max)
446 		return NULL;
447 
448 	/* Return pid + 1 so that zero can be the exit value */
449 	for (pid++; pid && l < *pos;
450 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 		;
452 	return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 	unsigned long pid = (unsigned long)v - 1;
466 
467 	seq_printf(m, "%lu\n", pid);
468 	return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE		127
473 
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 		    struct trace_pid_list **new_pid_list,
476 		    const char __user *ubuf, size_t cnt)
477 {
478 	struct trace_pid_list *pid_list;
479 	struct trace_parser parser;
480 	unsigned long val;
481 	int nr_pids = 0;
482 	ssize_t read = 0;
483 	ssize_t ret = 0;
484 	loff_t pos;
485 	pid_t pid;
486 
487 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 		return -ENOMEM;
489 
490 	/*
491 	 * Always recreate a new array. The write is an all or nothing
492 	 * operation. Always create a new array when adding new pids by
493 	 * the user. If the operation fails, then the current list is
494 	 * not modified.
495 	 */
496 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 	if (!pid_list)
498 		return -ENOMEM;
499 
500 	pid_list->pid_max = READ_ONCE(pid_max);
501 
502 	/* Only truncating will shrink pid_max */
503 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 		pid_list->pid_max = filtered_pids->pid_max;
505 
506 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 	if (!pid_list->pids) {
508 		kfree(pid_list);
509 		return -ENOMEM;
510 	}
511 
512 	if (filtered_pids) {
513 		/* copy the current bits to the new max */
514 		for_each_set_bit(pid, filtered_pids->pids,
515 				 filtered_pids->pid_max) {
516 			set_bit(pid, pid_list->pids);
517 			nr_pids++;
518 		}
519 	}
520 
521 	while (cnt > 0) {
522 
523 		pos = 0;
524 
525 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 		if (ret < 0 || !trace_parser_loaded(&parser))
527 			break;
528 
529 		read += ret;
530 		ubuf += ret;
531 		cnt -= ret;
532 
533 		parser.buffer[parser.idx] = 0;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id());
924 	local_irq_restore(flags);
925 }
926 
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943 	struct trace_array *tr = &global_trace;
944 
945 	tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 					struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955 	int ret;
956 
957 	if (!tr->allocated_snapshot) {
958 
959 		/* allocate spare buffer */
960 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 		if (ret < 0)
963 			return ret;
964 
965 		tr->allocated_snapshot = true;
966 	}
967 
968 	return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973 	/*
974 	 * We don't free the ring buffer. instead, resize it because
975 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 	 * we want preserve it.
977 	 */
978 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 	set_buffer_entries(&tr->max_buffer, 1);
980 	tracing_reset_online_cpus(&tr->max_buffer);
981 	tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996 	struct trace_array *tr = &global_trace;
997 	int ret;
998 
999 	ret = alloc_snapshot(tr);
1000 	WARN_ON(ret < 0);
1001 
1002 	return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 	int ret;
1020 
1021 	ret = tracing_alloc_snapshot();
1022 	if (ret < 0)
1023 		return;
1024 
1025 	tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 	return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 	/* Give warning */
1043 	tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 	if (tr->trace_buffer.buffer)
1051 		ring_buffer_record_off(tr->trace_buffer.buffer);
1052 	/*
1053 	 * This flag is looked at when buffers haven't been allocated
1054 	 * yet, or by some tracers (like irqsoff), that just want to
1055 	 * know if the ring buffer has been disabled, but it can handle
1056 	 * races of where it gets disabled but we still do a record.
1057 	 * As the check is in the fast path of the tracers, it is more
1058 	 * important to be fast than accurate.
1059 	 */
1060 	tr->buffer_disabled = 1;
1061 	/* Make the flag seen by readers */
1062 	smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075 	tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081 	if (__disable_trace_on_warning)
1082 		tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 	if (tr->trace_buffer.buffer)
1094 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 	return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103 	return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109 	unsigned long buf_size;
1110 
1111 	if (!str)
1112 		return 0;
1113 	buf_size = memparse(str, &str);
1114 	/* nr_entries can not be zero */
1115 	if (buf_size == 0)
1116 		return 0;
1117 	trace_buf_size = buf_size;
1118 	return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 	unsigned long threshold;
1125 	int ret;
1126 
1127 	if (!str)
1128 		return 0;
1129 	ret = kstrtoul(str, 0, &threshold);
1130 	if (ret < 0)
1131 		return 0;
1132 	tracing_thresh = threshold * 1000;
1133 	return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 	return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 	TRACE_FLAGS
1154 	NULL
1155 };
1156 
1157 static struct {
1158 	u64 (*func)(void);
1159 	const char *name;
1160 	int in_ns;		/* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 	{ trace_clock_local,		"local",	1 },
1163 	{ trace_clock_global,		"global",	1 },
1164 	{ trace_clock_counter,		"counter",	0 },
1165 	{ trace_clock_jiffies,		"uptime",	0 },
1166 	{ trace_clock,			"perf",		1 },
1167 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1168 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1169 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1170 	ARCH_TRACE_CLOCKS
1171 };
1172 
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 	memset(parser, 0, sizeof(*parser));
1179 
1180 	parser->buffer = kmalloc(size, GFP_KERNEL);
1181 	if (!parser->buffer)
1182 		return 1;
1183 
1184 	parser->size = size;
1185 	return 0;
1186 }
1187 
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 	kfree(parser->buffer);
1194 	parser->buffer = NULL;
1195 }
1196 
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 	size_t cnt, loff_t *ppos)
1210 {
1211 	char ch;
1212 	size_t read = 0;
1213 	ssize_t ret;
1214 
1215 	if (!*ppos)
1216 		trace_parser_clear(parser);
1217 
1218 	ret = get_user(ch, ubuf++);
1219 	if (ret)
1220 		goto out;
1221 
1222 	read++;
1223 	cnt--;
1224 
1225 	/*
1226 	 * The parser is not finished with the last write,
1227 	 * continue reading the user input without skipping spaces.
1228 	 */
1229 	if (!parser->cont) {
1230 		/* skip white space */
1231 		while (cnt && isspace(ch)) {
1232 			ret = get_user(ch, ubuf++);
1233 			if (ret)
1234 				goto out;
1235 			read++;
1236 			cnt--;
1237 		}
1238 
1239 		/* only spaces were written */
1240 		if (isspace(ch)) {
1241 			*ppos += read;
1242 			ret = read;
1243 			goto out;
1244 		}
1245 
1246 		parser->idx = 0;
1247 	}
1248 
1249 	/* read the non-space input */
1250 	while (cnt && !isspace(ch)) {
1251 		if (parser->idx < parser->size - 1)
1252 			parser->buffer[parser->idx++] = ch;
1253 		else {
1254 			ret = -EINVAL;
1255 			goto out;
1256 		}
1257 		ret = get_user(ch, ubuf++);
1258 		if (ret)
1259 			goto out;
1260 		read++;
1261 		cnt--;
1262 	}
1263 
1264 	/* We either got finished input or we have to wait for another call. */
1265 	if (isspace(ch)) {
1266 		parser->buffer[parser->idx] = 0;
1267 		parser->cont = false;
1268 	} else if (parser->idx < parser->size - 1) {
1269 		parser->cont = true;
1270 		parser->buffer[parser->idx++] = ch;
1271 	} else {
1272 		ret = -EINVAL;
1273 		goto out;
1274 	}
1275 
1276 	*ppos += read;
1277 	ret = read;
1278 
1279 out:
1280 	return ret;
1281 }
1282 
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 	int len;
1287 
1288 	if (trace_seq_used(s) <= s->seq.readpos)
1289 		return -EBUSY;
1290 
1291 	len = trace_seq_used(s) - s->seq.readpos;
1292 	if (cnt > len)
1293 		cnt = len;
1294 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295 
1296 	s->seq.readpos += cnt;
1297 	return cnt;
1298 }
1299 
1300 unsigned long __read_mostly	tracing_thresh;
1301 
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 	struct trace_buffer *max_buf = &tr->max_buffer;
1313 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315 
1316 	max_buf->cpu = cpu;
1317 	max_buf->time_start = data->preempt_timestamp;
1318 
1319 	max_data->saved_latency = tr->max_latency;
1320 	max_data->critical_start = data->critical_start;
1321 	max_data->critical_end = data->critical_end;
1322 
1323 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 	max_data->pid = tsk->pid;
1325 	/*
1326 	 * If tsk == current, then use current_uid(), as that does not use
1327 	 * RCU. The irq tracer can be called out of RCU scope.
1328 	 */
1329 	if (tsk == current)
1330 		max_data->uid = current_uid();
1331 	else
1332 		max_data->uid = task_uid(tsk);
1333 
1334 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 	max_data->policy = tsk->policy;
1336 	max_data->rt_priority = tsk->rt_priority;
1337 
1338 	/* record this tasks comm */
1339 	tracing_record_cmdline(tsk);
1340 }
1341 
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 	struct ring_buffer *buf;
1355 
1356 	if (tr->stop_count)
1357 		return;
1358 
1359 	WARN_ON_ONCE(!irqs_disabled());
1360 
1361 	if (!tr->allocated_snapshot) {
1362 		/* Only the nop tracer should hit this when disabling */
1363 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 		return;
1365 	}
1366 
1367 	arch_spin_lock(&tr->max_lock);
1368 
1369 	buf = tr->trace_buffer.buffer;
1370 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 	tr->max_buffer.buffer = buf;
1372 
1373 	__update_max_tr(tr, tsk, cpu);
1374 	arch_spin_unlock(&tr->max_lock);
1375 }
1376 
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388 	int ret;
1389 
1390 	if (tr->stop_count)
1391 		return;
1392 
1393 	WARN_ON_ONCE(!irqs_disabled());
1394 	if (!tr->allocated_snapshot) {
1395 		/* Only the nop tracer should hit this when disabling */
1396 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397 		return;
1398 	}
1399 
1400 	arch_spin_lock(&tr->max_lock);
1401 
1402 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403 
1404 	if (ret == -EBUSY) {
1405 		/*
1406 		 * We failed to swap the buffer due to a commit taking
1407 		 * place on this CPU. We fail to record, but we reset
1408 		 * the max trace buffer (no one writes directly to it)
1409 		 * and flag that it failed.
1410 		 */
1411 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 			"Failed to swap buffers due to commit in progress\n");
1413 	}
1414 
1415 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416 
1417 	__update_max_tr(tr, tsk, cpu);
1418 	arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421 
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424 	/* Iterators are static, they should be filled or empty */
1425 	if (trace_buffer_iter(iter, iter->cpu_file))
1426 		return 0;
1427 
1428 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429 				full);
1430 }
1431 
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434 
1435 struct trace_selftests {
1436 	struct list_head		list;
1437 	struct tracer			*type;
1438 };
1439 
1440 static LIST_HEAD(postponed_selftests);
1441 
1442 static int save_selftest(struct tracer *type)
1443 {
1444 	struct trace_selftests *selftest;
1445 
1446 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447 	if (!selftest)
1448 		return -ENOMEM;
1449 
1450 	selftest->type = type;
1451 	list_add(&selftest->list, &postponed_selftests);
1452 	return 0;
1453 }
1454 
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457 	struct trace_array *tr = &global_trace;
1458 	struct tracer *saved_tracer = tr->current_trace;
1459 	int ret;
1460 
1461 	if (!type->selftest || tracing_selftest_disabled)
1462 		return 0;
1463 
1464 	/*
1465 	 * If a tracer registers early in boot up (before scheduling is
1466 	 * initialized and such), then do not run its selftests yet.
1467 	 * Instead, run it a little later in the boot process.
1468 	 */
1469 	if (!selftests_can_run)
1470 		return save_selftest(type);
1471 
1472 	/*
1473 	 * Run a selftest on this tracer.
1474 	 * Here we reset the trace buffer, and set the current
1475 	 * tracer to be this tracer. The tracer can then run some
1476 	 * internal tracing to verify that everything is in order.
1477 	 * If we fail, we do not register this tracer.
1478 	 */
1479 	tracing_reset_online_cpus(&tr->trace_buffer);
1480 
1481 	tr->current_trace = type;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 	if (type->use_max_tr) {
1485 		/* If we expanded the buffers, make sure the max is expanded too */
1486 		if (ring_buffer_expanded)
1487 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 					   RING_BUFFER_ALL_CPUS);
1489 		tr->allocated_snapshot = true;
1490 	}
1491 #endif
1492 
1493 	/* the test is responsible for initializing and enabling */
1494 	pr_info("Testing tracer %s: ", type->name);
1495 	ret = type->selftest(type, tr);
1496 	/* the test is responsible for resetting too */
1497 	tr->current_trace = saved_tracer;
1498 	if (ret) {
1499 		printk(KERN_CONT "FAILED!\n");
1500 		/* Add the warning after printing 'FAILED' */
1501 		WARN_ON(1);
1502 		return -1;
1503 	}
1504 	/* Only reset on passing, to avoid touching corrupted buffers */
1505 	tracing_reset_online_cpus(&tr->trace_buffer);
1506 
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 	if (type->use_max_tr) {
1509 		tr->allocated_snapshot = false;
1510 
1511 		/* Shrink the max buffer again */
1512 		if (ring_buffer_expanded)
1513 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 					   RING_BUFFER_ALL_CPUS);
1515 	}
1516 #endif
1517 
1518 	printk(KERN_CONT "PASSED\n");
1519 	return 0;
1520 }
1521 
1522 static __init int init_trace_selftests(void)
1523 {
1524 	struct trace_selftests *p, *n;
1525 	struct tracer *t, **last;
1526 	int ret;
1527 
1528 	selftests_can_run = true;
1529 
1530 	mutex_lock(&trace_types_lock);
1531 
1532 	if (list_empty(&postponed_selftests))
1533 		goto out;
1534 
1535 	pr_info("Running postponed tracer tests:\n");
1536 
1537 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 		ret = run_tracer_selftest(p->type);
1539 		/* If the test fails, then warn and remove from available_tracers */
1540 		if (ret < 0) {
1541 			WARN(1, "tracer: %s failed selftest, disabling\n",
1542 			     p->type->name);
1543 			last = &trace_types;
1544 			for (t = trace_types; t; t = t->next) {
1545 				if (t == p->type) {
1546 					*last = t->next;
1547 					break;
1548 				}
1549 				last = &t->next;
1550 			}
1551 		}
1552 		list_del(&p->list);
1553 		kfree(p);
1554 	}
1555 
1556  out:
1557 	mutex_unlock(&trace_types_lock);
1558 
1559 	return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565 	return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568 
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570 
1571 static void __init apply_trace_boot_options(void);
1572 
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581 	struct tracer *t;
1582 	int ret = 0;
1583 
1584 	if (!type->name) {
1585 		pr_info("Tracer must have a name\n");
1586 		return -1;
1587 	}
1588 
1589 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591 		return -1;
1592 	}
1593 
1594 	mutex_lock(&trace_types_lock);
1595 
1596 	tracing_selftest_running = true;
1597 
1598 	for (t = trace_types; t; t = t->next) {
1599 		if (strcmp(type->name, t->name) == 0) {
1600 			/* already found */
1601 			pr_info("Tracer %s already registered\n",
1602 				type->name);
1603 			ret = -1;
1604 			goto out;
1605 		}
1606 	}
1607 
1608 	if (!type->set_flag)
1609 		type->set_flag = &dummy_set_flag;
1610 	if (!type->flags) {
1611 		/*allocate a dummy tracer_flags*/
1612 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613 		if (!type->flags) {
1614 			ret = -ENOMEM;
1615 			goto out;
1616 		}
1617 		type->flags->val = 0;
1618 		type->flags->opts = dummy_tracer_opt;
1619 	} else
1620 		if (!type->flags->opts)
1621 			type->flags->opts = dummy_tracer_opt;
1622 
1623 	/* store the tracer for __set_tracer_option */
1624 	type->flags->trace = type;
1625 
1626 	ret = run_tracer_selftest(type);
1627 	if (ret < 0)
1628 		goto out;
1629 
1630 	type->next = trace_types;
1631 	trace_types = type;
1632 	add_tracer_options(&global_trace, type);
1633 
1634  out:
1635 	tracing_selftest_running = false;
1636 	mutex_unlock(&trace_types_lock);
1637 
1638 	if (ret || !default_bootup_tracer)
1639 		goto out_unlock;
1640 
1641 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642 		goto out_unlock;
1643 
1644 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 	/* Do we want this tracer to start on bootup? */
1646 	tracing_set_tracer(&global_trace, type->name);
1647 	default_bootup_tracer = NULL;
1648 
1649 	apply_trace_boot_options();
1650 
1651 	/* disable other selftests, since this will break it. */
1652 	tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655 	       type->name);
1656 #endif
1657 
1658  out_unlock:
1659 	return ret;
1660 }
1661 
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664 	struct ring_buffer *buffer = buf->buffer;
1665 
1666 	if (!buffer)
1667 		return;
1668 
1669 	ring_buffer_record_disable(buffer);
1670 
1671 	/* Make sure all commits have finished */
1672 	synchronize_sched();
1673 	ring_buffer_reset_cpu(buffer, cpu);
1674 
1675 	ring_buffer_record_enable(buffer);
1676 }
1677 
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680 	struct ring_buffer *buffer = buf->buffer;
1681 	int cpu;
1682 
1683 	if (!buffer)
1684 		return;
1685 
1686 	ring_buffer_record_disable(buffer);
1687 
1688 	/* Make sure all commits have finished */
1689 	synchronize_sched();
1690 
1691 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692 
1693 	for_each_online_cpu(cpu)
1694 		ring_buffer_reset_cpu(buffer, cpu);
1695 
1696 	ring_buffer_record_enable(buffer);
1697 }
1698 
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702 	struct trace_array *tr;
1703 
1704 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 		tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 		tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709 	}
1710 }
1711 
1712 static int *tgid_map;
1713 
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719 	unsigned *map_cmdline_to_pid;
1720 	unsigned cmdline_num;
1721 	int cmdline_idx;
1722 	char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725 
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728 
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733 
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738 
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740 				    struct saved_cmdlines_buffer *s)
1741 {
1742 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743 					GFP_KERNEL);
1744 	if (!s->map_cmdline_to_pid)
1745 		return -ENOMEM;
1746 
1747 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748 	if (!s->saved_cmdlines) {
1749 		kfree(s->map_cmdline_to_pid);
1750 		return -ENOMEM;
1751 	}
1752 
1753 	s->cmdline_idx = 0;
1754 	s->cmdline_num = val;
1755 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756 	       sizeof(s->map_pid_to_cmdline));
1757 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758 	       val * sizeof(*s->map_cmdline_to_pid));
1759 
1760 	return 0;
1761 }
1762 
1763 static int trace_create_savedcmd(void)
1764 {
1765 	int ret;
1766 
1767 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768 	if (!savedcmd)
1769 		return -ENOMEM;
1770 
1771 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772 	if (ret < 0) {
1773 		kfree(savedcmd);
1774 		savedcmd = NULL;
1775 		return -ENOMEM;
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 int is_tracing_stopped(void)
1782 {
1783 	return global_trace.stop_count;
1784 }
1785 
1786 /**
1787  * tracing_start - quick start of the tracer
1788  *
1789  * If tracing is enabled but was stopped by tracing_stop,
1790  * this will start the tracer back up.
1791  */
1792 void tracing_start(void)
1793 {
1794 	struct ring_buffer *buffer;
1795 	unsigned long flags;
1796 
1797 	if (tracing_disabled)
1798 		return;
1799 
1800 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801 	if (--global_trace.stop_count) {
1802 		if (global_trace.stop_count < 0) {
1803 			/* Someone screwed up their debugging */
1804 			WARN_ON_ONCE(1);
1805 			global_trace.stop_count = 0;
1806 		}
1807 		goto out;
1808 	}
1809 
1810 	/* Prevent the buffers from switching */
1811 	arch_spin_lock(&global_trace.max_lock);
1812 
1813 	buffer = global_trace.trace_buffer.buffer;
1814 	if (buffer)
1815 		ring_buffer_record_enable(buffer);
1816 
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818 	buffer = global_trace.max_buffer.buffer;
1819 	if (buffer)
1820 		ring_buffer_record_enable(buffer);
1821 #endif
1822 
1823 	arch_spin_unlock(&global_trace.max_lock);
1824 
1825  out:
1826 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828 
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831 	struct ring_buffer *buffer;
1832 	unsigned long flags;
1833 
1834 	if (tracing_disabled)
1835 		return;
1836 
1837 	/* If global, we need to also start the max tracer */
1838 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839 		return tracing_start();
1840 
1841 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1842 
1843 	if (--tr->stop_count) {
1844 		if (tr->stop_count < 0) {
1845 			/* Someone screwed up their debugging */
1846 			WARN_ON_ONCE(1);
1847 			tr->stop_count = 0;
1848 		}
1849 		goto out;
1850 	}
1851 
1852 	buffer = tr->trace_buffer.buffer;
1853 	if (buffer)
1854 		ring_buffer_record_enable(buffer);
1855 
1856  out:
1857 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859 
1860 /**
1861  * tracing_stop - quick stop of the tracer
1862  *
1863  * Light weight way to stop tracing. Use in conjunction with
1864  * tracing_start.
1865  */
1866 void tracing_stop(void)
1867 {
1868 	struct ring_buffer *buffer;
1869 	unsigned long flags;
1870 
1871 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872 	if (global_trace.stop_count++)
1873 		goto out;
1874 
1875 	/* Prevent the buffers from switching */
1876 	arch_spin_lock(&global_trace.max_lock);
1877 
1878 	buffer = global_trace.trace_buffer.buffer;
1879 	if (buffer)
1880 		ring_buffer_record_disable(buffer);
1881 
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883 	buffer = global_trace.max_buffer.buffer;
1884 	if (buffer)
1885 		ring_buffer_record_disable(buffer);
1886 #endif
1887 
1888 	arch_spin_unlock(&global_trace.max_lock);
1889 
1890  out:
1891 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893 
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896 	struct ring_buffer *buffer;
1897 	unsigned long flags;
1898 
1899 	/* If global, we need to also stop the max tracer */
1900 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901 		return tracing_stop();
1902 
1903 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1904 	if (tr->stop_count++)
1905 		goto out;
1906 
1907 	buffer = tr->trace_buffer.buffer;
1908 	if (buffer)
1909 		ring_buffer_record_disable(buffer);
1910 
1911  out:
1912 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914 
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917 	unsigned pid, idx;
1918 
1919 	/* treat recording of idle task as a success */
1920 	if (!tsk->pid)
1921 		return 1;
1922 
1923 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1924 		return 0;
1925 
1926 	/*
1927 	 * It's not the end of the world if we don't get
1928 	 * the lock, but we also don't want to spin
1929 	 * nor do we want to disable interrupts,
1930 	 * so if we miss here, then better luck next time.
1931 	 */
1932 	if (!arch_spin_trylock(&trace_cmdline_lock))
1933 		return 0;
1934 
1935 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1936 	if (idx == NO_CMDLINE_MAP) {
1937 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1938 
1939 		/*
1940 		 * Check whether the cmdline buffer at idx has a pid
1941 		 * mapped. We are going to overwrite that entry so we
1942 		 * need to clear the map_pid_to_cmdline. Otherwise we
1943 		 * would read the new comm for the old pid.
1944 		 */
1945 		pid = savedcmd->map_cmdline_to_pid[idx];
1946 		if (pid != NO_CMDLINE_MAP)
1947 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1948 
1949 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1950 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1951 
1952 		savedcmd->cmdline_idx = idx;
1953 	}
1954 
1955 	set_cmdline(idx, tsk->comm);
1956 
1957 	arch_spin_unlock(&trace_cmdline_lock);
1958 
1959 	return 1;
1960 }
1961 
1962 static void __trace_find_cmdline(int pid, char comm[])
1963 {
1964 	unsigned map;
1965 
1966 	if (!pid) {
1967 		strcpy(comm, "<idle>");
1968 		return;
1969 	}
1970 
1971 	if (WARN_ON_ONCE(pid < 0)) {
1972 		strcpy(comm, "<XXX>");
1973 		return;
1974 	}
1975 
1976 	if (pid > PID_MAX_DEFAULT) {
1977 		strcpy(comm, "<...>");
1978 		return;
1979 	}
1980 
1981 	map = savedcmd->map_pid_to_cmdline[pid];
1982 	if (map != NO_CMDLINE_MAP)
1983 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1984 	else
1985 		strcpy(comm, "<...>");
1986 }
1987 
1988 void trace_find_cmdline(int pid, char comm[])
1989 {
1990 	preempt_disable();
1991 	arch_spin_lock(&trace_cmdline_lock);
1992 
1993 	__trace_find_cmdline(pid, comm);
1994 
1995 	arch_spin_unlock(&trace_cmdline_lock);
1996 	preempt_enable();
1997 }
1998 
1999 int trace_find_tgid(int pid)
2000 {
2001 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2002 		return 0;
2003 
2004 	return tgid_map[pid];
2005 }
2006 
2007 static int trace_save_tgid(struct task_struct *tsk)
2008 {
2009 	/* treat recording of idle task as a success */
2010 	if (!tsk->pid)
2011 		return 1;
2012 
2013 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2014 		return 0;
2015 
2016 	tgid_map[tsk->pid] = tsk->tgid;
2017 	return 1;
2018 }
2019 
2020 static bool tracing_record_taskinfo_skip(int flags)
2021 {
2022 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2023 		return true;
2024 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2025 		return true;
2026 	if (!__this_cpu_read(trace_taskinfo_save))
2027 		return true;
2028 	return false;
2029 }
2030 
2031 /**
2032  * tracing_record_taskinfo - record the task info of a task
2033  *
2034  * @task  - task to record
2035  * @flags - TRACE_RECORD_CMDLINE for recording comm
2036  *        - TRACE_RECORD_TGID for recording tgid
2037  */
2038 void tracing_record_taskinfo(struct task_struct *task, int flags)
2039 {
2040 	bool done;
2041 
2042 	if (tracing_record_taskinfo_skip(flags))
2043 		return;
2044 
2045 	/*
2046 	 * Record as much task information as possible. If some fail, continue
2047 	 * to try to record the others.
2048 	 */
2049 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2050 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2051 
2052 	/* If recording any information failed, retry again soon. */
2053 	if (!done)
2054 		return;
2055 
2056 	__this_cpu_write(trace_taskinfo_save, false);
2057 }
2058 
2059 /**
2060  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2061  *
2062  * @prev - previous task during sched_switch
2063  * @next - next task during sched_switch
2064  * @flags - TRACE_RECORD_CMDLINE for recording comm
2065  *          TRACE_RECORD_TGID for recording tgid
2066  */
2067 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2068 					  struct task_struct *next, int flags)
2069 {
2070 	bool done;
2071 
2072 	if (tracing_record_taskinfo_skip(flags))
2073 		return;
2074 
2075 	/*
2076 	 * Record as much task information as possible. If some fail, continue
2077 	 * to try to record the others.
2078 	 */
2079 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2080 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2081 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2082 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2083 
2084 	/* If recording any information failed, retry again soon. */
2085 	if (!done)
2086 		return;
2087 
2088 	__this_cpu_write(trace_taskinfo_save, false);
2089 }
2090 
2091 /* Helpers to record a specific task information */
2092 void tracing_record_cmdline(struct task_struct *task)
2093 {
2094 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2095 }
2096 
2097 void tracing_record_tgid(struct task_struct *task)
2098 {
2099 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2100 }
2101 
2102 /*
2103  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2104  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2105  * simplifies those functions and keeps them in sync.
2106  */
2107 enum print_line_t trace_handle_return(struct trace_seq *s)
2108 {
2109 	return trace_seq_has_overflowed(s) ?
2110 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2111 }
2112 EXPORT_SYMBOL_GPL(trace_handle_return);
2113 
2114 void
2115 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2116 			     int pc)
2117 {
2118 	struct task_struct *tsk = current;
2119 
2120 	entry->preempt_count		= pc & 0xff;
2121 	entry->pid			= (tsk) ? tsk->pid : 0;
2122 	entry->flags =
2123 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2124 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2125 #else
2126 		TRACE_FLAG_IRQS_NOSUPPORT |
2127 #endif
2128 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2129 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2130 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2131 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2132 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2133 }
2134 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2135 
2136 struct ring_buffer_event *
2137 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2138 			  int type,
2139 			  unsigned long len,
2140 			  unsigned long flags, int pc)
2141 {
2142 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2143 }
2144 
2145 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2146 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2147 static int trace_buffered_event_ref;
2148 
2149 /**
2150  * trace_buffered_event_enable - enable buffering events
2151  *
2152  * When events are being filtered, it is quicker to use a temporary
2153  * buffer to write the event data into if there's a likely chance
2154  * that it will not be committed. The discard of the ring buffer
2155  * is not as fast as committing, and is much slower than copying
2156  * a commit.
2157  *
2158  * When an event is to be filtered, allocate per cpu buffers to
2159  * write the event data into, and if the event is filtered and discarded
2160  * it is simply dropped, otherwise, the entire data is to be committed
2161  * in one shot.
2162  */
2163 void trace_buffered_event_enable(void)
2164 {
2165 	struct ring_buffer_event *event;
2166 	struct page *page;
2167 	int cpu;
2168 
2169 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2170 
2171 	if (trace_buffered_event_ref++)
2172 		return;
2173 
2174 	for_each_tracing_cpu(cpu) {
2175 		page = alloc_pages_node(cpu_to_node(cpu),
2176 					GFP_KERNEL | __GFP_NORETRY, 0);
2177 		if (!page)
2178 			goto failed;
2179 
2180 		event = page_address(page);
2181 		memset(event, 0, sizeof(*event));
2182 
2183 		per_cpu(trace_buffered_event, cpu) = event;
2184 
2185 		preempt_disable();
2186 		if (cpu == smp_processor_id() &&
2187 		    this_cpu_read(trace_buffered_event) !=
2188 		    per_cpu(trace_buffered_event, cpu))
2189 			WARN_ON_ONCE(1);
2190 		preempt_enable();
2191 	}
2192 
2193 	return;
2194  failed:
2195 	trace_buffered_event_disable();
2196 }
2197 
2198 static void enable_trace_buffered_event(void *data)
2199 {
2200 	/* Probably not needed, but do it anyway */
2201 	smp_rmb();
2202 	this_cpu_dec(trace_buffered_event_cnt);
2203 }
2204 
2205 static void disable_trace_buffered_event(void *data)
2206 {
2207 	this_cpu_inc(trace_buffered_event_cnt);
2208 }
2209 
2210 /**
2211  * trace_buffered_event_disable - disable buffering events
2212  *
2213  * When a filter is removed, it is faster to not use the buffered
2214  * events, and to commit directly into the ring buffer. Free up
2215  * the temp buffers when there are no more users. This requires
2216  * special synchronization with current events.
2217  */
2218 void trace_buffered_event_disable(void)
2219 {
2220 	int cpu;
2221 
2222 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2223 
2224 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2225 		return;
2226 
2227 	if (--trace_buffered_event_ref)
2228 		return;
2229 
2230 	preempt_disable();
2231 	/* For each CPU, set the buffer as used. */
2232 	smp_call_function_many(tracing_buffer_mask,
2233 			       disable_trace_buffered_event, NULL, 1);
2234 	preempt_enable();
2235 
2236 	/* Wait for all current users to finish */
2237 	synchronize_sched();
2238 
2239 	for_each_tracing_cpu(cpu) {
2240 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2241 		per_cpu(trace_buffered_event, cpu) = NULL;
2242 	}
2243 	/*
2244 	 * Make sure trace_buffered_event is NULL before clearing
2245 	 * trace_buffered_event_cnt.
2246 	 */
2247 	smp_wmb();
2248 
2249 	preempt_disable();
2250 	/* Do the work on each cpu */
2251 	smp_call_function_many(tracing_buffer_mask,
2252 			       enable_trace_buffered_event, NULL, 1);
2253 	preempt_enable();
2254 }
2255 
2256 static struct ring_buffer *temp_buffer;
2257 
2258 struct ring_buffer_event *
2259 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2260 			  struct trace_event_file *trace_file,
2261 			  int type, unsigned long len,
2262 			  unsigned long flags, int pc)
2263 {
2264 	struct ring_buffer_event *entry;
2265 	int val;
2266 
2267 	*current_rb = trace_file->tr->trace_buffer.buffer;
2268 
2269 	if ((trace_file->flags &
2270 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2271 	    (entry = this_cpu_read(trace_buffered_event))) {
2272 		/* Try to use the per cpu buffer first */
2273 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2274 		if (val == 1) {
2275 			trace_event_setup(entry, type, flags, pc);
2276 			entry->array[0] = len;
2277 			return entry;
2278 		}
2279 		this_cpu_dec(trace_buffered_event_cnt);
2280 	}
2281 
2282 	entry = __trace_buffer_lock_reserve(*current_rb,
2283 					    type, len, flags, pc);
2284 	/*
2285 	 * If tracing is off, but we have triggers enabled
2286 	 * we still need to look at the event data. Use the temp_buffer
2287 	 * to store the trace event for the tigger to use. It's recusive
2288 	 * safe and will not be recorded anywhere.
2289 	 */
2290 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2291 		*current_rb = temp_buffer;
2292 		entry = __trace_buffer_lock_reserve(*current_rb,
2293 						    type, len, flags, pc);
2294 	}
2295 	return entry;
2296 }
2297 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2298 
2299 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2300 static DEFINE_MUTEX(tracepoint_printk_mutex);
2301 
2302 static void output_printk(struct trace_event_buffer *fbuffer)
2303 {
2304 	struct trace_event_call *event_call;
2305 	struct trace_event *event;
2306 	unsigned long flags;
2307 	struct trace_iterator *iter = tracepoint_print_iter;
2308 
2309 	/* We should never get here if iter is NULL */
2310 	if (WARN_ON_ONCE(!iter))
2311 		return;
2312 
2313 	event_call = fbuffer->trace_file->event_call;
2314 	if (!event_call || !event_call->event.funcs ||
2315 	    !event_call->event.funcs->trace)
2316 		return;
2317 
2318 	event = &fbuffer->trace_file->event_call->event;
2319 
2320 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2321 	trace_seq_init(&iter->seq);
2322 	iter->ent = fbuffer->entry;
2323 	event_call->event.funcs->trace(iter, 0, event);
2324 	trace_seq_putc(&iter->seq, 0);
2325 	printk("%s", iter->seq.buffer);
2326 
2327 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2328 }
2329 
2330 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2331 			     void __user *buffer, size_t *lenp,
2332 			     loff_t *ppos)
2333 {
2334 	int save_tracepoint_printk;
2335 	int ret;
2336 
2337 	mutex_lock(&tracepoint_printk_mutex);
2338 	save_tracepoint_printk = tracepoint_printk;
2339 
2340 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2341 
2342 	/*
2343 	 * This will force exiting early, as tracepoint_printk
2344 	 * is always zero when tracepoint_printk_iter is not allocated
2345 	 */
2346 	if (!tracepoint_print_iter)
2347 		tracepoint_printk = 0;
2348 
2349 	if (save_tracepoint_printk == tracepoint_printk)
2350 		goto out;
2351 
2352 	if (tracepoint_printk)
2353 		static_key_enable(&tracepoint_printk_key.key);
2354 	else
2355 		static_key_disable(&tracepoint_printk_key.key);
2356 
2357  out:
2358 	mutex_unlock(&tracepoint_printk_mutex);
2359 
2360 	return ret;
2361 }
2362 
2363 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2364 {
2365 	if (static_key_false(&tracepoint_printk_key.key))
2366 		output_printk(fbuffer);
2367 
2368 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2369 				    fbuffer->event, fbuffer->entry,
2370 				    fbuffer->flags, fbuffer->pc);
2371 }
2372 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2373 
2374 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2375 				     struct ring_buffer *buffer,
2376 				     struct ring_buffer_event *event,
2377 				     unsigned long flags, int pc,
2378 				     struct pt_regs *regs)
2379 {
2380 	__buffer_unlock_commit(buffer, event);
2381 
2382 	/*
2383 	 * If regs is not set, then skip the following callers:
2384 	 *   trace_buffer_unlock_commit_regs
2385 	 *   event_trigger_unlock_commit
2386 	 *   trace_event_buffer_commit
2387 	 *   trace_event_raw_event_sched_switch
2388 	 * Note, we can still get here via blktrace, wakeup tracer
2389 	 * and mmiotrace, but that's ok if they lose a function or
2390 	 * two. They are that meaningful.
2391 	 */
2392 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2393 	ftrace_trace_userstack(buffer, flags, pc);
2394 }
2395 
2396 /*
2397  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2398  */
2399 void
2400 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2401 				   struct ring_buffer_event *event)
2402 {
2403 	__buffer_unlock_commit(buffer, event);
2404 }
2405 
2406 static void
2407 trace_process_export(struct trace_export *export,
2408 	       struct ring_buffer_event *event)
2409 {
2410 	struct trace_entry *entry;
2411 	unsigned int size = 0;
2412 
2413 	entry = ring_buffer_event_data(event);
2414 	size = ring_buffer_event_length(event);
2415 	export->write(entry, size);
2416 }
2417 
2418 static DEFINE_MUTEX(ftrace_export_lock);
2419 
2420 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2421 
2422 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2423 
2424 static inline void ftrace_exports_enable(void)
2425 {
2426 	static_branch_enable(&ftrace_exports_enabled);
2427 }
2428 
2429 static inline void ftrace_exports_disable(void)
2430 {
2431 	static_branch_disable(&ftrace_exports_enabled);
2432 }
2433 
2434 void ftrace_exports(struct ring_buffer_event *event)
2435 {
2436 	struct trace_export *export;
2437 
2438 	preempt_disable_notrace();
2439 
2440 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2441 	while (export) {
2442 		trace_process_export(export, event);
2443 		export = rcu_dereference_raw_notrace(export->next);
2444 	}
2445 
2446 	preempt_enable_notrace();
2447 }
2448 
2449 static inline void
2450 add_trace_export(struct trace_export **list, struct trace_export *export)
2451 {
2452 	rcu_assign_pointer(export->next, *list);
2453 	/*
2454 	 * We are entering export into the list but another
2455 	 * CPU might be walking that list. We need to make sure
2456 	 * the export->next pointer is valid before another CPU sees
2457 	 * the export pointer included into the list.
2458 	 */
2459 	rcu_assign_pointer(*list, export);
2460 }
2461 
2462 static inline int
2463 rm_trace_export(struct trace_export **list, struct trace_export *export)
2464 {
2465 	struct trace_export **p;
2466 
2467 	for (p = list; *p != NULL; p = &(*p)->next)
2468 		if (*p == export)
2469 			break;
2470 
2471 	if (*p != export)
2472 		return -1;
2473 
2474 	rcu_assign_pointer(*p, (*p)->next);
2475 
2476 	return 0;
2477 }
2478 
2479 static inline void
2480 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482 	if (*list == NULL)
2483 		ftrace_exports_enable();
2484 
2485 	add_trace_export(list, export);
2486 }
2487 
2488 static inline int
2489 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2490 {
2491 	int ret;
2492 
2493 	ret = rm_trace_export(list, export);
2494 	if (*list == NULL)
2495 		ftrace_exports_disable();
2496 
2497 	return ret;
2498 }
2499 
2500 int register_ftrace_export(struct trace_export *export)
2501 {
2502 	if (WARN_ON_ONCE(!export->write))
2503 		return -1;
2504 
2505 	mutex_lock(&ftrace_export_lock);
2506 
2507 	add_ftrace_export(&ftrace_exports_list, export);
2508 
2509 	mutex_unlock(&ftrace_export_lock);
2510 
2511 	return 0;
2512 }
2513 EXPORT_SYMBOL_GPL(register_ftrace_export);
2514 
2515 int unregister_ftrace_export(struct trace_export *export)
2516 {
2517 	int ret;
2518 
2519 	mutex_lock(&ftrace_export_lock);
2520 
2521 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2522 
2523 	mutex_unlock(&ftrace_export_lock);
2524 
2525 	return ret;
2526 }
2527 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2528 
2529 void
2530 trace_function(struct trace_array *tr,
2531 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2532 	       int pc)
2533 {
2534 	struct trace_event_call *call = &event_function;
2535 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2536 	struct ring_buffer_event *event;
2537 	struct ftrace_entry *entry;
2538 
2539 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2540 					    flags, pc);
2541 	if (!event)
2542 		return;
2543 	entry	= ring_buffer_event_data(event);
2544 	entry->ip			= ip;
2545 	entry->parent_ip		= parent_ip;
2546 
2547 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2548 		if (static_branch_unlikely(&ftrace_exports_enabled))
2549 			ftrace_exports(event);
2550 		__buffer_unlock_commit(buffer, event);
2551 	}
2552 }
2553 
2554 #ifdef CONFIG_STACKTRACE
2555 
2556 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2557 struct ftrace_stack {
2558 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2559 };
2560 
2561 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2562 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2563 
2564 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2565 				 unsigned long flags,
2566 				 int skip, int pc, struct pt_regs *regs)
2567 {
2568 	struct trace_event_call *call = &event_kernel_stack;
2569 	struct ring_buffer_event *event;
2570 	struct stack_entry *entry;
2571 	struct stack_trace trace;
2572 	int use_stack;
2573 	int size = FTRACE_STACK_ENTRIES;
2574 
2575 	trace.nr_entries	= 0;
2576 	trace.skip		= skip;
2577 
2578 	/*
2579 	 * Add two, for this function and the call to save_stack_trace()
2580 	 * If regs is set, then these functions will not be in the way.
2581 	 */
2582 	if (!regs)
2583 		trace.skip += 2;
2584 
2585 	/*
2586 	 * Since events can happen in NMIs there's no safe way to
2587 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2588 	 * or NMI comes in, it will just have to use the default
2589 	 * FTRACE_STACK_SIZE.
2590 	 */
2591 	preempt_disable_notrace();
2592 
2593 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2594 	/*
2595 	 * We don't need any atomic variables, just a barrier.
2596 	 * If an interrupt comes in, we don't care, because it would
2597 	 * have exited and put the counter back to what we want.
2598 	 * We just need a barrier to keep gcc from moving things
2599 	 * around.
2600 	 */
2601 	barrier();
2602 	if (use_stack == 1) {
2603 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2604 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2605 
2606 		if (regs)
2607 			save_stack_trace_regs(regs, &trace);
2608 		else
2609 			save_stack_trace(&trace);
2610 
2611 		if (trace.nr_entries > size)
2612 			size = trace.nr_entries;
2613 	} else
2614 		/* From now on, use_stack is a boolean */
2615 		use_stack = 0;
2616 
2617 	size *= sizeof(unsigned long);
2618 
2619 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2620 					    sizeof(*entry) + size, flags, pc);
2621 	if (!event)
2622 		goto out;
2623 	entry = ring_buffer_event_data(event);
2624 
2625 	memset(&entry->caller, 0, size);
2626 
2627 	if (use_stack)
2628 		memcpy(&entry->caller, trace.entries,
2629 		       trace.nr_entries * sizeof(unsigned long));
2630 	else {
2631 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2632 		trace.entries		= entry->caller;
2633 		if (regs)
2634 			save_stack_trace_regs(regs, &trace);
2635 		else
2636 			save_stack_trace(&trace);
2637 	}
2638 
2639 	entry->size = trace.nr_entries;
2640 
2641 	if (!call_filter_check_discard(call, entry, buffer, event))
2642 		__buffer_unlock_commit(buffer, event);
2643 
2644  out:
2645 	/* Again, don't let gcc optimize things here */
2646 	barrier();
2647 	__this_cpu_dec(ftrace_stack_reserve);
2648 	preempt_enable_notrace();
2649 
2650 }
2651 
2652 static inline void ftrace_trace_stack(struct trace_array *tr,
2653 				      struct ring_buffer *buffer,
2654 				      unsigned long flags,
2655 				      int skip, int pc, struct pt_regs *regs)
2656 {
2657 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2658 		return;
2659 
2660 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2661 }
2662 
2663 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2664 		   int pc)
2665 {
2666 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2667 
2668 	if (rcu_is_watching()) {
2669 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2670 		return;
2671 	}
2672 
2673 	/*
2674 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2675 	 * but if the above rcu_is_watching() failed, then the NMI
2676 	 * triggered someplace critical, and rcu_irq_enter() should
2677 	 * not be called from NMI.
2678 	 */
2679 	if (unlikely(in_nmi()))
2680 		return;
2681 
2682 	/*
2683 	 * It is possible that a function is being traced in a
2684 	 * location that RCU is not watching. A call to
2685 	 * rcu_irq_enter() will make sure that it is, but there's
2686 	 * a few internal rcu functions that could be traced
2687 	 * where that wont work either. In those cases, we just
2688 	 * do nothing.
2689 	 */
2690 	if (unlikely(rcu_irq_enter_disabled()))
2691 		return;
2692 
2693 	rcu_irq_enter_irqson();
2694 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2695 	rcu_irq_exit_irqson();
2696 }
2697 
2698 /**
2699  * trace_dump_stack - record a stack back trace in the trace buffer
2700  * @skip: Number of functions to skip (helper handlers)
2701  */
2702 void trace_dump_stack(int skip)
2703 {
2704 	unsigned long flags;
2705 
2706 	if (tracing_disabled || tracing_selftest_running)
2707 		return;
2708 
2709 	local_save_flags(flags);
2710 
2711 	/*
2712 	 * Skip 3 more, seems to get us at the caller of
2713 	 * this function.
2714 	 */
2715 	skip += 3;
2716 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2717 			     flags, skip, preempt_count(), NULL);
2718 }
2719 
2720 static DEFINE_PER_CPU(int, user_stack_count);
2721 
2722 void
2723 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2724 {
2725 	struct trace_event_call *call = &event_user_stack;
2726 	struct ring_buffer_event *event;
2727 	struct userstack_entry *entry;
2728 	struct stack_trace trace;
2729 
2730 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2731 		return;
2732 
2733 	/*
2734 	 * NMIs can not handle page faults, even with fix ups.
2735 	 * The save user stack can (and often does) fault.
2736 	 */
2737 	if (unlikely(in_nmi()))
2738 		return;
2739 
2740 	/*
2741 	 * prevent recursion, since the user stack tracing may
2742 	 * trigger other kernel events.
2743 	 */
2744 	preempt_disable();
2745 	if (__this_cpu_read(user_stack_count))
2746 		goto out;
2747 
2748 	__this_cpu_inc(user_stack_count);
2749 
2750 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 					    sizeof(*entry), flags, pc);
2752 	if (!event)
2753 		goto out_drop_count;
2754 	entry	= ring_buffer_event_data(event);
2755 
2756 	entry->tgid		= current->tgid;
2757 	memset(&entry->caller, 0, sizeof(entry->caller));
2758 
2759 	trace.nr_entries	= 0;
2760 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2761 	trace.skip		= 0;
2762 	trace.entries		= entry->caller;
2763 
2764 	save_stack_trace_user(&trace);
2765 	if (!call_filter_check_discard(call, entry, buffer, event))
2766 		__buffer_unlock_commit(buffer, event);
2767 
2768  out_drop_count:
2769 	__this_cpu_dec(user_stack_count);
2770  out:
2771 	preempt_enable();
2772 }
2773 
2774 #ifdef UNUSED
2775 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2776 {
2777 	ftrace_trace_userstack(tr, flags, preempt_count());
2778 }
2779 #endif /* UNUSED */
2780 
2781 #endif /* CONFIG_STACKTRACE */
2782 
2783 /* created for use with alloc_percpu */
2784 struct trace_buffer_struct {
2785 	int nesting;
2786 	char buffer[4][TRACE_BUF_SIZE];
2787 };
2788 
2789 static struct trace_buffer_struct *trace_percpu_buffer;
2790 
2791 /*
2792  * Thise allows for lockless recording.  If we're nested too deeply, then
2793  * this returns NULL.
2794  */
2795 static char *get_trace_buf(void)
2796 {
2797 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2798 
2799 	if (!buffer || buffer->nesting >= 4)
2800 		return NULL;
2801 
2802 	return &buffer->buffer[buffer->nesting++][0];
2803 }
2804 
2805 static void put_trace_buf(void)
2806 {
2807 	this_cpu_dec(trace_percpu_buffer->nesting);
2808 }
2809 
2810 static int alloc_percpu_trace_buffer(void)
2811 {
2812 	struct trace_buffer_struct *buffers;
2813 
2814 	buffers = alloc_percpu(struct trace_buffer_struct);
2815 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2816 		return -ENOMEM;
2817 
2818 	trace_percpu_buffer = buffers;
2819 	return 0;
2820 }
2821 
2822 static int buffers_allocated;
2823 
2824 void trace_printk_init_buffers(void)
2825 {
2826 	if (buffers_allocated)
2827 		return;
2828 
2829 	if (alloc_percpu_trace_buffer())
2830 		return;
2831 
2832 	/* trace_printk() is for debug use only. Don't use it in production. */
2833 
2834 	pr_warn("\n");
2835 	pr_warn("**********************************************************\n");
2836 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2837 	pr_warn("**                                                      **\n");
2838 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2839 	pr_warn("**                                                      **\n");
2840 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2841 	pr_warn("** unsafe for production use.                           **\n");
2842 	pr_warn("**                                                      **\n");
2843 	pr_warn("** If you see this message and you are not debugging    **\n");
2844 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2845 	pr_warn("**                                                      **\n");
2846 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2847 	pr_warn("**********************************************************\n");
2848 
2849 	/* Expand the buffers to set size */
2850 	tracing_update_buffers();
2851 
2852 	buffers_allocated = 1;
2853 
2854 	/*
2855 	 * trace_printk_init_buffers() can be called by modules.
2856 	 * If that happens, then we need to start cmdline recording
2857 	 * directly here. If the global_trace.buffer is already
2858 	 * allocated here, then this was called by module code.
2859 	 */
2860 	if (global_trace.trace_buffer.buffer)
2861 		tracing_start_cmdline_record();
2862 }
2863 
2864 void trace_printk_start_comm(void)
2865 {
2866 	/* Start tracing comms if trace printk is set */
2867 	if (!buffers_allocated)
2868 		return;
2869 	tracing_start_cmdline_record();
2870 }
2871 
2872 static void trace_printk_start_stop_comm(int enabled)
2873 {
2874 	if (!buffers_allocated)
2875 		return;
2876 
2877 	if (enabled)
2878 		tracing_start_cmdline_record();
2879 	else
2880 		tracing_stop_cmdline_record();
2881 }
2882 
2883 /**
2884  * trace_vbprintk - write binary msg to tracing buffer
2885  *
2886  */
2887 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2888 {
2889 	struct trace_event_call *call = &event_bprint;
2890 	struct ring_buffer_event *event;
2891 	struct ring_buffer *buffer;
2892 	struct trace_array *tr = &global_trace;
2893 	struct bprint_entry *entry;
2894 	unsigned long flags;
2895 	char *tbuffer;
2896 	int len = 0, size, pc;
2897 
2898 	if (unlikely(tracing_selftest_running || tracing_disabled))
2899 		return 0;
2900 
2901 	/* Don't pollute graph traces with trace_vprintk internals */
2902 	pause_graph_tracing();
2903 
2904 	pc = preempt_count();
2905 	preempt_disable_notrace();
2906 
2907 	tbuffer = get_trace_buf();
2908 	if (!tbuffer) {
2909 		len = 0;
2910 		goto out_nobuffer;
2911 	}
2912 
2913 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2914 
2915 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2916 		goto out;
2917 
2918 	local_save_flags(flags);
2919 	size = sizeof(*entry) + sizeof(u32) * len;
2920 	buffer = tr->trace_buffer.buffer;
2921 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2922 					    flags, pc);
2923 	if (!event)
2924 		goto out;
2925 	entry = ring_buffer_event_data(event);
2926 	entry->ip			= ip;
2927 	entry->fmt			= fmt;
2928 
2929 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2930 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2931 		__buffer_unlock_commit(buffer, event);
2932 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2933 	}
2934 
2935 out:
2936 	put_trace_buf();
2937 
2938 out_nobuffer:
2939 	preempt_enable_notrace();
2940 	unpause_graph_tracing();
2941 
2942 	return len;
2943 }
2944 EXPORT_SYMBOL_GPL(trace_vbprintk);
2945 
2946 static int
2947 __trace_array_vprintk(struct ring_buffer *buffer,
2948 		      unsigned long ip, const char *fmt, va_list args)
2949 {
2950 	struct trace_event_call *call = &event_print;
2951 	struct ring_buffer_event *event;
2952 	int len = 0, size, pc;
2953 	struct print_entry *entry;
2954 	unsigned long flags;
2955 	char *tbuffer;
2956 
2957 	if (tracing_disabled || tracing_selftest_running)
2958 		return 0;
2959 
2960 	/* Don't pollute graph traces with trace_vprintk internals */
2961 	pause_graph_tracing();
2962 
2963 	pc = preempt_count();
2964 	preempt_disable_notrace();
2965 
2966 
2967 	tbuffer = get_trace_buf();
2968 	if (!tbuffer) {
2969 		len = 0;
2970 		goto out_nobuffer;
2971 	}
2972 
2973 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2974 
2975 	local_save_flags(flags);
2976 	size = sizeof(*entry) + len + 1;
2977 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2978 					    flags, pc);
2979 	if (!event)
2980 		goto out;
2981 	entry = ring_buffer_event_data(event);
2982 	entry->ip = ip;
2983 
2984 	memcpy(&entry->buf, tbuffer, len + 1);
2985 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2986 		__buffer_unlock_commit(buffer, event);
2987 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2988 	}
2989 
2990 out:
2991 	put_trace_buf();
2992 
2993 out_nobuffer:
2994 	preempt_enable_notrace();
2995 	unpause_graph_tracing();
2996 
2997 	return len;
2998 }
2999 
3000 int trace_array_vprintk(struct trace_array *tr,
3001 			unsigned long ip, const char *fmt, va_list args)
3002 {
3003 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3004 }
3005 
3006 int trace_array_printk(struct trace_array *tr,
3007 		       unsigned long ip, const char *fmt, ...)
3008 {
3009 	int ret;
3010 	va_list ap;
3011 
3012 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3013 		return 0;
3014 
3015 	va_start(ap, fmt);
3016 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3017 	va_end(ap);
3018 	return ret;
3019 }
3020 
3021 int trace_array_printk_buf(struct ring_buffer *buffer,
3022 			   unsigned long ip, const char *fmt, ...)
3023 {
3024 	int ret;
3025 	va_list ap;
3026 
3027 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3028 		return 0;
3029 
3030 	va_start(ap, fmt);
3031 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3032 	va_end(ap);
3033 	return ret;
3034 }
3035 
3036 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3037 {
3038 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3039 }
3040 EXPORT_SYMBOL_GPL(trace_vprintk);
3041 
3042 static void trace_iterator_increment(struct trace_iterator *iter)
3043 {
3044 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3045 
3046 	iter->idx++;
3047 	if (buf_iter)
3048 		ring_buffer_read(buf_iter, NULL);
3049 }
3050 
3051 static struct trace_entry *
3052 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3053 		unsigned long *lost_events)
3054 {
3055 	struct ring_buffer_event *event;
3056 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3057 
3058 	if (buf_iter)
3059 		event = ring_buffer_iter_peek(buf_iter, ts);
3060 	else
3061 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3062 					 lost_events);
3063 
3064 	if (event) {
3065 		iter->ent_size = ring_buffer_event_length(event);
3066 		return ring_buffer_event_data(event);
3067 	}
3068 	iter->ent_size = 0;
3069 	return NULL;
3070 }
3071 
3072 static struct trace_entry *
3073 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3074 		  unsigned long *missing_events, u64 *ent_ts)
3075 {
3076 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3077 	struct trace_entry *ent, *next = NULL;
3078 	unsigned long lost_events = 0, next_lost = 0;
3079 	int cpu_file = iter->cpu_file;
3080 	u64 next_ts = 0, ts;
3081 	int next_cpu = -1;
3082 	int next_size = 0;
3083 	int cpu;
3084 
3085 	/*
3086 	 * If we are in a per_cpu trace file, don't bother by iterating over
3087 	 * all cpu and peek directly.
3088 	 */
3089 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3090 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3091 			return NULL;
3092 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3093 		if (ent_cpu)
3094 			*ent_cpu = cpu_file;
3095 
3096 		return ent;
3097 	}
3098 
3099 	for_each_tracing_cpu(cpu) {
3100 
3101 		if (ring_buffer_empty_cpu(buffer, cpu))
3102 			continue;
3103 
3104 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3105 
3106 		/*
3107 		 * Pick the entry with the smallest timestamp:
3108 		 */
3109 		if (ent && (!next || ts < next_ts)) {
3110 			next = ent;
3111 			next_cpu = cpu;
3112 			next_ts = ts;
3113 			next_lost = lost_events;
3114 			next_size = iter->ent_size;
3115 		}
3116 	}
3117 
3118 	iter->ent_size = next_size;
3119 
3120 	if (ent_cpu)
3121 		*ent_cpu = next_cpu;
3122 
3123 	if (ent_ts)
3124 		*ent_ts = next_ts;
3125 
3126 	if (missing_events)
3127 		*missing_events = next_lost;
3128 
3129 	return next;
3130 }
3131 
3132 /* Find the next real entry, without updating the iterator itself */
3133 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3134 					  int *ent_cpu, u64 *ent_ts)
3135 {
3136 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3137 }
3138 
3139 /* Find the next real entry, and increment the iterator to the next entry */
3140 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3141 {
3142 	iter->ent = __find_next_entry(iter, &iter->cpu,
3143 				      &iter->lost_events, &iter->ts);
3144 
3145 	if (iter->ent)
3146 		trace_iterator_increment(iter);
3147 
3148 	return iter->ent ? iter : NULL;
3149 }
3150 
3151 static void trace_consume(struct trace_iterator *iter)
3152 {
3153 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3154 			    &iter->lost_events);
3155 }
3156 
3157 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3158 {
3159 	struct trace_iterator *iter = m->private;
3160 	int i = (int)*pos;
3161 	void *ent;
3162 
3163 	WARN_ON_ONCE(iter->leftover);
3164 
3165 	(*pos)++;
3166 
3167 	/* can't go backwards */
3168 	if (iter->idx > i)
3169 		return NULL;
3170 
3171 	if (iter->idx < 0)
3172 		ent = trace_find_next_entry_inc(iter);
3173 	else
3174 		ent = iter;
3175 
3176 	while (ent && iter->idx < i)
3177 		ent = trace_find_next_entry_inc(iter);
3178 
3179 	iter->pos = *pos;
3180 
3181 	return ent;
3182 }
3183 
3184 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3185 {
3186 	struct ring_buffer_event *event;
3187 	struct ring_buffer_iter *buf_iter;
3188 	unsigned long entries = 0;
3189 	u64 ts;
3190 
3191 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3192 
3193 	buf_iter = trace_buffer_iter(iter, cpu);
3194 	if (!buf_iter)
3195 		return;
3196 
3197 	ring_buffer_iter_reset(buf_iter);
3198 
3199 	/*
3200 	 * We could have the case with the max latency tracers
3201 	 * that a reset never took place on a cpu. This is evident
3202 	 * by the timestamp being before the start of the buffer.
3203 	 */
3204 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3205 		if (ts >= iter->trace_buffer->time_start)
3206 			break;
3207 		entries++;
3208 		ring_buffer_read(buf_iter, NULL);
3209 	}
3210 
3211 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3212 }
3213 
3214 /*
3215  * The current tracer is copied to avoid a global locking
3216  * all around.
3217  */
3218 static void *s_start(struct seq_file *m, loff_t *pos)
3219 {
3220 	struct trace_iterator *iter = m->private;
3221 	struct trace_array *tr = iter->tr;
3222 	int cpu_file = iter->cpu_file;
3223 	void *p = NULL;
3224 	loff_t l = 0;
3225 	int cpu;
3226 
3227 	/*
3228 	 * copy the tracer to avoid using a global lock all around.
3229 	 * iter->trace is a copy of current_trace, the pointer to the
3230 	 * name may be used instead of a strcmp(), as iter->trace->name
3231 	 * will point to the same string as current_trace->name.
3232 	 */
3233 	mutex_lock(&trace_types_lock);
3234 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3235 		*iter->trace = *tr->current_trace;
3236 	mutex_unlock(&trace_types_lock);
3237 
3238 #ifdef CONFIG_TRACER_MAX_TRACE
3239 	if (iter->snapshot && iter->trace->use_max_tr)
3240 		return ERR_PTR(-EBUSY);
3241 #endif
3242 
3243 	if (!iter->snapshot)
3244 		atomic_inc(&trace_record_taskinfo_disabled);
3245 
3246 	if (*pos != iter->pos) {
3247 		iter->ent = NULL;
3248 		iter->cpu = 0;
3249 		iter->idx = -1;
3250 
3251 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3252 			for_each_tracing_cpu(cpu)
3253 				tracing_iter_reset(iter, cpu);
3254 		} else
3255 			tracing_iter_reset(iter, cpu_file);
3256 
3257 		iter->leftover = 0;
3258 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3259 			;
3260 
3261 	} else {
3262 		/*
3263 		 * If we overflowed the seq_file before, then we want
3264 		 * to just reuse the trace_seq buffer again.
3265 		 */
3266 		if (iter->leftover)
3267 			p = iter;
3268 		else {
3269 			l = *pos - 1;
3270 			p = s_next(m, p, &l);
3271 		}
3272 	}
3273 
3274 	trace_event_read_lock();
3275 	trace_access_lock(cpu_file);
3276 	return p;
3277 }
3278 
3279 static void s_stop(struct seq_file *m, void *p)
3280 {
3281 	struct trace_iterator *iter = m->private;
3282 
3283 #ifdef CONFIG_TRACER_MAX_TRACE
3284 	if (iter->snapshot && iter->trace->use_max_tr)
3285 		return;
3286 #endif
3287 
3288 	if (!iter->snapshot)
3289 		atomic_dec(&trace_record_taskinfo_disabled);
3290 
3291 	trace_access_unlock(iter->cpu_file);
3292 	trace_event_read_unlock();
3293 }
3294 
3295 static void
3296 get_total_entries(struct trace_buffer *buf,
3297 		  unsigned long *total, unsigned long *entries)
3298 {
3299 	unsigned long count;
3300 	int cpu;
3301 
3302 	*total = 0;
3303 	*entries = 0;
3304 
3305 	for_each_tracing_cpu(cpu) {
3306 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3307 		/*
3308 		 * If this buffer has skipped entries, then we hold all
3309 		 * entries for the trace and we need to ignore the
3310 		 * ones before the time stamp.
3311 		 */
3312 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3313 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3314 			/* total is the same as the entries */
3315 			*total += count;
3316 		} else
3317 			*total += count +
3318 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3319 		*entries += count;
3320 	}
3321 }
3322 
3323 static void print_lat_help_header(struct seq_file *m)
3324 {
3325 	seq_puts(m, "#                  _------=> CPU#            \n"
3326 		    "#                 / _-----=> irqs-off        \n"
3327 		    "#                | / _----=> need-resched    \n"
3328 		    "#                || / _---=> hardirq/softirq \n"
3329 		    "#                ||| / _--=> preempt-depth   \n"
3330 		    "#                |||| /     delay            \n"
3331 		    "#  cmd     pid   ||||| time  |   caller      \n"
3332 		    "#     \\   /      |||||  \\    |   /         \n");
3333 }
3334 
3335 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3336 {
3337 	unsigned long total;
3338 	unsigned long entries;
3339 
3340 	get_total_entries(buf, &total, &entries);
3341 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3342 		   entries, total, num_online_cpus());
3343 	seq_puts(m, "#\n");
3344 }
3345 
3346 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3347 				   unsigned int flags)
3348 {
3349 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3350 
3351 	print_event_info(buf, m);
3352 
3353 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3354 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3355 }
3356 
3357 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3358 				       unsigned int flags)
3359 {
3360 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3361 	const char tgid_space[] = "          ";
3362 	const char space[] = "  ";
3363 
3364 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3365 		   tgid ? tgid_space : space);
3366 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3367 		   tgid ? tgid_space : space);
3368 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3369 		   tgid ? tgid_space : space);
3370 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3371 		   tgid ? tgid_space : space);
3372 	seq_printf(m, "#                          %s||| /     delay\n",
3373 		   tgid ? tgid_space : space);
3374 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3375 		   tgid ? "   TGID   " : space);
3376 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3377 		   tgid ? "     |    " : space);
3378 }
3379 
3380 void
3381 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3382 {
3383 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3384 	struct trace_buffer *buf = iter->trace_buffer;
3385 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3386 	struct tracer *type = iter->trace;
3387 	unsigned long entries;
3388 	unsigned long total;
3389 	const char *name = "preemption";
3390 
3391 	name = type->name;
3392 
3393 	get_total_entries(buf, &total, &entries);
3394 
3395 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3396 		   name, UTS_RELEASE);
3397 	seq_puts(m, "# -----------------------------------"
3398 		 "---------------------------------\n");
3399 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3400 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3401 		   nsecs_to_usecs(data->saved_latency),
3402 		   entries,
3403 		   total,
3404 		   buf->cpu,
3405 #if defined(CONFIG_PREEMPT_NONE)
3406 		   "server",
3407 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3408 		   "desktop",
3409 #elif defined(CONFIG_PREEMPT)
3410 		   "preempt",
3411 #else
3412 		   "unknown",
3413 #endif
3414 		   /* These are reserved for later use */
3415 		   0, 0, 0, 0);
3416 #ifdef CONFIG_SMP
3417 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3418 #else
3419 	seq_puts(m, ")\n");
3420 #endif
3421 	seq_puts(m, "#    -----------------\n");
3422 	seq_printf(m, "#    | task: %.16s-%d "
3423 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3424 		   data->comm, data->pid,
3425 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3426 		   data->policy, data->rt_priority);
3427 	seq_puts(m, "#    -----------------\n");
3428 
3429 	if (data->critical_start) {
3430 		seq_puts(m, "#  => started at: ");
3431 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3432 		trace_print_seq(m, &iter->seq);
3433 		seq_puts(m, "\n#  => ended at:   ");
3434 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3435 		trace_print_seq(m, &iter->seq);
3436 		seq_puts(m, "\n#\n");
3437 	}
3438 
3439 	seq_puts(m, "#\n");
3440 }
3441 
3442 static void test_cpu_buff_start(struct trace_iterator *iter)
3443 {
3444 	struct trace_seq *s = &iter->seq;
3445 	struct trace_array *tr = iter->tr;
3446 
3447 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3448 		return;
3449 
3450 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3451 		return;
3452 
3453 	if (cpumask_available(iter->started) &&
3454 	    cpumask_test_cpu(iter->cpu, iter->started))
3455 		return;
3456 
3457 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3458 		return;
3459 
3460 	if (cpumask_available(iter->started))
3461 		cpumask_set_cpu(iter->cpu, iter->started);
3462 
3463 	/* Don't print started cpu buffer for the first entry of the trace */
3464 	if (iter->idx > 1)
3465 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3466 				iter->cpu);
3467 }
3468 
3469 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3470 {
3471 	struct trace_array *tr = iter->tr;
3472 	struct trace_seq *s = &iter->seq;
3473 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3474 	struct trace_entry *entry;
3475 	struct trace_event *event;
3476 
3477 	entry = iter->ent;
3478 
3479 	test_cpu_buff_start(iter);
3480 
3481 	event = ftrace_find_event(entry->type);
3482 
3483 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3484 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3485 			trace_print_lat_context(iter);
3486 		else
3487 			trace_print_context(iter);
3488 	}
3489 
3490 	if (trace_seq_has_overflowed(s))
3491 		return TRACE_TYPE_PARTIAL_LINE;
3492 
3493 	if (event)
3494 		return event->funcs->trace(iter, sym_flags, event);
3495 
3496 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3497 
3498 	return trace_handle_return(s);
3499 }
3500 
3501 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3502 {
3503 	struct trace_array *tr = iter->tr;
3504 	struct trace_seq *s = &iter->seq;
3505 	struct trace_entry *entry;
3506 	struct trace_event *event;
3507 
3508 	entry = iter->ent;
3509 
3510 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3511 		trace_seq_printf(s, "%d %d %llu ",
3512 				 entry->pid, iter->cpu, iter->ts);
3513 
3514 	if (trace_seq_has_overflowed(s))
3515 		return TRACE_TYPE_PARTIAL_LINE;
3516 
3517 	event = ftrace_find_event(entry->type);
3518 	if (event)
3519 		return event->funcs->raw(iter, 0, event);
3520 
3521 	trace_seq_printf(s, "%d ?\n", entry->type);
3522 
3523 	return trace_handle_return(s);
3524 }
3525 
3526 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3527 {
3528 	struct trace_array *tr = iter->tr;
3529 	struct trace_seq *s = &iter->seq;
3530 	unsigned char newline = '\n';
3531 	struct trace_entry *entry;
3532 	struct trace_event *event;
3533 
3534 	entry = iter->ent;
3535 
3536 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3537 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3538 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3539 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3540 		if (trace_seq_has_overflowed(s))
3541 			return TRACE_TYPE_PARTIAL_LINE;
3542 	}
3543 
3544 	event = ftrace_find_event(entry->type);
3545 	if (event) {
3546 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3547 		if (ret != TRACE_TYPE_HANDLED)
3548 			return ret;
3549 	}
3550 
3551 	SEQ_PUT_FIELD(s, newline);
3552 
3553 	return trace_handle_return(s);
3554 }
3555 
3556 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3557 {
3558 	struct trace_array *tr = iter->tr;
3559 	struct trace_seq *s = &iter->seq;
3560 	struct trace_entry *entry;
3561 	struct trace_event *event;
3562 
3563 	entry = iter->ent;
3564 
3565 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3566 		SEQ_PUT_FIELD(s, entry->pid);
3567 		SEQ_PUT_FIELD(s, iter->cpu);
3568 		SEQ_PUT_FIELD(s, iter->ts);
3569 		if (trace_seq_has_overflowed(s))
3570 			return TRACE_TYPE_PARTIAL_LINE;
3571 	}
3572 
3573 	event = ftrace_find_event(entry->type);
3574 	return event ? event->funcs->binary(iter, 0, event) :
3575 		TRACE_TYPE_HANDLED;
3576 }
3577 
3578 int trace_empty(struct trace_iterator *iter)
3579 {
3580 	struct ring_buffer_iter *buf_iter;
3581 	int cpu;
3582 
3583 	/* If we are looking at one CPU buffer, only check that one */
3584 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3585 		cpu = iter->cpu_file;
3586 		buf_iter = trace_buffer_iter(iter, cpu);
3587 		if (buf_iter) {
3588 			if (!ring_buffer_iter_empty(buf_iter))
3589 				return 0;
3590 		} else {
3591 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3592 				return 0;
3593 		}
3594 		return 1;
3595 	}
3596 
3597 	for_each_tracing_cpu(cpu) {
3598 		buf_iter = trace_buffer_iter(iter, cpu);
3599 		if (buf_iter) {
3600 			if (!ring_buffer_iter_empty(buf_iter))
3601 				return 0;
3602 		} else {
3603 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3604 				return 0;
3605 		}
3606 	}
3607 
3608 	return 1;
3609 }
3610 
3611 /*  Called with trace_event_read_lock() held. */
3612 enum print_line_t print_trace_line(struct trace_iterator *iter)
3613 {
3614 	struct trace_array *tr = iter->tr;
3615 	unsigned long trace_flags = tr->trace_flags;
3616 	enum print_line_t ret;
3617 
3618 	if (iter->lost_events) {
3619 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3620 				 iter->cpu, iter->lost_events);
3621 		if (trace_seq_has_overflowed(&iter->seq))
3622 			return TRACE_TYPE_PARTIAL_LINE;
3623 	}
3624 
3625 	if (iter->trace && iter->trace->print_line) {
3626 		ret = iter->trace->print_line(iter);
3627 		if (ret != TRACE_TYPE_UNHANDLED)
3628 			return ret;
3629 	}
3630 
3631 	if (iter->ent->type == TRACE_BPUTS &&
3632 			trace_flags & TRACE_ITER_PRINTK &&
3633 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3634 		return trace_print_bputs_msg_only(iter);
3635 
3636 	if (iter->ent->type == TRACE_BPRINT &&
3637 			trace_flags & TRACE_ITER_PRINTK &&
3638 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3639 		return trace_print_bprintk_msg_only(iter);
3640 
3641 	if (iter->ent->type == TRACE_PRINT &&
3642 			trace_flags & TRACE_ITER_PRINTK &&
3643 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3644 		return trace_print_printk_msg_only(iter);
3645 
3646 	if (trace_flags & TRACE_ITER_BIN)
3647 		return print_bin_fmt(iter);
3648 
3649 	if (trace_flags & TRACE_ITER_HEX)
3650 		return print_hex_fmt(iter);
3651 
3652 	if (trace_flags & TRACE_ITER_RAW)
3653 		return print_raw_fmt(iter);
3654 
3655 	return print_trace_fmt(iter);
3656 }
3657 
3658 void trace_latency_header(struct seq_file *m)
3659 {
3660 	struct trace_iterator *iter = m->private;
3661 	struct trace_array *tr = iter->tr;
3662 
3663 	/* print nothing if the buffers are empty */
3664 	if (trace_empty(iter))
3665 		return;
3666 
3667 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3668 		print_trace_header(m, iter);
3669 
3670 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3671 		print_lat_help_header(m);
3672 }
3673 
3674 void trace_default_header(struct seq_file *m)
3675 {
3676 	struct trace_iterator *iter = m->private;
3677 	struct trace_array *tr = iter->tr;
3678 	unsigned long trace_flags = tr->trace_flags;
3679 
3680 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3681 		return;
3682 
3683 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3684 		/* print nothing if the buffers are empty */
3685 		if (trace_empty(iter))
3686 			return;
3687 		print_trace_header(m, iter);
3688 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3689 			print_lat_help_header(m);
3690 	} else {
3691 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3692 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3693 				print_func_help_header_irq(iter->trace_buffer,
3694 							   m, trace_flags);
3695 			else
3696 				print_func_help_header(iter->trace_buffer, m,
3697 						       trace_flags);
3698 		}
3699 	}
3700 }
3701 
3702 static void test_ftrace_alive(struct seq_file *m)
3703 {
3704 	if (!ftrace_is_dead())
3705 		return;
3706 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3707 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3708 }
3709 
3710 #ifdef CONFIG_TRACER_MAX_TRACE
3711 static void show_snapshot_main_help(struct seq_file *m)
3712 {
3713 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3714 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3715 		    "#                      Takes a snapshot of the main buffer.\n"
3716 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3717 		    "#                      (Doesn't have to be '2' works with any number that\n"
3718 		    "#                       is not a '0' or '1')\n");
3719 }
3720 
3721 static void show_snapshot_percpu_help(struct seq_file *m)
3722 {
3723 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3724 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3725 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3726 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3727 #else
3728 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3729 		    "#                     Must use main snapshot file to allocate.\n");
3730 #endif
3731 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3732 		    "#                      (Doesn't have to be '2' works with any number that\n"
3733 		    "#                       is not a '0' or '1')\n");
3734 }
3735 
3736 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3737 {
3738 	if (iter->tr->allocated_snapshot)
3739 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3740 	else
3741 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3742 
3743 	seq_puts(m, "# Snapshot commands:\n");
3744 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3745 		show_snapshot_main_help(m);
3746 	else
3747 		show_snapshot_percpu_help(m);
3748 }
3749 #else
3750 /* Should never be called */
3751 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3752 #endif
3753 
3754 static int s_show(struct seq_file *m, void *v)
3755 {
3756 	struct trace_iterator *iter = v;
3757 	int ret;
3758 
3759 	if (iter->ent == NULL) {
3760 		if (iter->tr) {
3761 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3762 			seq_puts(m, "#\n");
3763 			test_ftrace_alive(m);
3764 		}
3765 		if (iter->snapshot && trace_empty(iter))
3766 			print_snapshot_help(m, iter);
3767 		else if (iter->trace && iter->trace->print_header)
3768 			iter->trace->print_header(m);
3769 		else
3770 			trace_default_header(m);
3771 
3772 	} else if (iter->leftover) {
3773 		/*
3774 		 * If we filled the seq_file buffer earlier, we
3775 		 * want to just show it now.
3776 		 */
3777 		ret = trace_print_seq(m, &iter->seq);
3778 
3779 		/* ret should this time be zero, but you never know */
3780 		iter->leftover = ret;
3781 
3782 	} else {
3783 		print_trace_line(iter);
3784 		ret = trace_print_seq(m, &iter->seq);
3785 		/*
3786 		 * If we overflow the seq_file buffer, then it will
3787 		 * ask us for this data again at start up.
3788 		 * Use that instead.
3789 		 *  ret is 0 if seq_file write succeeded.
3790 		 *        -1 otherwise.
3791 		 */
3792 		iter->leftover = ret;
3793 	}
3794 
3795 	return 0;
3796 }
3797 
3798 /*
3799  * Should be used after trace_array_get(), trace_types_lock
3800  * ensures that i_cdev was already initialized.
3801  */
3802 static inline int tracing_get_cpu(struct inode *inode)
3803 {
3804 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3805 		return (long)inode->i_cdev - 1;
3806 	return RING_BUFFER_ALL_CPUS;
3807 }
3808 
3809 static const struct seq_operations tracer_seq_ops = {
3810 	.start		= s_start,
3811 	.next		= s_next,
3812 	.stop		= s_stop,
3813 	.show		= s_show,
3814 };
3815 
3816 static struct trace_iterator *
3817 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3818 {
3819 	struct trace_array *tr = inode->i_private;
3820 	struct trace_iterator *iter;
3821 	int cpu;
3822 
3823 	if (tracing_disabled)
3824 		return ERR_PTR(-ENODEV);
3825 
3826 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3827 	if (!iter)
3828 		return ERR_PTR(-ENOMEM);
3829 
3830 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3831 				    GFP_KERNEL);
3832 	if (!iter->buffer_iter)
3833 		goto release;
3834 
3835 	/*
3836 	 * We make a copy of the current tracer to avoid concurrent
3837 	 * changes on it while we are reading.
3838 	 */
3839 	mutex_lock(&trace_types_lock);
3840 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3841 	if (!iter->trace)
3842 		goto fail;
3843 
3844 	*iter->trace = *tr->current_trace;
3845 
3846 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3847 		goto fail;
3848 
3849 	iter->tr = tr;
3850 
3851 #ifdef CONFIG_TRACER_MAX_TRACE
3852 	/* Currently only the top directory has a snapshot */
3853 	if (tr->current_trace->print_max || snapshot)
3854 		iter->trace_buffer = &tr->max_buffer;
3855 	else
3856 #endif
3857 		iter->trace_buffer = &tr->trace_buffer;
3858 	iter->snapshot = snapshot;
3859 	iter->pos = -1;
3860 	iter->cpu_file = tracing_get_cpu(inode);
3861 	mutex_init(&iter->mutex);
3862 
3863 	/* Notify the tracer early; before we stop tracing. */
3864 	if (iter->trace && iter->trace->open)
3865 		iter->trace->open(iter);
3866 
3867 	/* Annotate start of buffers if we had overruns */
3868 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3869 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3870 
3871 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3872 	if (trace_clocks[tr->clock_id].in_ns)
3873 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3874 
3875 	/* stop the trace while dumping if we are not opening "snapshot" */
3876 	if (!iter->snapshot)
3877 		tracing_stop_tr(tr);
3878 
3879 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3880 		for_each_tracing_cpu(cpu) {
3881 			iter->buffer_iter[cpu] =
3882 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3883 		}
3884 		ring_buffer_read_prepare_sync();
3885 		for_each_tracing_cpu(cpu) {
3886 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3887 			tracing_iter_reset(iter, cpu);
3888 		}
3889 	} else {
3890 		cpu = iter->cpu_file;
3891 		iter->buffer_iter[cpu] =
3892 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3893 		ring_buffer_read_prepare_sync();
3894 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3895 		tracing_iter_reset(iter, cpu);
3896 	}
3897 
3898 	mutex_unlock(&trace_types_lock);
3899 
3900 	return iter;
3901 
3902  fail:
3903 	mutex_unlock(&trace_types_lock);
3904 	kfree(iter->trace);
3905 	kfree(iter->buffer_iter);
3906 release:
3907 	seq_release_private(inode, file);
3908 	return ERR_PTR(-ENOMEM);
3909 }
3910 
3911 int tracing_open_generic(struct inode *inode, struct file *filp)
3912 {
3913 	if (tracing_disabled)
3914 		return -ENODEV;
3915 
3916 	filp->private_data = inode->i_private;
3917 	return 0;
3918 }
3919 
3920 bool tracing_is_disabled(void)
3921 {
3922 	return (tracing_disabled) ? true: false;
3923 }
3924 
3925 /*
3926  * Open and update trace_array ref count.
3927  * Must have the current trace_array passed to it.
3928  */
3929 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3930 {
3931 	struct trace_array *tr = inode->i_private;
3932 
3933 	if (tracing_disabled)
3934 		return -ENODEV;
3935 
3936 	if (trace_array_get(tr) < 0)
3937 		return -ENODEV;
3938 
3939 	filp->private_data = inode->i_private;
3940 
3941 	return 0;
3942 }
3943 
3944 static int tracing_release(struct inode *inode, struct file *file)
3945 {
3946 	struct trace_array *tr = inode->i_private;
3947 	struct seq_file *m = file->private_data;
3948 	struct trace_iterator *iter;
3949 	int cpu;
3950 
3951 	if (!(file->f_mode & FMODE_READ)) {
3952 		trace_array_put(tr);
3953 		return 0;
3954 	}
3955 
3956 	/* Writes do not use seq_file */
3957 	iter = m->private;
3958 	mutex_lock(&trace_types_lock);
3959 
3960 	for_each_tracing_cpu(cpu) {
3961 		if (iter->buffer_iter[cpu])
3962 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3963 	}
3964 
3965 	if (iter->trace && iter->trace->close)
3966 		iter->trace->close(iter);
3967 
3968 	if (!iter->snapshot)
3969 		/* reenable tracing if it was previously enabled */
3970 		tracing_start_tr(tr);
3971 
3972 	__trace_array_put(tr);
3973 
3974 	mutex_unlock(&trace_types_lock);
3975 
3976 	mutex_destroy(&iter->mutex);
3977 	free_cpumask_var(iter->started);
3978 	kfree(iter->trace);
3979 	kfree(iter->buffer_iter);
3980 	seq_release_private(inode, file);
3981 
3982 	return 0;
3983 }
3984 
3985 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3986 {
3987 	struct trace_array *tr = inode->i_private;
3988 
3989 	trace_array_put(tr);
3990 	return 0;
3991 }
3992 
3993 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3994 {
3995 	struct trace_array *tr = inode->i_private;
3996 
3997 	trace_array_put(tr);
3998 
3999 	return single_release(inode, file);
4000 }
4001 
4002 static int tracing_open(struct inode *inode, struct file *file)
4003 {
4004 	struct trace_array *tr = inode->i_private;
4005 	struct trace_iterator *iter;
4006 	int ret = 0;
4007 
4008 	if (trace_array_get(tr) < 0)
4009 		return -ENODEV;
4010 
4011 	/* If this file was open for write, then erase contents */
4012 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4013 		int cpu = tracing_get_cpu(inode);
4014 
4015 		if (cpu == RING_BUFFER_ALL_CPUS)
4016 			tracing_reset_online_cpus(&tr->trace_buffer);
4017 		else
4018 			tracing_reset(&tr->trace_buffer, cpu);
4019 	}
4020 
4021 	if (file->f_mode & FMODE_READ) {
4022 		iter = __tracing_open(inode, file, false);
4023 		if (IS_ERR(iter))
4024 			ret = PTR_ERR(iter);
4025 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4026 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4027 	}
4028 
4029 	if (ret < 0)
4030 		trace_array_put(tr);
4031 
4032 	return ret;
4033 }
4034 
4035 /*
4036  * Some tracers are not suitable for instance buffers.
4037  * A tracer is always available for the global array (toplevel)
4038  * or if it explicitly states that it is.
4039  */
4040 static bool
4041 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4042 {
4043 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4044 }
4045 
4046 /* Find the next tracer that this trace array may use */
4047 static struct tracer *
4048 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4049 {
4050 	while (t && !trace_ok_for_array(t, tr))
4051 		t = t->next;
4052 
4053 	return t;
4054 }
4055 
4056 static void *
4057 t_next(struct seq_file *m, void *v, loff_t *pos)
4058 {
4059 	struct trace_array *tr = m->private;
4060 	struct tracer *t = v;
4061 
4062 	(*pos)++;
4063 
4064 	if (t)
4065 		t = get_tracer_for_array(tr, t->next);
4066 
4067 	return t;
4068 }
4069 
4070 static void *t_start(struct seq_file *m, loff_t *pos)
4071 {
4072 	struct trace_array *tr = m->private;
4073 	struct tracer *t;
4074 	loff_t l = 0;
4075 
4076 	mutex_lock(&trace_types_lock);
4077 
4078 	t = get_tracer_for_array(tr, trace_types);
4079 	for (; t && l < *pos; t = t_next(m, t, &l))
4080 			;
4081 
4082 	return t;
4083 }
4084 
4085 static void t_stop(struct seq_file *m, void *p)
4086 {
4087 	mutex_unlock(&trace_types_lock);
4088 }
4089 
4090 static int t_show(struct seq_file *m, void *v)
4091 {
4092 	struct tracer *t = v;
4093 
4094 	if (!t)
4095 		return 0;
4096 
4097 	seq_puts(m, t->name);
4098 	if (t->next)
4099 		seq_putc(m, ' ');
4100 	else
4101 		seq_putc(m, '\n');
4102 
4103 	return 0;
4104 }
4105 
4106 static const struct seq_operations show_traces_seq_ops = {
4107 	.start		= t_start,
4108 	.next		= t_next,
4109 	.stop		= t_stop,
4110 	.show		= t_show,
4111 };
4112 
4113 static int show_traces_open(struct inode *inode, struct file *file)
4114 {
4115 	struct trace_array *tr = inode->i_private;
4116 	struct seq_file *m;
4117 	int ret;
4118 
4119 	if (tracing_disabled)
4120 		return -ENODEV;
4121 
4122 	ret = seq_open(file, &show_traces_seq_ops);
4123 	if (ret)
4124 		return ret;
4125 
4126 	m = file->private_data;
4127 	m->private = tr;
4128 
4129 	return 0;
4130 }
4131 
4132 static ssize_t
4133 tracing_write_stub(struct file *filp, const char __user *ubuf,
4134 		   size_t count, loff_t *ppos)
4135 {
4136 	return count;
4137 }
4138 
4139 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4140 {
4141 	int ret;
4142 
4143 	if (file->f_mode & FMODE_READ)
4144 		ret = seq_lseek(file, offset, whence);
4145 	else
4146 		file->f_pos = ret = 0;
4147 
4148 	return ret;
4149 }
4150 
4151 static const struct file_operations tracing_fops = {
4152 	.open		= tracing_open,
4153 	.read		= seq_read,
4154 	.write		= tracing_write_stub,
4155 	.llseek		= tracing_lseek,
4156 	.release	= tracing_release,
4157 };
4158 
4159 static const struct file_operations show_traces_fops = {
4160 	.open		= show_traces_open,
4161 	.read		= seq_read,
4162 	.release	= seq_release,
4163 	.llseek		= seq_lseek,
4164 };
4165 
4166 /*
4167  * The tracer itself will not take this lock, but still we want
4168  * to provide a consistent cpumask to user-space:
4169  */
4170 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4171 
4172 /*
4173  * Temporary storage for the character representation of the
4174  * CPU bitmask (and one more byte for the newline):
4175  */
4176 static char mask_str[NR_CPUS + 1];
4177 
4178 static ssize_t
4179 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4180 		     size_t count, loff_t *ppos)
4181 {
4182 	struct trace_array *tr = file_inode(filp)->i_private;
4183 	int len;
4184 
4185 	mutex_lock(&tracing_cpumask_update_lock);
4186 
4187 	len = snprintf(mask_str, count, "%*pb\n",
4188 		       cpumask_pr_args(tr->tracing_cpumask));
4189 	if (len >= count) {
4190 		count = -EINVAL;
4191 		goto out_err;
4192 	}
4193 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4194 
4195 out_err:
4196 	mutex_unlock(&tracing_cpumask_update_lock);
4197 
4198 	return count;
4199 }
4200 
4201 static ssize_t
4202 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4203 		      size_t count, loff_t *ppos)
4204 {
4205 	struct trace_array *tr = file_inode(filp)->i_private;
4206 	cpumask_var_t tracing_cpumask_new;
4207 	int err, cpu;
4208 
4209 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4210 		return -ENOMEM;
4211 
4212 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4213 	if (err)
4214 		goto err_unlock;
4215 
4216 	mutex_lock(&tracing_cpumask_update_lock);
4217 
4218 	local_irq_disable();
4219 	arch_spin_lock(&tr->max_lock);
4220 	for_each_tracing_cpu(cpu) {
4221 		/*
4222 		 * Increase/decrease the disabled counter if we are
4223 		 * about to flip a bit in the cpumask:
4224 		 */
4225 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4226 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4227 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4228 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4229 		}
4230 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4231 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4232 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4233 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4234 		}
4235 	}
4236 	arch_spin_unlock(&tr->max_lock);
4237 	local_irq_enable();
4238 
4239 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4240 
4241 	mutex_unlock(&tracing_cpumask_update_lock);
4242 	free_cpumask_var(tracing_cpumask_new);
4243 
4244 	return count;
4245 
4246 err_unlock:
4247 	free_cpumask_var(tracing_cpumask_new);
4248 
4249 	return err;
4250 }
4251 
4252 static const struct file_operations tracing_cpumask_fops = {
4253 	.open		= tracing_open_generic_tr,
4254 	.read		= tracing_cpumask_read,
4255 	.write		= tracing_cpumask_write,
4256 	.release	= tracing_release_generic_tr,
4257 	.llseek		= generic_file_llseek,
4258 };
4259 
4260 static int tracing_trace_options_show(struct seq_file *m, void *v)
4261 {
4262 	struct tracer_opt *trace_opts;
4263 	struct trace_array *tr = m->private;
4264 	u32 tracer_flags;
4265 	int i;
4266 
4267 	mutex_lock(&trace_types_lock);
4268 	tracer_flags = tr->current_trace->flags->val;
4269 	trace_opts = tr->current_trace->flags->opts;
4270 
4271 	for (i = 0; trace_options[i]; i++) {
4272 		if (tr->trace_flags & (1 << i))
4273 			seq_printf(m, "%s\n", trace_options[i]);
4274 		else
4275 			seq_printf(m, "no%s\n", trace_options[i]);
4276 	}
4277 
4278 	for (i = 0; trace_opts[i].name; i++) {
4279 		if (tracer_flags & trace_opts[i].bit)
4280 			seq_printf(m, "%s\n", trace_opts[i].name);
4281 		else
4282 			seq_printf(m, "no%s\n", trace_opts[i].name);
4283 	}
4284 	mutex_unlock(&trace_types_lock);
4285 
4286 	return 0;
4287 }
4288 
4289 static int __set_tracer_option(struct trace_array *tr,
4290 			       struct tracer_flags *tracer_flags,
4291 			       struct tracer_opt *opts, int neg)
4292 {
4293 	struct tracer *trace = tracer_flags->trace;
4294 	int ret;
4295 
4296 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4297 	if (ret)
4298 		return ret;
4299 
4300 	if (neg)
4301 		tracer_flags->val &= ~opts->bit;
4302 	else
4303 		tracer_flags->val |= opts->bit;
4304 	return 0;
4305 }
4306 
4307 /* Try to assign a tracer specific option */
4308 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4309 {
4310 	struct tracer *trace = tr->current_trace;
4311 	struct tracer_flags *tracer_flags = trace->flags;
4312 	struct tracer_opt *opts = NULL;
4313 	int i;
4314 
4315 	for (i = 0; tracer_flags->opts[i].name; i++) {
4316 		opts = &tracer_flags->opts[i];
4317 
4318 		if (strcmp(cmp, opts->name) == 0)
4319 			return __set_tracer_option(tr, trace->flags, opts, neg);
4320 	}
4321 
4322 	return -EINVAL;
4323 }
4324 
4325 /* Some tracers require overwrite to stay enabled */
4326 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4327 {
4328 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4329 		return -1;
4330 
4331 	return 0;
4332 }
4333 
4334 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4335 {
4336 	/* do nothing if flag is already set */
4337 	if (!!(tr->trace_flags & mask) == !!enabled)
4338 		return 0;
4339 
4340 	/* Give the tracer a chance to approve the change */
4341 	if (tr->current_trace->flag_changed)
4342 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4343 			return -EINVAL;
4344 
4345 	if (enabled)
4346 		tr->trace_flags |= mask;
4347 	else
4348 		tr->trace_flags &= ~mask;
4349 
4350 	if (mask == TRACE_ITER_RECORD_CMD)
4351 		trace_event_enable_cmd_record(enabled);
4352 
4353 	if (mask == TRACE_ITER_RECORD_TGID) {
4354 		if (!tgid_map)
4355 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4356 					   GFP_KERNEL);
4357 		if (!tgid_map) {
4358 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4359 			return -ENOMEM;
4360 		}
4361 
4362 		trace_event_enable_tgid_record(enabled);
4363 	}
4364 
4365 	if (mask == TRACE_ITER_EVENT_FORK)
4366 		trace_event_follow_fork(tr, enabled);
4367 
4368 	if (mask == TRACE_ITER_FUNC_FORK)
4369 		ftrace_pid_follow_fork(tr, enabled);
4370 
4371 	if (mask == TRACE_ITER_OVERWRITE) {
4372 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4373 #ifdef CONFIG_TRACER_MAX_TRACE
4374 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4375 #endif
4376 	}
4377 
4378 	if (mask == TRACE_ITER_PRINTK) {
4379 		trace_printk_start_stop_comm(enabled);
4380 		trace_printk_control(enabled);
4381 	}
4382 
4383 	return 0;
4384 }
4385 
4386 static int trace_set_options(struct trace_array *tr, char *option)
4387 {
4388 	char *cmp;
4389 	int neg = 0;
4390 	int ret = -ENODEV;
4391 	int i;
4392 	size_t orig_len = strlen(option);
4393 
4394 	cmp = strstrip(option);
4395 
4396 	if (strncmp(cmp, "no", 2) == 0) {
4397 		neg = 1;
4398 		cmp += 2;
4399 	}
4400 
4401 	mutex_lock(&trace_types_lock);
4402 
4403 	for (i = 0; trace_options[i]; i++) {
4404 		if (strcmp(cmp, trace_options[i]) == 0) {
4405 			ret = set_tracer_flag(tr, 1 << i, !neg);
4406 			break;
4407 		}
4408 	}
4409 
4410 	/* If no option could be set, test the specific tracer options */
4411 	if (!trace_options[i])
4412 		ret = set_tracer_option(tr, cmp, neg);
4413 
4414 	mutex_unlock(&trace_types_lock);
4415 
4416 	/*
4417 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4418 	 * turn it back into a space.
4419 	 */
4420 	if (orig_len > strlen(option))
4421 		option[strlen(option)] = ' ';
4422 
4423 	return ret;
4424 }
4425 
4426 static void __init apply_trace_boot_options(void)
4427 {
4428 	char *buf = trace_boot_options_buf;
4429 	char *option;
4430 
4431 	while (true) {
4432 		option = strsep(&buf, ",");
4433 
4434 		if (!option)
4435 			break;
4436 
4437 		if (*option)
4438 			trace_set_options(&global_trace, option);
4439 
4440 		/* Put back the comma to allow this to be called again */
4441 		if (buf)
4442 			*(buf - 1) = ',';
4443 	}
4444 }
4445 
4446 static ssize_t
4447 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4448 			size_t cnt, loff_t *ppos)
4449 {
4450 	struct seq_file *m = filp->private_data;
4451 	struct trace_array *tr = m->private;
4452 	char buf[64];
4453 	int ret;
4454 
4455 	if (cnt >= sizeof(buf))
4456 		return -EINVAL;
4457 
4458 	if (copy_from_user(buf, ubuf, cnt))
4459 		return -EFAULT;
4460 
4461 	buf[cnt] = 0;
4462 
4463 	ret = trace_set_options(tr, buf);
4464 	if (ret < 0)
4465 		return ret;
4466 
4467 	*ppos += cnt;
4468 
4469 	return cnt;
4470 }
4471 
4472 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4473 {
4474 	struct trace_array *tr = inode->i_private;
4475 	int ret;
4476 
4477 	if (tracing_disabled)
4478 		return -ENODEV;
4479 
4480 	if (trace_array_get(tr) < 0)
4481 		return -ENODEV;
4482 
4483 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4484 	if (ret < 0)
4485 		trace_array_put(tr);
4486 
4487 	return ret;
4488 }
4489 
4490 static const struct file_operations tracing_iter_fops = {
4491 	.open		= tracing_trace_options_open,
4492 	.read		= seq_read,
4493 	.llseek		= seq_lseek,
4494 	.release	= tracing_single_release_tr,
4495 	.write		= tracing_trace_options_write,
4496 };
4497 
4498 static const char readme_msg[] =
4499 	"tracing mini-HOWTO:\n\n"
4500 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4501 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4502 	" Important files:\n"
4503 	"  trace\t\t\t- The static contents of the buffer\n"
4504 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4505 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4506 	"  current_tracer\t- function and latency tracers\n"
4507 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4508 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4509 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4510 	"  trace_clock\t\t-change the clock used to order events\n"
4511 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4512 	"      global:   Synced across CPUs but slows tracing down.\n"
4513 	"     counter:   Not a clock, but just an increment\n"
4514 	"      uptime:   Jiffy counter from time of boot\n"
4515 	"        perf:   Same clock that perf events use\n"
4516 #ifdef CONFIG_X86_64
4517 	"     x86-tsc:   TSC cycle counter\n"
4518 #endif
4519 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4520 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4521 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4522 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4523 	"\t\t\t  Remove sub-buffer with rmdir\n"
4524 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4525 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4526 	"\t\t\t  option name\n"
4527 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4528 #ifdef CONFIG_DYNAMIC_FTRACE
4529 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4530 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4531 	"\t\t\t  functions\n"
4532 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4533 	"\t     modules: Can select a group via module\n"
4534 	"\t      Format: :mod:<module-name>\n"
4535 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4536 	"\t    triggers: a command to perform when function is hit\n"
4537 	"\t      Format: <function>:<trigger>[:count]\n"
4538 	"\t     trigger: traceon, traceoff\n"
4539 	"\t\t      enable_event:<system>:<event>\n"
4540 	"\t\t      disable_event:<system>:<event>\n"
4541 #ifdef CONFIG_STACKTRACE
4542 	"\t\t      stacktrace\n"
4543 #endif
4544 #ifdef CONFIG_TRACER_SNAPSHOT
4545 	"\t\t      snapshot\n"
4546 #endif
4547 	"\t\t      dump\n"
4548 	"\t\t      cpudump\n"
4549 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4550 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4551 	"\t     The first one will disable tracing every time do_fault is hit\n"
4552 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4553 	"\t       The first time do trap is hit and it disables tracing, the\n"
4554 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4555 	"\t       the counter will not decrement. It only decrements when the\n"
4556 	"\t       trigger did work\n"
4557 	"\t     To remove trigger without count:\n"
4558 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4559 	"\t     To remove trigger with a count:\n"
4560 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4561 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4562 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4563 	"\t    modules: Can select a group via module command :mod:\n"
4564 	"\t    Does not accept triggers\n"
4565 #endif /* CONFIG_DYNAMIC_FTRACE */
4566 #ifdef CONFIG_FUNCTION_TRACER
4567 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4568 	"\t\t    (function)\n"
4569 #endif
4570 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4571 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4572 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4573 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4574 #endif
4575 #ifdef CONFIG_TRACER_SNAPSHOT
4576 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4577 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4578 	"\t\t\t  information\n"
4579 #endif
4580 #ifdef CONFIG_STACK_TRACER
4581 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4582 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4583 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4584 	"\t\t\t  new trace)\n"
4585 #ifdef CONFIG_DYNAMIC_FTRACE
4586 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4587 	"\t\t\t  traces\n"
4588 #endif
4589 #endif /* CONFIG_STACK_TRACER */
4590 #ifdef CONFIG_KPROBE_EVENTS
4591 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4592 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4593 #endif
4594 #ifdef CONFIG_UPROBE_EVENTS
4595 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4596 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4597 #endif
4598 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4599 	"\t  accepts: event-definitions (one definition per line)\n"
4600 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4601 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4602 	"\t           -:[<group>/]<event>\n"
4603 #ifdef CONFIG_KPROBE_EVENTS
4604 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4605   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4606 #endif
4607 #ifdef CONFIG_UPROBE_EVENTS
4608 	"\t    place: <path>:<offset>\n"
4609 #endif
4610 	"\t     args: <name>=fetcharg[:type]\n"
4611 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4612 	"\t           $stack<index>, $stack, $retval, $comm\n"
4613 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4614 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4615 #endif
4616 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4617 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4618 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4619 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4620 	"\t\t\t  events\n"
4621 	"      filter\t\t- If set, only events passing filter are traced\n"
4622 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4623 	"\t\t\t  <event>:\n"
4624 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4625 	"      filter\t\t- If set, only events passing filter are traced\n"
4626 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4627 	"\t    Format: <trigger>[:count][if <filter>]\n"
4628 	"\t   trigger: traceon, traceoff\n"
4629 	"\t            enable_event:<system>:<event>\n"
4630 	"\t            disable_event:<system>:<event>\n"
4631 #ifdef CONFIG_HIST_TRIGGERS
4632 	"\t            enable_hist:<system>:<event>\n"
4633 	"\t            disable_hist:<system>:<event>\n"
4634 #endif
4635 #ifdef CONFIG_STACKTRACE
4636 	"\t\t    stacktrace\n"
4637 #endif
4638 #ifdef CONFIG_TRACER_SNAPSHOT
4639 	"\t\t    snapshot\n"
4640 #endif
4641 #ifdef CONFIG_HIST_TRIGGERS
4642 	"\t\t    hist (see below)\n"
4643 #endif
4644 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4645 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4646 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4647 	"\t                  events/block/block_unplug/trigger\n"
4648 	"\t   The first disables tracing every time block_unplug is hit.\n"
4649 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4650 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4651 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4652 	"\t   Like function triggers, the counter is only decremented if it\n"
4653 	"\t    enabled or disabled tracing.\n"
4654 	"\t   To remove a trigger without a count:\n"
4655 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4656 	"\t   To remove a trigger with a count:\n"
4657 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4658 	"\t   Filters can be ignored when removing a trigger.\n"
4659 #ifdef CONFIG_HIST_TRIGGERS
4660 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4661 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4662 	"\t            [:values=<field1[,field2,...]>]\n"
4663 	"\t            [:sort=<field1[,field2,...]>]\n"
4664 	"\t            [:size=#entries]\n"
4665 	"\t            [:pause][:continue][:clear]\n"
4666 	"\t            [:name=histname1]\n"
4667 	"\t            [if <filter>]\n\n"
4668 	"\t    When a matching event is hit, an entry is added to a hash\n"
4669 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4670 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4671 	"\t    correspond to fields in the event's format description.  Keys\n"
4672 	"\t    can be any field, or the special string 'stacktrace'.\n"
4673 	"\t    Compound keys consisting of up to two fields can be specified\n"
4674 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4675 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4676 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4677 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4678 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4679 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4680 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4681 	"\t    its histogram data will be shared with other triggers of the\n"
4682 	"\t    same name, and trigger hits will update this common data.\n\n"
4683 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4684 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4685 	"\t    triggers attached to an event, there will be a table for each\n"
4686 	"\t    trigger in the output.  The table displayed for a named\n"
4687 	"\t    trigger will be the same as any other instance having the\n"
4688 	"\t    same name.  The default format used to display a given field\n"
4689 	"\t    can be modified by appending any of the following modifiers\n"
4690 	"\t    to the field name, as applicable:\n\n"
4691 	"\t            .hex        display a number as a hex value\n"
4692 	"\t            .sym        display an address as a symbol\n"
4693 	"\t            .sym-offset display an address as a symbol and offset\n"
4694 	"\t            .execname   display a common_pid as a program name\n"
4695 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4696 	"\t            .log2       display log2 value rather than raw number\n\n"
4697 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4698 	"\t    trigger or to start a hist trigger but not log any events\n"
4699 	"\t    until told to do so.  'continue' can be used to start or\n"
4700 	"\t    restart a paused hist trigger.\n\n"
4701 	"\t    The 'clear' parameter will clear the contents of a running\n"
4702 	"\t    hist trigger and leave its current paused/active state\n"
4703 	"\t    unchanged.\n\n"
4704 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4705 	"\t    have one event conditionally start and stop another event's\n"
4706 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4707 	"\t    the enable_event and disable_event triggers.\n"
4708 #endif
4709 ;
4710 
4711 static ssize_t
4712 tracing_readme_read(struct file *filp, char __user *ubuf,
4713 		       size_t cnt, loff_t *ppos)
4714 {
4715 	return simple_read_from_buffer(ubuf, cnt, ppos,
4716 					readme_msg, strlen(readme_msg));
4717 }
4718 
4719 static const struct file_operations tracing_readme_fops = {
4720 	.open		= tracing_open_generic,
4721 	.read		= tracing_readme_read,
4722 	.llseek		= generic_file_llseek,
4723 };
4724 
4725 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4726 {
4727 	int *ptr = v;
4728 
4729 	if (*pos || m->count)
4730 		ptr++;
4731 
4732 	(*pos)++;
4733 
4734 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4735 		if (trace_find_tgid(*ptr))
4736 			return ptr;
4737 	}
4738 
4739 	return NULL;
4740 }
4741 
4742 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4743 {
4744 	void *v;
4745 	loff_t l = 0;
4746 
4747 	if (!tgid_map)
4748 		return NULL;
4749 
4750 	v = &tgid_map[0];
4751 	while (l <= *pos) {
4752 		v = saved_tgids_next(m, v, &l);
4753 		if (!v)
4754 			return NULL;
4755 	}
4756 
4757 	return v;
4758 }
4759 
4760 static void saved_tgids_stop(struct seq_file *m, void *v)
4761 {
4762 }
4763 
4764 static int saved_tgids_show(struct seq_file *m, void *v)
4765 {
4766 	int pid = (int *)v - tgid_map;
4767 
4768 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4769 	return 0;
4770 }
4771 
4772 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4773 	.start		= saved_tgids_start,
4774 	.stop		= saved_tgids_stop,
4775 	.next		= saved_tgids_next,
4776 	.show		= saved_tgids_show,
4777 };
4778 
4779 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4780 {
4781 	if (tracing_disabled)
4782 		return -ENODEV;
4783 
4784 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4785 }
4786 
4787 
4788 static const struct file_operations tracing_saved_tgids_fops = {
4789 	.open		= tracing_saved_tgids_open,
4790 	.read		= seq_read,
4791 	.llseek		= seq_lseek,
4792 	.release	= seq_release,
4793 };
4794 
4795 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4796 {
4797 	unsigned int *ptr = v;
4798 
4799 	if (*pos || m->count)
4800 		ptr++;
4801 
4802 	(*pos)++;
4803 
4804 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4805 	     ptr++) {
4806 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4807 			continue;
4808 
4809 		return ptr;
4810 	}
4811 
4812 	return NULL;
4813 }
4814 
4815 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4816 {
4817 	void *v;
4818 	loff_t l = 0;
4819 
4820 	preempt_disable();
4821 	arch_spin_lock(&trace_cmdline_lock);
4822 
4823 	v = &savedcmd->map_cmdline_to_pid[0];
4824 	while (l <= *pos) {
4825 		v = saved_cmdlines_next(m, v, &l);
4826 		if (!v)
4827 			return NULL;
4828 	}
4829 
4830 	return v;
4831 }
4832 
4833 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4834 {
4835 	arch_spin_unlock(&trace_cmdline_lock);
4836 	preempt_enable();
4837 }
4838 
4839 static int saved_cmdlines_show(struct seq_file *m, void *v)
4840 {
4841 	char buf[TASK_COMM_LEN];
4842 	unsigned int *pid = v;
4843 
4844 	__trace_find_cmdline(*pid, buf);
4845 	seq_printf(m, "%d %s\n", *pid, buf);
4846 	return 0;
4847 }
4848 
4849 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4850 	.start		= saved_cmdlines_start,
4851 	.next		= saved_cmdlines_next,
4852 	.stop		= saved_cmdlines_stop,
4853 	.show		= saved_cmdlines_show,
4854 };
4855 
4856 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4857 {
4858 	if (tracing_disabled)
4859 		return -ENODEV;
4860 
4861 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4862 }
4863 
4864 static const struct file_operations tracing_saved_cmdlines_fops = {
4865 	.open		= tracing_saved_cmdlines_open,
4866 	.read		= seq_read,
4867 	.llseek		= seq_lseek,
4868 	.release	= seq_release,
4869 };
4870 
4871 static ssize_t
4872 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4873 				 size_t cnt, loff_t *ppos)
4874 {
4875 	char buf[64];
4876 	int r;
4877 
4878 	arch_spin_lock(&trace_cmdline_lock);
4879 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4880 	arch_spin_unlock(&trace_cmdline_lock);
4881 
4882 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4883 }
4884 
4885 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4886 {
4887 	kfree(s->saved_cmdlines);
4888 	kfree(s->map_cmdline_to_pid);
4889 	kfree(s);
4890 }
4891 
4892 static int tracing_resize_saved_cmdlines(unsigned int val)
4893 {
4894 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4895 
4896 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4897 	if (!s)
4898 		return -ENOMEM;
4899 
4900 	if (allocate_cmdlines_buffer(val, s) < 0) {
4901 		kfree(s);
4902 		return -ENOMEM;
4903 	}
4904 
4905 	arch_spin_lock(&trace_cmdline_lock);
4906 	savedcmd_temp = savedcmd;
4907 	savedcmd = s;
4908 	arch_spin_unlock(&trace_cmdline_lock);
4909 	free_saved_cmdlines_buffer(savedcmd_temp);
4910 
4911 	return 0;
4912 }
4913 
4914 static ssize_t
4915 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4916 				  size_t cnt, loff_t *ppos)
4917 {
4918 	unsigned long val;
4919 	int ret;
4920 
4921 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4922 	if (ret)
4923 		return ret;
4924 
4925 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4926 	if (!val || val > PID_MAX_DEFAULT)
4927 		return -EINVAL;
4928 
4929 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4930 	if (ret < 0)
4931 		return ret;
4932 
4933 	*ppos += cnt;
4934 
4935 	return cnt;
4936 }
4937 
4938 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4939 	.open		= tracing_open_generic,
4940 	.read		= tracing_saved_cmdlines_size_read,
4941 	.write		= tracing_saved_cmdlines_size_write,
4942 };
4943 
4944 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4945 static union trace_eval_map_item *
4946 update_eval_map(union trace_eval_map_item *ptr)
4947 {
4948 	if (!ptr->map.eval_string) {
4949 		if (ptr->tail.next) {
4950 			ptr = ptr->tail.next;
4951 			/* Set ptr to the next real item (skip head) */
4952 			ptr++;
4953 		} else
4954 			return NULL;
4955 	}
4956 	return ptr;
4957 }
4958 
4959 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	union trace_eval_map_item *ptr = v;
4962 
4963 	/*
4964 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4965 	 * This really should never happen.
4966 	 */
4967 	ptr = update_eval_map(ptr);
4968 	if (WARN_ON_ONCE(!ptr))
4969 		return NULL;
4970 
4971 	ptr++;
4972 
4973 	(*pos)++;
4974 
4975 	ptr = update_eval_map(ptr);
4976 
4977 	return ptr;
4978 }
4979 
4980 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4981 {
4982 	union trace_eval_map_item *v;
4983 	loff_t l = 0;
4984 
4985 	mutex_lock(&trace_eval_mutex);
4986 
4987 	v = trace_eval_maps;
4988 	if (v)
4989 		v++;
4990 
4991 	while (v && l < *pos) {
4992 		v = eval_map_next(m, v, &l);
4993 	}
4994 
4995 	return v;
4996 }
4997 
4998 static void eval_map_stop(struct seq_file *m, void *v)
4999 {
5000 	mutex_unlock(&trace_eval_mutex);
5001 }
5002 
5003 static int eval_map_show(struct seq_file *m, void *v)
5004 {
5005 	union trace_eval_map_item *ptr = v;
5006 
5007 	seq_printf(m, "%s %ld (%s)\n",
5008 		   ptr->map.eval_string, ptr->map.eval_value,
5009 		   ptr->map.system);
5010 
5011 	return 0;
5012 }
5013 
5014 static const struct seq_operations tracing_eval_map_seq_ops = {
5015 	.start		= eval_map_start,
5016 	.next		= eval_map_next,
5017 	.stop		= eval_map_stop,
5018 	.show		= eval_map_show,
5019 };
5020 
5021 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5022 {
5023 	if (tracing_disabled)
5024 		return -ENODEV;
5025 
5026 	return seq_open(filp, &tracing_eval_map_seq_ops);
5027 }
5028 
5029 static const struct file_operations tracing_eval_map_fops = {
5030 	.open		= tracing_eval_map_open,
5031 	.read		= seq_read,
5032 	.llseek		= seq_lseek,
5033 	.release	= seq_release,
5034 };
5035 
5036 static inline union trace_eval_map_item *
5037 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5038 {
5039 	/* Return tail of array given the head */
5040 	return ptr + ptr->head.length + 1;
5041 }
5042 
5043 static void
5044 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5045 			   int len)
5046 {
5047 	struct trace_eval_map **stop;
5048 	struct trace_eval_map **map;
5049 	union trace_eval_map_item *map_array;
5050 	union trace_eval_map_item *ptr;
5051 
5052 	stop = start + len;
5053 
5054 	/*
5055 	 * The trace_eval_maps contains the map plus a head and tail item,
5056 	 * where the head holds the module and length of array, and the
5057 	 * tail holds a pointer to the next list.
5058 	 */
5059 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5060 	if (!map_array) {
5061 		pr_warn("Unable to allocate trace eval mapping\n");
5062 		return;
5063 	}
5064 
5065 	mutex_lock(&trace_eval_mutex);
5066 
5067 	if (!trace_eval_maps)
5068 		trace_eval_maps = map_array;
5069 	else {
5070 		ptr = trace_eval_maps;
5071 		for (;;) {
5072 			ptr = trace_eval_jmp_to_tail(ptr);
5073 			if (!ptr->tail.next)
5074 				break;
5075 			ptr = ptr->tail.next;
5076 
5077 		}
5078 		ptr->tail.next = map_array;
5079 	}
5080 	map_array->head.mod = mod;
5081 	map_array->head.length = len;
5082 	map_array++;
5083 
5084 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5085 		map_array->map = **map;
5086 		map_array++;
5087 	}
5088 	memset(map_array, 0, sizeof(*map_array));
5089 
5090 	mutex_unlock(&trace_eval_mutex);
5091 }
5092 
5093 static void trace_create_eval_file(struct dentry *d_tracer)
5094 {
5095 	trace_create_file("eval_map", 0444, d_tracer,
5096 			  NULL, &tracing_eval_map_fops);
5097 }
5098 
5099 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5100 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5101 static inline void trace_insert_eval_map_file(struct module *mod,
5102 			      struct trace_eval_map **start, int len) { }
5103 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5104 
5105 static void trace_insert_eval_map(struct module *mod,
5106 				  struct trace_eval_map **start, int len)
5107 {
5108 	struct trace_eval_map **map;
5109 
5110 	if (len <= 0)
5111 		return;
5112 
5113 	map = start;
5114 
5115 	trace_event_eval_update(map, len);
5116 
5117 	trace_insert_eval_map_file(mod, start, len);
5118 }
5119 
5120 static ssize_t
5121 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5122 		       size_t cnt, loff_t *ppos)
5123 {
5124 	struct trace_array *tr = filp->private_data;
5125 	char buf[MAX_TRACER_SIZE+2];
5126 	int r;
5127 
5128 	mutex_lock(&trace_types_lock);
5129 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5130 	mutex_unlock(&trace_types_lock);
5131 
5132 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5133 }
5134 
5135 int tracer_init(struct tracer *t, struct trace_array *tr)
5136 {
5137 	tracing_reset_online_cpus(&tr->trace_buffer);
5138 	return t->init(tr);
5139 }
5140 
5141 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5142 {
5143 	int cpu;
5144 
5145 	for_each_tracing_cpu(cpu)
5146 		per_cpu_ptr(buf->data, cpu)->entries = val;
5147 }
5148 
5149 #ifdef CONFIG_TRACER_MAX_TRACE
5150 /* resize @tr's buffer to the size of @size_tr's entries */
5151 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5152 					struct trace_buffer *size_buf, int cpu_id)
5153 {
5154 	int cpu, ret = 0;
5155 
5156 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5157 		for_each_tracing_cpu(cpu) {
5158 			ret = ring_buffer_resize(trace_buf->buffer,
5159 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5160 			if (ret < 0)
5161 				break;
5162 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5163 				per_cpu_ptr(size_buf->data, cpu)->entries;
5164 		}
5165 	} else {
5166 		ret = ring_buffer_resize(trace_buf->buffer,
5167 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5168 		if (ret == 0)
5169 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5170 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5171 	}
5172 
5173 	return ret;
5174 }
5175 #endif /* CONFIG_TRACER_MAX_TRACE */
5176 
5177 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5178 					unsigned long size, int cpu)
5179 {
5180 	int ret;
5181 
5182 	/*
5183 	 * If kernel or user changes the size of the ring buffer
5184 	 * we use the size that was given, and we can forget about
5185 	 * expanding it later.
5186 	 */
5187 	ring_buffer_expanded = true;
5188 
5189 	/* May be called before buffers are initialized */
5190 	if (!tr->trace_buffer.buffer)
5191 		return 0;
5192 
5193 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5194 	if (ret < 0)
5195 		return ret;
5196 
5197 #ifdef CONFIG_TRACER_MAX_TRACE
5198 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5199 	    !tr->current_trace->use_max_tr)
5200 		goto out;
5201 
5202 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5203 	if (ret < 0) {
5204 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5205 						     &tr->trace_buffer, cpu);
5206 		if (r < 0) {
5207 			/*
5208 			 * AARGH! We are left with different
5209 			 * size max buffer!!!!
5210 			 * The max buffer is our "snapshot" buffer.
5211 			 * When a tracer needs a snapshot (one of the
5212 			 * latency tracers), it swaps the max buffer
5213 			 * with the saved snap shot. We succeeded to
5214 			 * update the size of the main buffer, but failed to
5215 			 * update the size of the max buffer. But when we tried
5216 			 * to reset the main buffer to the original size, we
5217 			 * failed there too. This is very unlikely to
5218 			 * happen, but if it does, warn and kill all
5219 			 * tracing.
5220 			 */
5221 			WARN_ON(1);
5222 			tracing_disabled = 1;
5223 		}
5224 		return ret;
5225 	}
5226 
5227 	if (cpu == RING_BUFFER_ALL_CPUS)
5228 		set_buffer_entries(&tr->max_buffer, size);
5229 	else
5230 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5231 
5232  out:
5233 #endif /* CONFIG_TRACER_MAX_TRACE */
5234 
5235 	if (cpu == RING_BUFFER_ALL_CPUS)
5236 		set_buffer_entries(&tr->trace_buffer, size);
5237 	else
5238 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5239 
5240 	return ret;
5241 }
5242 
5243 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5244 					  unsigned long size, int cpu_id)
5245 {
5246 	int ret = size;
5247 
5248 	mutex_lock(&trace_types_lock);
5249 
5250 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5251 		/* make sure, this cpu is enabled in the mask */
5252 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5253 			ret = -EINVAL;
5254 			goto out;
5255 		}
5256 	}
5257 
5258 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5259 	if (ret < 0)
5260 		ret = -ENOMEM;
5261 
5262 out:
5263 	mutex_unlock(&trace_types_lock);
5264 
5265 	return ret;
5266 }
5267 
5268 
5269 /**
5270  * tracing_update_buffers - used by tracing facility to expand ring buffers
5271  *
5272  * To save on memory when the tracing is never used on a system with it
5273  * configured in. The ring buffers are set to a minimum size. But once
5274  * a user starts to use the tracing facility, then they need to grow
5275  * to their default size.
5276  *
5277  * This function is to be called when a tracer is about to be used.
5278  */
5279 int tracing_update_buffers(void)
5280 {
5281 	int ret = 0;
5282 
5283 	mutex_lock(&trace_types_lock);
5284 	if (!ring_buffer_expanded)
5285 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5286 						RING_BUFFER_ALL_CPUS);
5287 	mutex_unlock(&trace_types_lock);
5288 
5289 	return ret;
5290 }
5291 
5292 struct trace_option_dentry;
5293 
5294 static void
5295 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5296 
5297 /*
5298  * Used to clear out the tracer before deletion of an instance.
5299  * Must have trace_types_lock held.
5300  */
5301 static void tracing_set_nop(struct trace_array *tr)
5302 {
5303 	if (tr->current_trace == &nop_trace)
5304 		return;
5305 
5306 	tr->current_trace->enabled--;
5307 
5308 	if (tr->current_trace->reset)
5309 		tr->current_trace->reset(tr);
5310 
5311 	tr->current_trace = &nop_trace;
5312 }
5313 
5314 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5315 {
5316 	/* Only enable if the directory has been created already. */
5317 	if (!tr->dir)
5318 		return;
5319 
5320 	create_trace_option_files(tr, t);
5321 }
5322 
5323 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5324 {
5325 	struct tracer *t;
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327 	bool had_max_tr;
5328 #endif
5329 	int ret = 0;
5330 
5331 	mutex_lock(&trace_types_lock);
5332 
5333 	if (!ring_buffer_expanded) {
5334 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5335 						RING_BUFFER_ALL_CPUS);
5336 		if (ret < 0)
5337 			goto out;
5338 		ret = 0;
5339 	}
5340 
5341 	for (t = trace_types; t; t = t->next) {
5342 		if (strcmp(t->name, buf) == 0)
5343 			break;
5344 	}
5345 	if (!t) {
5346 		ret = -EINVAL;
5347 		goto out;
5348 	}
5349 	if (t == tr->current_trace)
5350 		goto out;
5351 
5352 	/* Some tracers are only allowed for the top level buffer */
5353 	if (!trace_ok_for_array(t, tr)) {
5354 		ret = -EINVAL;
5355 		goto out;
5356 	}
5357 
5358 	/* If trace pipe files are being read, we can't change the tracer */
5359 	if (tr->current_trace->ref) {
5360 		ret = -EBUSY;
5361 		goto out;
5362 	}
5363 
5364 	trace_branch_disable();
5365 
5366 	tr->current_trace->enabled--;
5367 
5368 	if (tr->current_trace->reset)
5369 		tr->current_trace->reset(tr);
5370 
5371 	/* Current trace needs to be nop_trace before synchronize_sched */
5372 	tr->current_trace = &nop_trace;
5373 
5374 #ifdef CONFIG_TRACER_MAX_TRACE
5375 	had_max_tr = tr->allocated_snapshot;
5376 
5377 	if (had_max_tr && !t->use_max_tr) {
5378 		/*
5379 		 * We need to make sure that the update_max_tr sees that
5380 		 * current_trace changed to nop_trace to keep it from
5381 		 * swapping the buffers after we resize it.
5382 		 * The update_max_tr is called from interrupts disabled
5383 		 * so a synchronized_sched() is sufficient.
5384 		 */
5385 		synchronize_sched();
5386 		free_snapshot(tr);
5387 	}
5388 #endif
5389 
5390 #ifdef CONFIG_TRACER_MAX_TRACE
5391 	if (t->use_max_tr && !had_max_tr) {
5392 		ret = alloc_snapshot(tr);
5393 		if (ret < 0)
5394 			goto out;
5395 	}
5396 #endif
5397 
5398 	if (t->init) {
5399 		ret = tracer_init(t, tr);
5400 		if (ret)
5401 			goto out;
5402 	}
5403 
5404 	tr->current_trace = t;
5405 	tr->current_trace->enabled++;
5406 	trace_branch_enable(tr);
5407  out:
5408 	mutex_unlock(&trace_types_lock);
5409 
5410 	return ret;
5411 }
5412 
5413 static ssize_t
5414 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5415 			size_t cnt, loff_t *ppos)
5416 {
5417 	struct trace_array *tr = filp->private_data;
5418 	char buf[MAX_TRACER_SIZE+1];
5419 	int i;
5420 	size_t ret;
5421 	int err;
5422 
5423 	ret = cnt;
5424 
5425 	if (cnt > MAX_TRACER_SIZE)
5426 		cnt = MAX_TRACER_SIZE;
5427 
5428 	if (copy_from_user(buf, ubuf, cnt))
5429 		return -EFAULT;
5430 
5431 	buf[cnt] = 0;
5432 
5433 	/* strip ending whitespace. */
5434 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5435 		buf[i] = 0;
5436 
5437 	err = tracing_set_tracer(tr, buf);
5438 	if (err)
5439 		return err;
5440 
5441 	*ppos += ret;
5442 
5443 	return ret;
5444 }
5445 
5446 static ssize_t
5447 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5448 		   size_t cnt, loff_t *ppos)
5449 {
5450 	char buf[64];
5451 	int r;
5452 
5453 	r = snprintf(buf, sizeof(buf), "%ld\n",
5454 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5455 	if (r > sizeof(buf))
5456 		r = sizeof(buf);
5457 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5458 }
5459 
5460 static ssize_t
5461 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5462 		    size_t cnt, loff_t *ppos)
5463 {
5464 	unsigned long val;
5465 	int ret;
5466 
5467 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5468 	if (ret)
5469 		return ret;
5470 
5471 	*ptr = val * 1000;
5472 
5473 	return cnt;
5474 }
5475 
5476 static ssize_t
5477 tracing_thresh_read(struct file *filp, char __user *ubuf,
5478 		    size_t cnt, loff_t *ppos)
5479 {
5480 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5481 }
5482 
5483 static ssize_t
5484 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5485 		     size_t cnt, loff_t *ppos)
5486 {
5487 	struct trace_array *tr = filp->private_data;
5488 	int ret;
5489 
5490 	mutex_lock(&trace_types_lock);
5491 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5492 	if (ret < 0)
5493 		goto out;
5494 
5495 	if (tr->current_trace->update_thresh) {
5496 		ret = tr->current_trace->update_thresh(tr);
5497 		if (ret < 0)
5498 			goto out;
5499 	}
5500 
5501 	ret = cnt;
5502 out:
5503 	mutex_unlock(&trace_types_lock);
5504 
5505 	return ret;
5506 }
5507 
5508 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5509 
5510 static ssize_t
5511 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5512 		     size_t cnt, loff_t *ppos)
5513 {
5514 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5515 }
5516 
5517 static ssize_t
5518 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5519 		      size_t cnt, loff_t *ppos)
5520 {
5521 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5522 }
5523 
5524 #endif
5525 
5526 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5527 {
5528 	struct trace_array *tr = inode->i_private;
5529 	struct trace_iterator *iter;
5530 	int ret = 0;
5531 
5532 	if (tracing_disabled)
5533 		return -ENODEV;
5534 
5535 	if (trace_array_get(tr) < 0)
5536 		return -ENODEV;
5537 
5538 	mutex_lock(&trace_types_lock);
5539 
5540 	/* create a buffer to store the information to pass to userspace */
5541 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5542 	if (!iter) {
5543 		ret = -ENOMEM;
5544 		__trace_array_put(tr);
5545 		goto out;
5546 	}
5547 
5548 	trace_seq_init(&iter->seq);
5549 	iter->trace = tr->current_trace;
5550 
5551 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5552 		ret = -ENOMEM;
5553 		goto fail;
5554 	}
5555 
5556 	/* trace pipe does not show start of buffer */
5557 	cpumask_setall(iter->started);
5558 
5559 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5560 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5561 
5562 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5563 	if (trace_clocks[tr->clock_id].in_ns)
5564 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5565 
5566 	iter->tr = tr;
5567 	iter->trace_buffer = &tr->trace_buffer;
5568 	iter->cpu_file = tracing_get_cpu(inode);
5569 	mutex_init(&iter->mutex);
5570 	filp->private_data = iter;
5571 
5572 	if (iter->trace->pipe_open)
5573 		iter->trace->pipe_open(iter);
5574 
5575 	nonseekable_open(inode, filp);
5576 
5577 	tr->current_trace->ref++;
5578 out:
5579 	mutex_unlock(&trace_types_lock);
5580 	return ret;
5581 
5582 fail:
5583 	kfree(iter->trace);
5584 	kfree(iter);
5585 	__trace_array_put(tr);
5586 	mutex_unlock(&trace_types_lock);
5587 	return ret;
5588 }
5589 
5590 static int tracing_release_pipe(struct inode *inode, struct file *file)
5591 {
5592 	struct trace_iterator *iter = file->private_data;
5593 	struct trace_array *tr = inode->i_private;
5594 
5595 	mutex_lock(&trace_types_lock);
5596 
5597 	tr->current_trace->ref--;
5598 
5599 	if (iter->trace->pipe_close)
5600 		iter->trace->pipe_close(iter);
5601 
5602 	mutex_unlock(&trace_types_lock);
5603 
5604 	free_cpumask_var(iter->started);
5605 	mutex_destroy(&iter->mutex);
5606 	kfree(iter);
5607 
5608 	trace_array_put(tr);
5609 
5610 	return 0;
5611 }
5612 
5613 static unsigned int
5614 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5615 {
5616 	struct trace_array *tr = iter->tr;
5617 
5618 	/* Iterators are static, they should be filled or empty */
5619 	if (trace_buffer_iter(iter, iter->cpu_file))
5620 		return POLLIN | POLLRDNORM;
5621 
5622 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5623 		/*
5624 		 * Always select as readable when in blocking mode
5625 		 */
5626 		return POLLIN | POLLRDNORM;
5627 	else
5628 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5629 					     filp, poll_table);
5630 }
5631 
5632 static unsigned int
5633 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5634 {
5635 	struct trace_iterator *iter = filp->private_data;
5636 
5637 	return trace_poll(iter, filp, poll_table);
5638 }
5639 
5640 /* Must be called with iter->mutex held. */
5641 static int tracing_wait_pipe(struct file *filp)
5642 {
5643 	struct trace_iterator *iter = filp->private_data;
5644 	int ret;
5645 
5646 	while (trace_empty(iter)) {
5647 
5648 		if ((filp->f_flags & O_NONBLOCK)) {
5649 			return -EAGAIN;
5650 		}
5651 
5652 		/*
5653 		 * We block until we read something and tracing is disabled.
5654 		 * We still block if tracing is disabled, but we have never
5655 		 * read anything. This allows a user to cat this file, and
5656 		 * then enable tracing. But after we have read something,
5657 		 * we give an EOF when tracing is again disabled.
5658 		 *
5659 		 * iter->pos will be 0 if we haven't read anything.
5660 		 */
5661 		if (!tracing_is_on() && iter->pos)
5662 			break;
5663 
5664 		mutex_unlock(&iter->mutex);
5665 
5666 		ret = wait_on_pipe(iter, false);
5667 
5668 		mutex_lock(&iter->mutex);
5669 
5670 		if (ret)
5671 			return ret;
5672 	}
5673 
5674 	return 1;
5675 }
5676 
5677 /*
5678  * Consumer reader.
5679  */
5680 static ssize_t
5681 tracing_read_pipe(struct file *filp, char __user *ubuf,
5682 		  size_t cnt, loff_t *ppos)
5683 {
5684 	struct trace_iterator *iter = filp->private_data;
5685 	ssize_t sret;
5686 
5687 	/*
5688 	 * Avoid more than one consumer on a single file descriptor
5689 	 * This is just a matter of traces coherency, the ring buffer itself
5690 	 * is protected.
5691 	 */
5692 	mutex_lock(&iter->mutex);
5693 
5694 	/* return any leftover data */
5695 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5696 	if (sret != -EBUSY)
5697 		goto out;
5698 
5699 	trace_seq_init(&iter->seq);
5700 
5701 	if (iter->trace->read) {
5702 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5703 		if (sret)
5704 			goto out;
5705 	}
5706 
5707 waitagain:
5708 	sret = tracing_wait_pipe(filp);
5709 	if (sret <= 0)
5710 		goto out;
5711 
5712 	/* stop when tracing is finished */
5713 	if (trace_empty(iter)) {
5714 		sret = 0;
5715 		goto out;
5716 	}
5717 
5718 	if (cnt >= PAGE_SIZE)
5719 		cnt = PAGE_SIZE - 1;
5720 
5721 	/* reset all but tr, trace, and overruns */
5722 	memset(&iter->seq, 0,
5723 	       sizeof(struct trace_iterator) -
5724 	       offsetof(struct trace_iterator, seq));
5725 	cpumask_clear(iter->started);
5726 	iter->pos = -1;
5727 
5728 	trace_event_read_lock();
5729 	trace_access_lock(iter->cpu_file);
5730 	while (trace_find_next_entry_inc(iter) != NULL) {
5731 		enum print_line_t ret;
5732 		int save_len = iter->seq.seq.len;
5733 
5734 		ret = print_trace_line(iter);
5735 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5736 			/* don't print partial lines */
5737 			iter->seq.seq.len = save_len;
5738 			break;
5739 		}
5740 		if (ret != TRACE_TYPE_NO_CONSUME)
5741 			trace_consume(iter);
5742 
5743 		if (trace_seq_used(&iter->seq) >= cnt)
5744 			break;
5745 
5746 		/*
5747 		 * Setting the full flag means we reached the trace_seq buffer
5748 		 * size and we should leave by partial output condition above.
5749 		 * One of the trace_seq_* functions is not used properly.
5750 		 */
5751 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5752 			  iter->ent->type);
5753 	}
5754 	trace_access_unlock(iter->cpu_file);
5755 	trace_event_read_unlock();
5756 
5757 	/* Now copy what we have to the user */
5758 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5759 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5760 		trace_seq_init(&iter->seq);
5761 
5762 	/*
5763 	 * If there was nothing to send to user, in spite of consuming trace
5764 	 * entries, go back to wait for more entries.
5765 	 */
5766 	if (sret == -EBUSY)
5767 		goto waitagain;
5768 
5769 out:
5770 	mutex_unlock(&iter->mutex);
5771 
5772 	return sret;
5773 }
5774 
5775 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5776 				     unsigned int idx)
5777 {
5778 	__free_page(spd->pages[idx]);
5779 }
5780 
5781 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5782 	.can_merge		= 0,
5783 	.confirm		= generic_pipe_buf_confirm,
5784 	.release		= generic_pipe_buf_release,
5785 	.steal			= generic_pipe_buf_steal,
5786 	.get			= generic_pipe_buf_get,
5787 };
5788 
5789 static size_t
5790 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5791 {
5792 	size_t count;
5793 	int save_len;
5794 	int ret;
5795 
5796 	/* Seq buffer is page-sized, exactly what we need. */
5797 	for (;;) {
5798 		save_len = iter->seq.seq.len;
5799 		ret = print_trace_line(iter);
5800 
5801 		if (trace_seq_has_overflowed(&iter->seq)) {
5802 			iter->seq.seq.len = save_len;
5803 			break;
5804 		}
5805 
5806 		/*
5807 		 * This should not be hit, because it should only
5808 		 * be set if the iter->seq overflowed. But check it
5809 		 * anyway to be safe.
5810 		 */
5811 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5812 			iter->seq.seq.len = save_len;
5813 			break;
5814 		}
5815 
5816 		count = trace_seq_used(&iter->seq) - save_len;
5817 		if (rem < count) {
5818 			rem = 0;
5819 			iter->seq.seq.len = save_len;
5820 			break;
5821 		}
5822 
5823 		if (ret != TRACE_TYPE_NO_CONSUME)
5824 			trace_consume(iter);
5825 		rem -= count;
5826 		if (!trace_find_next_entry_inc(iter))	{
5827 			rem = 0;
5828 			iter->ent = NULL;
5829 			break;
5830 		}
5831 	}
5832 
5833 	return rem;
5834 }
5835 
5836 static ssize_t tracing_splice_read_pipe(struct file *filp,
5837 					loff_t *ppos,
5838 					struct pipe_inode_info *pipe,
5839 					size_t len,
5840 					unsigned int flags)
5841 {
5842 	struct page *pages_def[PIPE_DEF_BUFFERS];
5843 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5844 	struct trace_iterator *iter = filp->private_data;
5845 	struct splice_pipe_desc spd = {
5846 		.pages		= pages_def,
5847 		.partial	= partial_def,
5848 		.nr_pages	= 0, /* This gets updated below. */
5849 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5850 		.ops		= &tracing_pipe_buf_ops,
5851 		.spd_release	= tracing_spd_release_pipe,
5852 	};
5853 	ssize_t ret;
5854 	size_t rem;
5855 	unsigned int i;
5856 
5857 	if (splice_grow_spd(pipe, &spd))
5858 		return -ENOMEM;
5859 
5860 	mutex_lock(&iter->mutex);
5861 
5862 	if (iter->trace->splice_read) {
5863 		ret = iter->trace->splice_read(iter, filp,
5864 					       ppos, pipe, len, flags);
5865 		if (ret)
5866 			goto out_err;
5867 	}
5868 
5869 	ret = tracing_wait_pipe(filp);
5870 	if (ret <= 0)
5871 		goto out_err;
5872 
5873 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5874 		ret = -EFAULT;
5875 		goto out_err;
5876 	}
5877 
5878 	trace_event_read_lock();
5879 	trace_access_lock(iter->cpu_file);
5880 
5881 	/* Fill as many pages as possible. */
5882 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5883 		spd.pages[i] = alloc_page(GFP_KERNEL);
5884 		if (!spd.pages[i])
5885 			break;
5886 
5887 		rem = tracing_fill_pipe_page(rem, iter);
5888 
5889 		/* Copy the data into the page, so we can start over. */
5890 		ret = trace_seq_to_buffer(&iter->seq,
5891 					  page_address(spd.pages[i]),
5892 					  trace_seq_used(&iter->seq));
5893 		if (ret < 0) {
5894 			__free_page(spd.pages[i]);
5895 			break;
5896 		}
5897 		spd.partial[i].offset = 0;
5898 		spd.partial[i].len = trace_seq_used(&iter->seq);
5899 
5900 		trace_seq_init(&iter->seq);
5901 	}
5902 
5903 	trace_access_unlock(iter->cpu_file);
5904 	trace_event_read_unlock();
5905 	mutex_unlock(&iter->mutex);
5906 
5907 	spd.nr_pages = i;
5908 
5909 	if (i)
5910 		ret = splice_to_pipe(pipe, &spd);
5911 	else
5912 		ret = 0;
5913 out:
5914 	splice_shrink_spd(&spd);
5915 	return ret;
5916 
5917 out_err:
5918 	mutex_unlock(&iter->mutex);
5919 	goto out;
5920 }
5921 
5922 static ssize_t
5923 tracing_entries_read(struct file *filp, char __user *ubuf,
5924 		     size_t cnt, loff_t *ppos)
5925 {
5926 	struct inode *inode = file_inode(filp);
5927 	struct trace_array *tr = inode->i_private;
5928 	int cpu = tracing_get_cpu(inode);
5929 	char buf[64];
5930 	int r = 0;
5931 	ssize_t ret;
5932 
5933 	mutex_lock(&trace_types_lock);
5934 
5935 	if (cpu == RING_BUFFER_ALL_CPUS) {
5936 		int cpu, buf_size_same;
5937 		unsigned long size;
5938 
5939 		size = 0;
5940 		buf_size_same = 1;
5941 		/* check if all cpu sizes are same */
5942 		for_each_tracing_cpu(cpu) {
5943 			/* fill in the size from first enabled cpu */
5944 			if (size == 0)
5945 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5946 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5947 				buf_size_same = 0;
5948 				break;
5949 			}
5950 		}
5951 
5952 		if (buf_size_same) {
5953 			if (!ring_buffer_expanded)
5954 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5955 					    size >> 10,
5956 					    trace_buf_size >> 10);
5957 			else
5958 				r = sprintf(buf, "%lu\n", size >> 10);
5959 		} else
5960 			r = sprintf(buf, "X\n");
5961 	} else
5962 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5963 
5964 	mutex_unlock(&trace_types_lock);
5965 
5966 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5967 	return ret;
5968 }
5969 
5970 static ssize_t
5971 tracing_entries_write(struct file *filp, const char __user *ubuf,
5972 		      size_t cnt, loff_t *ppos)
5973 {
5974 	struct inode *inode = file_inode(filp);
5975 	struct trace_array *tr = inode->i_private;
5976 	unsigned long val;
5977 	int ret;
5978 
5979 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5980 	if (ret)
5981 		return ret;
5982 
5983 	/* must have at least 1 entry */
5984 	if (!val)
5985 		return -EINVAL;
5986 
5987 	/* value is in KB */
5988 	val <<= 10;
5989 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5990 	if (ret < 0)
5991 		return ret;
5992 
5993 	*ppos += cnt;
5994 
5995 	return cnt;
5996 }
5997 
5998 static ssize_t
5999 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6000 				size_t cnt, loff_t *ppos)
6001 {
6002 	struct trace_array *tr = filp->private_data;
6003 	char buf[64];
6004 	int r, cpu;
6005 	unsigned long size = 0, expanded_size = 0;
6006 
6007 	mutex_lock(&trace_types_lock);
6008 	for_each_tracing_cpu(cpu) {
6009 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6010 		if (!ring_buffer_expanded)
6011 			expanded_size += trace_buf_size >> 10;
6012 	}
6013 	if (ring_buffer_expanded)
6014 		r = sprintf(buf, "%lu\n", size);
6015 	else
6016 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6017 	mutex_unlock(&trace_types_lock);
6018 
6019 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6020 }
6021 
6022 static ssize_t
6023 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6024 			  size_t cnt, loff_t *ppos)
6025 {
6026 	/*
6027 	 * There is no need to read what the user has written, this function
6028 	 * is just to make sure that there is no error when "echo" is used
6029 	 */
6030 
6031 	*ppos += cnt;
6032 
6033 	return cnt;
6034 }
6035 
6036 static int
6037 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6038 {
6039 	struct trace_array *tr = inode->i_private;
6040 
6041 	/* disable tracing ? */
6042 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6043 		tracer_tracing_off(tr);
6044 	/* resize the ring buffer to 0 */
6045 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6046 
6047 	trace_array_put(tr);
6048 
6049 	return 0;
6050 }
6051 
6052 static ssize_t
6053 tracing_mark_write(struct file *filp, const char __user *ubuf,
6054 					size_t cnt, loff_t *fpos)
6055 {
6056 	struct trace_array *tr = filp->private_data;
6057 	struct ring_buffer_event *event;
6058 	struct ring_buffer *buffer;
6059 	struct print_entry *entry;
6060 	unsigned long irq_flags;
6061 	const char faulted[] = "<faulted>";
6062 	ssize_t written;
6063 	int size;
6064 	int len;
6065 
6066 /* Used in tracing_mark_raw_write() as well */
6067 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6068 
6069 	if (tracing_disabled)
6070 		return -EINVAL;
6071 
6072 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6073 		return -EINVAL;
6074 
6075 	if (cnt > TRACE_BUF_SIZE)
6076 		cnt = TRACE_BUF_SIZE;
6077 
6078 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6079 
6080 	local_save_flags(irq_flags);
6081 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6082 
6083 	/* If less than "<faulted>", then make sure we can still add that */
6084 	if (cnt < FAULTED_SIZE)
6085 		size += FAULTED_SIZE - cnt;
6086 
6087 	buffer = tr->trace_buffer.buffer;
6088 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6089 					    irq_flags, preempt_count());
6090 	if (unlikely(!event))
6091 		/* Ring buffer disabled, return as if not open for write */
6092 		return -EBADF;
6093 
6094 	entry = ring_buffer_event_data(event);
6095 	entry->ip = _THIS_IP_;
6096 
6097 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6098 	if (len) {
6099 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6100 		cnt = FAULTED_SIZE;
6101 		written = -EFAULT;
6102 	} else
6103 		written = cnt;
6104 	len = cnt;
6105 
6106 	if (entry->buf[cnt - 1] != '\n') {
6107 		entry->buf[cnt] = '\n';
6108 		entry->buf[cnt + 1] = '\0';
6109 	} else
6110 		entry->buf[cnt] = '\0';
6111 
6112 	__buffer_unlock_commit(buffer, event);
6113 
6114 	if (written > 0)
6115 		*fpos += written;
6116 
6117 	return written;
6118 }
6119 
6120 /* Limit it for now to 3K (including tag) */
6121 #define RAW_DATA_MAX_SIZE (1024*3)
6122 
6123 static ssize_t
6124 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6125 					size_t cnt, loff_t *fpos)
6126 {
6127 	struct trace_array *tr = filp->private_data;
6128 	struct ring_buffer_event *event;
6129 	struct ring_buffer *buffer;
6130 	struct raw_data_entry *entry;
6131 	const char faulted[] = "<faulted>";
6132 	unsigned long irq_flags;
6133 	ssize_t written;
6134 	int size;
6135 	int len;
6136 
6137 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6138 
6139 	if (tracing_disabled)
6140 		return -EINVAL;
6141 
6142 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6143 		return -EINVAL;
6144 
6145 	/* The marker must at least have a tag id */
6146 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6147 		return -EINVAL;
6148 
6149 	if (cnt > TRACE_BUF_SIZE)
6150 		cnt = TRACE_BUF_SIZE;
6151 
6152 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6153 
6154 	local_save_flags(irq_flags);
6155 	size = sizeof(*entry) + cnt;
6156 	if (cnt < FAULT_SIZE_ID)
6157 		size += FAULT_SIZE_ID - cnt;
6158 
6159 	buffer = tr->trace_buffer.buffer;
6160 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6161 					    irq_flags, preempt_count());
6162 	if (!event)
6163 		/* Ring buffer disabled, return as if not open for write */
6164 		return -EBADF;
6165 
6166 	entry = ring_buffer_event_data(event);
6167 
6168 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6169 	if (len) {
6170 		entry->id = -1;
6171 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6172 		written = -EFAULT;
6173 	} else
6174 		written = cnt;
6175 
6176 	__buffer_unlock_commit(buffer, event);
6177 
6178 	if (written > 0)
6179 		*fpos += written;
6180 
6181 	return written;
6182 }
6183 
6184 static int tracing_clock_show(struct seq_file *m, void *v)
6185 {
6186 	struct trace_array *tr = m->private;
6187 	int i;
6188 
6189 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6190 		seq_printf(m,
6191 			"%s%s%s%s", i ? " " : "",
6192 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6193 			i == tr->clock_id ? "]" : "");
6194 	seq_putc(m, '\n');
6195 
6196 	return 0;
6197 }
6198 
6199 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6200 {
6201 	int i;
6202 
6203 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6204 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6205 			break;
6206 	}
6207 	if (i == ARRAY_SIZE(trace_clocks))
6208 		return -EINVAL;
6209 
6210 	mutex_lock(&trace_types_lock);
6211 
6212 	tr->clock_id = i;
6213 
6214 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6215 
6216 	/*
6217 	 * New clock may not be consistent with the previous clock.
6218 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6219 	 */
6220 	tracing_reset_online_cpus(&tr->trace_buffer);
6221 
6222 #ifdef CONFIG_TRACER_MAX_TRACE
6223 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6224 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6225 	tracing_reset_online_cpus(&tr->max_buffer);
6226 #endif
6227 
6228 	mutex_unlock(&trace_types_lock);
6229 
6230 	return 0;
6231 }
6232 
6233 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6234 				   size_t cnt, loff_t *fpos)
6235 {
6236 	struct seq_file *m = filp->private_data;
6237 	struct trace_array *tr = m->private;
6238 	char buf[64];
6239 	const char *clockstr;
6240 	int ret;
6241 
6242 	if (cnt >= sizeof(buf))
6243 		return -EINVAL;
6244 
6245 	if (copy_from_user(buf, ubuf, cnt))
6246 		return -EFAULT;
6247 
6248 	buf[cnt] = 0;
6249 
6250 	clockstr = strstrip(buf);
6251 
6252 	ret = tracing_set_clock(tr, clockstr);
6253 	if (ret)
6254 		return ret;
6255 
6256 	*fpos += cnt;
6257 
6258 	return cnt;
6259 }
6260 
6261 static int tracing_clock_open(struct inode *inode, struct file *file)
6262 {
6263 	struct trace_array *tr = inode->i_private;
6264 	int ret;
6265 
6266 	if (tracing_disabled)
6267 		return -ENODEV;
6268 
6269 	if (trace_array_get(tr))
6270 		return -ENODEV;
6271 
6272 	ret = single_open(file, tracing_clock_show, inode->i_private);
6273 	if (ret < 0)
6274 		trace_array_put(tr);
6275 
6276 	return ret;
6277 }
6278 
6279 struct ftrace_buffer_info {
6280 	struct trace_iterator	iter;
6281 	void			*spare;
6282 	unsigned int		spare_cpu;
6283 	unsigned int		read;
6284 };
6285 
6286 #ifdef CONFIG_TRACER_SNAPSHOT
6287 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6288 {
6289 	struct trace_array *tr = inode->i_private;
6290 	struct trace_iterator *iter;
6291 	struct seq_file *m;
6292 	int ret = 0;
6293 
6294 	if (trace_array_get(tr) < 0)
6295 		return -ENODEV;
6296 
6297 	if (file->f_mode & FMODE_READ) {
6298 		iter = __tracing_open(inode, file, true);
6299 		if (IS_ERR(iter))
6300 			ret = PTR_ERR(iter);
6301 	} else {
6302 		/* Writes still need the seq_file to hold the private data */
6303 		ret = -ENOMEM;
6304 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6305 		if (!m)
6306 			goto out;
6307 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6308 		if (!iter) {
6309 			kfree(m);
6310 			goto out;
6311 		}
6312 		ret = 0;
6313 
6314 		iter->tr = tr;
6315 		iter->trace_buffer = &tr->max_buffer;
6316 		iter->cpu_file = tracing_get_cpu(inode);
6317 		m->private = iter;
6318 		file->private_data = m;
6319 	}
6320 out:
6321 	if (ret < 0)
6322 		trace_array_put(tr);
6323 
6324 	return ret;
6325 }
6326 
6327 static ssize_t
6328 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6329 		       loff_t *ppos)
6330 {
6331 	struct seq_file *m = filp->private_data;
6332 	struct trace_iterator *iter = m->private;
6333 	struct trace_array *tr = iter->tr;
6334 	unsigned long val;
6335 	int ret;
6336 
6337 	ret = tracing_update_buffers();
6338 	if (ret < 0)
6339 		return ret;
6340 
6341 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6342 	if (ret)
6343 		return ret;
6344 
6345 	mutex_lock(&trace_types_lock);
6346 
6347 	if (tr->current_trace->use_max_tr) {
6348 		ret = -EBUSY;
6349 		goto out;
6350 	}
6351 
6352 	switch (val) {
6353 	case 0:
6354 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6355 			ret = -EINVAL;
6356 			break;
6357 		}
6358 		if (tr->allocated_snapshot)
6359 			free_snapshot(tr);
6360 		break;
6361 	case 1:
6362 /* Only allow per-cpu swap if the ring buffer supports it */
6363 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6364 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6365 			ret = -EINVAL;
6366 			break;
6367 		}
6368 #endif
6369 		if (!tr->allocated_snapshot) {
6370 			ret = alloc_snapshot(tr);
6371 			if (ret < 0)
6372 				break;
6373 		}
6374 		local_irq_disable();
6375 		/* Now, we're going to swap */
6376 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6377 			update_max_tr(tr, current, smp_processor_id());
6378 		else
6379 			update_max_tr_single(tr, current, iter->cpu_file);
6380 		local_irq_enable();
6381 		break;
6382 	default:
6383 		if (tr->allocated_snapshot) {
6384 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6385 				tracing_reset_online_cpus(&tr->max_buffer);
6386 			else
6387 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6388 		}
6389 		break;
6390 	}
6391 
6392 	if (ret >= 0) {
6393 		*ppos += cnt;
6394 		ret = cnt;
6395 	}
6396 out:
6397 	mutex_unlock(&trace_types_lock);
6398 	return ret;
6399 }
6400 
6401 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6402 {
6403 	struct seq_file *m = file->private_data;
6404 	int ret;
6405 
6406 	ret = tracing_release(inode, file);
6407 
6408 	if (file->f_mode & FMODE_READ)
6409 		return ret;
6410 
6411 	/* If write only, the seq_file is just a stub */
6412 	if (m)
6413 		kfree(m->private);
6414 	kfree(m);
6415 
6416 	return 0;
6417 }
6418 
6419 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6420 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6421 				    size_t count, loff_t *ppos);
6422 static int tracing_buffers_release(struct inode *inode, struct file *file);
6423 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6424 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6425 
6426 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6427 {
6428 	struct ftrace_buffer_info *info;
6429 	int ret;
6430 
6431 	ret = tracing_buffers_open(inode, filp);
6432 	if (ret < 0)
6433 		return ret;
6434 
6435 	info = filp->private_data;
6436 
6437 	if (info->iter.trace->use_max_tr) {
6438 		tracing_buffers_release(inode, filp);
6439 		return -EBUSY;
6440 	}
6441 
6442 	info->iter.snapshot = true;
6443 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6444 
6445 	return ret;
6446 }
6447 
6448 #endif /* CONFIG_TRACER_SNAPSHOT */
6449 
6450 
6451 static const struct file_operations tracing_thresh_fops = {
6452 	.open		= tracing_open_generic,
6453 	.read		= tracing_thresh_read,
6454 	.write		= tracing_thresh_write,
6455 	.llseek		= generic_file_llseek,
6456 };
6457 
6458 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6459 static const struct file_operations tracing_max_lat_fops = {
6460 	.open		= tracing_open_generic,
6461 	.read		= tracing_max_lat_read,
6462 	.write		= tracing_max_lat_write,
6463 	.llseek		= generic_file_llseek,
6464 };
6465 #endif
6466 
6467 static const struct file_operations set_tracer_fops = {
6468 	.open		= tracing_open_generic,
6469 	.read		= tracing_set_trace_read,
6470 	.write		= tracing_set_trace_write,
6471 	.llseek		= generic_file_llseek,
6472 };
6473 
6474 static const struct file_operations tracing_pipe_fops = {
6475 	.open		= tracing_open_pipe,
6476 	.poll		= tracing_poll_pipe,
6477 	.read		= tracing_read_pipe,
6478 	.splice_read	= tracing_splice_read_pipe,
6479 	.release	= tracing_release_pipe,
6480 	.llseek		= no_llseek,
6481 };
6482 
6483 static const struct file_operations tracing_entries_fops = {
6484 	.open		= tracing_open_generic_tr,
6485 	.read		= tracing_entries_read,
6486 	.write		= tracing_entries_write,
6487 	.llseek		= generic_file_llseek,
6488 	.release	= tracing_release_generic_tr,
6489 };
6490 
6491 static const struct file_operations tracing_total_entries_fops = {
6492 	.open		= tracing_open_generic_tr,
6493 	.read		= tracing_total_entries_read,
6494 	.llseek		= generic_file_llseek,
6495 	.release	= tracing_release_generic_tr,
6496 };
6497 
6498 static const struct file_operations tracing_free_buffer_fops = {
6499 	.open		= tracing_open_generic_tr,
6500 	.write		= tracing_free_buffer_write,
6501 	.release	= tracing_free_buffer_release,
6502 };
6503 
6504 static const struct file_operations tracing_mark_fops = {
6505 	.open		= tracing_open_generic_tr,
6506 	.write		= tracing_mark_write,
6507 	.llseek		= generic_file_llseek,
6508 	.release	= tracing_release_generic_tr,
6509 };
6510 
6511 static const struct file_operations tracing_mark_raw_fops = {
6512 	.open		= tracing_open_generic_tr,
6513 	.write		= tracing_mark_raw_write,
6514 	.llseek		= generic_file_llseek,
6515 	.release	= tracing_release_generic_tr,
6516 };
6517 
6518 static const struct file_operations trace_clock_fops = {
6519 	.open		= tracing_clock_open,
6520 	.read		= seq_read,
6521 	.llseek		= seq_lseek,
6522 	.release	= tracing_single_release_tr,
6523 	.write		= tracing_clock_write,
6524 };
6525 
6526 #ifdef CONFIG_TRACER_SNAPSHOT
6527 static const struct file_operations snapshot_fops = {
6528 	.open		= tracing_snapshot_open,
6529 	.read		= seq_read,
6530 	.write		= tracing_snapshot_write,
6531 	.llseek		= tracing_lseek,
6532 	.release	= tracing_snapshot_release,
6533 };
6534 
6535 static const struct file_operations snapshot_raw_fops = {
6536 	.open		= snapshot_raw_open,
6537 	.read		= tracing_buffers_read,
6538 	.release	= tracing_buffers_release,
6539 	.splice_read	= tracing_buffers_splice_read,
6540 	.llseek		= no_llseek,
6541 };
6542 
6543 #endif /* CONFIG_TRACER_SNAPSHOT */
6544 
6545 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6546 {
6547 	struct trace_array *tr = inode->i_private;
6548 	struct ftrace_buffer_info *info;
6549 	int ret;
6550 
6551 	if (tracing_disabled)
6552 		return -ENODEV;
6553 
6554 	if (trace_array_get(tr) < 0)
6555 		return -ENODEV;
6556 
6557 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6558 	if (!info) {
6559 		trace_array_put(tr);
6560 		return -ENOMEM;
6561 	}
6562 
6563 	mutex_lock(&trace_types_lock);
6564 
6565 	info->iter.tr		= tr;
6566 	info->iter.cpu_file	= tracing_get_cpu(inode);
6567 	info->iter.trace	= tr->current_trace;
6568 	info->iter.trace_buffer = &tr->trace_buffer;
6569 	info->spare		= NULL;
6570 	/* Force reading ring buffer for first read */
6571 	info->read		= (unsigned int)-1;
6572 
6573 	filp->private_data = info;
6574 
6575 	tr->current_trace->ref++;
6576 
6577 	mutex_unlock(&trace_types_lock);
6578 
6579 	ret = nonseekable_open(inode, filp);
6580 	if (ret < 0)
6581 		trace_array_put(tr);
6582 
6583 	return ret;
6584 }
6585 
6586 static unsigned int
6587 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6588 {
6589 	struct ftrace_buffer_info *info = filp->private_data;
6590 	struct trace_iterator *iter = &info->iter;
6591 
6592 	return trace_poll(iter, filp, poll_table);
6593 }
6594 
6595 static ssize_t
6596 tracing_buffers_read(struct file *filp, char __user *ubuf,
6597 		     size_t count, loff_t *ppos)
6598 {
6599 	struct ftrace_buffer_info *info = filp->private_data;
6600 	struct trace_iterator *iter = &info->iter;
6601 	ssize_t ret;
6602 	ssize_t size;
6603 
6604 	if (!count)
6605 		return 0;
6606 
6607 #ifdef CONFIG_TRACER_MAX_TRACE
6608 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6609 		return -EBUSY;
6610 #endif
6611 
6612 	if (!info->spare) {
6613 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6614 							  iter->cpu_file);
6615 		info->spare_cpu = iter->cpu_file;
6616 	}
6617 	if (!info->spare)
6618 		return -ENOMEM;
6619 
6620 	/* Do we have previous read data to read? */
6621 	if (info->read < PAGE_SIZE)
6622 		goto read;
6623 
6624  again:
6625 	trace_access_lock(iter->cpu_file);
6626 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6627 				    &info->spare,
6628 				    count,
6629 				    iter->cpu_file, 0);
6630 	trace_access_unlock(iter->cpu_file);
6631 
6632 	if (ret < 0) {
6633 		if (trace_empty(iter)) {
6634 			if ((filp->f_flags & O_NONBLOCK))
6635 				return -EAGAIN;
6636 
6637 			ret = wait_on_pipe(iter, false);
6638 			if (ret)
6639 				return ret;
6640 
6641 			goto again;
6642 		}
6643 		return 0;
6644 	}
6645 
6646 	info->read = 0;
6647  read:
6648 	size = PAGE_SIZE - info->read;
6649 	if (size > count)
6650 		size = count;
6651 
6652 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6653 	if (ret == size)
6654 		return -EFAULT;
6655 
6656 	size -= ret;
6657 
6658 	*ppos += size;
6659 	info->read += size;
6660 
6661 	return size;
6662 }
6663 
6664 static int tracing_buffers_release(struct inode *inode, struct file *file)
6665 {
6666 	struct ftrace_buffer_info *info = file->private_data;
6667 	struct trace_iterator *iter = &info->iter;
6668 
6669 	mutex_lock(&trace_types_lock);
6670 
6671 	iter->tr->current_trace->ref--;
6672 
6673 	__trace_array_put(iter->tr);
6674 
6675 	if (info->spare)
6676 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6677 					   info->spare_cpu, info->spare);
6678 	kfree(info);
6679 
6680 	mutex_unlock(&trace_types_lock);
6681 
6682 	return 0;
6683 }
6684 
6685 struct buffer_ref {
6686 	struct ring_buffer	*buffer;
6687 	void			*page;
6688 	int			cpu;
6689 	int			ref;
6690 };
6691 
6692 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6693 				    struct pipe_buffer *buf)
6694 {
6695 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6696 
6697 	if (--ref->ref)
6698 		return;
6699 
6700 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6701 	kfree(ref);
6702 	buf->private = 0;
6703 }
6704 
6705 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6706 				struct pipe_buffer *buf)
6707 {
6708 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6709 
6710 	ref->ref++;
6711 }
6712 
6713 /* Pipe buffer operations for a buffer. */
6714 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6715 	.can_merge		= 0,
6716 	.confirm		= generic_pipe_buf_confirm,
6717 	.release		= buffer_pipe_buf_release,
6718 	.steal			= generic_pipe_buf_steal,
6719 	.get			= buffer_pipe_buf_get,
6720 };
6721 
6722 /*
6723  * Callback from splice_to_pipe(), if we need to release some pages
6724  * at the end of the spd in case we error'ed out in filling the pipe.
6725  */
6726 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6727 {
6728 	struct buffer_ref *ref =
6729 		(struct buffer_ref *)spd->partial[i].private;
6730 
6731 	if (--ref->ref)
6732 		return;
6733 
6734 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6735 	kfree(ref);
6736 	spd->partial[i].private = 0;
6737 }
6738 
6739 static ssize_t
6740 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6741 			    struct pipe_inode_info *pipe, size_t len,
6742 			    unsigned int flags)
6743 {
6744 	struct ftrace_buffer_info *info = file->private_data;
6745 	struct trace_iterator *iter = &info->iter;
6746 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6747 	struct page *pages_def[PIPE_DEF_BUFFERS];
6748 	struct splice_pipe_desc spd = {
6749 		.pages		= pages_def,
6750 		.partial	= partial_def,
6751 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6752 		.ops		= &buffer_pipe_buf_ops,
6753 		.spd_release	= buffer_spd_release,
6754 	};
6755 	struct buffer_ref *ref;
6756 	int entries, size, i;
6757 	ssize_t ret = 0;
6758 
6759 #ifdef CONFIG_TRACER_MAX_TRACE
6760 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6761 		return -EBUSY;
6762 #endif
6763 
6764 	if (*ppos & (PAGE_SIZE - 1))
6765 		return -EINVAL;
6766 
6767 	if (len & (PAGE_SIZE - 1)) {
6768 		if (len < PAGE_SIZE)
6769 			return -EINVAL;
6770 		len &= PAGE_MASK;
6771 	}
6772 
6773 	if (splice_grow_spd(pipe, &spd))
6774 		return -ENOMEM;
6775 
6776  again:
6777 	trace_access_lock(iter->cpu_file);
6778 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6779 
6780 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6781 		struct page *page;
6782 		int r;
6783 
6784 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6785 		if (!ref) {
6786 			ret = -ENOMEM;
6787 			break;
6788 		}
6789 
6790 		ref->ref = 1;
6791 		ref->buffer = iter->trace_buffer->buffer;
6792 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6793 		if (!ref->page) {
6794 			ret = -ENOMEM;
6795 			kfree(ref);
6796 			break;
6797 		}
6798 		ref->cpu = iter->cpu_file;
6799 
6800 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6801 					  len, iter->cpu_file, 1);
6802 		if (r < 0) {
6803 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6804 						   ref->page);
6805 			kfree(ref);
6806 			break;
6807 		}
6808 
6809 		/*
6810 		 * zero out any left over data, this is going to
6811 		 * user land.
6812 		 */
6813 		size = ring_buffer_page_len(ref->page);
6814 		if (size < PAGE_SIZE)
6815 			memset(ref->page + size, 0, PAGE_SIZE - size);
6816 
6817 		page = virt_to_page(ref->page);
6818 
6819 		spd.pages[i] = page;
6820 		spd.partial[i].len = PAGE_SIZE;
6821 		spd.partial[i].offset = 0;
6822 		spd.partial[i].private = (unsigned long)ref;
6823 		spd.nr_pages++;
6824 		*ppos += PAGE_SIZE;
6825 
6826 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6827 	}
6828 
6829 	trace_access_unlock(iter->cpu_file);
6830 	spd.nr_pages = i;
6831 
6832 	/* did we read anything? */
6833 	if (!spd.nr_pages) {
6834 		if (ret)
6835 			goto out;
6836 
6837 		ret = -EAGAIN;
6838 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6839 			goto out;
6840 
6841 		ret = wait_on_pipe(iter, true);
6842 		if (ret)
6843 			goto out;
6844 
6845 		goto again;
6846 	}
6847 
6848 	ret = splice_to_pipe(pipe, &spd);
6849 out:
6850 	splice_shrink_spd(&spd);
6851 
6852 	return ret;
6853 }
6854 
6855 static const struct file_operations tracing_buffers_fops = {
6856 	.open		= tracing_buffers_open,
6857 	.read		= tracing_buffers_read,
6858 	.poll		= tracing_buffers_poll,
6859 	.release	= tracing_buffers_release,
6860 	.splice_read	= tracing_buffers_splice_read,
6861 	.llseek		= no_llseek,
6862 };
6863 
6864 static ssize_t
6865 tracing_stats_read(struct file *filp, char __user *ubuf,
6866 		   size_t count, loff_t *ppos)
6867 {
6868 	struct inode *inode = file_inode(filp);
6869 	struct trace_array *tr = inode->i_private;
6870 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6871 	int cpu = tracing_get_cpu(inode);
6872 	struct trace_seq *s;
6873 	unsigned long cnt;
6874 	unsigned long long t;
6875 	unsigned long usec_rem;
6876 
6877 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6878 	if (!s)
6879 		return -ENOMEM;
6880 
6881 	trace_seq_init(s);
6882 
6883 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6884 	trace_seq_printf(s, "entries: %ld\n", cnt);
6885 
6886 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6887 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6888 
6889 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6890 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6891 
6892 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6893 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6894 
6895 	if (trace_clocks[tr->clock_id].in_ns) {
6896 		/* local or global for trace_clock */
6897 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6898 		usec_rem = do_div(t, USEC_PER_SEC);
6899 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6900 								t, usec_rem);
6901 
6902 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6903 		usec_rem = do_div(t, USEC_PER_SEC);
6904 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6905 	} else {
6906 		/* counter or tsc mode for trace_clock */
6907 		trace_seq_printf(s, "oldest event ts: %llu\n",
6908 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6909 
6910 		trace_seq_printf(s, "now ts: %llu\n",
6911 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6912 	}
6913 
6914 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6915 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6916 
6917 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6918 	trace_seq_printf(s, "read events: %ld\n", cnt);
6919 
6920 	count = simple_read_from_buffer(ubuf, count, ppos,
6921 					s->buffer, trace_seq_used(s));
6922 
6923 	kfree(s);
6924 
6925 	return count;
6926 }
6927 
6928 static const struct file_operations tracing_stats_fops = {
6929 	.open		= tracing_open_generic_tr,
6930 	.read		= tracing_stats_read,
6931 	.llseek		= generic_file_llseek,
6932 	.release	= tracing_release_generic_tr,
6933 };
6934 
6935 #ifdef CONFIG_DYNAMIC_FTRACE
6936 
6937 static ssize_t
6938 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6939 		  size_t cnt, loff_t *ppos)
6940 {
6941 	unsigned long *p = filp->private_data;
6942 	char buf[64]; /* Not too big for a shallow stack */
6943 	int r;
6944 
6945 	r = scnprintf(buf, 63, "%ld", *p);
6946 	buf[r++] = '\n';
6947 
6948 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6949 }
6950 
6951 static const struct file_operations tracing_dyn_info_fops = {
6952 	.open		= tracing_open_generic,
6953 	.read		= tracing_read_dyn_info,
6954 	.llseek		= generic_file_llseek,
6955 };
6956 #endif /* CONFIG_DYNAMIC_FTRACE */
6957 
6958 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6959 static void
6960 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6961 		struct trace_array *tr, struct ftrace_probe_ops *ops,
6962 		void *data)
6963 {
6964 	tracing_snapshot_instance(tr);
6965 }
6966 
6967 static void
6968 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6969 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
6970 		      void *data)
6971 {
6972 	struct ftrace_func_mapper *mapper = data;
6973 	long *count = NULL;
6974 
6975 	if (mapper)
6976 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6977 
6978 	if (count) {
6979 
6980 		if (*count <= 0)
6981 			return;
6982 
6983 		(*count)--;
6984 	}
6985 
6986 	tracing_snapshot_instance(tr);
6987 }
6988 
6989 static int
6990 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6991 		      struct ftrace_probe_ops *ops, void *data)
6992 {
6993 	struct ftrace_func_mapper *mapper = data;
6994 	long *count = NULL;
6995 
6996 	seq_printf(m, "%ps:", (void *)ip);
6997 
6998 	seq_puts(m, "snapshot");
6999 
7000 	if (mapper)
7001 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7002 
7003 	if (count)
7004 		seq_printf(m, ":count=%ld\n", *count);
7005 	else
7006 		seq_puts(m, ":unlimited\n");
7007 
7008 	return 0;
7009 }
7010 
7011 static int
7012 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7013 		     unsigned long ip, void *init_data, void **data)
7014 {
7015 	struct ftrace_func_mapper *mapper = *data;
7016 
7017 	if (!mapper) {
7018 		mapper = allocate_ftrace_func_mapper();
7019 		if (!mapper)
7020 			return -ENOMEM;
7021 		*data = mapper;
7022 	}
7023 
7024 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7025 }
7026 
7027 static void
7028 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7029 		     unsigned long ip, void *data)
7030 {
7031 	struct ftrace_func_mapper *mapper = data;
7032 
7033 	if (!ip) {
7034 		if (!mapper)
7035 			return;
7036 		free_ftrace_func_mapper(mapper, NULL);
7037 		return;
7038 	}
7039 
7040 	ftrace_func_mapper_remove_ip(mapper, ip);
7041 }
7042 
7043 static struct ftrace_probe_ops snapshot_probe_ops = {
7044 	.func			= ftrace_snapshot,
7045 	.print			= ftrace_snapshot_print,
7046 };
7047 
7048 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7049 	.func			= ftrace_count_snapshot,
7050 	.print			= ftrace_snapshot_print,
7051 	.init			= ftrace_snapshot_init,
7052 	.free			= ftrace_snapshot_free,
7053 };
7054 
7055 static int
7056 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7057 			       char *glob, char *cmd, char *param, int enable)
7058 {
7059 	struct ftrace_probe_ops *ops;
7060 	void *count = (void *)-1;
7061 	char *number;
7062 	int ret;
7063 
7064 	if (!tr)
7065 		return -ENODEV;
7066 
7067 	/* hash funcs only work with set_ftrace_filter */
7068 	if (!enable)
7069 		return -EINVAL;
7070 
7071 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7072 
7073 	if (glob[0] == '!')
7074 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7075 
7076 	if (!param)
7077 		goto out_reg;
7078 
7079 	number = strsep(&param, ":");
7080 
7081 	if (!strlen(number))
7082 		goto out_reg;
7083 
7084 	/*
7085 	 * We use the callback data field (which is a pointer)
7086 	 * as our counter.
7087 	 */
7088 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7089 	if (ret)
7090 		return ret;
7091 
7092  out_reg:
7093 	ret = alloc_snapshot(tr);
7094 	if (ret < 0)
7095 		goto out;
7096 
7097 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7098 
7099  out:
7100 	return ret < 0 ? ret : 0;
7101 }
7102 
7103 static struct ftrace_func_command ftrace_snapshot_cmd = {
7104 	.name			= "snapshot",
7105 	.func			= ftrace_trace_snapshot_callback,
7106 };
7107 
7108 static __init int register_snapshot_cmd(void)
7109 {
7110 	return register_ftrace_command(&ftrace_snapshot_cmd);
7111 }
7112 #else
7113 static inline __init int register_snapshot_cmd(void) { return 0; }
7114 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7115 
7116 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7117 {
7118 	if (WARN_ON(!tr->dir))
7119 		return ERR_PTR(-ENODEV);
7120 
7121 	/* Top directory uses NULL as the parent */
7122 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7123 		return NULL;
7124 
7125 	/* All sub buffers have a descriptor */
7126 	return tr->dir;
7127 }
7128 
7129 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7130 {
7131 	struct dentry *d_tracer;
7132 
7133 	if (tr->percpu_dir)
7134 		return tr->percpu_dir;
7135 
7136 	d_tracer = tracing_get_dentry(tr);
7137 	if (IS_ERR(d_tracer))
7138 		return NULL;
7139 
7140 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7141 
7142 	WARN_ONCE(!tr->percpu_dir,
7143 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7144 
7145 	return tr->percpu_dir;
7146 }
7147 
7148 static struct dentry *
7149 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7150 		      void *data, long cpu, const struct file_operations *fops)
7151 {
7152 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7153 
7154 	if (ret) /* See tracing_get_cpu() */
7155 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7156 	return ret;
7157 }
7158 
7159 static void
7160 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7161 {
7162 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7163 	struct dentry *d_cpu;
7164 	char cpu_dir[30]; /* 30 characters should be more than enough */
7165 
7166 	if (!d_percpu)
7167 		return;
7168 
7169 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7170 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7171 	if (!d_cpu) {
7172 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7173 		return;
7174 	}
7175 
7176 	/* per cpu trace_pipe */
7177 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7178 				tr, cpu, &tracing_pipe_fops);
7179 
7180 	/* per cpu trace */
7181 	trace_create_cpu_file("trace", 0644, d_cpu,
7182 				tr, cpu, &tracing_fops);
7183 
7184 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7185 				tr, cpu, &tracing_buffers_fops);
7186 
7187 	trace_create_cpu_file("stats", 0444, d_cpu,
7188 				tr, cpu, &tracing_stats_fops);
7189 
7190 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7191 				tr, cpu, &tracing_entries_fops);
7192 
7193 #ifdef CONFIG_TRACER_SNAPSHOT
7194 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7195 				tr, cpu, &snapshot_fops);
7196 
7197 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7198 				tr, cpu, &snapshot_raw_fops);
7199 #endif
7200 }
7201 
7202 #ifdef CONFIG_FTRACE_SELFTEST
7203 /* Let selftest have access to static functions in this file */
7204 #include "trace_selftest.c"
7205 #endif
7206 
7207 static ssize_t
7208 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7209 			loff_t *ppos)
7210 {
7211 	struct trace_option_dentry *topt = filp->private_data;
7212 	char *buf;
7213 
7214 	if (topt->flags->val & topt->opt->bit)
7215 		buf = "1\n";
7216 	else
7217 		buf = "0\n";
7218 
7219 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7220 }
7221 
7222 static ssize_t
7223 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7224 			 loff_t *ppos)
7225 {
7226 	struct trace_option_dentry *topt = filp->private_data;
7227 	unsigned long val;
7228 	int ret;
7229 
7230 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7231 	if (ret)
7232 		return ret;
7233 
7234 	if (val != 0 && val != 1)
7235 		return -EINVAL;
7236 
7237 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7238 		mutex_lock(&trace_types_lock);
7239 		ret = __set_tracer_option(topt->tr, topt->flags,
7240 					  topt->opt, !val);
7241 		mutex_unlock(&trace_types_lock);
7242 		if (ret)
7243 			return ret;
7244 	}
7245 
7246 	*ppos += cnt;
7247 
7248 	return cnt;
7249 }
7250 
7251 
7252 static const struct file_operations trace_options_fops = {
7253 	.open = tracing_open_generic,
7254 	.read = trace_options_read,
7255 	.write = trace_options_write,
7256 	.llseek	= generic_file_llseek,
7257 };
7258 
7259 /*
7260  * In order to pass in both the trace_array descriptor as well as the index
7261  * to the flag that the trace option file represents, the trace_array
7262  * has a character array of trace_flags_index[], which holds the index
7263  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7264  * The address of this character array is passed to the flag option file
7265  * read/write callbacks.
7266  *
7267  * In order to extract both the index and the trace_array descriptor,
7268  * get_tr_index() uses the following algorithm.
7269  *
7270  *   idx = *ptr;
7271  *
7272  * As the pointer itself contains the address of the index (remember
7273  * index[1] == 1).
7274  *
7275  * Then to get the trace_array descriptor, by subtracting that index
7276  * from the ptr, we get to the start of the index itself.
7277  *
7278  *   ptr - idx == &index[0]
7279  *
7280  * Then a simple container_of() from that pointer gets us to the
7281  * trace_array descriptor.
7282  */
7283 static void get_tr_index(void *data, struct trace_array **ptr,
7284 			 unsigned int *pindex)
7285 {
7286 	*pindex = *(unsigned char *)data;
7287 
7288 	*ptr = container_of(data - *pindex, struct trace_array,
7289 			    trace_flags_index);
7290 }
7291 
7292 static ssize_t
7293 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7294 			loff_t *ppos)
7295 {
7296 	void *tr_index = filp->private_data;
7297 	struct trace_array *tr;
7298 	unsigned int index;
7299 	char *buf;
7300 
7301 	get_tr_index(tr_index, &tr, &index);
7302 
7303 	if (tr->trace_flags & (1 << index))
7304 		buf = "1\n";
7305 	else
7306 		buf = "0\n";
7307 
7308 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7309 }
7310 
7311 static ssize_t
7312 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7313 			 loff_t *ppos)
7314 {
7315 	void *tr_index = filp->private_data;
7316 	struct trace_array *tr;
7317 	unsigned int index;
7318 	unsigned long val;
7319 	int ret;
7320 
7321 	get_tr_index(tr_index, &tr, &index);
7322 
7323 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7324 	if (ret)
7325 		return ret;
7326 
7327 	if (val != 0 && val != 1)
7328 		return -EINVAL;
7329 
7330 	mutex_lock(&trace_types_lock);
7331 	ret = set_tracer_flag(tr, 1 << index, val);
7332 	mutex_unlock(&trace_types_lock);
7333 
7334 	if (ret < 0)
7335 		return ret;
7336 
7337 	*ppos += cnt;
7338 
7339 	return cnt;
7340 }
7341 
7342 static const struct file_operations trace_options_core_fops = {
7343 	.open = tracing_open_generic,
7344 	.read = trace_options_core_read,
7345 	.write = trace_options_core_write,
7346 	.llseek = generic_file_llseek,
7347 };
7348 
7349 struct dentry *trace_create_file(const char *name,
7350 				 umode_t mode,
7351 				 struct dentry *parent,
7352 				 void *data,
7353 				 const struct file_operations *fops)
7354 {
7355 	struct dentry *ret;
7356 
7357 	ret = tracefs_create_file(name, mode, parent, data, fops);
7358 	if (!ret)
7359 		pr_warn("Could not create tracefs '%s' entry\n", name);
7360 
7361 	return ret;
7362 }
7363 
7364 
7365 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7366 {
7367 	struct dentry *d_tracer;
7368 
7369 	if (tr->options)
7370 		return tr->options;
7371 
7372 	d_tracer = tracing_get_dentry(tr);
7373 	if (IS_ERR(d_tracer))
7374 		return NULL;
7375 
7376 	tr->options = tracefs_create_dir("options", d_tracer);
7377 	if (!tr->options) {
7378 		pr_warn("Could not create tracefs directory 'options'\n");
7379 		return NULL;
7380 	}
7381 
7382 	return tr->options;
7383 }
7384 
7385 static void
7386 create_trace_option_file(struct trace_array *tr,
7387 			 struct trace_option_dentry *topt,
7388 			 struct tracer_flags *flags,
7389 			 struct tracer_opt *opt)
7390 {
7391 	struct dentry *t_options;
7392 
7393 	t_options = trace_options_init_dentry(tr);
7394 	if (!t_options)
7395 		return;
7396 
7397 	topt->flags = flags;
7398 	topt->opt = opt;
7399 	topt->tr = tr;
7400 
7401 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7402 				    &trace_options_fops);
7403 
7404 }
7405 
7406 static void
7407 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7408 {
7409 	struct trace_option_dentry *topts;
7410 	struct trace_options *tr_topts;
7411 	struct tracer_flags *flags;
7412 	struct tracer_opt *opts;
7413 	int cnt;
7414 	int i;
7415 
7416 	if (!tracer)
7417 		return;
7418 
7419 	flags = tracer->flags;
7420 
7421 	if (!flags || !flags->opts)
7422 		return;
7423 
7424 	/*
7425 	 * If this is an instance, only create flags for tracers
7426 	 * the instance may have.
7427 	 */
7428 	if (!trace_ok_for_array(tracer, tr))
7429 		return;
7430 
7431 	for (i = 0; i < tr->nr_topts; i++) {
7432 		/* Make sure there's no duplicate flags. */
7433 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7434 			return;
7435 	}
7436 
7437 	opts = flags->opts;
7438 
7439 	for (cnt = 0; opts[cnt].name; cnt++)
7440 		;
7441 
7442 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7443 	if (!topts)
7444 		return;
7445 
7446 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7447 			    GFP_KERNEL);
7448 	if (!tr_topts) {
7449 		kfree(topts);
7450 		return;
7451 	}
7452 
7453 	tr->topts = tr_topts;
7454 	tr->topts[tr->nr_topts].tracer = tracer;
7455 	tr->topts[tr->nr_topts].topts = topts;
7456 	tr->nr_topts++;
7457 
7458 	for (cnt = 0; opts[cnt].name; cnt++) {
7459 		create_trace_option_file(tr, &topts[cnt], flags,
7460 					 &opts[cnt]);
7461 		WARN_ONCE(topts[cnt].entry == NULL,
7462 			  "Failed to create trace option: %s",
7463 			  opts[cnt].name);
7464 	}
7465 }
7466 
7467 static struct dentry *
7468 create_trace_option_core_file(struct trace_array *tr,
7469 			      const char *option, long index)
7470 {
7471 	struct dentry *t_options;
7472 
7473 	t_options = trace_options_init_dentry(tr);
7474 	if (!t_options)
7475 		return NULL;
7476 
7477 	return trace_create_file(option, 0644, t_options,
7478 				 (void *)&tr->trace_flags_index[index],
7479 				 &trace_options_core_fops);
7480 }
7481 
7482 static void create_trace_options_dir(struct trace_array *tr)
7483 {
7484 	struct dentry *t_options;
7485 	bool top_level = tr == &global_trace;
7486 	int i;
7487 
7488 	t_options = trace_options_init_dentry(tr);
7489 	if (!t_options)
7490 		return;
7491 
7492 	for (i = 0; trace_options[i]; i++) {
7493 		if (top_level ||
7494 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7495 			create_trace_option_core_file(tr, trace_options[i], i);
7496 	}
7497 }
7498 
7499 static ssize_t
7500 rb_simple_read(struct file *filp, char __user *ubuf,
7501 	       size_t cnt, loff_t *ppos)
7502 {
7503 	struct trace_array *tr = filp->private_data;
7504 	char buf[64];
7505 	int r;
7506 
7507 	r = tracer_tracing_is_on(tr);
7508 	r = sprintf(buf, "%d\n", r);
7509 
7510 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7511 }
7512 
7513 static ssize_t
7514 rb_simple_write(struct file *filp, const char __user *ubuf,
7515 		size_t cnt, loff_t *ppos)
7516 {
7517 	struct trace_array *tr = filp->private_data;
7518 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7519 	unsigned long val;
7520 	int ret;
7521 
7522 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7523 	if (ret)
7524 		return ret;
7525 
7526 	if (buffer) {
7527 		mutex_lock(&trace_types_lock);
7528 		if (val) {
7529 			tracer_tracing_on(tr);
7530 			if (tr->current_trace->start)
7531 				tr->current_trace->start(tr);
7532 		} else {
7533 			tracer_tracing_off(tr);
7534 			if (tr->current_trace->stop)
7535 				tr->current_trace->stop(tr);
7536 		}
7537 		mutex_unlock(&trace_types_lock);
7538 	}
7539 
7540 	(*ppos)++;
7541 
7542 	return cnt;
7543 }
7544 
7545 static const struct file_operations rb_simple_fops = {
7546 	.open		= tracing_open_generic_tr,
7547 	.read		= rb_simple_read,
7548 	.write		= rb_simple_write,
7549 	.release	= tracing_release_generic_tr,
7550 	.llseek		= default_llseek,
7551 };
7552 
7553 struct dentry *trace_instance_dir;
7554 
7555 static void
7556 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7557 
7558 static int
7559 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7560 {
7561 	enum ring_buffer_flags rb_flags;
7562 
7563 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7564 
7565 	buf->tr = tr;
7566 
7567 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7568 	if (!buf->buffer)
7569 		return -ENOMEM;
7570 
7571 	buf->data = alloc_percpu(struct trace_array_cpu);
7572 	if (!buf->data) {
7573 		ring_buffer_free(buf->buffer);
7574 		return -ENOMEM;
7575 	}
7576 
7577 	/* Allocate the first page for all buffers */
7578 	set_buffer_entries(&tr->trace_buffer,
7579 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7580 
7581 	return 0;
7582 }
7583 
7584 static int allocate_trace_buffers(struct trace_array *tr, int size)
7585 {
7586 	int ret;
7587 
7588 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7589 	if (ret)
7590 		return ret;
7591 
7592 #ifdef CONFIG_TRACER_MAX_TRACE
7593 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7594 				    allocate_snapshot ? size : 1);
7595 	if (WARN_ON(ret)) {
7596 		ring_buffer_free(tr->trace_buffer.buffer);
7597 		free_percpu(tr->trace_buffer.data);
7598 		return -ENOMEM;
7599 	}
7600 	tr->allocated_snapshot = allocate_snapshot;
7601 
7602 	/*
7603 	 * Only the top level trace array gets its snapshot allocated
7604 	 * from the kernel command line.
7605 	 */
7606 	allocate_snapshot = false;
7607 #endif
7608 	return 0;
7609 }
7610 
7611 static void free_trace_buffer(struct trace_buffer *buf)
7612 {
7613 	if (buf->buffer) {
7614 		ring_buffer_free(buf->buffer);
7615 		buf->buffer = NULL;
7616 		free_percpu(buf->data);
7617 		buf->data = NULL;
7618 	}
7619 }
7620 
7621 static void free_trace_buffers(struct trace_array *tr)
7622 {
7623 	if (!tr)
7624 		return;
7625 
7626 	free_trace_buffer(&tr->trace_buffer);
7627 
7628 #ifdef CONFIG_TRACER_MAX_TRACE
7629 	free_trace_buffer(&tr->max_buffer);
7630 #endif
7631 }
7632 
7633 static void init_trace_flags_index(struct trace_array *tr)
7634 {
7635 	int i;
7636 
7637 	/* Used by the trace options files */
7638 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7639 		tr->trace_flags_index[i] = i;
7640 }
7641 
7642 static void __update_tracer_options(struct trace_array *tr)
7643 {
7644 	struct tracer *t;
7645 
7646 	for (t = trace_types; t; t = t->next)
7647 		add_tracer_options(tr, t);
7648 }
7649 
7650 static void update_tracer_options(struct trace_array *tr)
7651 {
7652 	mutex_lock(&trace_types_lock);
7653 	__update_tracer_options(tr);
7654 	mutex_unlock(&trace_types_lock);
7655 }
7656 
7657 static int instance_mkdir(const char *name)
7658 {
7659 	struct trace_array *tr;
7660 	int ret;
7661 
7662 	mutex_lock(&trace_types_lock);
7663 
7664 	ret = -EEXIST;
7665 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7666 		if (tr->name && strcmp(tr->name, name) == 0)
7667 			goto out_unlock;
7668 	}
7669 
7670 	ret = -ENOMEM;
7671 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7672 	if (!tr)
7673 		goto out_unlock;
7674 
7675 	tr->name = kstrdup(name, GFP_KERNEL);
7676 	if (!tr->name)
7677 		goto out_free_tr;
7678 
7679 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7680 		goto out_free_tr;
7681 
7682 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7683 
7684 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7685 
7686 	raw_spin_lock_init(&tr->start_lock);
7687 
7688 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7689 
7690 	tr->current_trace = &nop_trace;
7691 
7692 	INIT_LIST_HEAD(&tr->systems);
7693 	INIT_LIST_HEAD(&tr->events);
7694 
7695 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7696 		goto out_free_tr;
7697 
7698 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7699 	if (!tr->dir)
7700 		goto out_free_tr;
7701 
7702 	ret = event_trace_add_tracer(tr->dir, tr);
7703 	if (ret) {
7704 		tracefs_remove_recursive(tr->dir);
7705 		goto out_free_tr;
7706 	}
7707 
7708 	ftrace_init_trace_array(tr);
7709 
7710 	init_tracer_tracefs(tr, tr->dir);
7711 	init_trace_flags_index(tr);
7712 	__update_tracer_options(tr);
7713 
7714 	list_add(&tr->list, &ftrace_trace_arrays);
7715 
7716 	mutex_unlock(&trace_types_lock);
7717 
7718 	return 0;
7719 
7720  out_free_tr:
7721 	free_trace_buffers(tr);
7722 	free_cpumask_var(tr->tracing_cpumask);
7723 	kfree(tr->name);
7724 	kfree(tr);
7725 
7726  out_unlock:
7727 	mutex_unlock(&trace_types_lock);
7728 
7729 	return ret;
7730 
7731 }
7732 
7733 static int instance_rmdir(const char *name)
7734 {
7735 	struct trace_array *tr;
7736 	int found = 0;
7737 	int ret;
7738 	int i;
7739 
7740 	mutex_lock(&trace_types_lock);
7741 
7742 	ret = -ENODEV;
7743 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7744 		if (tr->name && strcmp(tr->name, name) == 0) {
7745 			found = 1;
7746 			break;
7747 		}
7748 	}
7749 	if (!found)
7750 		goto out_unlock;
7751 
7752 	ret = -EBUSY;
7753 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7754 		goto out_unlock;
7755 
7756 	list_del(&tr->list);
7757 
7758 	/* Disable all the flags that were enabled coming in */
7759 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7760 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7761 			set_tracer_flag(tr, 1 << i, 0);
7762 	}
7763 
7764 	tracing_set_nop(tr);
7765 	clear_ftrace_function_probes(tr);
7766 	event_trace_del_tracer(tr);
7767 	ftrace_clear_pids(tr);
7768 	ftrace_destroy_function_files(tr);
7769 	tracefs_remove_recursive(tr->dir);
7770 	free_trace_buffers(tr);
7771 
7772 	for (i = 0; i < tr->nr_topts; i++) {
7773 		kfree(tr->topts[i].topts);
7774 	}
7775 	kfree(tr->topts);
7776 
7777 	free_cpumask_var(tr->tracing_cpumask);
7778 	kfree(tr->name);
7779 	kfree(tr);
7780 
7781 	ret = 0;
7782 
7783  out_unlock:
7784 	mutex_unlock(&trace_types_lock);
7785 
7786 	return ret;
7787 }
7788 
7789 static __init void create_trace_instances(struct dentry *d_tracer)
7790 {
7791 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7792 							 instance_mkdir,
7793 							 instance_rmdir);
7794 	if (WARN_ON(!trace_instance_dir))
7795 		return;
7796 }
7797 
7798 static void
7799 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7800 {
7801 	int cpu;
7802 
7803 	trace_create_file("available_tracers", 0444, d_tracer,
7804 			tr, &show_traces_fops);
7805 
7806 	trace_create_file("current_tracer", 0644, d_tracer,
7807 			tr, &set_tracer_fops);
7808 
7809 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7810 			  tr, &tracing_cpumask_fops);
7811 
7812 	trace_create_file("trace_options", 0644, d_tracer,
7813 			  tr, &tracing_iter_fops);
7814 
7815 	trace_create_file("trace", 0644, d_tracer,
7816 			  tr, &tracing_fops);
7817 
7818 	trace_create_file("trace_pipe", 0444, d_tracer,
7819 			  tr, &tracing_pipe_fops);
7820 
7821 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7822 			  tr, &tracing_entries_fops);
7823 
7824 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7825 			  tr, &tracing_total_entries_fops);
7826 
7827 	trace_create_file("free_buffer", 0200, d_tracer,
7828 			  tr, &tracing_free_buffer_fops);
7829 
7830 	trace_create_file("trace_marker", 0220, d_tracer,
7831 			  tr, &tracing_mark_fops);
7832 
7833 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7834 			  tr, &tracing_mark_raw_fops);
7835 
7836 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7837 			  &trace_clock_fops);
7838 
7839 	trace_create_file("tracing_on", 0644, d_tracer,
7840 			  tr, &rb_simple_fops);
7841 
7842 	create_trace_options_dir(tr);
7843 
7844 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7845 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7846 			&tr->max_latency, &tracing_max_lat_fops);
7847 #endif
7848 
7849 	if (ftrace_create_function_files(tr, d_tracer))
7850 		WARN(1, "Could not allocate function filter files");
7851 
7852 #ifdef CONFIG_TRACER_SNAPSHOT
7853 	trace_create_file("snapshot", 0644, d_tracer,
7854 			  tr, &snapshot_fops);
7855 #endif
7856 
7857 	for_each_tracing_cpu(cpu)
7858 		tracing_init_tracefs_percpu(tr, cpu);
7859 
7860 	ftrace_init_tracefs(tr, d_tracer);
7861 }
7862 
7863 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7864 {
7865 	struct vfsmount *mnt;
7866 	struct file_system_type *type;
7867 
7868 	/*
7869 	 * To maintain backward compatibility for tools that mount
7870 	 * debugfs to get to the tracing facility, tracefs is automatically
7871 	 * mounted to the debugfs/tracing directory.
7872 	 */
7873 	type = get_fs_type("tracefs");
7874 	if (!type)
7875 		return NULL;
7876 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7877 	put_filesystem(type);
7878 	if (IS_ERR(mnt))
7879 		return NULL;
7880 	mntget(mnt);
7881 
7882 	return mnt;
7883 }
7884 
7885 /**
7886  * tracing_init_dentry - initialize top level trace array
7887  *
7888  * This is called when creating files or directories in the tracing
7889  * directory. It is called via fs_initcall() by any of the boot up code
7890  * and expects to return the dentry of the top level tracing directory.
7891  */
7892 struct dentry *tracing_init_dentry(void)
7893 {
7894 	struct trace_array *tr = &global_trace;
7895 
7896 	/* The top level trace array uses  NULL as parent */
7897 	if (tr->dir)
7898 		return NULL;
7899 
7900 	if (WARN_ON(!tracefs_initialized()) ||
7901 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7902 		 WARN_ON(!debugfs_initialized())))
7903 		return ERR_PTR(-ENODEV);
7904 
7905 	/*
7906 	 * As there may still be users that expect the tracing
7907 	 * files to exist in debugfs/tracing, we must automount
7908 	 * the tracefs file system there, so older tools still
7909 	 * work with the newer kerenl.
7910 	 */
7911 	tr->dir = debugfs_create_automount("tracing", NULL,
7912 					   trace_automount, NULL);
7913 	if (!tr->dir) {
7914 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7915 		return ERR_PTR(-ENOMEM);
7916 	}
7917 
7918 	return NULL;
7919 }
7920 
7921 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7922 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7923 
7924 static void __init trace_eval_init(void)
7925 {
7926 	int len;
7927 
7928 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7929 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7930 }
7931 
7932 #ifdef CONFIG_MODULES
7933 static void trace_module_add_evals(struct module *mod)
7934 {
7935 	if (!mod->num_trace_evals)
7936 		return;
7937 
7938 	/*
7939 	 * Modules with bad taint do not have events created, do
7940 	 * not bother with enums either.
7941 	 */
7942 	if (trace_module_has_bad_taint(mod))
7943 		return;
7944 
7945 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7946 }
7947 
7948 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7949 static void trace_module_remove_evals(struct module *mod)
7950 {
7951 	union trace_eval_map_item *map;
7952 	union trace_eval_map_item **last = &trace_eval_maps;
7953 
7954 	if (!mod->num_trace_evals)
7955 		return;
7956 
7957 	mutex_lock(&trace_eval_mutex);
7958 
7959 	map = trace_eval_maps;
7960 
7961 	while (map) {
7962 		if (map->head.mod == mod)
7963 			break;
7964 		map = trace_eval_jmp_to_tail(map);
7965 		last = &map->tail.next;
7966 		map = map->tail.next;
7967 	}
7968 	if (!map)
7969 		goto out;
7970 
7971 	*last = trace_eval_jmp_to_tail(map)->tail.next;
7972 	kfree(map);
7973  out:
7974 	mutex_unlock(&trace_eval_mutex);
7975 }
7976 #else
7977 static inline void trace_module_remove_evals(struct module *mod) { }
7978 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7979 
7980 static int trace_module_notify(struct notifier_block *self,
7981 			       unsigned long val, void *data)
7982 {
7983 	struct module *mod = data;
7984 
7985 	switch (val) {
7986 	case MODULE_STATE_COMING:
7987 		trace_module_add_evals(mod);
7988 		break;
7989 	case MODULE_STATE_GOING:
7990 		trace_module_remove_evals(mod);
7991 		break;
7992 	}
7993 
7994 	return 0;
7995 }
7996 
7997 static struct notifier_block trace_module_nb = {
7998 	.notifier_call = trace_module_notify,
7999 	.priority = 0,
8000 };
8001 #endif /* CONFIG_MODULES */
8002 
8003 static __init int tracer_init_tracefs(void)
8004 {
8005 	struct dentry *d_tracer;
8006 
8007 	trace_access_lock_init();
8008 
8009 	d_tracer = tracing_init_dentry();
8010 	if (IS_ERR(d_tracer))
8011 		return 0;
8012 
8013 	init_tracer_tracefs(&global_trace, d_tracer);
8014 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8015 
8016 	trace_create_file("tracing_thresh", 0644, d_tracer,
8017 			&global_trace, &tracing_thresh_fops);
8018 
8019 	trace_create_file("README", 0444, d_tracer,
8020 			NULL, &tracing_readme_fops);
8021 
8022 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8023 			NULL, &tracing_saved_cmdlines_fops);
8024 
8025 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8026 			  NULL, &tracing_saved_cmdlines_size_fops);
8027 
8028 	trace_create_file("saved_tgids", 0444, d_tracer,
8029 			NULL, &tracing_saved_tgids_fops);
8030 
8031 	trace_eval_init();
8032 
8033 	trace_create_eval_file(d_tracer);
8034 
8035 #ifdef CONFIG_MODULES
8036 	register_module_notifier(&trace_module_nb);
8037 #endif
8038 
8039 #ifdef CONFIG_DYNAMIC_FTRACE
8040 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8041 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8042 #endif
8043 
8044 	create_trace_instances(d_tracer);
8045 
8046 	update_tracer_options(&global_trace);
8047 
8048 	return 0;
8049 }
8050 
8051 static int trace_panic_handler(struct notifier_block *this,
8052 			       unsigned long event, void *unused)
8053 {
8054 	if (ftrace_dump_on_oops)
8055 		ftrace_dump(ftrace_dump_on_oops);
8056 	return NOTIFY_OK;
8057 }
8058 
8059 static struct notifier_block trace_panic_notifier = {
8060 	.notifier_call  = trace_panic_handler,
8061 	.next           = NULL,
8062 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8063 };
8064 
8065 static int trace_die_handler(struct notifier_block *self,
8066 			     unsigned long val,
8067 			     void *data)
8068 {
8069 	switch (val) {
8070 	case DIE_OOPS:
8071 		if (ftrace_dump_on_oops)
8072 			ftrace_dump(ftrace_dump_on_oops);
8073 		break;
8074 	default:
8075 		break;
8076 	}
8077 	return NOTIFY_OK;
8078 }
8079 
8080 static struct notifier_block trace_die_notifier = {
8081 	.notifier_call = trace_die_handler,
8082 	.priority = 200
8083 };
8084 
8085 /*
8086  * printk is set to max of 1024, we really don't need it that big.
8087  * Nothing should be printing 1000 characters anyway.
8088  */
8089 #define TRACE_MAX_PRINT		1000
8090 
8091 /*
8092  * Define here KERN_TRACE so that we have one place to modify
8093  * it if we decide to change what log level the ftrace dump
8094  * should be at.
8095  */
8096 #define KERN_TRACE		KERN_EMERG
8097 
8098 void
8099 trace_printk_seq(struct trace_seq *s)
8100 {
8101 	/* Probably should print a warning here. */
8102 	if (s->seq.len >= TRACE_MAX_PRINT)
8103 		s->seq.len = TRACE_MAX_PRINT;
8104 
8105 	/*
8106 	 * More paranoid code. Although the buffer size is set to
8107 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8108 	 * an extra layer of protection.
8109 	 */
8110 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8111 		s->seq.len = s->seq.size - 1;
8112 
8113 	/* should be zero ended, but we are paranoid. */
8114 	s->buffer[s->seq.len] = 0;
8115 
8116 	printk(KERN_TRACE "%s", s->buffer);
8117 
8118 	trace_seq_init(s);
8119 }
8120 
8121 void trace_init_global_iter(struct trace_iterator *iter)
8122 {
8123 	iter->tr = &global_trace;
8124 	iter->trace = iter->tr->current_trace;
8125 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8126 	iter->trace_buffer = &global_trace.trace_buffer;
8127 
8128 	if (iter->trace && iter->trace->open)
8129 		iter->trace->open(iter);
8130 
8131 	/* Annotate start of buffers if we had overruns */
8132 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8133 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8134 
8135 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8136 	if (trace_clocks[iter->tr->clock_id].in_ns)
8137 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8138 }
8139 
8140 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8141 {
8142 	/* use static because iter can be a bit big for the stack */
8143 	static struct trace_iterator iter;
8144 	static atomic_t dump_running;
8145 	struct trace_array *tr = &global_trace;
8146 	unsigned int old_userobj;
8147 	unsigned long flags;
8148 	int cnt = 0, cpu;
8149 
8150 	/* Only allow one dump user at a time. */
8151 	if (atomic_inc_return(&dump_running) != 1) {
8152 		atomic_dec(&dump_running);
8153 		return;
8154 	}
8155 
8156 	/*
8157 	 * Always turn off tracing when we dump.
8158 	 * We don't need to show trace output of what happens
8159 	 * between multiple crashes.
8160 	 *
8161 	 * If the user does a sysrq-z, then they can re-enable
8162 	 * tracing with echo 1 > tracing_on.
8163 	 */
8164 	tracing_off();
8165 
8166 	local_irq_save(flags);
8167 
8168 	/* Simulate the iterator */
8169 	trace_init_global_iter(&iter);
8170 
8171 	for_each_tracing_cpu(cpu) {
8172 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8173 	}
8174 
8175 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8176 
8177 	/* don't look at user memory in panic mode */
8178 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8179 
8180 	switch (oops_dump_mode) {
8181 	case DUMP_ALL:
8182 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8183 		break;
8184 	case DUMP_ORIG:
8185 		iter.cpu_file = raw_smp_processor_id();
8186 		break;
8187 	case DUMP_NONE:
8188 		goto out_enable;
8189 	default:
8190 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8191 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8192 	}
8193 
8194 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8195 
8196 	/* Did function tracer already get disabled? */
8197 	if (ftrace_is_dead()) {
8198 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8199 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8200 	}
8201 
8202 	/*
8203 	 * We need to stop all tracing on all CPUS to read the
8204 	 * the next buffer. This is a bit expensive, but is
8205 	 * not done often. We fill all what we can read,
8206 	 * and then release the locks again.
8207 	 */
8208 
8209 	while (!trace_empty(&iter)) {
8210 
8211 		if (!cnt)
8212 			printk(KERN_TRACE "---------------------------------\n");
8213 
8214 		cnt++;
8215 
8216 		/* reset all but tr, trace, and overruns */
8217 		memset(&iter.seq, 0,
8218 		       sizeof(struct trace_iterator) -
8219 		       offsetof(struct trace_iterator, seq));
8220 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8221 		iter.pos = -1;
8222 
8223 		if (trace_find_next_entry_inc(&iter) != NULL) {
8224 			int ret;
8225 
8226 			ret = print_trace_line(&iter);
8227 			if (ret != TRACE_TYPE_NO_CONSUME)
8228 				trace_consume(&iter);
8229 		}
8230 		touch_nmi_watchdog();
8231 
8232 		trace_printk_seq(&iter.seq);
8233 	}
8234 
8235 	if (!cnt)
8236 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8237 	else
8238 		printk(KERN_TRACE "---------------------------------\n");
8239 
8240  out_enable:
8241 	tr->trace_flags |= old_userobj;
8242 
8243 	for_each_tracing_cpu(cpu) {
8244 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8245 	}
8246  	atomic_dec(&dump_running);
8247 	local_irq_restore(flags);
8248 }
8249 EXPORT_SYMBOL_GPL(ftrace_dump);
8250 
8251 __init static int tracer_alloc_buffers(void)
8252 {
8253 	int ring_buf_size;
8254 	int ret = -ENOMEM;
8255 
8256 	/*
8257 	 * Make sure we don't accidently add more trace options
8258 	 * than we have bits for.
8259 	 */
8260 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8261 
8262 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8263 		goto out;
8264 
8265 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8266 		goto out_free_buffer_mask;
8267 
8268 	/* Only allocate trace_printk buffers if a trace_printk exists */
8269 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8270 		/* Must be called before global_trace.buffer is allocated */
8271 		trace_printk_init_buffers();
8272 
8273 	/* To save memory, keep the ring buffer size to its minimum */
8274 	if (ring_buffer_expanded)
8275 		ring_buf_size = trace_buf_size;
8276 	else
8277 		ring_buf_size = 1;
8278 
8279 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8280 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8281 
8282 	raw_spin_lock_init(&global_trace.start_lock);
8283 
8284 	/*
8285 	 * The prepare callbacks allocates some memory for the ring buffer. We
8286 	 * don't free the buffer if the if the CPU goes down. If we were to free
8287 	 * the buffer, then the user would lose any trace that was in the
8288 	 * buffer. The memory will be removed once the "instance" is removed.
8289 	 */
8290 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8291 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8292 				      NULL);
8293 	if (ret < 0)
8294 		goto out_free_cpumask;
8295 	/* Used for event triggers */
8296 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8297 	if (!temp_buffer)
8298 		goto out_rm_hp_state;
8299 
8300 	if (trace_create_savedcmd() < 0)
8301 		goto out_free_temp_buffer;
8302 
8303 	/* TODO: make the number of buffers hot pluggable with CPUS */
8304 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8305 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8306 		WARN_ON(1);
8307 		goto out_free_savedcmd;
8308 	}
8309 
8310 	if (global_trace.buffer_disabled)
8311 		tracing_off();
8312 
8313 	if (trace_boot_clock) {
8314 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8315 		if (ret < 0)
8316 			pr_warn("Trace clock %s not defined, going back to default\n",
8317 				trace_boot_clock);
8318 	}
8319 
8320 	/*
8321 	 * register_tracer() might reference current_trace, so it
8322 	 * needs to be set before we register anything. This is
8323 	 * just a bootstrap of current_trace anyway.
8324 	 */
8325 	global_trace.current_trace = &nop_trace;
8326 
8327 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8328 
8329 	ftrace_init_global_array_ops(&global_trace);
8330 
8331 	init_trace_flags_index(&global_trace);
8332 
8333 	register_tracer(&nop_trace);
8334 
8335 	/* Function tracing may start here (via kernel command line) */
8336 	init_function_trace();
8337 
8338 	/* All seems OK, enable tracing */
8339 	tracing_disabled = 0;
8340 
8341 	atomic_notifier_chain_register(&panic_notifier_list,
8342 				       &trace_panic_notifier);
8343 
8344 	register_die_notifier(&trace_die_notifier);
8345 
8346 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8347 
8348 	INIT_LIST_HEAD(&global_trace.systems);
8349 	INIT_LIST_HEAD(&global_trace.events);
8350 	list_add(&global_trace.list, &ftrace_trace_arrays);
8351 
8352 	apply_trace_boot_options();
8353 
8354 	register_snapshot_cmd();
8355 
8356 	return 0;
8357 
8358 out_free_savedcmd:
8359 	free_saved_cmdlines_buffer(savedcmd);
8360 out_free_temp_buffer:
8361 	ring_buffer_free(temp_buffer);
8362 out_rm_hp_state:
8363 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8364 out_free_cpumask:
8365 	free_cpumask_var(global_trace.tracing_cpumask);
8366 out_free_buffer_mask:
8367 	free_cpumask_var(tracing_buffer_mask);
8368 out:
8369 	return ret;
8370 }
8371 
8372 void __init early_trace_init(void)
8373 {
8374 	if (tracepoint_printk) {
8375 		tracepoint_print_iter =
8376 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8377 		if (WARN_ON(!tracepoint_print_iter))
8378 			tracepoint_printk = 0;
8379 		else
8380 			static_key_enable(&tracepoint_printk_key.key);
8381 	}
8382 	tracer_alloc_buffers();
8383 }
8384 
8385 void __init trace_init(void)
8386 {
8387 	trace_event_init();
8388 }
8389 
8390 __init static int clear_boot_tracer(void)
8391 {
8392 	/*
8393 	 * The default tracer at boot buffer is an init section.
8394 	 * This function is called in lateinit. If we did not
8395 	 * find the boot tracer, then clear it out, to prevent
8396 	 * later registration from accessing the buffer that is
8397 	 * about to be freed.
8398 	 */
8399 	if (!default_bootup_tracer)
8400 		return 0;
8401 
8402 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8403 	       default_bootup_tracer);
8404 	default_bootup_tracer = NULL;
8405 
8406 	return 0;
8407 }
8408 
8409 fs_initcall(tracer_init_tracefs);
8410 late_initcall(clear_boot_tracer);
8411