xref: /openbmc/linux/kernel/trace/trace.c (revision 74ba9207e1adf1966c57450340534ae9742d00af)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163 				   unsigned long flags, int pc);
164 
165 #define MAX_TRACER_SIZE		100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
171 static int __init set_cmdline_ftrace(char *str)
172 {
173 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 	default_bootup_tracer = bootup_tracer_buf;
175 	/* We are using ftrace early, expand it */
176 	ring_buffer_expanded = true;
177 	return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 	if (*str++ != '=' || !*str) {
184 		ftrace_dump_on_oops = DUMP_ALL;
185 		return 1;
186 	}
187 
188 	if (!strcmp("orig_cpu", str)) {
189 		ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
197 static int __init stop_trace_on_warning(char *str)
198 {
199 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 		__disable_trace_on_warning = 1;
201 	return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
205 static int __init boot_alloc_snapshot(char *str)
206 {
207 	allocate_snapshot = true;
208 	/* We also need the main ring buffer expanded */
209 	ring_buffer_expanded = true;
210 	return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
217 static int __init set_trace_boot_options(char *str)
218 {
219 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 	return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
227 static int __init set_trace_boot_clock(char *str)
228 {
229 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 	trace_boot_clock = trace_boot_clock_buf;
231 	return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
235 static int __init set_tracepoint_printk(char *str)
236 {
237 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 		tracepoint_printk = 1;
239 	return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
243 unsigned long long ns2usecs(u64 nsec)
244 {
245 	nsec += 500;
246 	do_div(nsec, 1000);
247 	return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS						\
252 	(FUNCTION_DEFAULT_FLAGS |					\
253 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
254 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
255 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
256 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
260 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265 
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271 	.trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273 
274 LIST_HEAD(ftrace_trace_arrays);
275 
276 int trace_array_get(struct trace_array *this_tr)
277 {
278 	struct trace_array *tr;
279 	int ret = -ENODEV;
280 
281 	mutex_lock(&trace_types_lock);
282 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283 		if (tr == this_tr) {
284 			tr->ref++;
285 			ret = 0;
286 			break;
287 		}
288 	}
289 	mutex_unlock(&trace_types_lock);
290 
291 	return ret;
292 }
293 
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296 	WARN_ON(!this_tr->ref);
297 	this_tr->ref--;
298 }
299 
300 void trace_array_put(struct trace_array *this_tr)
301 {
302 	mutex_lock(&trace_types_lock);
303 	__trace_array_put(this_tr);
304 	mutex_unlock(&trace_types_lock);
305 }
306 
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308 			      struct ring_buffer *buffer,
309 			      struct ring_buffer_event *event)
310 {
311 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312 	    !filter_match_preds(call->filter, rec)) {
313 		__trace_event_discard_commit(buffer, event);
314 		return 1;
315 	}
316 
317 	return 0;
318 }
319 
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322 	vfree(pid_list->pids);
323 	kfree(pid_list);
324 }
325 
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336 	/*
337 	 * If pid_max changed after filtered_pids was created, we
338 	 * by default ignore all pids greater than the previous pid_max.
339 	 */
340 	if (search_pid >= filtered_pids->pid_max)
341 		return false;
342 
343 	return test_bit(search_pid, filtered_pids->pids);
344 }
345 
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358 	/*
359 	 * Return false, because if filtered_pids does not exist,
360 	 * all pids are good to trace.
361 	 */
362 	if (!filtered_pids)
363 		return false;
364 
365 	return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367 
368 /**
369  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381 				  struct task_struct *self,
382 				  struct task_struct *task)
383 {
384 	if (!pid_list)
385 		return;
386 
387 	/* For forks, we only add if the forking task is listed */
388 	if (self) {
389 		if (!trace_find_filtered_pid(pid_list, self->pid))
390 			return;
391 	}
392 
393 	/* Sorry, but we don't support pid_max changing after setting */
394 	if (task->pid >= pid_list->pid_max)
395 		return;
396 
397 	/* "self" is set for forks, and NULL for exits */
398 	if (self)
399 		set_bit(task->pid, pid_list->pids);
400 	else
401 		clear_bit(task->pid, pid_list->pids);
402 }
403 
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418 	unsigned long pid = (unsigned long)v;
419 
420 	(*pos)++;
421 
422 	/* pid already is +1 of the actual prevous bit */
423 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424 
425 	/* Return pid + 1 to allow zero to be represented */
426 	if (pid < pid_list->pid_max)
427 		return (void *)(pid + 1);
428 
429 	return NULL;
430 }
431 
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445 	unsigned long pid;
446 	loff_t l = 0;
447 
448 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449 	if (pid >= pid_list->pid_max)
450 		return NULL;
451 
452 	/* Return pid + 1 so that zero can be the exit value */
453 	for (pid++; pid && l < *pos;
454 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455 		;
456 	return (void *)pid;
457 }
458 
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469 	unsigned long pid = (unsigned long)v - 1;
470 
471 	seq_printf(m, "%lu\n", pid);
472 	return 0;
473 }
474 
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE		127
477 
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479 		    struct trace_pid_list **new_pid_list,
480 		    const char __user *ubuf, size_t cnt)
481 {
482 	struct trace_pid_list *pid_list;
483 	struct trace_parser parser;
484 	unsigned long val;
485 	int nr_pids = 0;
486 	ssize_t read = 0;
487 	ssize_t ret = 0;
488 	loff_t pos;
489 	pid_t pid;
490 
491 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492 		return -ENOMEM;
493 
494 	/*
495 	 * Always recreate a new array. The write is an all or nothing
496 	 * operation. Always create a new array when adding new pids by
497 	 * the user. If the operation fails, then the current list is
498 	 * not modified.
499 	 */
500 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501 	if (!pid_list) {
502 		trace_parser_put(&parser);
503 		return -ENOMEM;
504 	}
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		trace_parser_put(&parser);
515 		kfree(pid_list);
516 		return -ENOMEM;
517 	}
518 
519 	if (filtered_pids) {
520 		/* copy the current bits to the new max */
521 		for_each_set_bit(pid, filtered_pids->pids,
522 				 filtered_pids->pid_max) {
523 			set_bit(pid, pid_list->pids);
524 			nr_pids++;
525 		}
526 	}
527 
528 	while (cnt > 0) {
529 
530 		pos = 0;
531 
532 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
533 		if (ret < 0 || !trace_parser_loaded(&parser))
534 			break;
535 
536 		read += ret;
537 		ubuf += ret;
538 		cnt -= ret;
539 
540 		ret = -EINVAL;
541 		if (kstrtoul(parser.buffer, 0, &val))
542 			break;
543 		if (val >= pid_list->pid_max)
544 			break;
545 
546 		pid = (pid_t)val;
547 
548 		set_bit(pid, pid_list->pids);
549 		nr_pids++;
550 
551 		trace_parser_clear(&parser);
552 		ret = 0;
553 	}
554 	trace_parser_put(&parser);
555 
556 	if (ret < 0) {
557 		trace_free_pid_list(pid_list);
558 		return ret;
559 	}
560 
561 	if (!nr_pids) {
562 		/* Cleared the list of pids */
563 		trace_free_pid_list(pid_list);
564 		read = ret;
565 		pid_list = NULL;
566 	}
567 
568 	*new_pid_list = pid_list;
569 
570 	return read;
571 }
572 
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575 	u64 ts;
576 
577 	/* Early boot up does not have a buffer yet */
578 	if (!buf->buffer)
579 		return trace_clock_local();
580 
581 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
582 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583 
584 	return ts;
585 }
586 
587 u64 ftrace_now(int cpu)
588 {
589 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591 
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603 	/*
604 	 * For quick access (irqsoff uses this in fast path), just
605 	 * return the mirror variable of the state of the ring buffer.
606 	 * It's a little racy, but we don't really care.
607 	 */
608 	smp_rmb();
609 	return !global_trace.buffer_disabled;
610 }
611 
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
623 
624 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625 
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer		*trace_types __read_mostly;
628 
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633 
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655 
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659 
660 static inline void trace_access_lock(int cpu)
661 {
662 	if (cpu == RING_BUFFER_ALL_CPUS) {
663 		/* gain it for accessing the whole ring buffer. */
664 		down_write(&all_cpu_access_lock);
665 	} else {
666 		/* gain it for accessing a cpu ring buffer. */
667 
668 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669 		down_read(&all_cpu_access_lock);
670 
671 		/* Secondly block other access to this @cpu ring buffer. */
672 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
673 	}
674 }
675 
676 static inline void trace_access_unlock(int cpu)
677 {
678 	if (cpu == RING_BUFFER_ALL_CPUS) {
679 		up_write(&all_cpu_access_lock);
680 	} else {
681 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682 		up_read(&all_cpu_access_lock);
683 	}
684 }
685 
686 static inline void trace_access_lock_init(void)
687 {
688 	int cpu;
689 
690 	for_each_possible_cpu(cpu)
691 		mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693 
694 #else
695 
696 static DEFINE_MUTEX(access_lock);
697 
698 static inline void trace_access_lock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_lock(&access_lock);
702 }
703 
704 static inline void trace_access_unlock(int cpu)
705 {
706 	(void)cpu;
707 	mutex_unlock(&access_lock);
708 }
709 
710 static inline void trace_access_lock_init(void)
711 {
712 }
713 
714 #endif
715 
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718 				 unsigned long flags,
719 				 int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721 				      struct ring_buffer *buffer,
722 				      unsigned long flags,
723 				      int skip, int pc, struct pt_regs *regs);
724 
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727 					unsigned long flags,
728 					int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732 				      struct ring_buffer *buffer,
733 				      unsigned long flags,
734 				      int skip, int pc, struct pt_regs *regs)
735 {
736 }
737 
738 #endif
739 
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742 		  int type, unsigned long flags, int pc)
743 {
744 	struct trace_entry *ent = ring_buffer_event_data(event);
745 
746 	tracing_generic_entry_update(ent, flags, pc);
747 	ent->type = type;
748 }
749 
750 static __always_inline struct ring_buffer_event *
751 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
752 			  int type,
753 			  unsigned long len,
754 			  unsigned long flags, int pc)
755 {
756 	struct ring_buffer_event *event;
757 
758 	event = ring_buffer_lock_reserve(buffer, len);
759 	if (event != NULL)
760 		trace_event_setup(event, type, flags, pc);
761 
762 	return event;
763 }
764 
765 void tracer_tracing_on(struct trace_array *tr)
766 {
767 	if (tr->trace_buffer.buffer)
768 		ring_buffer_record_on(tr->trace_buffer.buffer);
769 	/*
770 	 * This flag is looked at when buffers haven't been allocated
771 	 * yet, or by some tracers (like irqsoff), that just want to
772 	 * know if the ring buffer has been disabled, but it can handle
773 	 * races of where it gets disabled but we still do a record.
774 	 * As the check is in the fast path of the tracers, it is more
775 	 * important to be fast than accurate.
776 	 */
777 	tr->buffer_disabled = 0;
778 	/* Make the flag seen by readers */
779 	smp_wmb();
780 }
781 
782 /**
783  * tracing_on - enable tracing buffers
784  *
785  * This function enables tracing buffers that may have been
786  * disabled with tracing_off.
787  */
788 void tracing_on(void)
789 {
790 	tracer_tracing_on(&global_trace);
791 }
792 EXPORT_SYMBOL_GPL(tracing_on);
793 
794 
795 static __always_inline void
796 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
797 {
798 	__this_cpu_write(trace_taskinfo_save, true);
799 
800 	/* If this is the temp buffer, we need to commit fully */
801 	if (this_cpu_read(trace_buffered_event) == event) {
802 		/* Length is in event->array[0] */
803 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
804 		/* Release the temp buffer */
805 		this_cpu_dec(trace_buffered_event_cnt);
806 	} else
807 		ring_buffer_unlock_commit(buffer, event);
808 }
809 
810 /**
811  * __trace_puts - write a constant string into the trace buffer.
812  * @ip:	   The address of the caller
813  * @str:   The constant string to write
814  * @size:  The size of the string.
815  */
816 int __trace_puts(unsigned long ip, const char *str, int size)
817 {
818 	struct ring_buffer_event *event;
819 	struct ring_buffer *buffer;
820 	struct print_entry *entry;
821 	unsigned long irq_flags;
822 	int alloc;
823 	int pc;
824 
825 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
826 		return 0;
827 
828 	pc = preempt_count();
829 
830 	if (unlikely(tracing_selftest_running || tracing_disabled))
831 		return 0;
832 
833 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
834 
835 	local_save_flags(irq_flags);
836 	buffer = global_trace.trace_buffer.buffer;
837 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
838 					    irq_flags, pc);
839 	if (!event)
840 		return 0;
841 
842 	entry = ring_buffer_event_data(event);
843 	entry->ip = ip;
844 
845 	memcpy(&entry->buf, str, size);
846 
847 	/* Add a newline if necessary */
848 	if (entry->buf[size - 1] != '\n') {
849 		entry->buf[size] = '\n';
850 		entry->buf[size + 1] = '\0';
851 	} else
852 		entry->buf[size] = '\0';
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return size;
858 }
859 EXPORT_SYMBOL_GPL(__trace_puts);
860 
861 /**
862  * __trace_bputs - write the pointer to a constant string into trace buffer
863  * @ip:	   The address of the caller
864  * @str:   The constant string to write to the buffer to
865  */
866 int __trace_bputs(unsigned long ip, const char *str)
867 {
868 	struct ring_buffer_event *event;
869 	struct ring_buffer *buffer;
870 	struct bputs_entry *entry;
871 	unsigned long irq_flags;
872 	int size = sizeof(struct bputs_entry);
873 	int pc;
874 
875 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
876 		return 0;
877 
878 	pc = preempt_count();
879 
880 	if (unlikely(tracing_selftest_running || tracing_disabled))
881 		return 0;
882 
883 	local_save_flags(irq_flags);
884 	buffer = global_trace.trace_buffer.buffer;
885 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
886 					    irq_flags, pc);
887 	if (!event)
888 		return 0;
889 
890 	entry = ring_buffer_event_data(event);
891 	entry->ip			= ip;
892 	entry->str			= str;
893 
894 	__buffer_unlock_commit(buffer, event);
895 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
896 
897 	return 1;
898 }
899 EXPORT_SYMBOL_GPL(__trace_bputs);
900 
901 #ifdef CONFIG_TRACER_SNAPSHOT
902 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
903 {
904 	struct tracer *tracer = tr->current_trace;
905 	unsigned long flags;
906 
907 	if (in_nmi()) {
908 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
909 		internal_trace_puts("*** snapshot is being ignored        ***\n");
910 		return;
911 	}
912 
913 	if (!tr->allocated_snapshot) {
914 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
915 		internal_trace_puts("*** stopping trace here!   ***\n");
916 		tracing_off();
917 		return;
918 	}
919 
920 	/* Note, snapshot can not be used when the tracer uses it */
921 	if (tracer->use_max_tr) {
922 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
923 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
924 		return;
925 	}
926 
927 	local_irq_save(flags);
928 	update_max_tr(tr, current, smp_processor_id(), cond_data);
929 	local_irq_restore(flags);
930 }
931 
932 void tracing_snapshot_instance(struct trace_array *tr)
933 {
934 	tracing_snapshot_instance_cond(tr, NULL);
935 }
936 
937 /**
938  * tracing_snapshot - take a snapshot of the current buffer.
939  *
940  * This causes a swap between the snapshot buffer and the current live
941  * tracing buffer. You can use this to take snapshots of the live
942  * trace when some condition is triggered, but continue to trace.
943  *
944  * Note, make sure to allocate the snapshot with either
945  * a tracing_snapshot_alloc(), or by doing it manually
946  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
947  *
948  * If the snapshot buffer is not allocated, it will stop tracing.
949  * Basically making a permanent snapshot.
950  */
951 void tracing_snapshot(void)
952 {
953 	struct trace_array *tr = &global_trace;
954 
955 	tracing_snapshot_instance(tr);
956 }
957 EXPORT_SYMBOL_GPL(tracing_snapshot);
958 
959 /**
960  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
961  * @tr:		The tracing instance to snapshot
962  * @cond_data:	The data to be tested conditionally, and possibly saved
963  *
964  * This is the same as tracing_snapshot() except that the snapshot is
965  * conditional - the snapshot will only happen if the
966  * cond_snapshot.update() implementation receiving the cond_data
967  * returns true, which means that the trace array's cond_snapshot
968  * update() operation used the cond_data to determine whether the
969  * snapshot should be taken, and if it was, presumably saved it along
970  * with the snapshot.
971  */
972 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
973 {
974 	tracing_snapshot_instance_cond(tr, cond_data);
975 }
976 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
977 
978 /**
979  * tracing_snapshot_cond_data - get the user data associated with a snapshot
980  * @tr:		The tracing instance
981  *
982  * When the user enables a conditional snapshot using
983  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
984  * with the snapshot.  This accessor is used to retrieve it.
985  *
986  * Should not be called from cond_snapshot.update(), since it takes
987  * the tr->max_lock lock, which the code calling
988  * cond_snapshot.update() has already done.
989  *
990  * Returns the cond_data associated with the trace array's snapshot.
991  */
992 void *tracing_cond_snapshot_data(struct trace_array *tr)
993 {
994 	void *cond_data = NULL;
995 
996 	arch_spin_lock(&tr->max_lock);
997 
998 	if (tr->cond_snapshot)
999 		cond_data = tr->cond_snapshot->cond_data;
1000 
1001 	arch_spin_unlock(&tr->max_lock);
1002 
1003 	return cond_data;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1006 
1007 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1008 					struct trace_buffer *size_buf, int cpu_id);
1009 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1010 
1011 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1012 {
1013 	int ret;
1014 
1015 	if (!tr->allocated_snapshot) {
1016 
1017 		/* allocate spare buffer */
1018 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1019 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1020 		if (ret < 0)
1021 			return ret;
1022 
1023 		tr->allocated_snapshot = true;
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 static void free_snapshot(struct trace_array *tr)
1030 {
1031 	/*
1032 	 * We don't free the ring buffer. instead, resize it because
1033 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1034 	 * we want preserve it.
1035 	 */
1036 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1037 	set_buffer_entries(&tr->max_buffer, 1);
1038 	tracing_reset_online_cpus(&tr->max_buffer);
1039 	tr->allocated_snapshot = false;
1040 }
1041 
1042 /**
1043  * tracing_alloc_snapshot - allocate snapshot buffer.
1044  *
1045  * This only allocates the snapshot buffer if it isn't already
1046  * allocated - it doesn't also take a snapshot.
1047  *
1048  * This is meant to be used in cases where the snapshot buffer needs
1049  * to be set up for events that can't sleep but need to be able to
1050  * trigger a snapshot.
1051  */
1052 int tracing_alloc_snapshot(void)
1053 {
1054 	struct trace_array *tr = &global_trace;
1055 	int ret;
1056 
1057 	ret = tracing_alloc_snapshot_instance(tr);
1058 	WARN_ON(ret < 0);
1059 
1060 	return ret;
1061 }
1062 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1063 
1064 /**
1065  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1066  *
1067  * This is similar to tracing_snapshot(), but it will allocate the
1068  * snapshot buffer if it isn't already allocated. Use this only
1069  * where it is safe to sleep, as the allocation may sleep.
1070  *
1071  * This causes a swap between the snapshot buffer and the current live
1072  * tracing buffer. You can use this to take snapshots of the live
1073  * trace when some condition is triggered, but continue to trace.
1074  */
1075 void tracing_snapshot_alloc(void)
1076 {
1077 	int ret;
1078 
1079 	ret = tracing_alloc_snapshot();
1080 	if (ret < 0)
1081 		return;
1082 
1083 	tracing_snapshot();
1084 }
1085 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1086 
1087 /**
1088  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1089  * @tr:		The tracing instance
1090  * @cond_data:	User data to associate with the snapshot
1091  * @update:	Implementation of the cond_snapshot update function
1092  *
1093  * Check whether the conditional snapshot for the given instance has
1094  * already been enabled, or if the current tracer is already using a
1095  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1096  * save the cond_data and update function inside.
1097  *
1098  * Returns 0 if successful, error otherwise.
1099  */
1100 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1101 				 cond_update_fn_t update)
1102 {
1103 	struct cond_snapshot *cond_snapshot;
1104 	int ret = 0;
1105 
1106 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1107 	if (!cond_snapshot)
1108 		return -ENOMEM;
1109 
1110 	cond_snapshot->cond_data = cond_data;
1111 	cond_snapshot->update = update;
1112 
1113 	mutex_lock(&trace_types_lock);
1114 
1115 	ret = tracing_alloc_snapshot_instance(tr);
1116 	if (ret)
1117 		goto fail_unlock;
1118 
1119 	if (tr->current_trace->use_max_tr) {
1120 		ret = -EBUSY;
1121 		goto fail_unlock;
1122 	}
1123 
1124 	/*
1125 	 * The cond_snapshot can only change to NULL without the
1126 	 * trace_types_lock. We don't care if we race with it going
1127 	 * to NULL, but we want to make sure that it's not set to
1128 	 * something other than NULL when we get here, which we can
1129 	 * do safely with only holding the trace_types_lock and not
1130 	 * having to take the max_lock.
1131 	 */
1132 	if (tr->cond_snapshot) {
1133 		ret = -EBUSY;
1134 		goto fail_unlock;
1135 	}
1136 
1137 	arch_spin_lock(&tr->max_lock);
1138 	tr->cond_snapshot = cond_snapshot;
1139 	arch_spin_unlock(&tr->max_lock);
1140 
1141 	mutex_unlock(&trace_types_lock);
1142 
1143 	return ret;
1144 
1145  fail_unlock:
1146 	mutex_unlock(&trace_types_lock);
1147 	kfree(cond_snapshot);
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1151 
1152 /**
1153  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1154  * @tr:		The tracing instance
1155  *
1156  * Check whether the conditional snapshot for the given instance is
1157  * enabled; if so, free the cond_snapshot associated with it,
1158  * otherwise return -EINVAL.
1159  *
1160  * Returns 0 if successful, error otherwise.
1161  */
1162 int tracing_snapshot_cond_disable(struct trace_array *tr)
1163 {
1164 	int ret = 0;
1165 
1166 	arch_spin_lock(&tr->max_lock);
1167 
1168 	if (!tr->cond_snapshot)
1169 		ret = -EINVAL;
1170 	else {
1171 		kfree(tr->cond_snapshot);
1172 		tr->cond_snapshot = NULL;
1173 	}
1174 
1175 	arch_spin_unlock(&tr->max_lock);
1176 
1177 	return ret;
1178 }
1179 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1180 #else
1181 void tracing_snapshot(void)
1182 {
1183 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot);
1186 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1187 {
1188 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1189 }
1190 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1191 int tracing_alloc_snapshot(void)
1192 {
1193 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1194 	return -ENODEV;
1195 }
1196 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1197 void tracing_snapshot_alloc(void)
1198 {
1199 	/* Give warning */
1200 	tracing_snapshot();
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1203 void *tracing_cond_snapshot_data(struct trace_array *tr)
1204 {
1205 	return NULL;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1209 {
1210 	return -ENODEV;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1213 int tracing_snapshot_cond_disable(struct trace_array *tr)
1214 {
1215 	return false;
1216 }
1217 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1218 #endif /* CONFIG_TRACER_SNAPSHOT */
1219 
1220 void tracer_tracing_off(struct trace_array *tr)
1221 {
1222 	if (tr->trace_buffer.buffer)
1223 		ring_buffer_record_off(tr->trace_buffer.buffer);
1224 	/*
1225 	 * This flag is looked at when buffers haven't been allocated
1226 	 * yet, or by some tracers (like irqsoff), that just want to
1227 	 * know if the ring buffer has been disabled, but it can handle
1228 	 * races of where it gets disabled but we still do a record.
1229 	 * As the check is in the fast path of the tracers, it is more
1230 	 * important to be fast than accurate.
1231 	 */
1232 	tr->buffer_disabled = 1;
1233 	/* Make the flag seen by readers */
1234 	smp_wmb();
1235 }
1236 
1237 /**
1238  * tracing_off - turn off tracing buffers
1239  *
1240  * This function stops the tracing buffers from recording data.
1241  * It does not disable any overhead the tracers themselves may
1242  * be causing. This function simply causes all recording to
1243  * the ring buffers to fail.
1244  */
1245 void tracing_off(void)
1246 {
1247 	tracer_tracing_off(&global_trace);
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_off);
1250 
1251 void disable_trace_on_warning(void)
1252 {
1253 	if (__disable_trace_on_warning)
1254 		tracing_off();
1255 }
1256 
1257 /**
1258  * tracer_tracing_is_on - show real state of ring buffer enabled
1259  * @tr : the trace array to know if ring buffer is enabled
1260  *
1261  * Shows real state of the ring buffer if it is enabled or not.
1262  */
1263 bool tracer_tracing_is_on(struct trace_array *tr)
1264 {
1265 	if (tr->trace_buffer.buffer)
1266 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1267 	return !tr->buffer_disabled;
1268 }
1269 
1270 /**
1271  * tracing_is_on - show state of ring buffers enabled
1272  */
1273 int tracing_is_on(void)
1274 {
1275 	return tracer_tracing_is_on(&global_trace);
1276 }
1277 EXPORT_SYMBOL_GPL(tracing_is_on);
1278 
1279 static int __init set_buf_size(char *str)
1280 {
1281 	unsigned long buf_size;
1282 
1283 	if (!str)
1284 		return 0;
1285 	buf_size = memparse(str, &str);
1286 	/* nr_entries can not be zero */
1287 	if (buf_size == 0)
1288 		return 0;
1289 	trace_buf_size = buf_size;
1290 	return 1;
1291 }
1292 __setup("trace_buf_size=", set_buf_size);
1293 
1294 static int __init set_tracing_thresh(char *str)
1295 {
1296 	unsigned long threshold;
1297 	int ret;
1298 
1299 	if (!str)
1300 		return 0;
1301 	ret = kstrtoul(str, 0, &threshold);
1302 	if (ret < 0)
1303 		return 0;
1304 	tracing_thresh = threshold * 1000;
1305 	return 1;
1306 }
1307 __setup("tracing_thresh=", set_tracing_thresh);
1308 
1309 unsigned long nsecs_to_usecs(unsigned long nsecs)
1310 {
1311 	return nsecs / 1000;
1312 }
1313 
1314 /*
1315  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1316  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1317  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1318  * of strings in the order that the evals (enum) were defined.
1319  */
1320 #undef C
1321 #define C(a, b) b
1322 
1323 /* These must match the bit postions in trace_iterator_flags */
1324 static const char *trace_options[] = {
1325 	TRACE_FLAGS
1326 	NULL
1327 };
1328 
1329 static struct {
1330 	u64 (*func)(void);
1331 	const char *name;
1332 	int in_ns;		/* is this clock in nanoseconds? */
1333 } trace_clocks[] = {
1334 	{ trace_clock_local,		"local",	1 },
1335 	{ trace_clock_global,		"global",	1 },
1336 	{ trace_clock_counter,		"counter",	0 },
1337 	{ trace_clock_jiffies,		"uptime",	0 },
1338 	{ trace_clock,			"perf",		1 },
1339 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1340 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1341 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1342 	ARCH_TRACE_CLOCKS
1343 };
1344 
1345 bool trace_clock_in_ns(struct trace_array *tr)
1346 {
1347 	if (trace_clocks[tr->clock_id].in_ns)
1348 		return true;
1349 
1350 	return false;
1351 }
1352 
1353 /*
1354  * trace_parser_get_init - gets the buffer for trace parser
1355  */
1356 int trace_parser_get_init(struct trace_parser *parser, int size)
1357 {
1358 	memset(parser, 0, sizeof(*parser));
1359 
1360 	parser->buffer = kmalloc(size, GFP_KERNEL);
1361 	if (!parser->buffer)
1362 		return 1;
1363 
1364 	parser->size = size;
1365 	return 0;
1366 }
1367 
1368 /*
1369  * trace_parser_put - frees the buffer for trace parser
1370  */
1371 void trace_parser_put(struct trace_parser *parser)
1372 {
1373 	kfree(parser->buffer);
1374 	parser->buffer = NULL;
1375 }
1376 
1377 /*
1378  * trace_get_user - reads the user input string separated by  space
1379  * (matched by isspace(ch))
1380  *
1381  * For each string found the 'struct trace_parser' is updated,
1382  * and the function returns.
1383  *
1384  * Returns number of bytes read.
1385  *
1386  * See kernel/trace/trace.h for 'struct trace_parser' details.
1387  */
1388 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1389 	size_t cnt, loff_t *ppos)
1390 {
1391 	char ch;
1392 	size_t read = 0;
1393 	ssize_t ret;
1394 
1395 	if (!*ppos)
1396 		trace_parser_clear(parser);
1397 
1398 	ret = get_user(ch, ubuf++);
1399 	if (ret)
1400 		goto out;
1401 
1402 	read++;
1403 	cnt--;
1404 
1405 	/*
1406 	 * The parser is not finished with the last write,
1407 	 * continue reading the user input without skipping spaces.
1408 	 */
1409 	if (!parser->cont) {
1410 		/* skip white space */
1411 		while (cnt && isspace(ch)) {
1412 			ret = get_user(ch, ubuf++);
1413 			if (ret)
1414 				goto out;
1415 			read++;
1416 			cnt--;
1417 		}
1418 
1419 		parser->idx = 0;
1420 
1421 		/* only spaces were written */
1422 		if (isspace(ch) || !ch) {
1423 			*ppos += read;
1424 			ret = read;
1425 			goto out;
1426 		}
1427 	}
1428 
1429 	/* read the non-space input */
1430 	while (cnt && !isspace(ch) && ch) {
1431 		if (parser->idx < parser->size - 1)
1432 			parser->buffer[parser->idx++] = ch;
1433 		else {
1434 			ret = -EINVAL;
1435 			goto out;
1436 		}
1437 		ret = get_user(ch, ubuf++);
1438 		if (ret)
1439 			goto out;
1440 		read++;
1441 		cnt--;
1442 	}
1443 
1444 	/* We either got finished input or we have to wait for another call. */
1445 	if (isspace(ch) || !ch) {
1446 		parser->buffer[parser->idx] = 0;
1447 		parser->cont = false;
1448 	} else if (parser->idx < parser->size - 1) {
1449 		parser->cont = true;
1450 		parser->buffer[parser->idx++] = ch;
1451 		/* Make sure the parsed string always terminates with '\0'. */
1452 		parser->buffer[parser->idx] = 0;
1453 	} else {
1454 		ret = -EINVAL;
1455 		goto out;
1456 	}
1457 
1458 	*ppos += read;
1459 	ret = read;
1460 
1461 out:
1462 	return ret;
1463 }
1464 
1465 /* TODO add a seq_buf_to_buffer() */
1466 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1467 {
1468 	int len;
1469 
1470 	if (trace_seq_used(s) <= s->seq.readpos)
1471 		return -EBUSY;
1472 
1473 	len = trace_seq_used(s) - s->seq.readpos;
1474 	if (cnt > len)
1475 		cnt = len;
1476 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1477 
1478 	s->seq.readpos += cnt;
1479 	return cnt;
1480 }
1481 
1482 unsigned long __read_mostly	tracing_thresh;
1483 
1484 #ifdef CONFIG_TRACER_MAX_TRACE
1485 /*
1486  * Copy the new maximum trace into the separate maximum-trace
1487  * structure. (this way the maximum trace is permanently saved,
1488  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1489  */
1490 static void
1491 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1492 {
1493 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1494 	struct trace_buffer *max_buf = &tr->max_buffer;
1495 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1496 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1497 
1498 	max_buf->cpu = cpu;
1499 	max_buf->time_start = data->preempt_timestamp;
1500 
1501 	max_data->saved_latency = tr->max_latency;
1502 	max_data->critical_start = data->critical_start;
1503 	max_data->critical_end = data->critical_end;
1504 
1505 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1506 	max_data->pid = tsk->pid;
1507 	/*
1508 	 * If tsk == current, then use current_uid(), as that does not use
1509 	 * RCU. The irq tracer can be called out of RCU scope.
1510 	 */
1511 	if (tsk == current)
1512 		max_data->uid = current_uid();
1513 	else
1514 		max_data->uid = task_uid(tsk);
1515 
1516 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1517 	max_data->policy = tsk->policy;
1518 	max_data->rt_priority = tsk->rt_priority;
1519 
1520 	/* record this tasks comm */
1521 	tracing_record_cmdline(tsk);
1522 }
1523 
1524 /**
1525  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1526  * @tr: tracer
1527  * @tsk: the task with the latency
1528  * @cpu: The cpu that initiated the trace.
1529  * @cond_data: User data associated with a conditional snapshot
1530  *
1531  * Flip the buffers between the @tr and the max_tr and record information
1532  * about which task was the cause of this latency.
1533  */
1534 void
1535 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1536 	      void *cond_data)
1537 {
1538 	if (tr->stop_count)
1539 		return;
1540 
1541 	WARN_ON_ONCE(!irqs_disabled());
1542 
1543 	if (!tr->allocated_snapshot) {
1544 		/* Only the nop tracer should hit this when disabling */
1545 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1546 		return;
1547 	}
1548 
1549 	arch_spin_lock(&tr->max_lock);
1550 
1551 	/* Inherit the recordable setting from trace_buffer */
1552 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1553 		ring_buffer_record_on(tr->max_buffer.buffer);
1554 	else
1555 		ring_buffer_record_off(tr->max_buffer.buffer);
1556 
1557 #ifdef CONFIG_TRACER_SNAPSHOT
1558 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1559 		goto out_unlock;
1560 #endif
1561 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1562 
1563 	__update_max_tr(tr, tsk, cpu);
1564 
1565  out_unlock:
1566 	arch_spin_unlock(&tr->max_lock);
1567 }
1568 
1569 /**
1570  * update_max_tr_single - only copy one trace over, and reset the rest
1571  * @tr - tracer
1572  * @tsk - task with the latency
1573  * @cpu - the cpu of the buffer to copy.
1574  *
1575  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1576  */
1577 void
1578 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1579 {
1580 	int ret;
1581 
1582 	if (tr->stop_count)
1583 		return;
1584 
1585 	WARN_ON_ONCE(!irqs_disabled());
1586 	if (!tr->allocated_snapshot) {
1587 		/* Only the nop tracer should hit this when disabling */
1588 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1589 		return;
1590 	}
1591 
1592 	arch_spin_lock(&tr->max_lock);
1593 
1594 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1595 
1596 	if (ret == -EBUSY) {
1597 		/*
1598 		 * We failed to swap the buffer due to a commit taking
1599 		 * place on this CPU. We fail to record, but we reset
1600 		 * the max trace buffer (no one writes directly to it)
1601 		 * and flag that it failed.
1602 		 */
1603 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1604 			"Failed to swap buffers due to commit in progress\n");
1605 	}
1606 
1607 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1608 
1609 	__update_max_tr(tr, tsk, cpu);
1610 	arch_spin_unlock(&tr->max_lock);
1611 }
1612 #endif /* CONFIG_TRACER_MAX_TRACE */
1613 
1614 static int wait_on_pipe(struct trace_iterator *iter, int full)
1615 {
1616 	/* Iterators are static, they should be filled or empty */
1617 	if (trace_buffer_iter(iter, iter->cpu_file))
1618 		return 0;
1619 
1620 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1621 				full);
1622 }
1623 
1624 #ifdef CONFIG_FTRACE_STARTUP_TEST
1625 static bool selftests_can_run;
1626 
1627 struct trace_selftests {
1628 	struct list_head		list;
1629 	struct tracer			*type;
1630 };
1631 
1632 static LIST_HEAD(postponed_selftests);
1633 
1634 static int save_selftest(struct tracer *type)
1635 {
1636 	struct trace_selftests *selftest;
1637 
1638 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1639 	if (!selftest)
1640 		return -ENOMEM;
1641 
1642 	selftest->type = type;
1643 	list_add(&selftest->list, &postponed_selftests);
1644 	return 0;
1645 }
1646 
1647 static int run_tracer_selftest(struct tracer *type)
1648 {
1649 	struct trace_array *tr = &global_trace;
1650 	struct tracer *saved_tracer = tr->current_trace;
1651 	int ret;
1652 
1653 	if (!type->selftest || tracing_selftest_disabled)
1654 		return 0;
1655 
1656 	/*
1657 	 * If a tracer registers early in boot up (before scheduling is
1658 	 * initialized and such), then do not run its selftests yet.
1659 	 * Instead, run it a little later in the boot process.
1660 	 */
1661 	if (!selftests_can_run)
1662 		return save_selftest(type);
1663 
1664 	/*
1665 	 * Run a selftest on this tracer.
1666 	 * Here we reset the trace buffer, and set the current
1667 	 * tracer to be this tracer. The tracer can then run some
1668 	 * internal tracing to verify that everything is in order.
1669 	 * If we fail, we do not register this tracer.
1670 	 */
1671 	tracing_reset_online_cpus(&tr->trace_buffer);
1672 
1673 	tr->current_trace = type;
1674 
1675 #ifdef CONFIG_TRACER_MAX_TRACE
1676 	if (type->use_max_tr) {
1677 		/* If we expanded the buffers, make sure the max is expanded too */
1678 		if (ring_buffer_expanded)
1679 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1680 					   RING_BUFFER_ALL_CPUS);
1681 		tr->allocated_snapshot = true;
1682 	}
1683 #endif
1684 
1685 	/* the test is responsible for initializing and enabling */
1686 	pr_info("Testing tracer %s: ", type->name);
1687 	ret = type->selftest(type, tr);
1688 	/* the test is responsible for resetting too */
1689 	tr->current_trace = saved_tracer;
1690 	if (ret) {
1691 		printk(KERN_CONT "FAILED!\n");
1692 		/* Add the warning after printing 'FAILED' */
1693 		WARN_ON(1);
1694 		return -1;
1695 	}
1696 	/* Only reset on passing, to avoid touching corrupted buffers */
1697 	tracing_reset_online_cpus(&tr->trace_buffer);
1698 
1699 #ifdef CONFIG_TRACER_MAX_TRACE
1700 	if (type->use_max_tr) {
1701 		tr->allocated_snapshot = false;
1702 
1703 		/* Shrink the max buffer again */
1704 		if (ring_buffer_expanded)
1705 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1706 					   RING_BUFFER_ALL_CPUS);
1707 	}
1708 #endif
1709 
1710 	printk(KERN_CONT "PASSED\n");
1711 	return 0;
1712 }
1713 
1714 static __init int init_trace_selftests(void)
1715 {
1716 	struct trace_selftests *p, *n;
1717 	struct tracer *t, **last;
1718 	int ret;
1719 
1720 	selftests_can_run = true;
1721 
1722 	mutex_lock(&trace_types_lock);
1723 
1724 	if (list_empty(&postponed_selftests))
1725 		goto out;
1726 
1727 	pr_info("Running postponed tracer tests:\n");
1728 
1729 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1730 		/* This loop can take minutes when sanitizers are enabled, so
1731 		 * lets make sure we allow RCU processing.
1732 		 */
1733 		cond_resched();
1734 		ret = run_tracer_selftest(p->type);
1735 		/* If the test fails, then warn and remove from available_tracers */
1736 		if (ret < 0) {
1737 			WARN(1, "tracer: %s failed selftest, disabling\n",
1738 			     p->type->name);
1739 			last = &trace_types;
1740 			for (t = trace_types; t; t = t->next) {
1741 				if (t == p->type) {
1742 					*last = t->next;
1743 					break;
1744 				}
1745 				last = &t->next;
1746 			}
1747 		}
1748 		list_del(&p->list);
1749 		kfree(p);
1750 	}
1751 
1752  out:
1753 	mutex_unlock(&trace_types_lock);
1754 
1755 	return 0;
1756 }
1757 core_initcall(init_trace_selftests);
1758 #else
1759 static inline int run_tracer_selftest(struct tracer *type)
1760 {
1761 	return 0;
1762 }
1763 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1764 
1765 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1766 
1767 static void __init apply_trace_boot_options(void);
1768 
1769 /**
1770  * register_tracer - register a tracer with the ftrace system.
1771  * @type - the plugin for the tracer
1772  *
1773  * Register a new plugin tracer.
1774  */
1775 int __init register_tracer(struct tracer *type)
1776 {
1777 	struct tracer *t;
1778 	int ret = 0;
1779 
1780 	if (!type->name) {
1781 		pr_info("Tracer must have a name\n");
1782 		return -1;
1783 	}
1784 
1785 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1786 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1787 		return -1;
1788 	}
1789 
1790 	mutex_lock(&trace_types_lock);
1791 
1792 	tracing_selftest_running = true;
1793 
1794 	for (t = trace_types; t; t = t->next) {
1795 		if (strcmp(type->name, t->name) == 0) {
1796 			/* already found */
1797 			pr_info("Tracer %s already registered\n",
1798 				type->name);
1799 			ret = -1;
1800 			goto out;
1801 		}
1802 	}
1803 
1804 	if (!type->set_flag)
1805 		type->set_flag = &dummy_set_flag;
1806 	if (!type->flags) {
1807 		/*allocate a dummy tracer_flags*/
1808 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1809 		if (!type->flags) {
1810 			ret = -ENOMEM;
1811 			goto out;
1812 		}
1813 		type->flags->val = 0;
1814 		type->flags->opts = dummy_tracer_opt;
1815 	} else
1816 		if (!type->flags->opts)
1817 			type->flags->opts = dummy_tracer_opt;
1818 
1819 	/* store the tracer for __set_tracer_option */
1820 	type->flags->trace = type;
1821 
1822 	ret = run_tracer_selftest(type);
1823 	if (ret < 0)
1824 		goto out;
1825 
1826 	type->next = trace_types;
1827 	trace_types = type;
1828 	add_tracer_options(&global_trace, type);
1829 
1830  out:
1831 	tracing_selftest_running = false;
1832 	mutex_unlock(&trace_types_lock);
1833 
1834 	if (ret || !default_bootup_tracer)
1835 		goto out_unlock;
1836 
1837 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1838 		goto out_unlock;
1839 
1840 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1841 	/* Do we want this tracer to start on bootup? */
1842 	tracing_set_tracer(&global_trace, type->name);
1843 	default_bootup_tracer = NULL;
1844 
1845 	apply_trace_boot_options();
1846 
1847 	/* disable other selftests, since this will break it. */
1848 	tracing_selftest_disabled = true;
1849 #ifdef CONFIG_FTRACE_STARTUP_TEST
1850 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1851 	       type->name);
1852 #endif
1853 
1854  out_unlock:
1855 	return ret;
1856 }
1857 
1858 void tracing_reset(struct trace_buffer *buf, int cpu)
1859 {
1860 	struct ring_buffer *buffer = buf->buffer;
1861 
1862 	if (!buffer)
1863 		return;
1864 
1865 	ring_buffer_record_disable(buffer);
1866 
1867 	/* Make sure all commits have finished */
1868 	synchronize_rcu();
1869 	ring_buffer_reset_cpu(buffer, cpu);
1870 
1871 	ring_buffer_record_enable(buffer);
1872 }
1873 
1874 void tracing_reset_online_cpus(struct trace_buffer *buf)
1875 {
1876 	struct ring_buffer *buffer = buf->buffer;
1877 	int cpu;
1878 
1879 	if (!buffer)
1880 		return;
1881 
1882 	ring_buffer_record_disable(buffer);
1883 
1884 	/* Make sure all commits have finished */
1885 	synchronize_rcu();
1886 
1887 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1888 
1889 	for_each_online_cpu(cpu)
1890 		ring_buffer_reset_cpu(buffer, cpu);
1891 
1892 	ring_buffer_record_enable(buffer);
1893 }
1894 
1895 /* Must have trace_types_lock held */
1896 void tracing_reset_all_online_cpus(void)
1897 {
1898 	struct trace_array *tr;
1899 
1900 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1901 		if (!tr->clear_trace)
1902 			continue;
1903 		tr->clear_trace = false;
1904 		tracing_reset_online_cpus(&tr->trace_buffer);
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 		tracing_reset_online_cpus(&tr->max_buffer);
1907 #endif
1908 	}
1909 }
1910 
1911 static int *tgid_map;
1912 
1913 #define SAVED_CMDLINES_DEFAULT 128
1914 #define NO_CMDLINE_MAP UINT_MAX
1915 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1916 struct saved_cmdlines_buffer {
1917 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1918 	unsigned *map_cmdline_to_pid;
1919 	unsigned cmdline_num;
1920 	int cmdline_idx;
1921 	char *saved_cmdlines;
1922 };
1923 static struct saved_cmdlines_buffer *savedcmd;
1924 
1925 /* temporary disable recording */
1926 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1927 
1928 static inline char *get_saved_cmdlines(int idx)
1929 {
1930 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1931 }
1932 
1933 static inline void set_cmdline(int idx, const char *cmdline)
1934 {
1935 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1936 }
1937 
1938 static int allocate_cmdlines_buffer(unsigned int val,
1939 				    struct saved_cmdlines_buffer *s)
1940 {
1941 	s->map_cmdline_to_pid = kmalloc_array(val,
1942 					      sizeof(*s->map_cmdline_to_pid),
1943 					      GFP_KERNEL);
1944 	if (!s->map_cmdline_to_pid)
1945 		return -ENOMEM;
1946 
1947 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1948 	if (!s->saved_cmdlines) {
1949 		kfree(s->map_cmdline_to_pid);
1950 		return -ENOMEM;
1951 	}
1952 
1953 	s->cmdline_idx = 0;
1954 	s->cmdline_num = val;
1955 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1956 	       sizeof(s->map_pid_to_cmdline));
1957 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1958 	       val * sizeof(*s->map_cmdline_to_pid));
1959 
1960 	return 0;
1961 }
1962 
1963 static int trace_create_savedcmd(void)
1964 {
1965 	int ret;
1966 
1967 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1968 	if (!savedcmd)
1969 		return -ENOMEM;
1970 
1971 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1972 	if (ret < 0) {
1973 		kfree(savedcmd);
1974 		savedcmd = NULL;
1975 		return -ENOMEM;
1976 	}
1977 
1978 	return 0;
1979 }
1980 
1981 int is_tracing_stopped(void)
1982 {
1983 	return global_trace.stop_count;
1984 }
1985 
1986 /**
1987  * tracing_start - quick start of the tracer
1988  *
1989  * If tracing is enabled but was stopped by tracing_stop,
1990  * this will start the tracer back up.
1991  */
1992 void tracing_start(void)
1993 {
1994 	struct ring_buffer *buffer;
1995 	unsigned long flags;
1996 
1997 	if (tracing_disabled)
1998 		return;
1999 
2000 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2001 	if (--global_trace.stop_count) {
2002 		if (global_trace.stop_count < 0) {
2003 			/* Someone screwed up their debugging */
2004 			WARN_ON_ONCE(1);
2005 			global_trace.stop_count = 0;
2006 		}
2007 		goto out;
2008 	}
2009 
2010 	/* Prevent the buffers from switching */
2011 	arch_spin_lock(&global_trace.max_lock);
2012 
2013 	buffer = global_trace.trace_buffer.buffer;
2014 	if (buffer)
2015 		ring_buffer_record_enable(buffer);
2016 
2017 #ifdef CONFIG_TRACER_MAX_TRACE
2018 	buffer = global_trace.max_buffer.buffer;
2019 	if (buffer)
2020 		ring_buffer_record_enable(buffer);
2021 #endif
2022 
2023 	arch_spin_unlock(&global_trace.max_lock);
2024 
2025  out:
2026 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2027 }
2028 
2029 static void tracing_start_tr(struct trace_array *tr)
2030 {
2031 	struct ring_buffer *buffer;
2032 	unsigned long flags;
2033 
2034 	if (tracing_disabled)
2035 		return;
2036 
2037 	/* If global, we need to also start the max tracer */
2038 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2039 		return tracing_start();
2040 
2041 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2042 
2043 	if (--tr->stop_count) {
2044 		if (tr->stop_count < 0) {
2045 			/* Someone screwed up their debugging */
2046 			WARN_ON_ONCE(1);
2047 			tr->stop_count = 0;
2048 		}
2049 		goto out;
2050 	}
2051 
2052 	buffer = tr->trace_buffer.buffer;
2053 	if (buffer)
2054 		ring_buffer_record_enable(buffer);
2055 
2056  out:
2057 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2058 }
2059 
2060 /**
2061  * tracing_stop - quick stop of the tracer
2062  *
2063  * Light weight way to stop tracing. Use in conjunction with
2064  * tracing_start.
2065  */
2066 void tracing_stop(void)
2067 {
2068 	struct ring_buffer *buffer;
2069 	unsigned long flags;
2070 
2071 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2072 	if (global_trace.stop_count++)
2073 		goto out;
2074 
2075 	/* Prevent the buffers from switching */
2076 	arch_spin_lock(&global_trace.max_lock);
2077 
2078 	buffer = global_trace.trace_buffer.buffer;
2079 	if (buffer)
2080 		ring_buffer_record_disable(buffer);
2081 
2082 #ifdef CONFIG_TRACER_MAX_TRACE
2083 	buffer = global_trace.max_buffer.buffer;
2084 	if (buffer)
2085 		ring_buffer_record_disable(buffer);
2086 #endif
2087 
2088 	arch_spin_unlock(&global_trace.max_lock);
2089 
2090  out:
2091 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2092 }
2093 
2094 static void tracing_stop_tr(struct trace_array *tr)
2095 {
2096 	struct ring_buffer *buffer;
2097 	unsigned long flags;
2098 
2099 	/* If global, we need to also stop the max tracer */
2100 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2101 		return tracing_stop();
2102 
2103 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2104 	if (tr->stop_count++)
2105 		goto out;
2106 
2107 	buffer = tr->trace_buffer.buffer;
2108 	if (buffer)
2109 		ring_buffer_record_disable(buffer);
2110 
2111  out:
2112 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2113 }
2114 
2115 static int trace_save_cmdline(struct task_struct *tsk)
2116 {
2117 	unsigned pid, idx;
2118 
2119 	/* treat recording of idle task as a success */
2120 	if (!tsk->pid)
2121 		return 1;
2122 
2123 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2124 		return 0;
2125 
2126 	/*
2127 	 * It's not the end of the world if we don't get
2128 	 * the lock, but we also don't want to spin
2129 	 * nor do we want to disable interrupts,
2130 	 * so if we miss here, then better luck next time.
2131 	 */
2132 	if (!arch_spin_trylock(&trace_cmdline_lock))
2133 		return 0;
2134 
2135 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2136 	if (idx == NO_CMDLINE_MAP) {
2137 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2138 
2139 		/*
2140 		 * Check whether the cmdline buffer at idx has a pid
2141 		 * mapped. We are going to overwrite that entry so we
2142 		 * need to clear the map_pid_to_cmdline. Otherwise we
2143 		 * would read the new comm for the old pid.
2144 		 */
2145 		pid = savedcmd->map_cmdline_to_pid[idx];
2146 		if (pid != NO_CMDLINE_MAP)
2147 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2148 
2149 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2150 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2151 
2152 		savedcmd->cmdline_idx = idx;
2153 	}
2154 
2155 	set_cmdline(idx, tsk->comm);
2156 
2157 	arch_spin_unlock(&trace_cmdline_lock);
2158 
2159 	return 1;
2160 }
2161 
2162 static void __trace_find_cmdline(int pid, char comm[])
2163 {
2164 	unsigned map;
2165 
2166 	if (!pid) {
2167 		strcpy(comm, "<idle>");
2168 		return;
2169 	}
2170 
2171 	if (WARN_ON_ONCE(pid < 0)) {
2172 		strcpy(comm, "<XXX>");
2173 		return;
2174 	}
2175 
2176 	if (pid > PID_MAX_DEFAULT) {
2177 		strcpy(comm, "<...>");
2178 		return;
2179 	}
2180 
2181 	map = savedcmd->map_pid_to_cmdline[pid];
2182 	if (map != NO_CMDLINE_MAP)
2183 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2184 	else
2185 		strcpy(comm, "<...>");
2186 }
2187 
2188 void trace_find_cmdline(int pid, char comm[])
2189 {
2190 	preempt_disable();
2191 	arch_spin_lock(&trace_cmdline_lock);
2192 
2193 	__trace_find_cmdline(pid, comm);
2194 
2195 	arch_spin_unlock(&trace_cmdline_lock);
2196 	preempt_enable();
2197 }
2198 
2199 int trace_find_tgid(int pid)
2200 {
2201 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2202 		return 0;
2203 
2204 	return tgid_map[pid];
2205 }
2206 
2207 static int trace_save_tgid(struct task_struct *tsk)
2208 {
2209 	/* treat recording of idle task as a success */
2210 	if (!tsk->pid)
2211 		return 1;
2212 
2213 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2214 		return 0;
2215 
2216 	tgid_map[tsk->pid] = tsk->tgid;
2217 	return 1;
2218 }
2219 
2220 static bool tracing_record_taskinfo_skip(int flags)
2221 {
2222 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2223 		return true;
2224 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2225 		return true;
2226 	if (!__this_cpu_read(trace_taskinfo_save))
2227 		return true;
2228 	return false;
2229 }
2230 
2231 /**
2232  * tracing_record_taskinfo - record the task info of a task
2233  *
2234  * @task  - task to record
2235  * @flags - TRACE_RECORD_CMDLINE for recording comm
2236  *        - TRACE_RECORD_TGID for recording tgid
2237  */
2238 void tracing_record_taskinfo(struct task_struct *task, int flags)
2239 {
2240 	bool done;
2241 
2242 	if (tracing_record_taskinfo_skip(flags))
2243 		return;
2244 
2245 	/*
2246 	 * Record as much task information as possible. If some fail, continue
2247 	 * to try to record the others.
2248 	 */
2249 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2250 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2251 
2252 	/* If recording any information failed, retry again soon. */
2253 	if (!done)
2254 		return;
2255 
2256 	__this_cpu_write(trace_taskinfo_save, false);
2257 }
2258 
2259 /**
2260  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2261  *
2262  * @prev - previous task during sched_switch
2263  * @next - next task during sched_switch
2264  * @flags - TRACE_RECORD_CMDLINE for recording comm
2265  *          TRACE_RECORD_TGID for recording tgid
2266  */
2267 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2268 					  struct task_struct *next, int flags)
2269 {
2270 	bool done;
2271 
2272 	if (tracing_record_taskinfo_skip(flags))
2273 		return;
2274 
2275 	/*
2276 	 * Record as much task information as possible. If some fail, continue
2277 	 * to try to record the others.
2278 	 */
2279 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2280 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2281 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2282 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2283 
2284 	/* If recording any information failed, retry again soon. */
2285 	if (!done)
2286 		return;
2287 
2288 	__this_cpu_write(trace_taskinfo_save, false);
2289 }
2290 
2291 /* Helpers to record a specific task information */
2292 void tracing_record_cmdline(struct task_struct *task)
2293 {
2294 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2295 }
2296 
2297 void tracing_record_tgid(struct task_struct *task)
2298 {
2299 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2300 }
2301 
2302 /*
2303  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2304  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2305  * simplifies those functions and keeps them in sync.
2306  */
2307 enum print_line_t trace_handle_return(struct trace_seq *s)
2308 {
2309 	return trace_seq_has_overflowed(s) ?
2310 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2311 }
2312 EXPORT_SYMBOL_GPL(trace_handle_return);
2313 
2314 void
2315 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2316 			     int pc)
2317 {
2318 	struct task_struct *tsk = current;
2319 
2320 	entry->preempt_count		= pc & 0xff;
2321 	entry->pid			= (tsk) ? tsk->pid : 0;
2322 	entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326 		TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2329 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335 
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338 			  int type,
2339 			  unsigned long len,
2340 			  unsigned long flags, int pc)
2341 {
2342 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344 
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348 
2349 /**
2350  * trace_buffered_event_enable - enable buffering events
2351  *
2352  * When events are being filtered, it is quicker to use a temporary
2353  * buffer to write the event data into if there's a likely chance
2354  * that it will not be committed. The discard of the ring buffer
2355  * is not as fast as committing, and is much slower than copying
2356  * a commit.
2357  *
2358  * When an event is to be filtered, allocate per cpu buffers to
2359  * write the event data into, and if the event is filtered and discarded
2360  * it is simply dropped, otherwise, the entire data is to be committed
2361  * in one shot.
2362  */
2363 void trace_buffered_event_enable(void)
2364 {
2365 	struct ring_buffer_event *event;
2366 	struct page *page;
2367 	int cpu;
2368 
2369 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370 
2371 	if (trace_buffered_event_ref++)
2372 		return;
2373 
2374 	for_each_tracing_cpu(cpu) {
2375 		page = alloc_pages_node(cpu_to_node(cpu),
2376 					GFP_KERNEL | __GFP_NORETRY, 0);
2377 		if (!page)
2378 			goto failed;
2379 
2380 		event = page_address(page);
2381 		memset(event, 0, sizeof(*event));
2382 
2383 		per_cpu(trace_buffered_event, cpu) = event;
2384 
2385 		preempt_disable();
2386 		if (cpu == smp_processor_id() &&
2387 		    this_cpu_read(trace_buffered_event) !=
2388 		    per_cpu(trace_buffered_event, cpu))
2389 			WARN_ON_ONCE(1);
2390 		preempt_enable();
2391 	}
2392 
2393 	return;
2394  failed:
2395 	trace_buffered_event_disable();
2396 }
2397 
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400 	/* Probably not needed, but do it anyway */
2401 	smp_rmb();
2402 	this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404 
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407 	this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409 
2410 /**
2411  * trace_buffered_event_disable - disable buffering events
2412  *
2413  * When a filter is removed, it is faster to not use the buffered
2414  * events, and to commit directly into the ring buffer. Free up
2415  * the temp buffers when there are no more users. This requires
2416  * special synchronization with current events.
2417  */
2418 void trace_buffered_event_disable(void)
2419 {
2420 	int cpu;
2421 
2422 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423 
2424 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425 		return;
2426 
2427 	if (--trace_buffered_event_ref)
2428 		return;
2429 
2430 	preempt_disable();
2431 	/* For each CPU, set the buffer as used. */
2432 	smp_call_function_many(tracing_buffer_mask,
2433 			       disable_trace_buffered_event, NULL, 1);
2434 	preempt_enable();
2435 
2436 	/* Wait for all current users to finish */
2437 	synchronize_rcu();
2438 
2439 	for_each_tracing_cpu(cpu) {
2440 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441 		per_cpu(trace_buffered_event, cpu) = NULL;
2442 	}
2443 	/*
2444 	 * Make sure trace_buffered_event is NULL before clearing
2445 	 * trace_buffered_event_cnt.
2446 	 */
2447 	smp_wmb();
2448 
2449 	preempt_disable();
2450 	/* Do the work on each cpu */
2451 	smp_call_function_many(tracing_buffer_mask,
2452 			       enable_trace_buffered_event, NULL, 1);
2453 	preempt_enable();
2454 }
2455 
2456 static struct ring_buffer *temp_buffer;
2457 
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460 			  struct trace_event_file *trace_file,
2461 			  int type, unsigned long len,
2462 			  unsigned long flags, int pc)
2463 {
2464 	struct ring_buffer_event *entry;
2465 	int val;
2466 
2467 	*current_rb = trace_file->tr->trace_buffer.buffer;
2468 
2469 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471 	    (entry = this_cpu_read(trace_buffered_event))) {
2472 		/* Try to use the per cpu buffer first */
2473 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2474 		if (val == 1) {
2475 			trace_event_setup(entry, type, flags, pc);
2476 			entry->array[0] = len;
2477 			return entry;
2478 		}
2479 		this_cpu_dec(trace_buffered_event_cnt);
2480 	}
2481 
2482 	entry = __trace_buffer_lock_reserve(*current_rb,
2483 					    type, len, flags, pc);
2484 	/*
2485 	 * If tracing is off, but we have triggers enabled
2486 	 * we still need to look at the event data. Use the temp_buffer
2487 	 * to store the trace event for the tigger to use. It's recusive
2488 	 * safe and will not be recorded anywhere.
2489 	 */
2490 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491 		*current_rb = temp_buffer;
2492 		entry = __trace_buffer_lock_reserve(*current_rb,
2493 						    type, len, flags, pc);
2494 	}
2495 	return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498 
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501 
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504 	struct trace_event_call *event_call;
2505 	struct trace_event *event;
2506 	unsigned long flags;
2507 	struct trace_iterator *iter = tracepoint_print_iter;
2508 
2509 	/* We should never get here if iter is NULL */
2510 	if (WARN_ON_ONCE(!iter))
2511 		return;
2512 
2513 	event_call = fbuffer->trace_file->event_call;
2514 	if (!event_call || !event_call->event.funcs ||
2515 	    !event_call->event.funcs->trace)
2516 		return;
2517 
2518 	event = &fbuffer->trace_file->event_call->event;
2519 
2520 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521 	trace_seq_init(&iter->seq);
2522 	iter->ent = fbuffer->entry;
2523 	event_call->event.funcs->trace(iter, 0, event);
2524 	trace_seq_putc(&iter->seq, 0);
2525 	printk("%s", iter->seq.buffer);
2526 
2527 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529 
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531 			     void __user *buffer, size_t *lenp,
2532 			     loff_t *ppos)
2533 {
2534 	int save_tracepoint_printk;
2535 	int ret;
2536 
2537 	mutex_lock(&tracepoint_printk_mutex);
2538 	save_tracepoint_printk = tracepoint_printk;
2539 
2540 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541 
2542 	/*
2543 	 * This will force exiting early, as tracepoint_printk
2544 	 * is always zero when tracepoint_printk_iter is not allocated
2545 	 */
2546 	if (!tracepoint_print_iter)
2547 		tracepoint_printk = 0;
2548 
2549 	if (save_tracepoint_printk == tracepoint_printk)
2550 		goto out;
2551 
2552 	if (tracepoint_printk)
2553 		static_key_enable(&tracepoint_printk_key.key);
2554 	else
2555 		static_key_disable(&tracepoint_printk_key.key);
2556 
2557  out:
2558 	mutex_unlock(&tracepoint_printk_mutex);
2559 
2560 	return ret;
2561 }
2562 
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565 	if (static_key_false(&tracepoint_printk_key.key))
2566 		output_printk(fbuffer);
2567 
2568 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569 				    fbuffer->event, fbuffer->entry,
2570 				    fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573 
2574 /*
2575  * Skip 3:
2576  *
2577  *   trace_buffer_unlock_commit_regs()
2578  *   trace_event_buffer_commit()
2579  *   trace_event_raw_event_xxx()
2580  */
2581 # define STACK_SKIP 3
2582 
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584 				     struct ring_buffer *buffer,
2585 				     struct ring_buffer_event *event,
2586 				     unsigned long flags, int pc,
2587 				     struct pt_regs *regs)
2588 {
2589 	__buffer_unlock_commit(buffer, event);
2590 
2591 	/*
2592 	 * If regs is not set, then skip the necessary functions.
2593 	 * Note, we can still get here via blktrace, wakeup tracer
2594 	 * and mmiotrace, but that's ok if they lose a function or
2595 	 * two. They are not that meaningful.
2596 	 */
2597 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598 	ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600 
2601 /*
2602  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603  */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606 				   struct ring_buffer_event *event)
2607 {
2608 	__buffer_unlock_commit(buffer, event);
2609 }
2610 
2611 static void
2612 trace_process_export(struct trace_export *export,
2613 	       struct ring_buffer_event *event)
2614 {
2615 	struct trace_entry *entry;
2616 	unsigned int size = 0;
2617 
2618 	entry = ring_buffer_event_data(event);
2619 	size = ring_buffer_event_length(event);
2620 	export->write(export, entry, size);
2621 }
2622 
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624 
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626 
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628 
2629 static inline void ftrace_exports_enable(void)
2630 {
2631 	static_branch_enable(&ftrace_exports_enabled);
2632 }
2633 
2634 static inline void ftrace_exports_disable(void)
2635 {
2636 	static_branch_disable(&ftrace_exports_enabled);
2637 }
2638 
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641 	struct trace_export *export;
2642 
2643 	preempt_disable_notrace();
2644 
2645 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2646 	while (export) {
2647 		trace_process_export(export, event);
2648 		export = rcu_dereference_raw_notrace(export->next);
2649 	}
2650 
2651 	preempt_enable_notrace();
2652 }
2653 
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657 	rcu_assign_pointer(export->next, *list);
2658 	/*
2659 	 * We are entering export into the list but another
2660 	 * CPU might be walking that list. We need to make sure
2661 	 * the export->next pointer is valid before another CPU sees
2662 	 * the export pointer included into the list.
2663 	 */
2664 	rcu_assign_pointer(*list, export);
2665 }
2666 
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670 	struct trace_export **p;
2671 
2672 	for (p = list; *p != NULL; p = &(*p)->next)
2673 		if (*p == export)
2674 			break;
2675 
2676 	if (*p != export)
2677 		return -1;
2678 
2679 	rcu_assign_pointer(*p, (*p)->next);
2680 
2681 	return 0;
2682 }
2683 
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	if (*list == NULL)
2688 		ftrace_exports_enable();
2689 
2690 	add_trace_export(list, export);
2691 }
2692 
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696 	int ret;
2697 
2698 	ret = rm_trace_export(list, export);
2699 	if (*list == NULL)
2700 		ftrace_exports_disable();
2701 
2702 	return ret;
2703 }
2704 
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707 	if (WARN_ON_ONCE(!export->write))
2708 		return -1;
2709 
2710 	mutex_lock(&ftrace_export_lock);
2711 
2712 	add_ftrace_export(&ftrace_exports_list, export);
2713 
2714 	mutex_unlock(&ftrace_export_lock);
2715 
2716 	return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719 
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722 	int ret;
2723 
2724 	mutex_lock(&ftrace_export_lock);
2725 
2726 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2727 
2728 	mutex_unlock(&ftrace_export_lock);
2729 
2730 	return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733 
2734 void
2735 trace_function(struct trace_array *tr,
2736 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737 	       int pc)
2738 {
2739 	struct trace_event_call *call = &event_function;
2740 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741 	struct ring_buffer_event *event;
2742 	struct ftrace_entry *entry;
2743 
2744 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745 					    flags, pc);
2746 	if (!event)
2747 		return;
2748 	entry	= ring_buffer_event_data(event);
2749 	entry->ip			= ip;
2750 	entry->parent_ip		= parent_ip;
2751 
2752 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2753 		if (static_branch_unlikely(&ftrace_exports_enabled))
2754 			ftrace_exports(event);
2755 		__buffer_unlock_commit(buffer, event);
2756 	}
2757 }
2758 
2759 #ifdef CONFIG_STACKTRACE
2760 
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING	4
2763 
2764 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765 
2766 struct ftrace_stack {
2767 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769 
2770 
2771 struct ftrace_stacks {
2772 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2773 };
2774 
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777 
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779 				 unsigned long flags,
2780 				 int skip, int pc, struct pt_regs *regs)
2781 {
2782 	struct trace_event_call *call = &event_kernel_stack;
2783 	struct ring_buffer_event *event;
2784 	unsigned int size, nr_entries;
2785 	struct ftrace_stack *fstack;
2786 	struct stack_entry *entry;
2787 	int stackidx;
2788 
2789 	/*
2790 	 * Add one, for this function and the call to save_stack_trace()
2791 	 * If regs is set, then these functions will not be in the way.
2792 	 */
2793 #ifndef CONFIG_UNWINDER_ORC
2794 	if (!regs)
2795 		skip++;
2796 #endif
2797 
2798 	/*
2799 	 * Since events can happen in NMIs there's no safe way to
2800 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801 	 * or NMI comes in, it will just have to use the default
2802 	 * FTRACE_STACK_SIZE.
2803 	 */
2804 	preempt_disable_notrace();
2805 
2806 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807 
2808 	/* This should never happen. If it does, yell once and skip */
2809 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810 		goto out;
2811 
2812 	/*
2813 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814 	 * interrupt will either see the value pre increment or post
2815 	 * increment. If the interrupt happens pre increment it will have
2816 	 * restored the counter when it returns.  We just need a barrier to
2817 	 * keep gcc from moving things around.
2818 	 */
2819 	barrier();
2820 
2821 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822 	size = ARRAY_SIZE(fstack->calls);
2823 
2824 	if (regs) {
2825 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826 						   size, skip);
2827 	} else {
2828 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2829 	}
2830 
2831 	size = nr_entries * sizeof(unsigned long);
2832 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833 					    sizeof(*entry) + size, flags, pc);
2834 	if (!event)
2835 		goto out;
2836 	entry = ring_buffer_event_data(event);
2837 
2838 	memcpy(&entry->caller, fstack->calls, size);
2839 	entry->size = nr_entries;
2840 
2841 	if (!call_filter_check_discard(call, entry, buffer, event))
2842 		__buffer_unlock_commit(buffer, event);
2843 
2844  out:
2845 	/* Again, don't let gcc optimize things here */
2846 	barrier();
2847 	__this_cpu_dec(ftrace_stack_reserve);
2848 	preempt_enable_notrace();
2849 
2850 }
2851 
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853 				      struct ring_buffer *buffer,
2854 				      unsigned long flags,
2855 				      int skip, int pc, struct pt_regs *regs)
2856 {
2857 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858 		return;
2859 
2860 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862 
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864 		   int pc)
2865 {
2866 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867 
2868 	if (rcu_is_watching()) {
2869 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870 		return;
2871 	}
2872 
2873 	/*
2874 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875 	 * but if the above rcu_is_watching() failed, then the NMI
2876 	 * triggered someplace critical, and rcu_irq_enter() should
2877 	 * not be called from NMI.
2878 	 */
2879 	if (unlikely(in_nmi()))
2880 		return;
2881 
2882 	rcu_irq_enter_irqson();
2883 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884 	rcu_irq_exit_irqson();
2885 }
2886 
2887 /**
2888  * trace_dump_stack - record a stack back trace in the trace buffer
2889  * @skip: Number of functions to skip (helper handlers)
2890  */
2891 void trace_dump_stack(int skip)
2892 {
2893 	unsigned long flags;
2894 
2895 	if (tracing_disabled || tracing_selftest_running)
2896 		return;
2897 
2898 	local_save_flags(flags);
2899 
2900 #ifndef CONFIG_UNWINDER_ORC
2901 	/* Skip 1 to skip this function. */
2902 	skip++;
2903 #endif
2904 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905 			     flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908 
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911 
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 	struct trace_event_call *call = &event_user_stack;
2916 	struct ring_buffer_event *event;
2917 	struct userstack_entry *entry;
2918 
2919 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920 		return;
2921 
2922 	/*
2923 	 * NMIs can not handle page faults, even with fix ups.
2924 	 * The save user stack can (and often does) fault.
2925 	 */
2926 	if (unlikely(in_nmi()))
2927 		return;
2928 
2929 	/*
2930 	 * prevent recursion, since the user stack tracing may
2931 	 * trigger other kernel events.
2932 	 */
2933 	preempt_disable();
2934 	if (__this_cpu_read(user_stack_count))
2935 		goto out;
2936 
2937 	__this_cpu_inc(user_stack_count);
2938 
2939 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940 					    sizeof(*entry), flags, pc);
2941 	if (!event)
2942 		goto out_drop_count;
2943 	entry	= ring_buffer_event_data(event);
2944 
2945 	entry->tgid		= current->tgid;
2946 	memset(&entry->caller, 0, sizeof(entry->caller));
2947 
2948 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949 	if (!call_filter_check_discard(call, entry, buffer, event))
2950 		__buffer_unlock_commit(buffer, event);
2951 
2952  out_drop_count:
2953 	__this_cpu_dec(user_stack_count);
2954  out:
2955 	preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959 				   unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963 
2964 #endif /* CONFIG_STACKTRACE */
2965 
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968 	int nesting;
2969 	char buffer[4][TRACE_BUF_SIZE];
2970 };
2971 
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973 
2974 /*
2975  * Thise allows for lockless recording.  If we're nested too deeply, then
2976  * this returns NULL.
2977  */
2978 static char *get_trace_buf(void)
2979 {
2980 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981 
2982 	if (!buffer || buffer->nesting >= 4)
2983 		return NULL;
2984 
2985 	buffer->nesting++;
2986 
2987 	/* Interrupts must see nesting incremented before we use the buffer */
2988 	barrier();
2989 	return &buffer->buffer[buffer->nesting][0];
2990 }
2991 
2992 static void put_trace_buf(void)
2993 {
2994 	/* Don't let the decrement of nesting leak before this */
2995 	barrier();
2996 	this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998 
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001 	struct trace_buffer_struct *buffers;
3002 
3003 	buffers = alloc_percpu(struct trace_buffer_struct);
3004 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005 		return -ENOMEM;
3006 
3007 	trace_percpu_buffer = buffers;
3008 	return 0;
3009 }
3010 
3011 static int buffers_allocated;
3012 
3013 void trace_printk_init_buffers(void)
3014 {
3015 	if (buffers_allocated)
3016 		return;
3017 
3018 	if (alloc_percpu_trace_buffer())
3019 		return;
3020 
3021 	/* trace_printk() is for debug use only. Don't use it in production. */
3022 
3023 	pr_warn("\n");
3024 	pr_warn("**********************************************************\n");
3025 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3026 	pr_warn("**                                                      **\n");
3027 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3028 	pr_warn("**                                                      **\n");
3029 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3030 	pr_warn("** unsafe for production use.                           **\n");
3031 	pr_warn("**                                                      **\n");
3032 	pr_warn("** If you see this message and you are not debugging    **\n");
3033 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3034 	pr_warn("**                                                      **\n");
3035 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3036 	pr_warn("**********************************************************\n");
3037 
3038 	/* Expand the buffers to set size */
3039 	tracing_update_buffers();
3040 
3041 	buffers_allocated = 1;
3042 
3043 	/*
3044 	 * trace_printk_init_buffers() can be called by modules.
3045 	 * If that happens, then we need to start cmdline recording
3046 	 * directly here. If the global_trace.buffer is already
3047 	 * allocated here, then this was called by module code.
3048 	 */
3049 	if (global_trace.trace_buffer.buffer)
3050 		tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053 
3054 void trace_printk_start_comm(void)
3055 {
3056 	/* Start tracing comms if trace printk is set */
3057 	if (!buffers_allocated)
3058 		return;
3059 	tracing_start_cmdline_record();
3060 }
3061 
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064 	if (!buffers_allocated)
3065 		return;
3066 
3067 	if (enabled)
3068 		tracing_start_cmdline_record();
3069 	else
3070 		tracing_stop_cmdline_record();
3071 }
3072 
3073 /**
3074  * trace_vbprintk - write binary msg to tracing buffer
3075  *
3076  */
3077 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3078 {
3079 	struct trace_event_call *call = &event_bprint;
3080 	struct ring_buffer_event *event;
3081 	struct ring_buffer *buffer;
3082 	struct trace_array *tr = &global_trace;
3083 	struct bprint_entry *entry;
3084 	unsigned long flags;
3085 	char *tbuffer;
3086 	int len = 0, size, pc;
3087 
3088 	if (unlikely(tracing_selftest_running || tracing_disabled))
3089 		return 0;
3090 
3091 	/* Don't pollute graph traces with trace_vprintk internals */
3092 	pause_graph_tracing();
3093 
3094 	pc = preempt_count();
3095 	preempt_disable_notrace();
3096 
3097 	tbuffer = get_trace_buf();
3098 	if (!tbuffer) {
3099 		len = 0;
3100 		goto out_nobuffer;
3101 	}
3102 
3103 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3104 
3105 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3106 		goto out;
3107 
3108 	local_save_flags(flags);
3109 	size = sizeof(*entry) + sizeof(u32) * len;
3110 	buffer = tr->trace_buffer.buffer;
3111 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3112 					    flags, pc);
3113 	if (!event)
3114 		goto out;
3115 	entry = ring_buffer_event_data(event);
3116 	entry->ip			= ip;
3117 	entry->fmt			= fmt;
3118 
3119 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3120 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3121 		__buffer_unlock_commit(buffer, event);
3122 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3123 	}
3124 
3125 out:
3126 	put_trace_buf();
3127 
3128 out_nobuffer:
3129 	preempt_enable_notrace();
3130 	unpause_graph_tracing();
3131 
3132 	return len;
3133 }
3134 EXPORT_SYMBOL_GPL(trace_vbprintk);
3135 
3136 __printf(3, 0)
3137 static int
3138 __trace_array_vprintk(struct ring_buffer *buffer,
3139 		      unsigned long ip, const char *fmt, va_list args)
3140 {
3141 	struct trace_event_call *call = &event_print;
3142 	struct ring_buffer_event *event;
3143 	int len = 0, size, pc;
3144 	struct print_entry *entry;
3145 	unsigned long flags;
3146 	char *tbuffer;
3147 
3148 	if (tracing_disabled || tracing_selftest_running)
3149 		return 0;
3150 
3151 	/* Don't pollute graph traces with trace_vprintk internals */
3152 	pause_graph_tracing();
3153 
3154 	pc = preempt_count();
3155 	preempt_disable_notrace();
3156 
3157 
3158 	tbuffer = get_trace_buf();
3159 	if (!tbuffer) {
3160 		len = 0;
3161 		goto out_nobuffer;
3162 	}
3163 
3164 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3165 
3166 	local_save_flags(flags);
3167 	size = sizeof(*entry) + len + 1;
3168 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3169 					    flags, pc);
3170 	if (!event)
3171 		goto out;
3172 	entry = ring_buffer_event_data(event);
3173 	entry->ip = ip;
3174 
3175 	memcpy(&entry->buf, tbuffer, len + 1);
3176 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3177 		__buffer_unlock_commit(buffer, event);
3178 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3179 	}
3180 
3181 out:
3182 	put_trace_buf();
3183 
3184 out_nobuffer:
3185 	preempt_enable_notrace();
3186 	unpause_graph_tracing();
3187 
3188 	return len;
3189 }
3190 
3191 __printf(3, 0)
3192 int trace_array_vprintk(struct trace_array *tr,
3193 			unsigned long ip, const char *fmt, va_list args)
3194 {
3195 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3196 }
3197 
3198 __printf(3, 0)
3199 int trace_array_printk(struct trace_array *tr,
3200 		       unsigned long ip, const char *fmt, ...)
3201 {
3202 	int ret;
3203 	va_list ap;
3204 
3205 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3206 		return 0;
3207 
3208 	va_start(ap, fmt);
3209 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3210 	va_end(ap);
3211 	return ret;
3212 }
3213 EXPORT_SYMBOL_GPL(trace_array_printk);
3214 
3215 __printf(3, 4)
3216 int trace_array_printk_buf(struct ring_buffer *buffer,
3217 			   unsigned long ip, const char *fmt, ...)
3218 {
3219 	int ret;
3220 	va_list ap;
3221 
3222 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3223 		return 0;
3224 
3225 	va_start(ap, fmt);
3226 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3227 	va_end(ap);
3228 	return ret;
3229 }
3230 
3231 __printf(2, 0)
3232 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3233 {
3234 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3235 }
3236 EXPORT_SYMBOL_GPL(trace_vprintk);
3237 
3238 static void trace_iterator_increment(struct trace_iterator *iter)
3239 {
3240 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3241 
3242 	iter->idx++;
3243 	if (buf_iter)
3244 		ring_buffer_read(buf_iter, NULL);
3245 }
3246 
3247 static struct trace_entry *
3248 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3249 		unsigned long *lost_events)
3250 {
3251 	struct ring_buffer_event *event;
3252 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3253 
3254 	if (buf_iter)
3255 		event = ring_buffer_iter_peek(buf_iter, ts);
3256 	else
3257 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3258 					 lost_events);
3259 
3260 	if (event) {
3261 		iter->ent_size = ring_buffer_event_length(event);
3262 		return ring_buffer_event_data(event);
3263 	}
3264 	iter->ent_size = 0;
3265 	return NULL;
3266 }
3267 
3268 static struct trace_entry *
3269 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3270 		  unsigned long *missing_events, u64 *ent_ts)
3271 {
3272 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3273 	struct trace_entry *ent, *next = NULL;
3274 	unsigned long lost_events = 0, next_lost = 0;
3275 	int cpu_file = iter->cpu_file;
3276 	u64 next_ts = 0, ts;
3277 	int next_cpu = -1;
3278 	int next_size = 0;
3279 	int cpu;
3280 
3281 	/*
3282 	 * If we are in a per_cpu trace file, don't bother by iterating over
3283 	 * all cpu and peek directly.
3284 	 */
3285 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3286 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3287 			return NULL;
3288 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3289 		if (ent_cpu)
3290 			*ent_cpu = cpu_file;
3291 
3292 		return ent;
3293 	}
3294 
3295 	for_each_tracing_cpu(cpu) {
3296 
3297 		if (ring_buffer_empty_cpu(buffer, cpu))
3298 			continue;
3299 
3300 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3301 
3302 		/*
3303 		 * Pick the entry with the smallest timestamp:
3304 		 */
3305 		if (ent && (!next || ts < next_ts)) {
3306 			next = ent;
3307 			next_cpu = cpu;
3308 			next_ts = ts;
3309 			next_lost = lost_events;
3310 			next_size = iter->ent_size;
3311 		}
3312 	}
3313 
3314 	iter->ent_size = next_size;
3315 
3316 	if (ent_cpu)
3317 		*ent_cpu = next_cpu;
3318 
3319 	if (ent_ts)
3320 		*ent_ts = next_ts;
3321 
3322 	if (missing_events)
3323 		*missing_events = next_lost;
3324 
3325 	return next;
3326 }
3327 
3328 /* Find the next real entry, without updating the iterator itself */
3329 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3330 					  int *ent_cpu, u64 *ent_ts)
3331 {
3332 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3333 }
3334 
3335 /* Find the next real entry, and increment the iterator to the next entry */
3336 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3337 {
3338 	iter->ent = __find_next_entry(iter, &iter->cpu,
3339 				      &iter->lost_events, &iter->ts);
3340 
3341 	if (iter->ent)
3342 		trace_iterator_increment(iter);
3343 
3344 	return iter->ent ? iter : NULL;
3345 }
3346 
3347 static void trace_consume(struct trace_iterator *iter)
3348 {
3349 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3350 			    &iter->lost_events);
3351 }
3352 
3353 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3354 {
3355 	struct trace_iterator *iter = m->private;
3356 	int i = (int)*pos;
3357 	void *ent;
3358 
3359 	WARN_ON_ONCE(iter->leftover);
3360 
3361 	(*pos)++;
3362 
3363 	/* can't go backwards */
3364 	if (iter->idx > i)
3365 		return NULL;
3366 
3367 	if (iter->idx < 0)
3368 		ent = trace_find_next_entry_inc(iter);
3369 	else
3370 		ent = iter;
3371 
3372 	while (ent && iter->idx < i)
3373 		ent = trace_find_next_entry_inc(iter);
3374 
3375 	iter->pos = *pos;
3376 
3377 	return ent;
3378 }
3379 
3380 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3381 {
3382 	struct ring_buffer_event *event;
3383 	struct ring_buffer_iter *buf_iter;
3384 	unsigned long entries = 0;
3385 	u64 ts;
3386 
3387 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3388 
3389 	buf_iter = trace_buffer_iter(iter, cpu);
3390 	if (!buf_iter)
3391 		return;
3392 
3393 	ring_buffer_iter_reset(buf_iter);
3394 
3395 	/*
3396 	 * We could have the case with the max latency tracers
3397 	 * that a reset never took place on a cpu. This is evident
3398 	 * by the timestamp being before the start of the buffer.
3399 	 */
3400 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3401 		if (ts >= iter->trace_buffer->time_start)
3402 			break;
3403 		entries++;
3404 		ring_buffer_read(buf_iter, NULL);
3405 	}
3406 
3407 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3408 }
3409 
3410 /*
3411  * The current tracer is copied to avoid a global locking
3412  * all around.
3413  */
3414 static void *s_start(struct seq_file *m, loff_t *pos)
3415 {
3416 	struct trace_iterator *iter = m->private;
3417 	struct trace_array *tr = iter->tr;
3418 	int cpu_file = iter->cpu_file;
3419 	void *p = NULL;
3420 	loff_t l = 0;
3421 	int cpu;
3422 
3423 	/*
3424 	 * copy the tracer to avoid using a global lock all around.
3425 	 * iter->trace is a copy of current_trace, the pointer to the
3426 	 * name may be used instead of a strcmp(), as iter->trace->name
3427 	 * will point to the same string as current_trace->name.
3428 	 */
3429 	mutex_lock(&trace_types_lock);
3430 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3431 		*iter->trace = *tr->current_trace;
3432 	mutex_unlock(&trace_types_lock);
3433 
3434 #ifdef CONFIG_TRACER_MAX_TRACE
3435 	if (iter->snapshot && iter->trace->use_max_tr)
3436 		return ERR_PTR(-EBUSY);
3437 #endif
3438 
3439 	if (!iter->snapshot)
3440 		atomic_inc(&trace_record_taskinfo_disabled);
3441 
3442 	if (*pos != iter->pos) {
3443 		iter->ent = NULL;
3444 		iter->cpu = 0;
3445 		iter->idx = -1;
3446 
3447 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3448 			for_each_tracing_cpu(cpu)
3449 				tracing_iter_reset(iter, cpu);
3450 		} else
3451 			tracing_iter_reset(iter, cpu_file);
3452 
3453 		iter->leftover = 0;
3454 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3455 			;
3456 
3457 	} else {
3458 		/*
3459 		 * If we overflowed the seq_file before, then we want
3460 		 * to just reuse the trace_seq buffer again.
3461 		 */
3462 		if (iter->leftover)
3463 			p = iter;
3464 		else {
3465 			l = *pos - 1;
3466 			p = s_next(m, p, &l);
3467 		}
3468 	}
3469 
3470 	trace_event_read_lock();
3471 	trace_access_lock(cpu_file);
3472 	return p;
3473 }
3474 
3475 static void s_stop(struct seq_file *m, void *p)
3476 {
3477 	struct trace_iterator *iter = m->private;
3478 
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 	if (iter->snapshot && iter->trace->use_max_tr)
3481 		return;
3482 #endif
3483 
3484 	if (!iter->snapshot)
3485 		atomic_dec(&trace_record_taskinfo_disabled);
3486 
3487 	trace_access_unlock(iter->cpu_file);
3488 	trace_event_read_unlock();
3489 }
3490 
3491 static void
3492 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3493 		      unsigned long *entries, int cpu)
3494 {
3495 	unsigned long count;
3496 
3497 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3498 	/*
3499 	 * If this buffer has skipped entries, then we hold all
3500 	 * entries for the trace and we need to ignore the
3501 	 * ones before the time stamp.
3502 	 */
3503 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3504 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3505 		/* total is the same as the entries */
3506 		*total = count;
3507 	} else
3508 		*total = count +
3509 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3510 	*entries = count;
3511 }
3512 
3513 static void
3514 get_total_entries(struct trace_buffer *buf,
3515 		  unsigned long *total, unsigned long *entries)
3516 {
3517 	unsigned long t, e;
3518 	int cpu;
3519 
3520 	*total = 0;
3521 	*entries = 0;
3522 
3523 	for_each_tracing_cpu(cpu) {
3524 		get_total_entries_cpu(buf, &t, &e, cpu);
3525 		*total += t;
3526 		*entries += e;
3527 	}
3528 }
3529 
3530 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3531 {
3532 	unsigned long total, entries;
3533 
3534 	if (!tr)
3535 		tr = &global_trace;
3536 
3537 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3538 
3539 	return entries;
3540 }
3541 
3542 unsigned long trace_total_entries(struct trace_array *tr)
3543 {
3544 	unsigned long total, entries;
3545 
3546 	if (!tr)
3547 		tr = &global_trace;
3548 
3549 	get_total_entries(&tr->trace_buffer, &total, &entries);
3550 
3551 	return entries;
3552 }
3553 
3554 static void print_lat_help_header(struct seq_file *m)
3555 {
3556 	seq_puts(m, "#                  _------=> CPU#            \n"
3557 		    "#                 / _-----=> irqs-off        \n"
3558 		    "#                | / _----=> need-resched    \n"
3559 		    "#                || / _---=> hardirq/softirq \n"
3560 		    "#                ||| / _--=> preempt-depth   \n"
3561 		    "#                |||| /     delay            \n"
3562 		    "#  cmd     pid   ||||| time  |   caller      \n"
3563 		    "#     \\   /      |||||  \\    |   /         \n");
3564 }
3565 
3566 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3567 {
3568 	unsigned long total;
3569 	unsigned long entries;
3570 
3571 	get_total_entries(buf, &total, &entries);
3572 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3573 		   entries, total, num_online_cpus());
3574 	seq_puts(m, "#\n");
3575 }
3576 
3577 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3578 				   unsigned int flags)
3579 {
3580 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3581 
3582 	print_event_info(buf, m);
3583 
3584 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3585 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3586 }
3587 
3588 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3589 				       unsigned int flags)
3590 {
3591 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3592 	const char *space = "          ";
3593 	int prec = tgid ? 10 : 2;
3594 
3595 	print_event_info(buf, m);
3596 
3597 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3598 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3599 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3600 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3601 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3602 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3603 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3604 }
3605 
3606 void
3607 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3608 {
3609 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3610 	struct trace_buffer *buf = iter->trace_buffer;
3611 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3612 	struct tracer *type = iter->trace;
3613 	unsigned long entries;
3614 	unsigned long total;
3615 	const char *name = "preemption";
3616 
3617 	name = type->name;
3618 
3619 	get_total_entries(buf, &total, &entries);
3620 
3621 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3622 		   name, UTS_RELEASE);
3623 	seq_puts(m, "# -----------------------------------"
3624 		 "---------------------------------\n");
3625 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3626 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3627 		   nsecs_to_usecs(data->saved_latency),
3628 		   entries,
3629 		   total,
3630 		   buf->cpu,
3631 #if defined(CONFIG_PREEMPT_NONE)
3632 		   "server",
3633 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3634 		   "desktop",
3635 #elif defined(CONFIG_PREEMPT)
3636 		   "preempt",
3637 #else
3638 		   "unknown",
3639 #endif
3640 		   /* These are reserved for later use */
3641 		   0, 0, 0, 0);
3642 #ifdef CONFIG_SMP
3643 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3644 #else
3645 	seq_puts(m, ")\n");
3646 #endif
3647 	seq_puts(m, "#    -----------------\n");
3648 	seq_printf(m, "#    | task: %.16s-%d "
3649 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3650 		   data->comm, data->pid,
3651 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3652 		   data->policy, data->rt_priority);
3653 	seq_puts(m, "#    -----------------\n");
3654 
3655 	if (data->critical_start) {
3656 		seq_puts(m, "#  => started at: ");
3657 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3658 		trace_print_seq(m, &iter->seq);
3659 		seq_puts(m, "\n#  => ended at:   ");
3660 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3661 		trace_print_seq(m, &iter->seq);
3662 		seq_puts(m, "\n#\n");
3663 	}
3664 
3665 	seq_puts(m, "#\n");
3666 }
3667 
3668 static void test_cpu_buff_start(struct trace_iterator *iter)
3669 {
3670 	struct trace_seq *s = &iter->seq;
3671 	struct trace_array *tr = iter->tr;
3672 
3673 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3674 		return;
3675 
3676 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3677 		return;
3678 
3679 	if (cpumask_available(iter->started) &&
3680 	    cpumask_test_cpu(iter->cpu, iter->started))
3681 		return;
3682 
3683 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3684 		return;
3685 
3686 	if (cpumask_available(iter->started))
3687 		cpumask_set_cpu(iter->cpu, iter->started);
3688 
3689 	/* Don't print started cpu buffer for the first entry of the trace */
3690 	if (iter->idx > 1)
3691 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3692 				iter->cpu);
3693 }
3694 
3695 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3696 {
3697 	struct trace_array *tr = iter->tr;
3698 	struct trace_seq *s = &iter->seq;
3699 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3700 	struct trace_entry *entry;
3701 	struct trace_event *event;
3702 
3703 	entry = iter->ent;
3704 
3705 	test_cpu_buff_start(iter);
3706 
3707 	event = ftrace_find_event(entry->type);
3708 
3709 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3710 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3711 			trace_print_lat_context(iter);
3712 		else
3713 			trace_print_context(iter);
3714 	}
3715 
3716 	if (trace_seq_has_overflowed(s))
3717 		return TRACE_TYPE_PARTIAL_LINE;
3718 
3719 	if (event)
3720 		return event->funcs->trace(iter, sym_flags, event);
3721 
3722 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3723 
3724 	return trace_handle_return(s);
3725 }
3726 
3727 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3728 {
3729 	struct trace_array *tr = iter->tr;
3730 	struct trace_seq *s = &iter->seq;
3731 	struct trace_entry *entry;
3732 	struct trace_event *event;
3733 
3734 	entry = iter->ent;
3735 
3736 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3737 		trace_seq_printf(s, "%d %d %llu ",
3738 				 entry->pid, iter->cpu, iter->ts);
3739 
3740 	if (trace_seq_has_overflowed(s))
3741 		return TRACE_TYPE_PARTIAL_LINE;
3742 
3743 	event = ftrace_find_event(entry->type);
3744 	if (event)
3745 		return event->funcs->raw(iter, 0, event);
3746 
3747 	trace_seq_printf(s, "%d ?\n", entry->type);
3748 
3749 	return trace_handle_return(s);
3750 }
3751 
3752 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3753 {
3754 	struct trace_array *tr = iter->tr;
3755 	struct trace_seq *s = &iter->seq;
3756 	unsigned char newline = '\n';
3757 	struct trace_entry *entry;
3758 	struct trace_event *event;
3759 
3760 	entry = iter->ent;
3761 
3762 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3763 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3764 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3765 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3766 		if (trace_seq_has_overflowed(s))
3767 			return TRACE_TYPE_PARTIAL_LINE;
3768 	}
3769 
3770 	event = ftrace_find_event(entry->type);
3771 	if (event) {
3772 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3773 		if (ret != TRACE_TYPE_HANDLED)
3774 			return ret;
3775 	}
3776 
3777 	SEQ_PUT_FIELD(s, newline);
3778 
3779 	return trace_handle_return(s);
3780 }
3781 
3782 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3783 {
3784 	struct trace_array *tr = iter->tr;
3785 	struct trace_seq *s = &iter->seq;
3786 	struct trace_entry *entry;
3787 	struct trace_event *event;
3788 
3789 	entry = iter->ent;
3790 
3791 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3792 		SEQ_PUT_FIELD(s, entry->pid);
3793 		SEQ_PUT_FIELD(s, iter->cpu);
3794 		SEQ_PUT_FIELD(s, iter->ts);
3795 		if (trace_seq_has_overflowed(s))
3796 			return TRACE_TYPE_PARTIAL_LINE;
3797 	}
3798 
3799 	event = ftrace_find_event(entry->type);
3800 	return event ? event->funcs->binary(iter, 0, event) :
3801 		TRACE_TYPE_HANDLED;
3802 }
3803 
3804 int trace_empty(struct trace_iterator *iter)
3805 {
3806 	struct ring_buffer_iter *buf_iter;
3807 	int cpu;
3808 
3809 	/* If we are looking at one CPU buffer, only check that one */
3810 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3811 		cpu = iter->cpu_file;
3812 		buf_iter = trace_buffer_iter(iter, cpu);
3813 		if (buf_iter) {
3814 			if (!ring_buffer_iter_empty(buf_iter))
3815 				return 0;
3816 		} else {
3817 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3818 				return 0;
3819 		}
3820 		return 1;
3821 	}
3822 
3823 	for_each_tracing_cpu(cpu) {
3824 		buf_iter = trace_buffer_iter(iter, cpu);
3825 		if (buf_iter) {
3826 			if (!ring_buffer_iter_empty(buf_iter))
3827 				return 0;
3828 		} else {
3829 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3830 				return 0;
3831 		}
3832 	}
3833 
3834 	return 1;
3835 }
3836 
3837 /*  Called with trace_event_read_lock() held. */
3838 enum print_line_t print_trace_line(struct trace_iterator *iter)
3839 {
3840 	struct trace_array *tr = iter->tr;
3841 	unsigned long trace_flags = tr->trace_flags;
3842 	enum print_line_t ret;
3843 
3844 	if (iter->lost_events) {
3845 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3846 				 iter->cpu, iter->lost_events);
3847 		if (trace_seq_has_overflowed(&iter->seq))
3848 			return TRACE_TYPE_PARTIAL_LINE;
3849 	}
3850 
3851 	if (iter->trace && iter->trace->print_line) {
3852 		ret = iter->trace->print_line(iter);
3853 		if (ret != TRACE_TYPE_UNHANDLED)
3854 			return ret;
3855 	}
3856 
3857 	if (iter->ent->type == TRACE_BPUTS &&
3858 			trace_flags & TRACE_ITER_PRINTK &&
3859 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3860 		return trace_print_bputs_msg_only(iter);
3861 
3862 	if (iter->ent->type == TRACE_BPRINT &&
3863 			trace_flags & TRACE_ITER_PRINTK &&
3864 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3865 		return trace_print_bprintk_msg_only(iter);
3866 
3867 	if (iter->ent->type == TRACE_PRINT &&
3868 			trace_flags & TRACE_ITER_PRINTK &&
3869 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3870 		return trace_print_printk_msg_only(iter);
3871 
3872 	if (trace_flags & TRACE_ITER_BIN)
3873 		return print_bin_fmt(iter);
3874 
3875 	if (trace_flags & TRACE_ITER_HEX)
3876 		return print_hex_fmt(iter);
3877 
3878 	if (trace_flags & TRACE_ITER_RAW)
3879 		return print_raw_fmt(iter);
3880 
3881 	return print_trace_fmt(iter);
3882 }
3883 
3884 void trace_latency_header(struct seq_file *m)
3885 {
3886 	struct trace_iterator *iter = m->private;
3887 	struct trace_array *tr = iter->tr;
3888 
3889 	/* print nothing if the buffers are empty */
3890 	if (trace_empty(iter))
3891 		return;
3892 
3893 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3894 		print_trace_header(m, iter);
3895 
3896 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3897 		print_lat_help_header(m);
3898 }
3899 
3900 void trace_default_header(struct seq_file *m)
3901 {
3902 	struct trace_iterator *iter = m->private;
3903 	struct trace_array *tr = iter->tr;
3904 	unsigned long trace_flags = tr->trace_flags;
3905 
3906 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3907 		return;
3908 
3909 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3910 		/* print nothing if the buffers are empty */
3911 		if (trace_empty(iter))
3912 			return;
3913 		print_trace_header(m, iter);
3914 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3915 			print_lat_help_header(m);
3916 	} else {
3917 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3918 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3919 				print_func_help_header_irq(iter->trace_buffer,
3920 							   m, trace_flags);
3921 			else
3922 				print_func_help_header(iter->trace_buffer, m,
3923 						       trace_flags);
3924 		}
3925 	}
3926 }
3927 
3928 static void test_ftrace_alive(struct seq_file *m)
3929 {
3930 	if (!ftrace_is_dead())
3931 		return;
3932 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3933 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3934 }
3935 
3936 #ifdef CONFIG_TRACER_MAX_TRACE
3937 static void show_snapshot_main_help(struct seq_file *m)
3938 {
3939 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3940 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3941 		    "#                      Takes a snapshot of the main buffer.\n"
3942 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3943 		    "#                      (Doesn't have to be '2' works with any number that\n"
3944 		    "#                       is not a '0' or '1')\n");
3945 }
3946 
3947 static void show_snapshot_percpu_help(struct seq_file *m)
3948 {
3949 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3950 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3951 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3952 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3953 #else
3954 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3955 		    "#                     Must use main snapshot file to allocate.\n");
3956 #endif
3957 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3958 		    "#                      (Doesn't have to be '2' works with any number that\n"
3959 		    "#                       is not a '0' or '1')\n");
3960 }
3961 
3962 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3963 {
3964 	if (iter->tr->allocated_snapshot)
3965 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3966 	else
3967 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3968 
3969 	seq_puts(m, "# Snapshot commands:\n");
3970 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3971 		show_snapshot_main_help(m);
3972 	else
3973 		show_snapshot_percpu_help(m);
3974 }
3975 #else
3976 /* Should never be called */
3977 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3978 #endif
3979 
3980 static int s_show(struct seq_file *m, void *v)
3981 {
3982 	struct trace_iterator *iter = v;
3983 	int ret;
3984 
3985 	if (iter->ent == NULL) {
3986 		if (iter->tr) {
3987 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3988 			seq_puts(m, "#\n");
3989 			test_ftrace_alive(m);
3990 		}
3991 		if (iter->snapshot && trace_empty(iter))
3992 			print_snapshot_help(m, iter);
3993 		else if (iter->trace && iter->trace->print_header)
3994 			iter->trace->print_header(m);
3995 		else
3996 			trace_default_header(m);
3997 
3998 	} else if (iter->leftover) {
3999 		/*
4000 		 * If we filled the seq_file buffer earlier, we
4001 		 * want to just show it now.
4002 		 */
4003 		ret = trace_print_seq(m, &iter->seq);
4004 
4005 		/* ret should this time be zero, but you never know */
4006 		iter->leftover = ret;
4007 
4008 	} else {
4009 		print_trace_line(iter);
4010 		ret = trace_print_seq(m, &iter->seq);
4011 		/*
4012 		 * If we overflow the seq_file buffer, then it will
4013 		 * ask us for this data again at start up.
4014 		 * Use that instead.
4015 		 *  ret is 0 if seq_file write succeeded.
4016 		 *        -1 otherwise.
4017 		 */
4018 		iter->leftover = ret;
4019 	}
4020 
4021 	return 0;
4022 }
4023 
4024 /*
4025  * Should be used after trace_array_get(), trace_types_lock
4026  * ensures that i_cdev was already initialized.
4027  */
4028 static inline int tracing_get_cpu(struct inode *inode)
4029 {
4030 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4031 		return (long)inode->i_cdev - 1;
4032 	return RING_BUFFER_ALL_CPUS;
4033 }
4034 
4035 static const struct seq_operations tracer_seq_ops = {
4036 	.start		= s_start,
4037 	.next		= s_next,
4038 	.stop		= s_stop,
4039 	.show		= s_show,
4040 };
4041 
4042 static struct trace_iterator *
4043 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4044 {
4045 	struct trace_array *tr = inode->i_private;
4046 	struct trace_iterator *iter;
4047 	int cpu;
4048 
4049 	if (tracing_disabled)
4050 		return ERR_PTR(-ENODEV);
4051 
4052 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4053 	if (!iter)
4054 		return ERR_PTR(-ENOMEM);
4055 
4056 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4057 				    GFP_KERNEL);
4058 	if (!iter->buffer_iter)
4059 		goto release;
4060 
4061 	/*
4062 	 * We make a copy of the current tracer to avoid concurrent
4063 	 * changes on it while we are reading.
4064 	 */
4065 	mutex_lock(&trace_types_lock);
4066 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4067 	if (!iter->trace)
4068 		goto fail;
4069 
4070 	*iter->trace = *tr->current_trace;
4071 
4072 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4073 		goto fail;
4074 
4075 	iter->tr = tr;
4076 
4077 #ifdef CONFIG_TRACER_MAX_TRACE
4078 	/* Currently only the top directory has a snapshot */
4079 	if (tr->current_trace->print_max || snapshot)
4080 		iter->trace_buffer = &tr->max_buffer;
4081 	else
4082 #endif
4083 		iter->trace_buffer = &tr->trace_buffer;
4084 	iter->snapshot = snapshot;
4085 	iter->pos = -1;
4086 	iter->cpu_file = tracing_get_cpu(inode);
4087 	mutex_init(&iter->mutex);
4088 
4089 	/* Notify the tracer early; before we stop tracing. */
4090 	if (iter->trace && iter->trace->open)
4091 		iter->trace->open(iter);
4092 
4093 	/* Annotate start of buffers if we had overruns */
4094 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4095 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4096 
4097 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4098 	if (trace_clocks[tr->clock_id].in_ns)
4099 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4100 
4101 	/* stop the trace while dumping if we are not opening "snapshot" */
4102 	if (!iter->snapshot)
4103 		tracing_stop_tr(tr);
4104 
4105 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4106 		for_each_tracing_cpu(cpu) {
4107 			iter->buffer_iter[cpu] =
4108 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4109 							 cpu, GFP_KERNEL);
4110 		}
4111 		ring_buffer_read_prepare_sync();
4112 		for_each_tracing_cpu(cpu) {
4113 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4114 			tracing_iter_reset(iter, cpu);
4115 		}
4116 	} else {
4117 		cpu = iter->cpu_file;
4118 		iter->buffer_iter[cpu] =
4119 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4120 						 cpu, GFP_KERNEL);
4121 		ring_buffer_read_prepare_sync();
4122 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4123 		tracing_iter_reset(iter, cpu);
4124 	}
4125 
4126 	mutex_unlock(&trace_types_lock);
4127 
4128 	return iter;
4129 
4130  fail:
4131 	mutex_unlock(&trace_types_lock);
4132 	kfree(iter->trace);
4133 	kfree(iter->buffer_iter);
4134 release:
4135 	seq_release_private(inode, file);
4136 	return ERR_PTR(-ENOMEM);
4137 }
4138 
4139 int tracing_open_generic(struct inode *inode, struct file *filp)
4140 {
4141 	if (tracing_disabled)
4142 		return -ENODEV;
4143 
4144 	filp->private_data = inode->i_private;
4145 	return 0;
4146 }
4147 
4148 bool tracing_is_disabled(void)
4149 {
4150 	return (tracing_disabled) ? true: false;
4151 }
4152 
4153 /*
4154  * Open and update trace_array ref count.
4155  * Must have the current trace_array passed to it.
4156  */
4157 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4158 {
4159 	struct trace_array *tr = inode->i_private;
4160 
4161 	if (tracing_disabled)
4162 		return -ENODEV;
4163 
4164 	if (trace_array_get(tr) < 0)
4165 		return -ENODEV;
4166 
4167 	filp->private_data = inode->i_private;
4168 
4169 	return 0;
4170 }
4171 
4172 static int tracing_release(struct inode *inode, struct file *file)
4173 {
4174 	struct trace_array *tr = inode->i_private;
4175 	struct seq_file *m = file->private_data;
4176 	struct trace_iterator *iter;
4177 	int cpu;
4178 
4179 	if (!(file->f_mode & FMODE_READ)) {
4180 		trace_array_put(tr);
4181 		return 0;
4182 	}
4183 
4184 	/* Writes do not use seq_file */
4185 	iter = m->private;
4186 	mutex_lock(&trace_types_lock);
4187 
4188 	for_each_tracing_cpu(cpu) {
4189 		if (iter->buffer_iter[cpu])
4190 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4191 	}
4192 
4193 	if (iter->trace && iter->trace->close)
4194 		iter->trace->close(iter);
4195 
4196 	if (!iter->snapshot)
4197 		/* reenable tracing if it was previously enabled */
4198 		tracing_start_tr(tr);
4199 
4200 	__trace_array_put(tr);
4201 
4202 	mutex_unlock(&trace_types_lock);
4203 
4204 	mutex_destroy(&iter->mutex);
4205 	free_cpumask_var(iter->started);
4206 	kfree(iter->trace);
4207 	kfree(iter->buffer_iter);
4208 	seq_release_private(inode, file);
4209 
4210 	return 0;
4211 }
4212 
4213 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4214 {
4215 	struct trace_array *tr = inode->i_private;
4216 
4217 	trace_array_put(tr);
4218 	return 0;
4219 }
4220 
4221 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4222 {
4223 	struct trace_array *tr = inode->i_private;
4224 
4225 	trace_array_put(tr);
4226 
4227 	return single_release(inode, file);
4228 }
4229 
4230 static int tracing_open(struct inode *inode, struct file *file)
4231 {
4232 	struct trace_array *tr = inode->i_private;
4233 	struct trace_iterator *iter;
4234 	int ret = 0;
4235 
4236 	if (trace_array_get(tr) < 0)
4237 		return -ENODEV;
4238 
4239 	/* If this file was open for write, then erase contents */
4240 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4241 		int cpu = tracing_get_cpu(inode);
4242 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4243 
4244 #ifdef CONFIG_TRACER_MAX_TRACE
4245 		if (tr->current_trace->print_max)
4246 			trace_buf = &tr->max_buffer;
4247 #endif
4248 
4249 		if (cpu == RING_BUFFER_ALL_CPUS)
4250 			tracing_reset_online_cpus(trace_buf);
4251 		else
4252 			tracing_reset(trace_buf, cpu);
4253 	}
4254 
4255 	if (file->f_mode & FMODE_READ) {
4256 		iter = __tracing_open(inode, file, false);
4257 		if (IS_ERR(iter))
4258 			ret = PTR_ERR(iter);
4259 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4260 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4261 	}
4262 
4263 	if (ret < 0)
4264 		trace_array_put(tr);
4265 
4266 	return ret;
4267 }
4268 
4269 /*
4270  * Some tracers are not suitable for instance buffers.
4271  * A tracer is always available for the global array (toplevel)
4272  * or if it explicitly states that it is.
4273  */
4274 static bool
4275 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4276 {
4277 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4278 }
4279 
4280 /* Find the next tracer that this trace array may use */
4281 static struct tracer *
4282 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4283 {
4284 	while (t && !trace_ok_for_array(t, tr))
4285 		t = t->next;
4286 
4287 	return t;
4288 }
4289 
4290 static void *
4291 t_next(struct seq_file *m, void *v, loff_t *pos)
4292 {
4293 	struct trace_array *tr = m->private;
4294 	struct tracer *t = v;
4295 
4296 	(*pos)++;
4297 
4298 	if (t)
4299 		t = get_tracer_for_array(tr, t->next);
4300 
4301 	return t;
4302 }
4303 
4304 static void *t_start(struct seq_file *m, loff_t *pos)
4305 {
4306 	struct trace_array *tr = m->private;
4307 	struct tracer *t;
4308 	loff_t l = 0;
4309 
4310 	mutex_lock(&trace_types_lock);
4311 
4312 	t = get_tracer_for_array(tr, trace_types);
4313 	for (; t && l < *pos; t = t_next(m, t, &l))
4314 			;
4315 
4316 	return t;
4317 }
4318 
4319 static void t_stop(struct seq_file *m, void *p)
4320 {
4321 	mutex_unlock(&trace_types_lock);
4322 }
4323 
4324 static int t_show(struct seq_file *m, void *v)
4325 {
4326 	struct tracer *t = v;
4327 
4328 	if (!t)
4329 		return 0;
4330 
4331 	seq_puts(m, t->name);
4332 	if (t->next)
4333 		seq_putc(m, ' ');
4334 	else
4335 		seq_putc(m, '\n');
4336 
4337 	return 0;
4338 }
4339 
4340 static const struct seq_operations show_traces_seq_ops = {
4341 	.start		= t_start,
4342 	.next		= t_next,
4343 	.stop		= t_stop,
4344 	.show		= t_show,
4345 };
4346 
4347 static int show_traces_open(struct inode *inode, struct file *file)
4348 {
4349 	struct trace_array *tr = inode->i_private;
4350 	struct seq_file *m;
4351 	int ret;
4352 
4353 	if (tracing_disabled)
4354 		return -ENODEV;
4355 
4356 	ret = seq_open(file, &show_traces_seq_ops);
4357 	if (ret)
4358 		return ret;
4359 
4360 	m = file->private_data;
4361 	m->private = tr;
4362 
4363 	return 0;
4364 }
4365 
4366 static ssize_t
4367 tracing_write_stub(struct file *filp, const char __user *ubuf,
4368 		   size_t count, loff_t *ppos)
4369 {
4370 	return count;
4371 }
4372 
4373 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4374 {
4375 	int ret;
4376 
4377 	if (file->f_mode & FMODE_READ)
4378 		ret = seq_lseek(file, offset, whence);
4379 	else
4380 		file->f_pos = ret = 0;
4381 
4382 	return ret;
4383 }
4384 
4385 static const struct file_operations tracing_fops = {
4386 	.open		= tracing_open,
4387 	.read		= seq_read,
4388 	.write		= tracing_write_stub,
4389 	.llseek		= tracing_lseek,
4390 	.release	= tracing_release,
4391 };
4392 
4393 static const struct file_operations show_traces_fops = {
4394 	.open		= show_traces_open,
4395 	.read		= seq_read,
4396 	.release	= seq_release,
4397 	.llseek		= seq_lseek,
4398 };
4399 
4400 static ssize_t
4401 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4402 		     size_t count, loff_t *ppos)
4403 {
4404 	struct trace_array *tr = file_inode(filp)->i_private;
4405 	char *mask_str;
4406 	int len;
4407 
4408 	len = snprintf(NULL, 0, "%*pb\n",
4409 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4410 	mask_str = kmalloc(len, GFP_KERNEL);
4411 	if (!mask_str)
4412 		return -ENOMEM;
4413 
4414 	len = snprintf(mask_str, len, "%*pb\n",
4415 		       cpumask_pr_args(tr->tracing_cpumask));
4416 	if (len >= count) {
4417 		count = -EINVAL;
4418 		goto out_err;
4419 	}
4420 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4421 
4422 out_err:
4423 	kfree(mask_str);
4424 
4425 	return count;
4426 }
4427 
4428 static ssize_t
4429 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4430 		      size_t count, loff_t *ppos)
4431 {
4432 	struct trace_array *tr = file_inode(filp)->i_private;
4433 	cpumask_var_t tracing_cpumask_new;
4434 	int err, cpu;
4435 
4436 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4437 		return -ENOMEM;
4438 
4439 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4440 	if (err)
4441 		goto err_unlock;
4442 
4443 	local_irq_disable();
4444 	arch_spin_lock(&tr->max_lock);
4445 	for_each_tracing_cpu(cpu) {
4446 		/*
4447 		 * Increase/decrease the disabled counter if we are
4448 		 * about to flip a bit in the cpumask:
4449 		 */
4450 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4451 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4452 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4453 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4454 		}
4455 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4456 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4457 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4458 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4459 		}
4460 	}
4461 	arch_spin_unlock(&tr->max_lock);
4462 	local_irq_enable();
4463 
4464 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4465 	free_cpumask_var(tracing_cpumask_new);
4466 
4467 	return count;
4468 
4469 err_unlock:
4470 	free_cpumask_var(tracing_cpumask_new);
4471 
4472 	return err;
4473 }
4474 
4475 static const struct file_operations tracing_cpumask_fops = {
4476 	.open		= tracing_open_generic_tr,
4477 	.read		= tracing_cpumask_read,
4478 	.write		= tracing_cpumask_write,
4479 	.release	= tracing_release_generic_tr,
4480 	.llseek		= generic_file_llseek,
4481 };
4482 
4483 static int tracing_trace_options_show(struct seq_file *m, void *v)
4484 {
4485 	struct tracer_opt *trace_opts;
4486 	struct trace_array *tr = m->private;
4487 	u32 tracer_flags;
4488 	int i;
4489 
4490 	mutex_lock(&trace_types_lock);
4491 	tracer_flags = tr->current_trace->flags->val;
4492 	trace_opts = tr->current_trace->flags->opts;
4493 
4494 	for (i = 0; trace_options[i]; i++) {
4495 		if (tr->trace_flags & (1 << i))
4496 			seq_printf(m, "%s\n", trace_options[i]);
4497 		else
4498 			seq_printf(m, "no%s\n", trace_options[i]);
4499 	}
4500 
4501 	for (i = 0; trace_opts[i].name; i++) {
4502 		if (tracer_flags & trace_opts[i].bit)
4503 			seq_printf(m, "%s\n", trace_opts[i].name);
4504 		else
4505 			seq_printf(m, "no%s\n", trace_opts[i].name);
4506 	}
4507 	mutex_unlock(&trace_types_lock);
4508 
4509 	return 0;
4510 }
4511 
4512 static int __set_tracer_option(struct trace_array *tr,
4513 			       struct tracer_flags *tracer_flags,
4514 			       struct tracer_opt *opts, int neg)
4515 {
4516 	struct tracer *trace = tracer_flags->trace;
4517 	int ret;
4518 
4519 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4520 	if (ret)
4521 		return ret;
4522 
4523 	if (neg)
4524 		tracer_flags->val &= ~opts->bit;
4525 	else
4526 		tracer_flags->val |= opts->bit;
4527 	return 0;
4528 }
4529 
4530 /* Try to assign a tracer specific option */
4531 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4532 {
4533 	struct tracer *trace = tr->current_trace;
4534 	struct tracer_flags *tracer_flags = trace->flags;
4535 	struct tracer_opt *opts = NULL;
4536 	int i;
4537 
4538 	for (i = 0; tracer_flags->opts[i].name; i++) {
4539 		opts = &tracer_flags->opts[i];
4540 
4541 		if (strcmp(cmp, opts->name) == 0)
4542 			return __set_tracer_option(tr, trace->flags, opts, neg);
4543 	}
4544 
4545 	return -EINVAL;
4546 }
4547 
4548 /* Some tracers require overwrite to stay enabled */
4549 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4550 {
4551 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4552 		return -1;
4553 
4554 	return 0;
4555 }
4556 
4557 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4558 {
4559 	/* do nothing if flag is already set */
4560 	if (!!(tr->trace_flags & mask) == !!enabled)
4561 		return 0;
4562 
4563 	/* Give the tracer a chance to approve the change */
4564 	if (tr->current_trace->flag_changed)
4565 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4566 			return -EINVAL;
4567 
4568 	if (enabled)
4569 		tr->trace_flags |= mask;
4570 	else
4571 		tr->trace_flags &= ~mask;
4572 
4573 	if (mask == TRACE_ITER_RECORD_CMD)
4574 		trace_event_enable_cmd_record(enabled);
4575 
4576 	if (mask == TRACE_ITER_RECORD_TGID) {
4577 		if (!tgid_map)
4578 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4579 					   sizeof(*tgid_map),
4580 					   GFP_KERNEL);
4581 		if (!tgid_map) {
4582 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4583 			return -ENOMEM;
4584 		}
4585 
4586 		trace_event_enable_tgid_record(enabled);
4587 	}
4588 
4589 	if (mask == TRACE_ITER_EVENT_FORK)
4590 		trace_event_follow_fork(tr, enabled);
4591 
4592 	if (mask == TRACE_ITER_FUNC_FORK)
4593 		ftrace_pid_follow_fork(tr, enabled);
4594 
4595 	if (mask == TRACE_ITER_OVERWRITE) {
4596 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4597 #ifdef CONFIG_TRACER_MAX_TRACE
4598 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4599 #endif
4600 	}
4601 
4602 	if (mask == TRACE_ITER_PRINTK) {
4603 		trace_printk_start_stop_comm(enabled);
4604 		trace_printk_control(enabled);
4605 	}
4606 
4607 	return 0;
4608 }
4609 
4610 static int trace_set_options(struct trace_array *tr, char *option)
4611 {
4612 	char *cmp;
4613 	int neg = 0;
4614 	int ret;
4615 	size_t orig_len = strlen(option);
4616 	int len;
4617 
4618 	cmp = strstrip(option);
4619 
4620 	len = str_has_prefix(cmp, "no");
4621 	if (len)
4622 		neg = 1;
4623 
4624 	cmp += len;
4625 
4626 	mutex_lock(&trace_types_lock);
4627 
4628 	ret = match_string(trace_options, -1, cmp);
4629 	/* If no option could be set, test the specific tracer options */
4630 	if (ret < 0)
4631 		ret = set_tracer_option(tr, cmp, neg);
4632 	else
4633 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4634 
4635 	mutex_unlock(&trace_types_lock);
4636 
4637 	/*
4638 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4639 	 * turn it back into a space.
4640 	 */
4641 	if (orig_len > strlen(option))
4642 		option[strlen(option)] = ' ';
4643 
4644 	return ret;
4645 }
4646 
4647 static void __init apply_trace_boot_options(void)
4648 {
4649 	char *buf = trace_boot_options_buf;
4650 	char *option;
4651 
4652 	while (true) {
4653 		option = strsep(&buf, ",");
4654 
4655 		if (!option)
4656 			break;
4657 
4658 		if (*option)
4659 			trace_set_options(&global_trace, option);
4660 
4661 		/* Put back the comma to allow this to be called again */
4662 		if (buf)
4663 			*(buf - 1) = ',';
4664 	}
4665 }
4666 
4667 static ssize_t
4668 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4669 			size_t cnt, loff_t *ppos)
4670 {
4671 	struct seq_file *m = filp->private_data;
4672 	struct trace_array *tr = m->private;
4673 	char buf[64];
4674 	int ret;
4675 
4676 	if (cnt >= sizeof(buf))
4677 		return -EINVAL;
4678 
4679 	if (copy_from_user(buf, ubuf, cnt))
4680 		return -EFAULT;
4681 
4682 	buf[cnt] = 0;
4683 
4684 	ret = trace_set_options(tr, buf);
4685 	if (ret < 0)
4686 		return ret;
4687 
4688 	*ppos += cnt;
4689 
4690 	return cnt;
4691 }
4692 
4693 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4694 {
4695 	struct trace_array *tr = inode->i_private;
4696 	int ret;
4697 
4698 	if (tracing_disabled)
4699 		return -ENODEV;
4700 
4701 	if (trace_array_get(tr) < 0)
4702 		return -ENODEV;
4703 
4704 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4705 	if (ret < 0)
4706 		trace_array_put(tr);
4707 
4708 	return ret;
4709 }
4710 
4711 static const struct file_operations tracing_iter_fops = {
4712 	.open		= tracing_trace_options_open,
4713 	.read		= seq_read,
4714 	.llseek		= seq_lseek,
4715 	.release	= tracing_single_release_tr,
4716 	.write		= tracing_trace_options_write,
4717 };
4718 
4719 static const char readme_msg[] =
4720 	"tracing mini-HOWTO:\n\n"
4721 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4722 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4723 	" Important files:\n"
4724 	"  trace\t\t\t- The static contents of the buffer\n"
4725 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4726 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4727 	"  current_tracer\t- function and latency tracers\n"
4728 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4729 	"  error_log\t- error log for failed commands (that support it)\n"
4730 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4731 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4732 	"  trace_clock\t\t-change the clock used to order events\n"
4733 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4734 	"      global:   Synced across CPUs but slows tracing down.\n"
4735 	"     counter:   Not a clock, but just an increment\n"
4736 	"      uptime:   Jiffy counter from time of boot\n"
4737 	"        perf:   Same clock that perf events use\n"
4738 #ifdef CONFIG_X86_64
4739 	"     x86-tsc:   TSC cycle counter\n"
4740 #endif
4741 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4742 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4743 	"    absolute:   Absolute (standalone) timestamp\n"
4744 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4745 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4746 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4747 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4748 	"\t\t\t  Remove sub-buffer with rmdir\n"
4749 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4750 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4751 	"\t\t\t  option name\n"
4752 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4753 #ifdef CONFIG_DYNAMIC_FTRACE
4754 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4755 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4756 	"\t\t\t  functions\n"
4757 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4758 	"\t     modules: Can select a group via module\n"
4759 	"\t      Format: :mod:<module-name>\n"
4760 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4761 	"\t    triggers: a command to perform when function is hit\n"
4762 	"\t      Format: <function>:<trigger>[:count]\n"
4763 	"\t     trigger: traceon, traceoff\n"
4764 	"\t\t      enable_event:<system>:<event>\n"
4765 	"\t\t      disable_event:<system>:<event>\n"
4766 #ifdef CONFIG_STACKTRACE
4767 	"\t\t      stacktrace\n"
4768 #endif
4769 #ifdef CONFIG_TRACER_SNAPSHOT
4770 	"\t\t      snapshot\n"
4771 #endif
4772 	"\t\t      dump\n"
4773 	"\t\t      cpudump\n"
4774 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4775 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4776 	"\t     The first one will disable tracing every time do_fault is hit\n"
4777 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4778 	"\t       The first time do trap is hit and it disables tracing, the\n"
4779 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4780 	"\t       the counter will not decrement. It only decrements when the\n"
4781 	"\t       trigger did work\n"
4782 	"\t     To remove trigger without count:\n"
4783 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4784 	"\t     To remove trigger with a count:\n"
4785 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4786 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4787 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4788 	"\t    modules: Can select a group via module command :mod:\n"
4789 	"\t    Does not accept triggers\n"
4790 #endif /* CONFIG_DYNAMIC_FTRACE */
4791 #ifdef CONFIG_FUNCTION_TRACER
4792 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4793 	"\t\t    (function)\n"
4794 #endif
4795 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4796 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4797 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4798 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4799 #endif
4800 #ifdef CONFIG_TRACER_SNAPSHOT
4801 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4802 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4803 	"\t\t\t  information\n"
4804 #endif
4805 #ifdef CONFIG_STACK_TRACER
4806 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4807 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4808 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4809 	"\t\t\t  new trace)\n"
4810 #ifdef CONFIG_DYNAMIC_FTRACE
4811 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4812 	"\t\t\t  traces\n"
4813 #endif
4814 #endif /* CONFIG_STACK_TRACER */
4815 #ifdef CONFIG_DYNAMIC_EVENTS
4816 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4817 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4818 #endif
4819 #ifdef CONFIG_KPROBE_EVENTS
4820 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4821 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4822 #endif
4823 #ifdef CONFIG_UPROBE_EVENTS
4824 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4825 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4826 #endif
4827 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4828 	"\t  accepts: event-definitions (one definition per line)\n"
4829 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4830 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4831 #ifdef CONFIG_HIST_TRIGGERS
4832 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4833 #endif
4834 	"\t           -:[<group>/]<event>\n"
4835 #ifdef CONFIG_KPROBE_EVENTS
4836 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4837   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4838 #endif
4839 #ifdef CONFIG_UPROBE_EVENTS
4840   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4841 #endif
4842 	"\t     args: <name>=fetcharg[:type]\n"
4843 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4844 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4845 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4846 #else
4847 	"\t           $stack<index>, $stack, $retval, $comm\n"
4848 #endif
4849 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4850 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4851 	"\t           <type>\\[<array-size>\\]\n"
4852 #ifdef CONFIG_HIST_TRIGGERS
4853 	"\t    field: <stype> <name>;\n"
4854 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4855 	"\t           [unsigned] char/int/long\n"
4856 #endif
4857 #endif
4858 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4859 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4860 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4861 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4862 	"\t\t\t  events\n"
4863 	"      filter\t\t- If set, only events passing filter are traced\n"
4864 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4865 	"\t\t\t  <event>:\n"
4866 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4867 	"      filter\t\t- If set, only events passing filter are traced\n"
4868 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4869 	"\t    Format: <trigger>[:count][if <filter>]\n"
4870 	"\t   trigger: traceon, traceoff\n"
4871 	"\t            enable_event:<system>:<event>\n"
4872 	"\t            disable_event:<system>:<event>\n"
4873 #ifdef CONFIG_HIST_TRIGGERS
4874 	"\t            enable_hist:<system>:<event>\n"
4875 	"\t            disable_hist:<system>:<event>\n"
4876 #endif
4877 #ifdef CONFIG_STACKTRACE
4878 	"\t\t    stacktrace\n"
4879 #endif
4880 #ifdef CONFIG_TRACER_SNAPSHOT
4881 	"\t\t    snapshot\n"
4882 #endif
4883 #ifdef CONFIG_HIST_TRIGGERS
4884 	"\t\t    hist (see below)\n"
4885 #endif
4886 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4887 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4888 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4889 	"\t                  events/block/block_unplug/trigger\n"
4890 	"\t   The first disables tracing every time block_unplug is hit.\n"
4891 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4892 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4893 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4894 	"\t   Like function triggers, the counter is only decremented if it\n"
4895 	"\t    enabled or disabled tracing.\n"
4896 	"\t   To remove a trigger without a count:\n"
4897 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4898 	"\t   To remove a trigger with a count:\n"
4899 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4900 	"\t   Filters can be ignored when removing a trigger.\n"
4901 #ifdef CONFIG_HIST_TRIGGERS
4902 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4903 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4904 	"\t            [:values=<field1[,field2,...]>]\n"
4905 	"\t            [:sort=<field1[,field2,...]>]\n"
4906 	"\t            [:size=#entries]\n"
4907 	"\t            [:pause][:continue][:clear]\n"
4908 	"\t            [:name=histname1]\n"
4909 	"\t            [:<handler>.<action>]\n"
4910 	"\t            [if <filter>]\n\n"
4911 	"\t    When a matching event is hit, an entry is added to a hash\n"
4912 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4913 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4914 	"\t    correspond to fields in the event's format description.  Keys\n"
4915 	"\t    can be any field, or the special string 'stacktrace'.\n"
4916 	"\t    Compound keys consisting of up to two fields can be specified\n"
4917 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4918 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4919 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4920 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4921 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4922 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4923 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4924 	"\t    its histogram data will be shared with other triggers of the\n"
4925 	"\t    same name, and trigger hits will update this common data.\n\n"
4926 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4927 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4928 	"\t    triggers attached to an event, there will be a table for each\n"
4929 	"\t    trigger in the output.  The table displayed for a named\n"
4930 	"\t    trigger will be the same as any other instance having the\n"
4931 	"\t    same name.  The default format used to display a given field\n"
4932 	"\t    can be modified by appending any of the following modifiers\n"
4933 	"\t    to the field name, as applicable:\n\n"
4934 	"\t            .hex        display a number as a hex value\n"
4935 	"\t            .sym        display an address as a symbol\n"
4936 	"\t            .sym-offset display an address as a symbol and offset\n"
4937 	"\t            .execname   display a common_pid as a program name\n"
4938 	"\t            .syscall    display a syscall id as a syscall name\n"
4939 	"\t            .log2       display log2 value rather than raw number\n"
4940 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4941 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4942 	"\t    trigger or to start a hist trigger but not log any events\n"
4943 	"\t    until told to do so.  'continue' can be used to start or\n"
4944 	"\t    restart a paused hist trigger.\n\n"
4945 	"\t    The 'clear' parameter will clear the contents of a running\n"
4946 	"\t    hist trigger and leave its current paused/active state\n"
4947 	"\t    unchanged.\n\n"
4948 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4949 	"\t    have one event conditionally start and stop another event's\n"
4950 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4951 	"\t    the enable_event and disable_event triggers.\n\n"
4952 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4953 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4954 	"\t        <handler>.<action>\n\n"
4955 	"\t    The available handlers are:\n\n"
4956 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4957 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4958 	"\t        onchange(var)            - invoke action if var changes\n\n"
4959 	"\t    The available actions are:\n\n"
4960 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4961 	"\t        save(field,...)                      - save current event fields\n"
4962 #ifdef CONFIG_TRACER_SNAPSHOT
4963 	"\t        snapshot()                           - snapshot the trace buffer\n"
4964 #endif
4965 #endif
4966 ;
4967 
4968 static ssize_t
4969 tracing_readme_read(struct file *filp, char __user *ubuf,
4970 		       size_t cnt, loff_t *ppos)
4971 {
4972 	return simple_read_from_buffer(ubuf, cnt, ppos,
4973 					readme_msg, strlen(readme_msg));
4974 }
4975 
4976 static const struct file_operations tracing_readme_fops = {
4977 	.open		= tracing_open_generic,
4978 	.read		= tracing_readme_read,
4979 	.llseek		= generic_file_llseek,
4980 };
4981 
4982 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4983 {
4984 	int *ptr = v;
4985 
4986 	if (*pos || m->count)
4987 		ptr++;
4988 
4989 	(*pos)++;
4990 
4991 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4992 		if (trace_find_tgid(*ptr))
4993 			return ptr;
4994 	}
4995 
4996 	return NULL;
4997 }
4998 
4999 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5000 {
5001 	void *v;
5002 	loff_t l = 0;
5003 
5004 	if (!tgid_map)
5005 		return NULL;
5006 
5007 	v = &tgid_map[0];
5008 	while (l <= *pos) {
5009 		v = saved_tgids_next(m, v, &l);
5010 		if (!v)
5011 			return NULL;
5012 	}
5013 
5014 	return v;
5015 }
5016 
5017 static void saved_tgids_stop(struct seq_file *m, void *v)
5018 {
5019 }
5020 
5021 static int saved_tgids_show(struct seq_file *m, void *v)
5022 {
5023 	int pid = (int *)v - tgid_map;
5024 
5025 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5026 	return 0;
5027 }
5028 
5029 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5030 	.start		= saved_tgids_start,
5031 	.stop		= saved_tgids_stop,
5032 	.next		= saved_tgids_next,
5033 	.show		= saved_tgids_show,
5034 };
5035 
5036 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5037 {
5038 	if (tracing_disabled)
5039 		return -ENODEV;
5040 
5041 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5042 }
5043 
5044 
5045 static const struct file_operations tracing_saved_tgids_fops = {
5046 	.open		= tracing_saved_tgids_open,
5047 	.read		= seq_read,
5048 	.llseek		= seq_lseek,
5049 	.release	= seq_release,
5050 };
5051 
5052 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5053 {
5054 	unsigned int *ptr = v;
5055 
5056 	if (*pos || m->count)
5057 		ptr++;
5058 
5059 	(*pos)++;
5060 
5061 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5062 	     ptr++) {
5063 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5064 			continue;
5065 
5066 		return ptr;
5067 	}
5068 
5069 	return NULL;
5070 }
5071 
5072 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5073 {
5074 	void *v;
5075 	loff_t l = 0;
5076 
5077 	preempt_disable();
5078 	arch_spin_lock(&trace_cmdline_lock);
5079 
5080 	v = &savedcmd->map_cmdline_to_pid[0];
5081 	while (l <= *pos) {
5082 		v = saved_cmdlines_next(m, v, &l);
5083 		if (!v)
5084 			return NULL;
5085 	}
5086 
5087 	return v;
5088 }
5089 
5090 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5091 {
5092 	arch_spin_unlock(&trace_cmdline_lock);
5093 	preempt_enable();
5094 }
5095 
5096 static int saved_cmdlines_show(struct seq_file *m, void *v)
5097 {
5098 	char buf[TASK_COMM_LEN];
5099 	unsigned int *pid = v;
5100 
5101 	__trace_find_cmdline(*pid, buf);
5102 	seq_printf(m, "%d %s\n", *pid, buf);
5103 	return 0;
5104 }
5105 
5106 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5107 	.start		= saved_cmdlines_start,
5108 	.next		= saved_cmdlines_next,
5109 	.stop		= saved_cmdlines_stop,
5110 	.show		= saved_cmdlines_show,
5111 };
5112 
5113 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5114 {
5115 	if (tracing_disabled)
5116 		return -ENODEV;
5117 
5118 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5119 }
5120 
5121 static const struct file_operations tracing_saved_cmdlines_fops = {
5122 	.open		= tracing_saved_cmdlines_open,
5123 	.read		= seq_read,
5124 	.llseek		= seq_lseek,
5125 	.release	= seq_release,
5126 };
5127 
5128 static ssize_t
5129 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5130 				 size_t cnt, loff_t *ppos)
5131 {
5132 	char buf[64];
5133 	int r;
5134 
5135 	arch_spin_lock(&trace_cmdline_lock);
5136 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5137 	arch_spin_unlock(&trace_cmdline_lock);
5138 
5139 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5140 }
5141 
5142 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5143 {
5144 	kfree(s->saved_cmdlines);
5145 	kfree(s->map_cmdline_to_pid);
5146 	kfree(s);
5147 }
5148 
5149 static int tracing_resize_saved_cmdlines(unsigned int val)
5150 {
5151 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5152 
5153 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5154 	if (!s)
5155 		return -ENOMEM;
5156 
5157 	if (allocate_cmdlines_buffer(val, s) < 0) {
5158 		kfree(s);
5159 		return -ENOMEM;
5160 	}
5161 
5162 	arch_spin_lock(&trace_cmdline_lock);
5163 	savedcmd_temp = savedcmd;
5164 	savedcmd = s;
5165 	arch_spin_unlock(&trace_cmdline_lock);
5166 	free_saved_cmdlines_buffer(savedcmd_temp);
5167 
5168 	return 0;
5169 }
5170 
5171 static ssize_t
5172 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5173 				  size_t cnt, loff_t *ppos)
5174 {
5175 	unsigned long val;
5176 	int ret;
5177 
5178 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5179 	if (ret)
5180 		return ret;
5181 
5182 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5183 	if (!val || val > PID_MAX_DEFAULT)
5184 		return -EINVAL;
5185 
5186 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5187 	if (ret < 0)
5188 		return ret;
5189 
5190 	*ppos += cnt;
5191 
5192 	return cnt;
5193 }
5194 
5195 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5196 	.open		= tracing_open_generic,
5197 	.read		= tracing_saved_cmdlines_size_read,
5198 	.write		= tracing_saved_cmdlines_size_write,
5199 };
5200 
5201 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5202 static union trace_eval_map_item *
5203 update_eval_map(union trace_eval_map_item *ptr)
5204 {
5205 	if (!ptr->map.eval_string) {
5206 		if (ptr->tail.next) {
5207 			ptr = ptr->tail.next;
5208 			/* Set ptr to the next real item (skip head) */
5209 			ptr++;
5210 		} else
5211 			return NULL;
5212 	}
5213 	return ptr;
5214 }
5215 
5216 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5217 {
5218 	union trace_eval_map_item *ptr = v;
5219 
5220 	/*
5221 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5222 	 * This really should never happen.
5223 	 */
5224 	ptr = update_eval_map(ptr);
5225 	if (WARN_ON_ONCE(!ptr))
5226 		return NULL;
5227 
5228 	ptr++;
5229 
5230 	(*pos)++;
5231 
5232 	ptr = update_eval_map(ptr);
5233 
5234 	return ptr;
5235 }
5236 
5237 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5238 {
5239 	union trace_eval_map_item *v;
5240 	loff_t l = 0;
5241 
5242 	mutex_lock(&trace_eval_mutex);
5243 
5244 	v = trace_eval_maps;
5245 	if (v)
5246 		v++;
5247 
5248 	while (v && l < *pos) {
5249 		v = eval_map_next(m, v, &l);
5250 	}
5251 
5252 	return v;
5253 }
5254 
5255 static void eval_map_stop(struct seq_file *m, void *v)
5256 {
5257 	mutex_unlock(&trace_eval_mutex);
5258 }
5259 
5260 static int eval_map_show(struct seq_file *m, void *v)
5261 {
5262 	union trace_eval_map_item *ptr = v;
5263 
5264 	seq_printf(m, "%s %ld (%s)\n",
5265 		   ptr->map.eval_string, ptr->map.eval_value,
5266 		   ptr->map.system);
5267 
5268 	return 0;
5269 }
5270 
5271 static const struct seq_operations tracing_eval_map_seq_ops = {
5272 	.start		= eval_map_start,
5273 	.next		= eval_map_next,
5274 	.stop		= eval_map_stop,
5275 	.show		= eval_map_show,
5276 };
5277 
5278 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5279 {
5280 	if (tracing_disabled)
5281 		return -ENODEV;
5282 
5283 	return seq_open(filp, &tracing_eval_map_seq_ops);
5284 }
5285 
5286 static const struct file_operations tracing_eval_map_fops = {
5287 	.open		= tracing_eval_map_open,
5288 	.read		= seq_read,
5289 	.llseek		= seq_lseek,
5290 	.release	= seq_release,
5291 };
5292 
5293 static inline union trace_eval_map_item *
5294 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5295 {
5296 	/* Return tail of array given the head */
5297 	return ptr + ptr->head.length + 1;
5298 }
5299 
5300 static void
5301 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5302 			   int len)
5303 {
5304 	struct trace_eval_map **stop;
5305 	struct trace_eval_map **map;
5306 	union trace_eval_map_item *map_array;
5307 	union trace_eval_map_item *ptr;
5308 
5309 	stop = start + len;
5310 
5311 	/*
5312 	 * The trace_eval_maps contains the map plus a head and tail item,
5313 	 * where the head holds the module and length of array, and the
5314 	 * tail holds a pointer to the next list.
5315 	 */
5316 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5317 	if (!map_array) {
5318 		pr_warn("Unable to allocate trace eval mapping\n");
5319 		return;
5320 	}
5321 
5322 	mutex_lock(&trace_eval_mutex);
5323 
5324 	if (!trace_eval_maps)
5325 		trace_eval_maps = map_array;
5326 	else {
5327 		ptr = trace_eval_maps;
5328 		for (;;) {
5329 			ptr = trace_eval_jmp_to_tail(ptr);
5330 			if (!ptr->tail.next)
5331 				break;
5332 			ptr = ptr->tail.next;
5333 
5334 		}
5335 		ptr->tail.next = map_array;
5336 	}
5337 	map_array->head.mod = mod;
5338 	map_array->head.length = len;
5339 	map_array++;
5340 
5341 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5342 		map_array->map = **map;
5343 		map_array++;
5344 	}
5345 	memset(map_array, 0, sizeof(*map_array));
5346 
5347 	mutex_unlock(&trace_eval_mutex);
5348 }
5349 
5350 static void trace_create_eval_file(struct dentry *d_tracer)
5351 {
5352 	trace_create_file("eval_map", 0444, d_tracer,
5353 			  NULL, &tracing_eval_map_fops);
5354 }
5355 
5356 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5357 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5358 static inline void trace_insert_eval_map_file(struct module *mod,
5359 			      struct trace_eval_map **start, int len) { }
5360 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5361 
5362 static void trace_insert_eval_map(struct module *mod,
5363 				  struct trace_eval_map **start, int len)
5364 {
5365 	struct trace_eval_map **map;
5366 
5367 	if (len <= 0)
5368 		return;
5369 
5370 	map = start;
5371 
5372 	trace_event_eval_update(map, len);
5373 
5374 	trace_insert_eval_map_file(mod, start, len);
5375 }
5376 
5377 static ssize_t
5378 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5379 		       size_t cnt, loff_t *ppos)
5380 {
5381 	struct trace_array *tr = filp->private_data;
5382 	char buf[MAX_TRACER_SIZE+2];
5383 	int r;
5384 
5385 	mutex_lock(&trace_types_lock);
5386 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5387 	mutex_unlock(&trace_types_lock);
5388 
5389 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5390 }
5391 
5392 int tracer_init(struct tracer *t, struct trace_array *tr)
5393 {
5394 	tracing_reset_online_cpus(&tr->trace_buffer);
5395 	return t->init(tr);
5396 }
5397 
5398 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5399 {
5400 	int cpu;
5401 
5402 	for_each_tracing_cpu(cpu)
5403 		per_cpu_ptr(buf->data, cpu)->entries = val;
5404 }
5405 
5406 #ifdef CONFIG_TRACER_MAX_TRACE
5407 /* resize @tr's buffer to the size of @size_tr's entries */
5408 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5409 					struct trace_buffer *size_buf, int cpu_id)
5410 {
5411 	int cpu, ret = 0;
5412 
5413 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5414 		for_each_tracing_cpu(cpu) {
5415 			ret = ring_buffer_resize(trace_buf->buffer,
5416 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5417 			if (ret < 0)
5418 				break;
5419 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5420 				per_cpu_ptr(size_buf->data, cpu)->entries;
5421 		}
5422 	} else {
5423 		ret = ring_buffer_resize(trace_buf->buffer,
5424 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5425 		if (ret == 0)
5426 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5427 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5428 	}
5429 
5430 	return ret;
5431 }
5432 #endif /* CONFIG_TRACER_MAX_TRACE */
5433 
5434 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5435 					unsigned long size, int cpu)
5436 {
5437 	int ret;
5438 
5439 	/*
5440 	 * If kernel or user changes the size of the ring buffer
5441 	 * we use the size that was given, and we can forget about
5442 	 * expanding it later.
5443 	 */
5444 	ring_buffer_expanded = true;
5445 
5446 	/* May be called before buffers are initialized */
5447 	if (!tr->trace_buffer.buffer)
5448 		return 0;
5449 
5450 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5451 	if (ret < 0)
5452 		return ret;
5453 
5454 #ifdef CONFIG_TRACER_MAX_TRACE
5455 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5456 	    !tr->current_trace->use_max_tr)
5457 		goto out;
5458 
5459 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5460 	if (ret < 0) {
5461 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5462 						     &tr->trace_buffer, cpu);
5463 		if (r < 0) {
5464 			/*
5465 			 * AARGH! We are left with different
5466 			 * size max buffer!!!!
5467 			 * The max buffer is our "snapshot" buffer.
5468 			 * When a tracer needs a snapshot (one of the
5469 			 * latency tracers), it swaps the max buffer
5470 			 * with the saved snap shot. We succeeded to
5471 			 * update the size of the main buffer, but failed to
5472 			 * update the size of the max buffer. But when we tried
5473 			 * to reset the main buffer to the original size, we
5474 			 * failed there too. This is very unlikely to
5475 			 * happen, but if it does, warn and kill all
5476 			 * tracing.
5477 			 */
5478 			WARN_ON(1);
5479 			tracing_disabled = 1;
5480 		}
5481 		return ret;
5482 	}
5483 
5484 	if (cpu == RING_BUFFER_ALL_CPUS)
5485 		set_buffer_entries(&tr->max_buffer, size);
5486 	else
5487 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5488 
5489  out:
5490 #endif /* CONFIG_TRACER_MAX_TRACE */
5491 
5492 	if (cpu == RING_BUFFER_ALL_CPUS)
5493 		set_buffer_entries(&tr->trace_buffer, size);
5494 	else
5495 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5496 
5497 	return ret;
5498 }
5499 
5500 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5501 					  unsigned long size, int cpu_id)
5502 {
5503 	int ret = size;
5504 
5505 	mutex_lock(&trace_types_lock);
5506 
5507 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5508 		/* make sure, this cpu is enabled in the mask */
5509 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5510 			ret = -EINVAL;
5511 			goto out;
5512 		}
5513 	}
5514 
5515 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5516 	if (ret < 0)
5517 		ret = -ENOMEM;
5518 
5519 out:
5520 	mutex_unlock(&trace_types_lock);
5521 
5522 	return ret;
5523 }
5524 
5525 
5526 /**
5527  * tracing_update_buffers - used by tracing facility to expand ring buffers
5528  *
5529  * To save on memory when the tracing is never used on a system with it
5530  * configured in. The ring buffers are set to a minimum size. But once
5531  * a user starts to use the tracing facility, then they need to grow
5532  * to their default size.
5533  *
5534  * This function is to be called when a tracer is about to be used.
5535  */
5536 int tracing_update_buffers(void)
5537 {
5538 	int ret = 0;
5539 
5540 	mutex_lock(&trace_types_lock);
5541 	if (!ring_buffer_expanded)
5542 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5543 						RING_BUFFER_ALL_CPUS);
5544 	mutex_unlock(&trace_types_lock);
5545 
5546 	return ret;
5547 }
5548 
5549 struct trace_option_dentry;
5550 
5551 static void
5552 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5553 
5554 /*
5555  * Used to clear out the tracer before deletion of an instance.
5556  * Must have trace_types_lock held.
5557  */
5558 static void tracing_set_nop(struct trace_array *tr)
5559 {
5560 	if (tr->current_trace == &nop_trace)
5561 		return;
5562 
5563 	tr->current_trace->enabled--;
5564 
5565 	if (tr->current_trace->reset)
5566 		tr->current_trace->reset(tr);
5567 
5568 	tr->current_trace = &nop_trace;
5569 }
5570 
5571 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5572 {
5573 	/* Only enable if the directory has been created already. */
5574 	if (!tr->dir)
5575 		return;
5576 
5577 	create_trace_option_files(tr, t);
5578 }
5579 
5580 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5581 {
5582 	struct tracer *t;
5583 #ifdef CONFIG_TRACER_MAX_TRACE
5584 	bool had_max_tr;
5585 #endif
5586 	int ret = 0;
5587 
5588 	mutex_lock(&trace_types_lock);
5589 
5590 	if (!ring_buffer_expanded) {
5591 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5592 						RING_BUFFER_ALL_CPUS);
5593 		if (ret < 0)
5594 			goto out;
5595 		ret = 0;
5596 	}
5597 
5598 	for (t = trace_types; t; t = t->next) {
5599 		if (strcmp(t->name, buf) == 0)
5600 			break;
5601 	}
5602 	if (!t) {
5603 		ret = -EINVAL;
5604 		goto out;
5605 	}
5606 	if (t == tr->current_trace)
5607 		goto out;
5608 
5609 #ifdef CONFIG_TRACER_SNAPSHOT
5610 	if (t->use_max_tr) {
5611 		arch_spin_lock(&tr->max_lock);
5612 		if (tr->cond_snapshot)
5613 			ret = -EBUSY;
5614 		arch_spin_unlock(&tr->max_lock);
5615 		if (ret)
5616 			goto out;
5617 	}
5618 #endif
5619 	/* Some tracers won't work on kernel command line */
5620 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5621 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5622 			t->name);
5623 		goto out;
5624 	}
5625 
5626 	/* Some tracers are only allowed for the top level buffer */
5627 	if (!trace_ok_for_array(t, tr)) {
5628 		ret = -EINVAL;
5629 		goto out;
5630 	}
5631 
5632 	/* If trace pipe files are being read, we can't change the tracer */
5633 	if (tr->current_trace->ref) {
5634 		ret = -EBUSY;
5635 		goto out;
5636 	}
5637 
5638 	trace_branch_disable();
5639 
5640 	tr->current_trace->enabled--;
5641 
5642 	if (tr->current_trace->reset)
5643 		tr->current_trace->reset(tr);
5644 
5645 	/* Current trace needs to be nop_trace before synchronize_rcu */
5646 	tr->current_trace = &nop_trace;
5647 
5648 #ifdef CONFIG_TRACER_MAX_TRACE
5649 	had_max_tr = tr->allocated_snapshot;
5650 
5651 	if (had_max_tr && !t->use_max_tr) {
5652 		/*
5653 		 * We need to make sure that the update_max_tr sees that
5654 		 * current_trace changed to nop_trace to keep it from
5655 		 * swapping the buffers after we resize it.
5656 		 * The update_max_tr is called from interrupts disabled
5657 		 * so a synchronized_sched() is sufficient.
5658 		 */
5659 		synchronize_rcu();
5660 		free_snapshot(tr);
5661 	}
5662 #endif
5663 
5664 #ifdef CONFIG_TRACER_MAX_TRACE
5665 	if (t->use_max_tr && !had_max_tr) {
5666 		ret = tracing_alloc_snapshot_instance(tr);
5667 		if (ret < 0)
5668 			goto out;
5669 	}
5670 #endif
5671 
5672 	if (t->init) {
5673 		ret = tracer_init(t, tr);
5674 		if (ret)
5675 			goto out;
5676 	}
5677 
5678 	tr->current_trace = t;
5679 	tr->current_trace->enabled++;
5680 	trace_branch_enable(tr);
5681  out:
5682 	mutex_unlock(&trace_types_lock);
5683 
5684 	return ret;
5685 }
5686 
5687 static ssize_t
5688 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5689 			size_t cnt, loff_t *ppos)
5690 {
5691 	struct trace_array *tr = filp->private_data;
5692 	char buf[MAX_TRACER_SIZE+1];
5693 	int i;
5694 	size_t ret;
5695 	int err;
5696 
5697 	ret = cnt;
5698 
5699 	if (cnt > MAX_TRACER_SIZE)
5700 		cnt = MAX_TRACER_SIZE;
5701 
5702 	if (copy_from_user(buf, ubuf, cnt))
5703 		return -EFAULT;
5704 
5705 	buf[cnt] = 0;
5706 
5707 	/* strip ending whitespace. */
5708 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5709 		buf[i] = 0;
5710 
5711 	err = tracing_set_tracer(tr, buf);
5712 	if (err)
5713 		return err;
5714 
5715 	*ppos += ret;
5716 
5717 	return ret;
5718 }
5719 
5720 static ssize_t
5721 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5722 		   size_t cnt, loff_t *ppos)
5723 {
5724 	char buf[64];
5725 	int r;
5726 
5727 	r = snprintf(buf, sizeof(buf), "%ld\n",
5728 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5729 	if (r > sizeof(buf))
5730 		r = sizeof(buf);
5731 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5732 }
5733 
5734 static ssize_t
5735 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5736 		    size_t cnt, loff_t *ppos)
5737 {
5738 	unsigned long val;
5739 	int ret;
5740 
5741 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5742 	if (ret)
5743 		return ret;
5744 
5745 	*ptr = val * 1000;
5746 
5747 	return cnt;
5748 }
5749 
5750 static ssize_t
5751 tracing_thresh_read(struct file *filp, char __user *ubuf,
5752 		    size_t cnt, loff_t *ppos)
5753 {
5754 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5755 }
5756 
5757 static ssize_t
5758 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5759 		     size_t cnt, loff_t *ppos)
5760 {
5761 	struct trace_array *tr = filp->private_data;
5762 	int ret;
5763 
5764 	mutex_lock(&trace_types_lock);
5765 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5766 	if (ret < 0)
5767 		goto out;
5768 
5769 	if (tr->current_trace->update_thresh) {
5770 		ret = tr->current_trace->update_thresh(tr);
5771 		if (ret < 0)
5772 			goto out;
5773 	}
5774 
5775 	ret = cnt;
5776 out:
5777 	mutex_unlock(&trace_types_lock);
5778 
5779 	return ret;
5780 }
5781 
5782 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5783 
5784 static ssize_t
5785 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5786 		     size_t cnt, loff_t *ppos)
5787 {
5788 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5789 }
5790 
5791 static ssize_t
5792 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5793 		      size_t cnt, loff_t *ppos)
5794 {
5795 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5796 }
5797 
5798 #endif
5799 
5800 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5801 {
5802 	struct trace_array *tr = inode->i_private;
5803 	struct trace_iterator *iter;
5804 	int ret = 0;
5805 
5806 	if (tracing_disabled)
5807 		return -ENODEV;
5808 
5809 	if (trace_array_get(tr) < 0)
5810 		return -ENODEV;
5811 
5812 	mutex_lock(&trace_types_lock);
5813 
5814 	/* create a buffer to store the information to pass to userspace */
5815 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5816 	if (!iter) {
5817 		ret = -ENOMEM;
5818 		__trace_array_put(tr);
5819 		goto out;
5820 	}
5821 
5822 	trace_seq_init(&iter->seq);
5823 	iter->trace = tr->current_trace;
5824 
5825 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5826 		ret = -ENOMEM;
5827 		goto fail;
5828 	}
5829 
5830 	/* trace pipe does not show start of buffer */
5831 	cpumask_setall(iter->started);
5832 
5833 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5834 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5835 
5836 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5837 	if (trace_clocks[tr->clock_id].in_ns)
5838 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5839 
5840 	iter->tr = tr;
5841 	iter->trace_buffer = &tr->trace_buffer;
5842 	iter->cpu_file = tracing_get_cpu(inode);
5843 	mutex_init(&iter->mutex);
5844 	filp->private_data = iter;
5845 
5846 	if (iter->trace->pipe_open)
5847 		iter->trace->pipe_open(iter);
5848 
5849 	nonseekable_open(inode, filp);
5850 
5851 	tr->current_trace->ref++;
5852 out:
5853 	mutex_unlock(&trace_types_lock);
5854 	return ret;
5855 
5856 fail:
5857 	kfree(iter);
5858 	__trace_array_put(tr);
5859 	mutex_unlock(&trace_types_lock);
5860 	return ret;
5861 }
5862 
5863 static int tracing_release_pipe(struct inode *inode, struct file *file)
5864 {
5865 	struct trace_iterator *iter = file->private_data;
5866 	struct trace_array *tr = inode->i_private;
5867 
5868 	mutex_lock(&trace_types_lock);
5869 
5870 	tr->current_trace->ref--;
5871 
5872 	if (iter->trace->pipe_close)
5873 		iter->trace->pipe_close(iter);
5874 
5875 	mutex_unlock(&trace_types_lock);
5876 
5877 	free_cpumask_var(iter->started);
5878 	mutex_destroy(&iter->mutex);
5879 	kfree(iter);
5880 
5881 	trace_array_put(tr);
5882 
5883 	return 0;
5884 }
5885 
5886 static __poll_t
5887 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5888 {
5889 	struct trace_array *tr = iter->tr;
5890 
5891 	/* Iterators are static, they should be filled or empty */
5892 	if (trace_buffer_iter(iter, iter->cpu_file))
5893 		return EPOLLIN | EPOLLRDNORM;
5894 
5895 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5896 		/*
5897 		 * Always select as readable when in blocking mode
5898 		 */
5899 		return EPOLLIN | EPOLLRDNORM;
5900 	else
5901 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5902 					     filp, poll_table);
5903 }
5904 
5905 static __poll_t
5906 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5907 {
5908 	struct trace_iterator *iter = filp->private_data;
5909 
5910 	return trace_poll(iter, filp, poll_table);
5911 }
5912 
5913 /* Must be called with iter->mutex held. */
5914 static int tracing_wait_pipe(struct file *filp)
5915 {
5916 	struct trace_iterator *iter = filp->private_data;
5917 	int ret;
5918 
5919 	while (trace_empty(iter)) {
5920 
5921 		if ((filp->f_flags & O_NONBLOCK)) {
5922 			return -EAGAIN;
5923 		}
5924 
5925 		/*
5926 		 * We block until we read something and tracing is disabled.
5927 		 * We still block if tracing is disabled, but we have never
5928 		 * read anything. This allows a user to cat this file, and
5929 		 * then enable tracing. But after we have read something,
5930 		 * we give an EOF when tracing is again disabled.
5931 		 *
5932 		 * iter->pos will be 0 if we haven't read anything.
5933 		 */
5934 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5935 			break;
5936 
5937 		mutex_unlock(&iter->mutex);
5938 
5939 		ret = wait_on_pipe(iter, 0);
5940 
5941 		mutex_lock(&iter->mutex);
5942 
5943 		if (ret)
5944 			return ret;
5945 	}
5946 
5947 	return 1;
5948 }
5949 
5950 /*
5951  * Consumer reader.
5952  */
5953 static ssize_t
5954 tracing_read_pipe(struct file *filp, char __user *ubuf,
5955 		  size_t cnt, loff_t *ppos)
5956 {
5957 	struct trace_iterator *iter = filp->private_data;
5958 	ssize_t sret;
5959 
5960 	/*
5961 	 * Avoid more than one consumer on a single file descriptor
5962 	 * This is just a matter of traces coherency, the ring buffer itself
5963 	 * is protected.
5964 	 */
5965 	mutex_lock(&iter->mutex);
5966 
5967 	/* return any leftover data */
5968 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5969 	if (sret != -EBUSY)
5970 		goto out;
5971 
5972 	trace_seq_init(&iter->seq);
5973 
5974 	if (iter->trace->read) {
5975 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5976 		if (sret)
5977 			goto out;
5978 	}
5979 
5980 waitagain:
5981 	sret = tracing_wait_pipe(filp);
5982 	if (sret <= 0)
5983 		goto out;
5984 
5985 	/* stop when tracing is finished */
5986 	if (trace_empty(iter)) {
5987 		sret = 0;
5988 		goto out;
5989 	}
5990 
5991 	if (cnt >= PAGE_SIZE)
5992 		cnt = PAGE_SIZE - 1;
5993 
5994 	/* reset all but tr, trace, and overruns */
5995 	memset(&iter->seq, 0,
5996 	       sizeof(struct trace_iterator) -
5997 	       offsetof(struct trace_iterator, seq));
5998 	cpumask_clear(iter->started);
5999 	iter->pos = -1;
6000 
6001 	trace_event_read_lock();
6002 	trace_access_lock(iter->cpu_file);
6003 	while (trace_find_next_entry_inc(iter) != NULL) {
6004 		enum print_line_t ret;
6005 		int save_len = iter->seq.seq.len;
6006 
6007 		ret = print_trace_line(iter);
6008 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6009 			/* don't print partial lines */
6010 			iter->seq.seq.len = save_len;
6011 			break;
6012 		}
6013 		if (ret != TRACE_TYPE_NO_CONSUME)
6014 			trace_consume(iter);
6015 
6016 		if (trace_seq_used(&iter->seq) >= cnt)
6017 			break;
6018 
6019 		/*
6020 		 * Setting the full flag means we reached the trace_seq buffer
6021 		 * size and we should leave by partial output condition above.
6022 		 * One of the trace_seq_* functions is not used properly.
6023 		 */
6024 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6025 			  iter->ent->type);
6026 	}
6027 	trace_access_unlock(iter->cpu_file);
6028 	trace_event_read_unlock();
6029 
6030 	/* Now copy what we have to the user */
6031 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6032 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6033 		trace_seq_init(&iter->seq);
6034 
6035 	/*
6036 	 * If there was nothing to send to user, in spite of consuming trace
6037 	 * entries, go back to wait for more entries.
6038 	 */
6039 	if (sret == -EBUSY)
6040 		goto waitagain;
6041 
6042 out:
6043 	mutex_unlock(&iter->mutex);
6044 
6045 	return sret;
6046 }
6047 
6048 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6049 				     unsigned int idx)
6050 {
6051 	__free_page(spd->pages[idx]);
6052 }
6053 
6054 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6055 	.confirm		= generic_pipe_buf_confirm,
6056 	.release		= generic_pipe_buf_release,
6057 	.steal			= generic_pipe_buf_steal,
6058 	.get			= generic_pipe_buf_get,
6059 };
6060 
6061 static size_t
6062 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6063 {
6064 	size_t count;
6065 	int save_len;
6066 	int ret;
6067 
6068 	/* Seq buffer is page-sized, exactly what we need. */
6069 	for (;;) {
6070 		save_len = iter->seq.seq.len;
6071 		ret = print_trace_line(iter);
6072 
6073 		if (trace_seq_has_overflowed(&iter->seq)) {
6074 			iter->seq.seq.len = save_len;
6075 			break;
6076 		}
6077 
6078 		/*
6079 		 * This should not be hit, because it should only
6080 		 * be set if the iter->seq overflowed. But check it
6081 		 * anyway to be safe.
6082 		 */
6083 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6084 			iter->seq.seq.len = save_len;
6085 			break;
6086 		}
6087 
6088 		count = trace_seq_used(&iter->seq) - save_len;
6089 		if (rem < count) {
6090 			rem = 0;
6091 			iter->seq.seq.len = save_len;
6092 			break;
6093 		}
6094 
6095 		if (ret != TRACE_TYPE_NO_CONSUME)
6096 			trace_consume(iter);
6097 		rem -= count;
6098 		if (!trace_find_next_entry_inc(iter))	{
6099 			rem = 0;
6100 			iter->ent = NULL;
6101 			break;
6102 		}
6103 	}
6104 
6105 	return rem;
6106 }
6107 
6108 static ssize_t tracing_splice_read_pipe(struct file *filp,
6109 					loff_t *ppos,
6110 					struct pipe_inode_info *pipe,
6111 					size_t len,
6112 					unsigned int flags)
6113 {
6114 	struct page *pages_def[PIPE_DEF_BUFFERS];
6115 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6116 	struct trace_iterator *iter = filp->private_data;
6117 	struct splice_pipe_desc spd = {
6118 		.pages		= pages_def,
6119 		.partial	= partial_def,
6120 		.nr_pages	= 0, /* This gets updated below. */
6121 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6122 		.ops		= &tracing_pipe_buf_ops,
6123 		.spd_release	= tracing_spd_release_pipe,
6124 	};
6125 	ssize_t ret;
6126 	size_t rem;
6127 	unsigned int i;
6128 
6129 	if (splice_grow_spd(pipe, &spd))
6130 		return -ENOMEM;
6131 
6132 	mutex_lock(&iter->mutex);
6133 
6134 	if (iter->trace->splice_read) {
6135 		ret = iter->trace->splice_read(iter, filp,
6136 					       ppos, pipe, len, flags);
6137 		if (ret)
6138 			goto out_err;
6139 	}
6140 
6141 	ret = tracing_wait_pipe(filp);
6142 	if (ret <= 0)
6143 		goto out_err;
6144 
6145 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6146 		ret = -EFAULT;
6147 		goto out_err;
6148 	}
6149 
6150 	trace_event_read_lock();
6151 	trace_access_lock(iter->cpu_file);
6152 
6153 	/* Fill as many pages as possible. */
6154 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6155 		spd.pages[i] = alloc_page(GFP_KERNEL);
6156 		if (!spd.pages[i])
6157 			break;
6158 
6159 		rem = tracing_fill_pipe_page(rem, iter);
6160 
6161 		/* Copy the data into the page, so we can start over. */
6162 		ret = trace_seq_to_buffer(&iter->seq,
6163 					  page_address(spd.pages[i]),
6164 					  trace_seq_used(&iter->seq));
6165 		if (ret < 0) {
6166 			__free_page(spd.pages[i]);
6167 			break;
6168 		}
6169 		spd.partial[i].offset = 0;
6170 		spd.partial[i].len = trace_seq_used(&iter->seq);
6171 
6172 		trace_seq_init(&iter->seq);
6173 	}
6174 
6175 	trace_access_unlock(iter->cpu_file);
6176 	trace_event_read_unlock();
6177 	mutex_unlock(&iter->mutex);
6178 
6179 	spd.nr_pages = i;
6180 
6181 	if (i)
6182 		ret = splice_to_pipe(pipe, &spd);
6183 	else
6184 		ret = 0;
6185 out:
6186 	splice_shrink_spd(&spd);
6187 	return ret;
6188 
6189 out_err:
6190 	mutex_unlock(&iter->mutex);
6191 	goto out;
6192 }
6193 
6194 static ssize_t
6195 tracing_entries_read(struct file *filp, char __user *ubuf,
6196 		     size_t cnt, loff_t *ppos)
6197 {
6198 	struct inode *inode = file_inode(filp);
6199 	struct trace_array *tr = inode->i_private;
6200 	int cpu = tracing_get_cpu(inode);
6201 	char buf[64];
6202 	int r = 0;
6203 	ssize_t ret;
6204 
6205 	mutex_lock(&trace_types_lock);
6206 
6207 	if (cpu == RING_BUFFER_ALL_CPUS) {
6208 		int cpu, buf_size_same;
6209 		unsigned long size;
6210 
6211 		size = 0;
6212 		buf_size_same = 1;
6213 		/* check if all cpu sizes are same */
6214 		for_each_tracing_cpu(cpu) {
6215 			/* fill in the size from first enabled cpu */
6216 			if (size == 0)
6217 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6218 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6219 				buf_size_same = 0;
6220 				break;
6221 			}
6222 		}
6223 
6224 		if (buf_size_same) {
6225 			if (!ring_buffer_expanded)
6226 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6227 					    size >> 10,
6228 					    trace_buf_size >> 10);
6229 			else
6230 				r = sprintf(buf, "%lu\n", size >> 10);
6231 		} else
6232 			r = sprintf(buf, "X\n");
6233 	} else
6234 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6235 
6236 	mutex_unlock(&trace_types_lock);
6237 
6238 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6239 	return ret;
6240 }
6241 
6242 static ssize_t
6243 tracing_entries_write(struct file *filp, const char __user *ubuf,
6244 		      size_t cnt, loff_t *ppos)
6245 {
6246 	struct inode *inode = file_inode(filp);
6247 	struct trace_array *tr = inode->i_private;
6248 	unsigned long val;
6249 	int ret;
6250 
6251 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6252 	if (ret)
6253 		return ret;
6254 
6255 	/* must have at least 1 entry */
6256 	if (!val)
6257 		return -EINVAL;
6258 
6259 	/* value is in KB */
6260 	val <<= 10;
6261 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6262 	if (ret < 0)
6263 		return ret;
6264 
6265 	*ppos += cnt;
6266 
6267 	return cnt;
6268 }
6269 
6270 static ssize_t
6271 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6272 				size_t cnt, loff_t *ppos)
6273 {
6274 	struct trace_array *tr = filp->private_data;
6275 	char buf[64];
6276 	int r, cpu;
6277 	unsigned long size = 0, expanded_size = 0;
6278 
6279 	mutex_lock(&trace_types_lock);
6280 	for_each_tracing_cpu(cpu) {
6281 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6282 		if (!ring_buffer_expanded)
6283 			expanded_size += trace_buf_size >> 10;
6284 	}
6285 	if (ring_buffer_expanded)
6286 		r = sprintf(buf, "%lu\n", size);
6287 	else
6288 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6289 	mutex_unlock(&trace_types_lock);
6290 
6291 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6292 }
6293 
6294 static ssize_t
6295 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6296 			  size_t cnt, loff_t *ppos)
6297 {
6298 	/*
6299 	 * There is no need to read what the user has written, this function
6300 	 * is just to make sure that there is no error when "echo" is used
6301 	 */
6302 
6303 	*ppos += cnt;
6304 
6305 	return cnt;
6306 }
6307 
6308 static int
6309 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6310 {
6311 	struct trace_array *tr = inode->i_private;
6312 
6313 	/* disable tracing ? */
6314 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6315 		tracer_tracing_off(tr);
6316 	/* resize the ring buffer to 0 */
6317 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6318 
6319 	trace_array_put(tr);
6320 
6321 	return 0;
6322 }
6323 
6324 static ssize_t
6325 tracing_mark_write(struct file *filp, const char __user *ubuf,
6326 					size_t cnt, loff_t *fpos)
6327 {
6328 	struct trace_array *tr = filp->private_data;
6329 	struct ring_buffer_event *event;
6330 	enum event_trigger_type tt = ETT_NONE;
6331 	struct ring_buffer *buffer;
6332 	struct print_entry *entry;
6333 	unsigned long irq_flags;
6334 	ssize_t written;
6335 	int size;
6336 	int len;
6337 
6338 /* Used in tracing_mark_raw_write() as well */
6339 #define FAULTED_STR "<faulted>"
6340 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6341 
6342 	if (tracing_disabled)
6343 		return -EINVAL;
6344 
6345 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6346 		return -EINVAL;
6347 
6348 	if (cnt > TRACE_BUF_SIZE)
6349 		cnt = TRACE_BUF_SIZE;
6350 
6351 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6352 
6353 	local_save_flags(irq_flags);
6354 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6355 
6356 	/* If less than "<faulted>", then make sure we can still add that */
6357 	if (cnt < FAULTED_SIZE)
6358 		size += FAULTED_SIZE - cnt;
6359 
6360 	buffer = tr->trace_buffer.buffer;
6361 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6362 					    irq_flags, preempt_count());
6363 	if (unlikely(!event))
6364 		/* Ring buffer disabled, return as if not open for write */
6365 		return -EBADF;
6366 
6367 	entry = ring_buffer_event_data(event);
6368 	entry->ip = _THIS_IP_;
6369 
6370 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6371 	if (len) {
6372 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6373 		cnt = FAULTED_SIZE;
6374 		written = -EFAULT;
6375 	} else
6376 		written = cnt;
6377 	len = cnt;
6378 
6379 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6380 		/* do not add \n before testing triggers, but add \0 */
6381 		entry->buf[cnt] = '\0';
6382 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6383 	}
6384 
6385 	if (entry->buf[cnt - 1] != '\n') {
6386 		entry->buf[cnt] = '\n';
6387 		entry->buf[cnt + 1] = '\0';
6388 	} else
6389 		entry->buf[cnt] = '\0';
6390 
6391 	__buffer_unlock_commit(buffer, event);
6392 
6393 	if (tt)
6394 		event_triggers_post_call(tr->trace_marker_file, tt);
6395 
6396 	if (written > 0)
6397 		*fpos += written;
6398 
6399 	return written;
6400 }
6401 
6402 /* Limit it for now to 3K (including tag) */
6403 #define RAW_DATA_MAX_SIZE (1024*3)
6404 
6405 static ssize_t
6406 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6407 					size_t cnt, loff_t *fpos)
6408 {
6409 	struct trace_array *tr = filp->private_data;
6410 	struct ring_buffer_event *event;
6411 	struct ring_buffer *buffer;
6412 	struct raw_data_entry *entry;
6413 	unsigned long irq_flags;
6414 	ssize_t written;
6415 	int size;
6416 	int len;
6417 
6418 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6419 
6420 	if (tracing_disabled)
6421 		return -EINVAL;
6422 
6423 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6424 		return -EINVAL;
6425 
6426 	/* The marker must at least have a tag id */
6427 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6428 		return -EINVAL;
6429 
6430 	if (cnt > TRACE_BUF_SIZE)
6431 		cnt = TRACE_BUF_SIZE;
6432 
6433 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6434 
6435 	local_save_flags(irq_flags);
6436 	size = sizeof(*entry) + cnt;
6437 	if (cnt < FAULT_SIZE_ID)
6438 		size += FAULT_SIZE_ID - cnt;
6439 
6440 	buffer = tr->trace_buffer.buffer;
6441 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6442 					    irq_flags, preempt_count());
6443 	if (!event)
6444 		/* Ring buffer disabled, return as if not open for write */
6445 		return -EBADF;
6446 
6447 	entry = ring_buffer_event_data(event);
6448 
6449 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6450 	if (len) {
6451 		entry->id = -1;
6452 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6453 		written = -EFAULT;
6454 	} else
6455 		written = cnt;
6456 
6457 	__buffer_unlock_commit(buffer, event);
6458 
6459 	if (written > 0)
6460 		*fpos += written;
6461 
6462 	return written;
6463 }
6464 
6465 static int tracing_clock_show(struct seq_file *m, void *v)
6466 {
6467 	struct trace_array *tr = m->private;
6468 	int i;
6469 
6470 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6471 		seq_printf(m,
6472 			"%s%s%s%s", i ? " " : "",
6473 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6474 			i == tr->clock_id ? "]" : "");
6475 	seq_putc(m, '\n');
6476 
6477 	return 0;
6478 }
6479 
6480 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6481 {
6482 	int i;
6483 
6484 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6485 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6486 			break;
6487 	}
6488 	if (i == ARRAY_SIZE(trace_clocks))
6489 		return -EINVAL;
6490 
6491 	mutex_lock(&trace_types_lock);
6492 
6493 	tr->clock_id = i;
6494 
6495 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6496 
6497 	/*
6498 	 * New clock may not be consistent with the previous clock.
6499 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6500 	 */
6501 	tracing_reset_online_cpus(&tr->trace_buffer);
6502 
6503 #ifdef CONFIG_TRACER_MAX_TRACE
6504 	if (tr->max_buffer.buffer)
6505 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6506 	tracing_reset_online_cpus(&tr->max_buffer);
6507 #endif
6508 
6509 	mutex_unlock(&trace_types_lock);
6510 
6511 	return 0;
6512 }
6513 
6514 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6515 				   size_t cnt, loff_t *fpos)
6516 {
6517 	struct seq_file *m = filp->private_data;
6518 	struct trace_array *tr = m->private;
6519 	char buf[64];
6520 	const char *clockstr;
6521 	int ret;
6522 
6523 	if (cnt >= sizeof(buf))
6524 		return -EINVAL;
6525 
6526 	if (copy_from_user(buf, ubuf, cnt))
6527 		return -EFAULT;
6528 
6529 	buf[cnt] = 0;
6530 
6531 	clockstr = strstrip(buf);
6532 
6533 	ret = tracing_set_clock(tr, clockstr);
6534 	if (ret)
6535 		return ret;
6536 
6537 	*fpos += cnt;
6538 
6539 	return cnt;
6540 }
6541 
6542 static int tracing_clock_open(struct inode *inode, struct file *file)
6543 {
6544 	struct trace_array *tr = inode->i_private;
6545 	int ret;
6546 
6547 	if (tracing_disabled)
6548 		return -ENODEV;
6549 
6550 	if (trace_array_get(tr))
6551 		return -ENODEV;
6552 
6553 	ret = single_open(file, tracing_clock_show, inode->i_private);
6554 	if (ret < 0)
6555 		trace_array_put(tr);
6556 
6557 	return ret;
6558 }
6559 
6560 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6561 {
6562 	struct trace_array *tr = m->private;
6563 
6564 	mutex_lock(&trace_types_lock);
6565 
6566 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6567 		seq_puts(m, "delta [absolute]\n");
6568 	else
6569 		seq_puts(m, "[delta] absolute\n");
6570 
6571 	mutex_unlock(&trace_types_lock);
6572 
6573 	return 0;
6574 }
6575 
6576 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6577 {
6578 	struct trace_array *tr = inode->i_private;
6579 	int ret;
6580 
6581 	if (tracing_disabled)
6582 		return -ENODEV;
6583 
6584 	if (trace_array_get(tr))
6585 		return -ENODEV;
6586 
6587 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6588 	if (ret < 0)
6589 		trace_array_put(tr);
6590 
6591 	return ret;
6592 }
6593 
6594 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6595 {
6596 	int ret = 0;
6597 
6598 	mutex_lock(&trace_types_lock);
6599 
6600 	if (abs && tr->time_stamp_abs_ref++)
6601 		goto out;
6602 
6603 	if (!abs) {
6604 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6605 			ret = -EINVAL;
6606 			goto out;
6607 		}
6608 
6609 		if (--tr->time_stamp_abs_ref)
6610 			goto out;
6611 	}
6612 
6613 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6614 
6615 #ifdef CONFIG_TRACER_MAX_TRACE
6616 	if (tr->max_buffer.buffer)
6617 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6618 #endif
6619  out:
6620 	mutex_unlock(&trace_types_lock);
6621 
6622 	return ret;
6623 }
6624 
6625 struct ftrace_buffer_info {
6626 	struct trace_iterator	iter;
6627 	void			*spare;
6628 	unsigned int		spare_cpu;
6629 	unsigned int		read;
6630 };
6631 
6632 #ifdef CONFIG_TRACER_SNAPSHOT
6633 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6634 {
6635 	struct trace_array *tr = inode->i_private;
6636 	struct trace_iterator *iter;
6637 	struct seq_file *m;
6638 	int ret = 0;
6639 
6640 	if (trace_array_get(tr) < 0)
6641 		return -ENODEV;
6642 
6643 	if (file->f_mode & FMODE_READ) {
6644 		iter = __tracing_open(inode, file, true);
6645 		if (IS_ERR(iter))
6646 			ret = PTR_ERR(iter);
6647 	} else {
6648 		/* Writes still need the seq_file to hold the private data */
6649 		ret = -ENOMEM;
6650 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6651 		if (!m)
6652 			goto out;
6653 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6654 		if (!iter) {
6655 			kfree(m);
6656 			goto out;
6657 		}
6658 		ret = 0;
6659 
6660 		iter->tr = tr;
6661 		iter->trace_buffer = &tr->max_buffer;
6662 		iter->cpu_file = tracing_get_cpu(inode);
6663 		m->private = iter;
6664 		file->private_data = m;
6665 	}
6666 out:
6667 	if (ret < 0)
6668 		trace_array_put(tr);
6669 
6670 	return ret;
6671 }
6672 
6673 static ssize_t
6674 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6675 		       loff_t *ppos)
6676 {
6677 	struct seq_file *m = filp->private_data;
6678 	struct trace_iterator *iter = m->private;
6679 	struct trace_array *tr = iter->tr;
6680 	unsigned long val;
6681 	int ret;
6682 
6683 	ret = tracing_update_buffers();
6684 	if (ret < 0)
6685 		return ret;
6686 
6687 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6688 	if (ret)
6689 		return ret;
6690 
6691 	mutex_lock(&trace_types_lock);
6692 
6693 	if (tr->current_trace->use_max_tr) {
6694 		ret = -EBUSY;
6695 		goto out;
6696 	}
6697 
6698 	arch_spin_lock(&tr->max_lock);
6699 	if (tr->cond_snapshot)
6700 		ret = -EBUSY;
6701 	arch_spin_unlock(&tr->max_lock);
6702 	if (ret)
6703 		goto out;
6704 
6705 	switch (val) {
6706 	case 0:
6707 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6708 			ret = -EINVAL;
6709 			break;
6710 		}
6711 		if (tr->allocated_snapshot)
6712 			free_snapshot(tr);
6713 		break;
6714 	case 1:
6715 /* Only allow per-cpu swap if the ring buffer supports it */
6716 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6717 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6718 			ret = -EINVAL;
6719 			break;
6720 		}
6721 #endif
6722 		if (!tr->allocated_snapshot) {
6723 			ret = tracing_alloc_snapshot_instance(tr);
6724 			if (ret < 0)
6725 				break;
6726 		}
6727 		local_irq_disable();
6728 		/* Now, we're going to swap */
6729 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6730 			update_max_tr(tr, current, smp_processor_id(), NULL);
6731 		else
6732 			update_max_tr_single(tr, current, iter->cpu_file);
6733 		local_irq_enable();
6734 		break;
6735 	default:
6736 		if (tr->allocated_snapshot) {
6737 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6738 				tracing_reset_online_cpus(&tr->max_buffer);
6739 			else
6740 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6741 		}
6742 		break;
6743 	}
6744 
6745 	if (ret >= 0) {
6746 		*ppos += cnt;
6747 		ret = cnt;
6748 	}
6749 out:
6750 	mutex_unlock(&trace_types_lock);
6751 	return ret;
6752 }
6753 
6754 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6755 {
6756 	struct seq_file *m = file->private_data;
6757 	int ret;
6758 
6759 	ret = tracing_release(inode, file);
6760 
6761 	if (file->f_mode & FMODE_READ)
6762 		return ret;
6763 
6764 	/* If write only, the seq_file is just a stub */
6765 	if (m)
6766 		kfree(m->private);
6767 	kfree(m);
6768 
6769 	return 0;
6770 }
6771 
6772 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6773 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6774 				    size_t count, loff_t *ppos);
6775 static int tracing_buffers_release(struct inode *inode, struct file *file);
6776 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6777 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6778 
6779 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6780 {
6781 	struct ftrace_buffer_info *info;
6782 	int ret;
6783 
6784 	ret = tracing_buffers_open(inode, filp);
6785 	if (ret < 0)
6786 		return ret;
6787 
6788 	info = filp->private_data;
6789 
6790 	if (info->iter.trace->use_max_tr) {
6791 		tracing_buffers_release(inode, filp);
6792 		return -EBUSY;
6793 	}
6794 
6795 	info->iter.snapshot = true;
6796 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6797 
6798 	return ret;
6799 }
6800 
6801 #endif /* CONFIG_TRACER_SNAPSHOT */
6802 
6803 
6804 static const struct file_operations tracing_thresh_fops = {
6805 	.open		= tracing_open_generic,
6806 	.read		= tracing_thresh_read,
6807 	.write		= tracing_thresh_write,
6808 	.llseek		= generic_file_llseek,
6809 };
6810 
6811 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6812 static const struct file_operations tracing_max_lat_fops = {
6813 	.open		= tracing_open_generic,
6814 	.read		= tracing_max_lat_read,
6815 	.write		= tracing_max_lat_write,
6816 	.llseek		= generic_file_llseek,
6817 };
6818 #endif
6819 
6820 static const struct file_operations set_tracer_fops = {
6821 	.open		= tracing_open_generic,
6822 	.read		= tracing_set_trace_read,
6823 	.write		= tracing_set_trace_write,
6824 	.llseek		= generic_file_llseek,
6825 };
6826 
6827 static const struct file_operations tracing_pipe_fops = {
6828 	.open		= tracing_open_pipe,
6829 	.poll		= tracing_poll_pipe,
6830 	.read		= tracing_read_pipe,
6831 	.splice_read	= tracing_splice_read_pipe,
6832 	.release	= tracing_release_pipe,
6833 	.llseek		= no_llseek,
6834 };
6835 
6836 static const struct file_operations tracing_entries_fops = {
6837 	.open		= tracing_open_generic_tr,
6838 	.read		= tracing_entries_read,
6839 	.write		= tracing_entries_write,
6840 	.llseek		= generic_file_llseek,
6841 	.release	= tracing_release_generic_tr,
6842 };
6843 
6844 static const struct file_operations tracing_total_entries_fops = {
6845 	.open		= tracing_open_generic_tr,
6846 	.read		= tracing_total_entries_read,
6847 	.llseek		= generic_file_llseek,
6848 	.release	= tracing_release_generic_tr,
6849 };
6850 
6851 static const struct file_operations tracing_free_buffer_fops = {
6852 	.open		= tracing_open_generic_tr,
6853 	.write		= tracing_free_buffer_write,
6854 	.release	= tracing_free_buffer_release,
6855 };
6856 
6857 static const struct file_operations tracing_mark_fops = {
6858 	.open		= tracing_open_generic_tr,
6859 	.write		= tracing_mark_write,
6860 	.llseek		= generic_file_llseek,
6861 	.release	= tracing_release_generic_tr,
6862 };
6863 
6864 static const struct file_operations tracing_mark_raw_fops = {
6865 	.open		= tracing_open_generic_tr,
6866 	.write		= tracing_mark_raw_write,
6867 	.llseek		= generic_file_llseek,
6868 	.release	= tracing_release_generic_tr,
6869 };
6870 
6871 static const struct file_operations trace_clock_fops = {
6872 	.open		= tracing_clock_open,
6873 	.read		= seq_read,
6874 	.llseek		= seq_lseek,
6875 	.release	= tracing_single_release_tr,
6876 	.write		= tracing_clock_write,
6877 };
6878 
6879 static const struct file_operations trace_time_stamp_mode_fops = {
6880 	.open		= tracing_time_stamp_mode_open,
6881 	.read		= seq_read,
6882 	.llseek		= seq_lseek,
6883 	.release	= tracing_single_release_tr,
6884 };
6885 
6886 #ifdef CONFIG_TRACER_SNAPSHOT
6887 static const struct file_operations snapshot_fops = {
6888 	.open		= tracing_snapshot_open,
6889 	.read		= seq_read,
6890 	.write		= tracing_snapshot_write,
6891 	.llseek		= tracing_lseek,
6892 	.release	= tracing_snapshot_release,
6893 };
6894 
6895 static const struct file_operations snapshot_raw_fops = {
6896 	.open		= snapshot_raw_open,
6897 	.read		= tracing_buffers_read,
6898 	.release	= tracing_buffers_release,
6899 	.splice_read	= tracing_buffers_splice_read,
6900 	.llseek		= no_llseek,
6901 };
6902 
6903 #endif /* CONFIG_TRACER_SNAPSHOT */
6904 
6905 #define TRACING_LOG_ERRS_MAX	8
6906 #define TRACING_LOG_LOC_MAX	128
6907 
6908 #define CMD_PREFIX "  Command: "
6909 
6910 struct err_info {
6911 	const char	**errs;	/* ptr to loc-specific array of err strings */
6912 	u8		type;	/* index into errs -> specific err string */
6913 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
6914 	u64		ts;
6915 };
6916 
6917 struct tracing_log_err {
6918 	struct list_head	list;
6919 	struct err_info		info;
6920 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6921 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6922 };
6923 
6924 static DEFINE_MUTEX(tracing_err_log_lock);
6925 
6926 struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6927 {
6928 	struct tracing_log_err *err;
6929 
6930 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6931 		err = kzalloc(sizeof(*err), GFP_KERNEL);
6932 		if (!err)
6933 			err = ERR_PTR(-ENOMEM);
6934 		tr->n_err_log_entries++;
6935 
6936 		return err;
6937 	}
6938 
6939 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6940 	list_del(&err->list);
6941 
6942 	return err;
6943 }
6944 
6945 /**
6946  * err_pos - find the position of a string within a command for error careting
6947  * @cmd: The tracing command that caused the error
6948  * @str: The string to position the caret at within @cmd
6949  *
6950  * Finds the position of the first occurence of @str within @cmd.  The
6951  * return value can be passed to tracing_log_err() for caret placement
6952  * within @cmd.
6953  *
6954  * Returns the index within @cmd of the first occurence of @str or 0
6955  * if @str was not found.
6956  */
6957 unsigned int err_pos(char *cmd, const char *str)
6958 {
6959 	char *found;
6960 
6961 	if (WARN_ON(!strlen(cmd)))
6962 		return 0;
6963 
6964 	found = strstr(cmd, str);
6965 	if (found)
6966 		return found - cmd;
6967 
6968 	return 0;
6969 }
6970 
6971 /**
6972  * tracing_log_err - write an error to the tracing error log
6973  * @tr: The associated trace array for the error (NULL for top level array)
6974  * @loc: A string describing where the error occurred
6975  * @cmd: The tracing command that caused the error
6976  * @errs: The array of loc-specific static error strings
6977  * @type: The index into errs[], which produces the specific static err string
6978  * @pos: The position the caret should be placed in the cmd
6979  *
6980  * Writes an error into tracing/error_log of the form:
6981  *
6982  * <loc>: error: <text>
6983  *   Command: <cmd>
6984  *              ^
6985  *
6986  * tracing/error_log is a small log file containing the last
6987  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
6988  * unless there has been a tracing error, and the error log can be
6989  * cleared and have its memory freed by writing the empty string in
6990  * truncation mode to it i.e. echo > tracing/error_log.
6991  *
6992  * NOTE: the @errs array along with the @type param are used to
6993  * produce a static error string - this string is not copied and saved
6994  * when the error is logged - only a pointer to it is saved.  See
6995  * existing callers for examples of how static strings are typically
6996  * defined for use with tracing_log_err().
6997  */
6998 void tracing_log_err(struct trace_array *tr,
6999 		     const char *loc, const char *cmd,
7000 		     const char **errs, u8 type, u8 pos)
7001 {
7002 	struct tracing_log_err *err;
7003 
7004 	if (!tr)
7005 		tr = &global_trace;
7006 
7007 	mutex_lock(&tracing_err_log_lock);
7008 	err = get_tracing_log_err(tr);
7009 	if (PTR_ERR(err) == -ENOMEM) {
7010 		mutex_unlock(&tracing_err_log_lock);
7011 		return;
7012 	}
7013 
7014 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7015 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7016 
7017 	err->info.errs = errs;
7018 	err->info.type = type;
7019 	err->info.pos = pos;
7020 	err->info.ts = local_clock();
7021 
7022 	list_add_tail(&err->list, &tr->err_log);
7023 	mutex_unlock(&tracing_err_log_lock);
7024 }
7025 
7026 static void clear_tracing_err_log(struct trace_array *tr)
7027 {
7028 	struct tracing_log_err *err, *next;
7029 
7030 	mutex_lock(&tracing_err_log_lock);
7031 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7032 		list_del(&err->list);
7033 		kfree(err);
7034 	}
7035 
7036 	tr->n_err_log_entries = 0;
7037 	mutex_unlock(&tracing_err_log_lock);
7038 }
7039 
7040 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7041 {
7042 	struct trace_array *tr = m->private;
7043 
7044 	mutex_lock(&tracing_err_log_lock);
7045 
7046 	return seq_list_start(&tr->err_log, *pos);
7047 }
7048 
7049 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7050 {
7051 	struct trace_array *tr = m->private;
7052 
7053 	return seq_list_next(v, &tr->err_log, pos);
7054 }
7055 
7056 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7057 {
7058 	mutex_unlock(&tracing_err_log_lock);
7059 }
7060 
7061 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7062 {
7063 	u8 i;
7064 
7065 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7066 		seq_putc(m, ' ');
7067 	for (i = 0; i < pos; i++)
7068 		seq_putc(m, ' ');
7069 	seq_puts(m, "^\n");
7070 }
7071 
7072 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7073 {
7074 	struct tracing_log_err *err = v;
7075 
7076 	if (err) {
7077 		const char *err_text = err->info.errs[err->info.type];
7078 		u64 sec = err->info.ts;
7079 		u32 nsec;
7080 
7081 		nsec = do_div(sec, NSEC_PER_SEC);
7082 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7083 			   err->loc, err_text);
7084 		seq_printf(m, "%s", err->cmd);
7085 		tracing_err_log_show_pos(m, err->info.pos);
7086 	}
7087 
7088 	return 0;
7089 }
7090 
7091 static const struct seq_operations tracing_err_log_seq_ops = {
7092 	.start  = tracing_err_log_seq_start,
7093 	.next   = tracing_err_log_seq_next,
7094 	.stop   = tracing_err_log_seq_stop,
7095 	.show   = tracing_err_log_seq_show
7096 };
7097 
7098 static int tracing_err_log_open(struct inode *inode, struct file *file)
7099 {
7100 	struct trace_array *tr = inode->i_private;
7101 	int ret = 0;
7102 
7103 	if (trace_array_get(tr) < 0)
7104 		return -ENODEV;
7105 
7106 	/* If this file was opened for write, then erase contents */
7107 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7108 		clear_tracing_err_log(tr);
7109 
7110 	if (file->f_mode & FMODE_READ) {
7111 		ret = seq_open(file, &tracing_err_log_seq_ops);
7112 		if (!ret) {
7113 			struct seq_file *m = file->private_data;
7114 			m->private = tr;
7115 		} else {
7116 			trace_array_put(tr);
7117 		}
7118 	}
7119 	return ret;
7120 }
7121 
7122 static ssize_t tracing_err_log_write(struct file *file,
7123 				     const char __user *buffer,
7124 				     size_t count, loff_t *ppos)
7125 {
7126 	return count;
7127 }
7128 
7129 static const struct file_operations tracing_err_log_fops = {
7130 	.open           = tracing_err_log_open,
7131 	.write		= tracing_err_log_write,
7132 	.read           = seq_read,
7133 	.llseek         = seq_lseek,
7134 	.release	= tracing_release_generic_tr,
7135 };
7136 
7137 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7138 {
7139 	struct trace_array *tr = inode->i_private;
7140 	struct ftrace_buffer_info *info;
7141 	int ret;
7142 
7143 	if (tracing_disabled)
7144 		return -ENODEV;
7145 
7146 	if (trace_array_get(tr) < 0)
7147 		return -ENODEV;
7148 
7149 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7150 	if (!info) {
7151 		trace_array_put(tr);
7152 		return -ENOMEM;
7153 	}
7154 
7155 	mutex_lock(&trace_types_lock);
7156 
7157 	info->iter.tr		= tr;
7158 	info->iter.cpu_file	= tracing_get_cpu(inode);
7159 	info->iter.trace	= tr->current_trace;
7160 	info->iter.trace_buffer = &tr->trace_buffer;
7161 	info->spare		= NULL;
7162 	/* Force reading ring buffer for first read */
7163 	info->read		= (unsigned int)-1;
7164 
7165 	filp->private_data = info;
7166 
7167 	tr->current_trace->ref++;
7168 
7169 	mutex_unlock(&trace_types_lock);
7170 
7171 	ret = nonseekable_open(inode, filp);
7172 	if (ret < 0)
7173 		trace_array_put(tr);
7174 
7175 	return ret;
7176 }
7177 
7178 static __poll_t
7179 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7180 {
7181 	struct ftrace_buffer_info *info = filp->private_data;
7182 	struct trace_iterator *iter = &info->iter;
7183 
7184 	return trace_poll(iter, filp, poll_table);
7185 }
7186 
7187 static ssize_t
7188 tracing_buffers_read(struct file *filp, char __user *ubuf,
7189 		     size_t count, loff_t *ppos)
7190 {
7191 	struct ftrace_buffer_info *info = filp->private_data;
7192 	struct trace_iterator *iter = &info->iter;
7193 	ssize_t ret = 0;
7194 	ssize_t size;
7195 
7196 	if (!count)
7197 		return 0;
7198 
7199 #ifdef CONFIG_TRACER_MAX_TRACE
7200 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7201 		return -EBUSY;
7202 #endif
7203 
7204 	if (!info->spare) {
7205 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7206 							  iter->cpu_file);
7207 		if (IS_ERR(info->spare)) {
7208 			ret = PTR_ERR(info->spare);
7209 			info->spare = NULL;
7210 		} else {
7211 			info->spare_cpu = iter->cpu_file;
7212 		}
7213 	}
7214 	if (!info->spare)
7215 		return ret;
7216 
7217 	/* Do we have previous read data to read? */
7218 	if (info->read < PAGE_SIZE)
7219 		goto read;
7220 
7221  again:
7222 	trace_access_lock(iter->cpu_file);
7223 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7224 				    &info->spare,
7225 				    count,
7226 				    iter->cpu_file, 0);
7227 	trace_access_unlock(iter->cpu_file);
7228 
7229 	if (ret < 0) {
7230 		if (trace_empty(iter)) {
7231 			if ((filp->f_flags & O_NONBLOCK))
7232 				return -EAGAIN;
7233 
7234 			ret = wait_on_pipe(iter, 0);
7235 			if (ret)
7236 				return ret;
7237 
7238 			goto again;
7239 		}
7240 		return 0;
7241 	}
7242 
7243 	info->read = 0;
7244  read:
7245 	size = PAGE_SIZE - info->read;
7246 	if (size > count)
7247 		size = count;
7248 
7249 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7250 	if (ret == size)
7251 		return -EFAULT;
7252 
7253 	size -= ret;
7254 
7255 	*ppos += size;
7256 	info->read += size;
7257 
7258 	return size;
7259 }
7260 
7261 static int tracing_buffers_release(struct inode *inode, struct file *file)
7262 {
7263 	struct ftrace_buffer_info *info = file->private_data;
7264 	struct trace_iterator *iter = &info->iter;
7265 
7266 	mutex_lock(&trace_types_lock);
7267 
7268 	iter->tr->current_trace->ref--;
7269 
7270 	__trace_array_put(iter->tr);
7271 
7272 	if (info->spare)
7273 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7274 					   info->spare_cpu, info->spare);
7275 	kfree(info);
7276 
7277 	mutex_unlock(&trace_types_lock);
7278 
7279 	return 0;
7280 }
7281 
7282 struct buffer_ref {
7283 	struct ring_buffer	*buffer;
7284 	void			*page;
7285 	int			cpu;
7286 	refcount_t		refcount;
7287 };
7288 
7289 static void buffer_ref_release(struct buffer_ref *ref)
7290 {
7291 	if (!refcount_dec_and_test(&ref->refcount))
7292 		return;
7293 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7294 	kfree(ref);
7295 }
7296 
7297 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7298 				    struct pipe_buffer *buf)
7299 {
7300 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7301 
7302 	buffer_ref_release(ref);
7303 	buf->private = 0;
7304 }
7305 
7306 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7307 				struct pipe_buffer *buf)
7308 {
7309 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7310 
7311 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7312 		return false;
7313 
7314 	refcount_inc(&ref->refcount);
7315 	return true;
7316 }
7317 
7318 /* Pipe buffer operations for a buffer. */
7319 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7320 	.confirm		= generic_pipe_buf_confirm,
7321 	.release		= buffer_pipe_buf_release,
7322 	.steal			= generic_pipe_buf_nosteal,
7323 	.get			= buffer_pipe_buf_get,
7324 };
7325 
7326 /*
7327  * Callback from splice_to_pipe(), if we need to release some pages
7328  * at the end of the spd in case we error'ed out in filling the pipe.
7329  */
7330 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7331 {
7332 	struct buffer_ref *ref =
7333 		(struct buffer_ref *)spd->partial[i].private;
7334 
7335 	buffer_ref_release(ref);
7336 	spd->partial[i].private = 0;
7337 }
7338 
7339 static ssize_t
7340 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7341 			    struct pipe_inode_info *pipe, size_t len,
7342 			    unsigned int flags)
7343 {
7344 	struct ftrace_buffer_info *info = file->private_data;
7345 	struct trace_iterator *iter = &info->iter;
7346 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7347 	struct page *pages_def[PIPE_DEF_BUFFERS];
7348 	struct splice_pipe_desc spd = {
7349 		.pages		= pages_def,
7350 		.partial	= partial_def,
7351 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7352 		.ops		= &buffer_pipe_buf_ops,
7353 		.spd_release	= buffer_spd_release,
7354 	};
7355 	struct buffer_ref *ref;
7356 	int entries, i;
7357 	ssize_t ret = 0;
7358 
7359 #ifdef CONFIG_TRACER_MAX_TRACE
7360 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7361 		return -EBUSY;
7362 #endif
7363 
7364 	if (*ppos & (PAGE_SIZE - 1))
7365 		return -EINVAL;
7366 
7367 	if (len & (PAGE_SIZE - 1)) {
7368 		if (len < PAGE_SIZE)
7369 			return -EINVAL;
7370 		len &= PAGE_MASK;
7371 	}
7372 
7373 	if (splice_grow_spd(pipe, &spd))
7374 		return -ENOMEM;
7375 
7376  again:
7377 	trace_access_lock(iter->cpu_file);
7378 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7379 
7380 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7381 		struct page *page;
7382 		int r;
7383 
7384 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7385 		if (!ref) {
7386 			ret = -ENOMEM;
7387 			break;
7388 		}
7389 
7390 		refcount_set(&ref->refcount, 1);
7391 		ref->buffer = iter->trace_buffer->buffer;
7392 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7393 		if (IS_ERR(ref->page)) {
7394 			ret = PTR_ERR(ref->page);
7395 			ref->page = NULL;
7396 			kfree(ref);
7397 			break;
7398 		}
7399 		ref->cpu = iter->cpu_file;
7400 
7401 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7402 					  len, iter->cpu_file, 1);
7403 		if (r < 0) {
7404 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7405 						   ref->page);
7406 			kfree(ref);
7407 			break;
7408 		}
7409 
7410 		page = virt_to_page(ref->page);
7411 
7412 		spd.pages[i] = page;
7413 		spd.partial[i].len = PAGE_SIZE;
7414 		spd.partial[i].offset = 0;
7415 		spd.partial[i].private = (unsigned long)ref;
7416 		spd.nr_pages++;
7417 		*ppos += PAGE_SIZE;
7418 
7419 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7420 	}
7421 
7422 	trace_access_unlock(iter->cpu_file);
7423 	spd.nr_pages = i;
7424 
7425 	/* did we read anything? */
7426 	if (!spd.nr_pages) {
7427 		if (ret)
7428 			goto out;
7429 
7430 		ret = -EAGAIN;
7431 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7432 			goto out;
7433 
7434 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7435 		if (ret)
7436 			goto out;
7437 
7438 		goto again;
7439 	}
7440 
7441 	ret = splice_to_pipe(pipe, &spd);
7442 out:
7443 	splice_shrink_spd(&spd);
7444 
7445 	return ret;
7446 }
7447 
7448 static const struct file_operations tracing_buffers_fops = {
7449 	.open		= tracing_buffers_open,
7450 	.read		= tracing_buffers_read,
7451 	.poll		= tracing_buffers_poll,
7452 	.release	= tracing_buffers_release,
7453 	.splice_read	= tracing_buffers_splice_read,
7454 	.llseek		= no_llseek,
7455 };
7456 
7457 static ssize_t
7458 tracing_stats_read(struct file *filp, char __user *ubuf,
7459 		   size_t count, loff_t *ppos)
7460 {
7461 	struct inode *inode = file_inode(filp);
7462 	struct trace_array *tr = inode->i_private;
7463 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7464 	int cpu = tracing_get_cpu(inode);
7465 	struct trace_seq *s;
7466 	unsigned long cnt;
7467 	unsigned long long t;
7468 	unsigned long usec_rem;
7469 
7470 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7471 	if (!s)
7472 		return -ENOMEM;
7473 
7474 	trace_seq_init(s);
7475 
7476 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7477 	trace_seq_printf(s, "entries: %ld\n", cnt);
7478 
7479 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7480 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7481 
7482 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7483 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7484 
7485 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7486 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7487 
7488 	if (trace_clocks[tr->clock_id].in_ns) {
7489 		/* local or global for trace_clock */
7490 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7491 		usec_rem = do_div(t, USEC_PER_SEC);
7492 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7493 								t, usec_rem);
7494 
7495 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7496 		usec_rem = do_div(t, USEC_PER_SEC);
7497 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7498 	} else {
7499 		/* counter or tsc mode for trace_clock */
7500 		trace_seq_printf(s, "oldest event ts: %llu\n",
7501 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7502 
7503 		trace_seq_printf(s, "now ts: %llu\n",
7504 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7505 	}
7506 
7507 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7508 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7509 
7510 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7511 	trace_seq_printf(s, "read events: %ld\n", cnt);
7512 
7513 	count = simple_read_from_buffer(ubuf, count, ppos,
7514 					s->buffer, trace_seq_used(s));
7515 
7516 	kfree(s);
7517 
7518 	return count;
7519 }
7520 
7521 static const struct file_operations tracing_stats_fops = {
7522 	.open		= tracing_open_generic_tr,
7523 	.read		= tracing_stats_read,
7524 	.llseek		= generic_file_llseek,
7525 	.release	= tracing_release_generic_tr,
7526 };
7527 
7528 #ifdef CONFIG_DYNAMIC_FTRACE
7529 
7530 static ssize_t
7531 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7532 		  size_t cnt, loff_t *ppos)
7533 {
7534 	unsigned long *p = filp->private_data;
7535 	char buf[64]; /* Not too big for a shallow stack */
7536 	int r;
7537 
7538 	r = scnprintf(buf, 63, "%ld", *p);
7539 	buf[r++] = '\n';
7540 
7541 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7542 }
7543 
7544 static const struct file_operations tracing_dyn_info_fops = {
7545 	.open		= tracing_open_generic,
7546 	.read		= tracing_read_dyn_info,
7547 	.llseek		= generic_file_llseek,
7548 };
7549 #endif /* CONFIG_DYNAMIC_FTRACE */
7550 
7551 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7552 static void
7553 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7554 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7555 		void *data)
7556 {
7557 	tracing_snapshot_instance(tr);
7558 }
7559 
7560 static void
7561 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7562 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7563 		      void *data)
7564 {
7565 	struct ftrace_func_mapper *mapper = data;
7566 	long *count = NULL;
7567 
7568 	if (mapper)
7569 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7570 
7571 	if (count) {
7572 
7573 		if (*count <= 0)
7574 			return;
7575 
7576 		(*count)--;
7577 	}
7578 
7579 	tracing_snapshot_instance(tr);
7580 }
7581 
7582 static int
7583 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7584 		      struct ftrace_probe_ops *ops, void *data)
7585 {
7586 	struct ftrace_func_mapper *mapper = data;
7587 	long *count = NULL;
7588 
7589 	seq_printf(m, "%ps:", (void *)ip);
7590 
7591 	seq_puts(m, "snapshot");
7592 
7593 	if (mapper)
7594 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7595 
7596 	if (count)
7597 		seq_printf(m, ":count=%ld\n", *count);
7598 	else
7599 		seq_puts(m, ":unlimited\n");
7600 
7601 	return 0;
7602 }
7603 
7604 static int
7605 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7606 		     unsigned long ip, void *init_data, void **data)
7607 {
7608 	struct ftrace_func_mapper *mapper = *data;
7609 
7610 	if (!mapper) {
7611 		mapper = allocate_ftrace_func_mapper();
7612 		if (!mapper)
7613 			return -ENOMEM;
7614 		*data = mapper;
7615 	}
7616 
7617 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7618 }
7619 
7620 static void
7621 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7622 		     unsigned long ip, void *data)
7623 {
7624 	struct ftrace_func_mapper *mapper = data;
7625 
7626 	if (!ip) {
7627 		if (!mapper)
7628 			return;
7629 		free_ftrace_func_mapper(mapper, NULL);
7630 		return;
7631 	}
7632 
7633 	ftrace_func_mapper_remove_ip(mapper, ip);
7634 }
7635 
7636 static struct ftrace_probe_ops snapshot_probe_ops = {
7637 	.func			= ftrace_snapshot,
7638 	.print			= ftrace_snapshot_print,
7639 };
7640 
7641 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7642 	.func			= ftrace_count_snapshot,
7643 	.print			= ftrace_snapshot_print,
7644 	.init			= ftrace_snapshot_init,
7645 	.free			= ftrace_snapshot_free,
7646 };
7647 
7648 static int
7649 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7650 			       char *glob, char *cmd, char *param, int enable)
7651 {
7652 	struct ftrace_probe_ops *ops;
7653 	void *count = (void *)-1;
7654 	char *number;
7655 	int ret;
7656 
7657 	if (!tr)
7658 		return -ENODEV;
7659 
7660 	/* hash funcs only work with set_ftrace_filter */
7661 	if (!enable)
7662 		return -EINVAL;
7663 
7664 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7665 
7666 	if (glob[0] == '!')
7667 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7668 
7669 	if (!param)
7670 		goto out_reg;
7671 
7672 	number = strsep(&param, ":");
7673 
7674 	if (!strlen(number))
7675 		goto out_reg;
7676 
7677 	/*
7678 	 * We use the callback data field (which is a pointer)
7679 	 * as our counter.
7680 	 */
7681 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7682 	if (ret)
7683 		return ret;
7684 
7685  out_reg:
7686 	ret = tracing_alloc_snapshot_instance(tr);
7687 	if (ret < 0)
7688 		goto out;
7689 
7690 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7691 
7692  out:
7693 	return ret < 0 ? ret : 0;
7694 }
7695 
7696 static struct ftrace_func_command ftrace_snapshot_cmd = {
7697 	.name			= "snapshot",
7698 	.func			= ftrace_trace_snapshot_callback,
7699 };
7700 
7701 static __init int register_snapshot_cmd(void)
7702 {
7703 	return register_ftrace_command(&ftrace_snapshot_cmd);
7704 }
7705 #else
7706 static inline __init int register_snapshot_cmd(void) { return 0; }
7707 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7708 
7709 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7710 {
7711 	if (WARN_ON(!tr->dir))
7712 		return ERR_PTR(-ENODEV);
7713 
7714 	/* Top directory uses NULL as the parent */
7715 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7716 		return NULL;
7717 
7718 	/* All sub buffers have a descriptor */
7719 	return tr->dir;
7720 }
7721 
7722 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7723 {
7724 	struct dentry *d_tracer;
7725 
7726 	if (tr->percpu_dir)
7727 		return tr->percpu_dir;
7728 
7729 	d_tracer = tracing_get_dentry(tr);
7730 	if (IS_ERR(d_tracer))
7731 		return NULL;
7732 
7733 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7734 
7735 	WARN_ONCE(!tr->percpu_dir,
7736 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7737 
7738 	return tr->percpu_dir;
7739 }
7740 
7741 static struct dentry *
7742 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7743 		      void *data, long cpu, const struct file_operations *fops)
7744 {
7745 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7746 
7747 	if (ret) /* See tracing_get_cpu() */
7748 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7749 	return ret;
7750 }
7751 
7752 static void
7753 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7754 {
7755 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7756 	struct dentry *d_cpu;
7757 	char cpu_dir[30]; /* 30 characters should be more than enough */
7758 
7759 	if (!d_percpu)
7760 		return;
7761 
7762 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7763 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7764 	if (!d_cpu) {
7765 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7766 		return;
7767 	}
7768 
7769 	/* per cpu trace_pipe */
7770 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7771 				tr, cpu, &tracing_pipe_fops);
7772 
7773 	/* per cpu trace */
7774 	trace_create_cpu_file("trace", 0644, d_cpu,
7775 				tr, cpu, &tracing_fops);
7776 
7777 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7778 				tr, cpu, &tracing_buffers_fops);
7779 
7780 	trace_create_cpu_file("stats", 0444, d_cpu,
7781 				tr, cpu, &tracing_stats_fops);
7782 
7783 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7784 				tr, cpu, &tracing_entries_fops);
7785 
7786 #ifdef CONFIG_TRACER_SNAPSHOT
7787 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7788 				tr, cpu, &snapshot_fops);
7789 
7790 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7791 				tr, cpu, &snapshot_raw_fops);
7792 #endif
7793 }
7794 
7795 #ifdef CONFIG_FTRACE_SELFTEST
7796 /* Let selftest have access to static functions in this file */
7797 #include "trace_selftest.c"
7798 #endif
7799 
7800 static ssize_t
7801 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7802 			loff_t *ppos)
7803 {
7804 	struct trace_option_dentry *topt = filp->private_data;
7805 	char *buf;
7806 
7807 	if (topt->flags->val & topt->opt->bit)
7808 		buf = "1\n";
7809 	else
7810 		buf = "0\n";
7811 
7812 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7813 }
7814 
7815 static ssize_t
7816 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7817 			 loff_t *ppos)
7818 {
7819 	struct trace_option_dentry *topt = filp->private_data;
7820 	unsigned long val;
7821 	int ret;
7822 
7823 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7824 	if (ret)
7825 		return ret;
7826 
7827 	if (val != 0 && val != 1)
7828 		return -EINVAL;
7829 
7830 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7831 		mutex_lock(&trace_types_lock);
7832 		ret = __set_tracer_option(topt->tr, topt->flags,
7833 					  topt->opt, !val);
7834 		mutex_unlock(&trace_types_lock);
7835 		if (ret)
7836 			return ret;
7837 	}
7838 
7839 	*ppos += cnt;
7840 
7841 	return cnt;
7842 }
7843 
7844 
7845 static const struct file_operations trace_options_fops = {
7846 	.open = tracing_open_generic,
7847 	.read = trace_options_read,
7848 	.write = trace_options_write,
7849 	.llseek	= generic_file_llseek,
7850 };
7851 
7852 /*
7853  * In order to pass in both the trace_array descriptor as well as the index
7854  * to the flag that the trace option file represents, the trace_array
7855  * has a character array of trace_flags_index[], which holds the index
7856  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7857  * The address of this character array is passed to the flag option file
7858  * read/write callbacks.
7859  *
7860  * In order to extract both the index and the trace_array descriptor,
7861  * get_tr_index() uses the following algorithm.
7862  *
7863  *   idx = *ptr;
7864  *
7865  * As the pointer itself contains the address of the index (remember
7866  * index[1] == 1).
7867  *
7868  * Then to get the trace_array descriptor, by subtracting that index
7869  * from the ptr, we get to the start of the index itself.
7870  *
7871  *   ptr - idx == &index[0]
7872  *
7873  * Then a simple container_of() from that pointer gets us to the
7874  * trace_array descriptor.
7875  */
7876 static void get_tr_index(void *data, struct trace_array **ptr,
7877 			 unsigned int *pindex)
7878 {
7879 	*pindex = *(unsigned char *)data;
7880 
7881 	*ptr = container_of(data - *pindex, struct trace_array,
7882 			    trace_flags_index);
7883 }
7884 
7885 static ssize_t
7886 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7887 			loff_t *ppos)
7888 {
7889 	void *tr_index = filp->private_data;
7890 	struct trace_array *tr;
7891 	unsigned int index;
7892 	char *buf;
7893 
7894 	get_tr_index(tr_index, &tr, &index);
7895 
7896 	if (tr->trace_flags & (1 << index))
7897 		buf = "1\n";
7898 	else
7899 		buf = "0\n";
7900 
7901 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7902 }
7903 
7904 static ssize_t
7905 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7906 			 loff_t *ppos)
7907 {
7908 	void *tr_index = filp->private_data;
7909 	struct trace_array *tr;
7910 	unsigned int index;
7911 	unsigned long val;
7912 	int ret;
7913 
7914 	get_tr_index(tr_index, &tr, &index);
7915 
7916 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7917 	if (ret)
7918 		return ret;
7919 
7920 	if (val != 0 && val != 1)
7921 		return -EINVAL;
7922 
7923 	mutex_lock(&trace_types_lock);
7924 	ret = set_tracer_flag(tr, 1 << index, val);
7925 	mutex_unlock(&trace_types_lock);
7926 
7927 	if (ret < 0)
7928 		return ret;
7929 
7930 	*ppos += cnt;
7931 
7932 	return cnt;
7933 }
7934 
7935 static const struct file_operations trace_options_core_fops = {
7936 	.open = tracing_open_generic,
7937 	.read = trace_options_core_read,
7938 	.write = trace_options_core_write,
7939 	.llseek = generic_file_llseek,
7940 };
7941 
7942 struct dentry *trace_create_file(const char *name,
7943 				 umode_t mode,
7944 				 struct dentry *parent,
7945 				 void *data,
7946 				 const struct file_operations *fops)
7947 {
7948 	struct dentry *ret;
7949 
7950 	ret = tracefs_create_file(name, mode, parent, data, fops);
7951 	if (!ret)
7952 		pr_warn("Could not create tracefs '%s' entry\n", name);
7953 
7954 	return ret;
7955 }
7956 
7957 
7958 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7959 {
7960 	struct dentry *d_tracer;
7961 
7962 	if (tr->options)
7963 		return tr->options;
7964 
7965 	d_tracer = tracing_get_dentry(tr);
7966 	if (IS_ERR(d_tracer))
7967 		return NULL;
7968 
7969 	tr->options = tracefs_create_dir("options", d_tracer);
7970 	if (!tr->options) {
7971 		pr_warn("Could not create tracefs directory 'options'\n");
7972 		return NULL;
7973 	}
7974 
7975 	return tr->options;
7976 }
7977 
7978 static void
7979 create_trace_option_file(struct trace_array *tr,
7980 			 struct trace_option_dentry *topt,
7981 			 struct tracer_flags *flags,
7982 			 struct tracer_opt *opt)
7983 {
7984 	struct dentry *t_options;
7985 
7986 	t_options = trace_options_init_dentry(tr);
7987 	if (!t_options)
7988 		return;
7989 
7990 	topt->flags = flags;
7991 	topt->opt = opt;
7992 	topt->tr = tr;
7993 
7994 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7995 				    &trace_options_fops);
7996 
7997 }
7998 
7999 static void
8000 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8001 {
8002 	struct trace_option_dentry *topts;
8003 	struct trace_options *tr_topts;
8004 	struct tracer_flags *flags;
8005 	struct tracer_opt *opts;
8006 	int cnt;
8007 	int i;
8008 
8009 	if (!tracer)
8010 		return;
8011 
8012 	flags = tracer->flags;
8013 
8014 	if (!flags || !flags->opts)
8015 		return;
8016 
8017 	/*
8018 	 * If this is an instance, only create flags for tracers
8019 	 * the instance may have.
8020 	 */
8021 	if (!trace_ok_for_array(tracer, tr))
8022 		return;
8023 
8024 	for (i = 0; i < tr->nr_topts; i++) {
8025 		/* Make sure there's no duplicate flags. */
8026 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8027 			return;
8028 	}
8029 
8030 	opts = flags->opts;
8031 
8032 	for (cnt = 0; opts[cnt].name; cnt++)
8033 		;
8034 
8035 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8036 	if (!topts)
8037 		return;
8038 
8039 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8040 			    GFP_KERNEL);
8041 	if (!tr_topts) {
8042 		kfree(topts);
8043 		return;
8044 	}
8045 
8046 	tr->topts = tr_topts;
8047 	tr->topts[tr->nr_topts].tracer = tracer;
8048 	tr->topts[tr->nr_topts].topts = topts;
8049 	tr->nr_topts++;
8050 
8051 	for (cnt = 0; opts[cnt].name; cnt++) {
8052 		create_trace_option_file(tr, &topts[cnt], flags,
8053 					 &opts[cnt]);
8054 		WARN_ONCE(topts[cnt].entry == NULL,
8055 			  "Failed to create trace option: %s",
8056 			  opts[cnt].name);
8057 	}
8058 }
8059 
8060 static struct dentry *
8061 create_trace_option_core_file(struct trace_array *tr,
8062 			      const char *option, long index)
8063 {
8064 	struct dentry *t_options;
8065 
8066 	t_options = trace_options_init_dentry(tr);
8067 	if (!t_options)
8068 		return NULL;
8069 
8070 	return trace_create_file(option, 0644, t_options,
8071 				 (void *)&tr->trace_flags_index[index],
8072 				 &trace_options_core_fops);
8073 }
8074 
8075 static void create_trace_options_dir(struct trace_array *tr)
8076 {
8077 	struct dentry *t_options;
8078 	bool top_level = tr == &global_trace;
8079 	int i;
8080 
8081 	t_options = trace_options_init_dentry(tr);
8082 	if (!t_options)
8083 		return;
8084 
8085 	for (i = 0; trace_options[i]; i++) {
8086 		if (top_level ||
8087 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8088 			create_trace_option_core_file(tr, trace_options[i], i);
8089 	}
8090 }
8091 
8092 static ssize_t
8093 rb_simple_read(struct file *filp, char __user *ubuf,
8094 	       size_t cnt, loff_t *ppos)
8095 {
8096 	struct trace_array *tr = filp->private_data;
8097 	char buf[64];
8098 	int r;
8099 
8100 	r = tracer_tracing_is_on(tr);
8101 	r = sprintf(buf, "%d\n", r);
8102 
8103 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8104 }
8105 
8106 static ssize_t
8107 rb_simple_write(struct file *filp, const char __user *ubuf,
8108 		size_t cnt, loff_t *ppos)
8109 {
8110 	struct trace_array *tr = filp->private_data;
8111 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8112 	unsigned long val;
8113 	int ret;
8114 
8115 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8116 	if (ret)
8117 		return ret;
8118 
8119 	if (buffer) {
8120 		mutex_lock(&trace_types_lock);
8121 		if (!!val == tracer_tracing_is_on(tr)) {
8122 			val = 0; /* do nothing */
8123 		} else if (val) {
8124 			tracer_tracing_on(tr);
8125 			if (tr->current_trace->start)
8126 				tr->current_trace->start(tr);
8127 		} else {
8128 			tracer_tracing_off(tr);
8129 			if (tr->current_trace->stop)
8130 				tr->current_trace->stop(tr);
8131 		}
8132 		mutex_unlock(&trace_types_lock);
8133 	}
8134 
8135 	(*ppos)++;
8136 
8137 	return cnt;
8138 }
8139 
8140 static const struct file_operations rb_simple_fops = {
8141 	.open		= tracing_open_generic_tr,
8142 	.read		= rb_simple_read,
8143 	.write		= rb_simple_write,
8144 	.release	= tracing_release_generic_tr,
8145 	.llseek		= default_llseek,
8146 };
8147 
8148 static ssize_t
8149 buffer_percent_read(struct file *filp, char __user *ubuf,
8150 		    size_t cnt, loff_t *ppos)
8151 {
8152 	struct trace_array *tr = filp->private_data;
8153 	char buf[64];
8154 	int r;
8155 
8156 	r = tr->buffer_percent;
8157 	r = sprintf(buf, "%d\n", r);
8158 
8159 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8160 }
8161 
8162 static ssize_t
8163 buffer_percent_write(struct file *filp, const char __user *ubuf,
8164 		     size_t cnt, loff_t *ppos)
8165 {
8166 	struct trace_array *tr = filp->private_data;
8167 	unsigned long val;
8168 	int ret;
8169 
8170 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8171 	if (ret)
8172 		return ret;
8173 
8174 	if (val > 100)
8175 		return -EINVAL;
8176 
8177 	if (!val)
8178 		val = 1;
8179 
8180 	tr->buffer_percent = val;
8181 
8182 	(*ppos)++;
8183 
8184 	return cnt;
8185 }
8186 
8187 static const struct file_operations buffer_percent_fops = {
8188 	.open		= tracing_open_generic_tr,
8189 	.read		= buffer_percent_read,
8190 	.write		= buffer_percent_write,
8191 	.release	= tracing_release_generic_tr,
8192 	.llseek		= default_llseek,
8193 };
8194 
8195 struct dentry *trace_instance_dir;
8196 
8197 static void
8198 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8199 
8200 static int
8201 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8202 {
8203 	enum ring_buffer_flags rb_flags;
8204 
8205 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8206 
8207 	buf->tr = tr;
8208 
8209 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8210 	if (!buf->buffer)
8211 		return -ENOMEM;
8212 
8213 	buf->data = alloc_percpu(struct trace_array_cpu);
8214 	if (!buf->data) {
8215 		ring_buffer_free(buf->buffer);
8216 		buf->buffer = NULL;
8217 		return -ENOMEM;
8218 	}
8219 
8220 	/* Allocate the first page for all buffers */
8221 	set_buffer_entries(&tr->trace_buffer,
8222 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8223 
8224 	return 0;
8225 }
8226 
8227 static int allocate_trace_buffers(struct trace_array *tr, int size)
8228 {
8229 	int ret;
8230 
8231 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8232 	if (ret)
8233 		return ret;
8234 
8235 #ifdef CONFIG_TRACER_MAX_TRACE
8236 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8237 				    allocate_snapshot ? size : 1);
8238 	if (WARN_ON(ret)) {
8239 		ring_buffer_free(tr->trace_buffer.buffer);
8240 		tr->trace_buffer.buffer = NULL;
8241 		free_percpu(tr->trace_buffer.data);
8242 		tr->trace_buffer.data = NULL;
8243 		return -ENOMEM;
8244 	}
8245 	tr->allocated_snapshot = allocate_snapshot;
8246 
8247 	/*
8248 	 * Only the top level trace array gets its snapshot allocated
8249 	 * from the kernel command line.
8250 	 */
8251 	allocate_snapshot = false;
8252 #endif
8253 	return 0;
8254 }
8255 
8256 static void free_trace_buffer(struct trace_buffer *buf)
8257 {
8258 	if (buf->buffer) {
8259 		ring_buffer_free(buf->buffer);
8260 		buf->buffer = NULL;
8261 		free_percpu(buf->data);
8262 		buf->data = NULL;
8263 	}
8264 }
8265 
8266 static void free_trace_buffers(struct trace_array *tr)
8267 {
8268 	if (!tr)
8269 		return;
8270 
8271 	free_trace_buffer(&tr->trace_buffer);
8272 
8273 #ifdef CONFIG_TRACER_MAX_TRACE
8274 	free_trace_buffer(&tr->max_buffer);
8275 #endif
8276 }
8277 
8278 static void init_trace_flags_index(struct trace_array *tr)
8279 {
8280 	int i;
8281 
8282 	/* Used by the trace options files */
8283 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8284 		tr->trace_flags_index[i] = i;
8285 }
8286 
8287 static void __update_tracer_options(struct trace_array *tr)
8288 {
8289 	struct tracer *t;
8290 
8291 	for (t = trace_types; t; t = t->next)
8292 		add_tracer_options(tr, t);
8293 }
8294 
8295 static void update_tracer_options(struct trace_array *tr)
8296 {
8297 	mutex_lock(&trace_types_lock);
8298 	__update_tracer_options(tr);
8299 	mutex_unlock(&trace_types_lock);
8300 }
8301 
8302 struct trace_array *trace_array_create(const char *name)
8303 {
8304 	struct trace_array *tr;
8305 	int ret;
8306 
8307 	mutex_lock(&event_mutex);
8308 	mutex_lock(&trace_types_lock);
8309 
8310 	ret = -EEXIST;
8311 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8312 		if (tr->name && strcmp(tr->name, name) == 0)
8313 			goto out_unlock;
8314 	}
8315 
8316 	ret = -ENOMEM;
8317 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8318 	if (!tr)
8319 		goto out_unlock;
8320 
8321 	tr->name = kstrdup(name, GFP_KERNEL);
8322 	if (!tr->name)
8323 		goto out_free_tr;
8324 
8325 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8326 		goto out_free_tr;
8327 
8328 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8329 
8330 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8331 
8332 	raw_spin_lock_init(&tr->start_lock);
8333 
8334 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8335 
8336 	tr->current_trace = &nop_trace;
8337 
8338 	INIT_LIST_HEAD(&tr->systems);
8339 	INIT_LIST_HEAD(&tr->events);
8340 	INIT_LIST_HEAD(&tr->hist_vars);
8341 	INIT_LIST_HEAD(&tr->err_log);
8342 
8343 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8344 		goto out_free_tr;
8345 
8346 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8347 	if (!tr->dir)
8348 		goto out_free_tr;
8349 
8350 	ret = event_trace_add_tracer(tr->dir, tr);
8351 	if (ret) {
8352 		tracefs_remove_recursive(tr->dir);
8353 		goto out_free_tr;
8354 	}
8355 
8356 	ftrace_init_trace_array(tr);
8357 
8358 	init_tracer_tracefs(tr, tr->dir);
8359 	init_trace_flags_index(tr);
8360 	__update_tracer_options(tr);
8361 
8362 	list_add(&tr->list, &ftrace_trace_arrays);
8363 
8364 	mutex_unlock(&trace_types_lock);
8365 	mutex_unlock(&event_mutex);
8366 
8367 	return tr;
8368 
8369  out_free_tr:
8370 	free_trace_buffers(tr);
8371 	free_cpumask_var(tr->tracing_cpumask);
8372 	kfree(tr->name);
8373 	kfree(tr);
8374 
8375  out_unlock:
8376 	mutex_unlock(&trace_types_lock);
8377 	mutex_unlock(&event_mutex);
8378 
8379 	return ERR_PTR(ret);
8380 }
8381 EXPORT_SYMBOL_GPL(trace_array_create);
8382 
8383 static int instance_mkdir(const char *name)
8384 {
8385 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8386 }
8387 
8388 static int __remove_instance(struct trace_array *tr)
8389 {
8390 	int i;
8391 
8392 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8393 		return -EBUSY;
8394 
8395 	list_del(&tr->list);
8396 
8397 	/* Disable all the flags that were enabled coming in */
8398 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8399 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8400 			set_tracer_flag(tr, 1 << i, 0);
8401 	}
8402 
8403 	tracing_set_nop(tr);
8404 	clear_ftrace_function_probes(tr);
8405 	event_trace_del_tracer(tr);
8406 	ftrace_clear_pids(tr);
8407 	ftrace_destroy_function_files(tr);
8408 	tracefs_remove_recursive(tr->dir);
8409 	free_trace_buffers(tr);
8410 
8411 	for (i = 0; i < tr->nr_topts; i++) {
8412 		kfree(tr->topts[i].topts);
8413 	}
8414 	kfree(tr->topts);
8415 
8416 	free_cpumask_var(tr->tracing_cpumask);
8417 	kfree(tr->name);
8418 	kfree(tr);
8419 	tr = NULL;
8420 
8421 	return 0;
8422 }
8423 
8424 int trace_array_destroy(struct trace_array *tr)
8425 {
8426 	int ret;
8427 
8428 	if (!tr)
8429 		return -EINVAL;
8430 
8431 	mutex_lock(&event_mutex);
8432 	mutex_lock(&trace_types_lock);
8433 
8434 	ret = __remove_instance(tr);
8435 
8436 	mutex_unlock(&trace_types_lock);
8437 	mutex_unlock(&event_mutex);
8438 
8439 	return ret;
8440 }
8441 EXPORT_SYMBOL_GPL(trace_array_destroy);
8442 
8443 static int instance_rmdir(const char *name)
8444 {
8445 	struct trace_array *tr;
8446 	int ret;
8447 
8448 	mutex_lock(&event_mutex);
8449 	mutex_lock(&trace_types_lock);
8450 
8451 	ret = -ENODEV;
8452 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8453 		if (tr->name && strcmp(tr->name, name) == 0) {
8454 			ret = __remove_instance(tr);
8455 			break;
8456 		}
8457 	}
8458 
8459 	mutex_unlock(&trace_types_lock);
8460 	mutex_unlock(&event_mutex);
8461 
8462 	return ret;
8463 }
8464 
8465 static __init void create_trace_instances(struct dentry *d_tracer)
8466 {
8467 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8468 							 instance_mkdir,
8469 							 instance_rmdir);
8470 	if (WARN_ON(!trace_instance_dir))
8471 		return;
8472 }
8473 
8474 static void
8475 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8476 {
8477 	struct trace_event_file *file;
8478 	int cpu;
8479 
8480 	trace_create_file("available_tracers", 0444, d_tracer,
8481 			tr, &show_traces_fops);
8482 
8483 	trace_create_file("current_tracer", 0644, d_tracer,
8484 			tr, &set_tracer_fops);
8485 
8486 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8487 			  tr, &tracing_cpumask_fops);
8488 
8489 	trace_create_file("trace_options", 0644, d_tracer,
8490 			  tr, &tracing_iter_fops);
8491 
8492 	trace_create_file("trace", 0644, d_tracer,
8493 			  tr, &tracing_fops);
8494 
8495 	trace_create_file("trace_pipe", 0444, d_tracer,
8496 			  tr, &tracing_pipe_fops);
8497 
8498 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8499 			  tr, &tracing_entries_fops);
8500 
8501 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8502 			  tr, &tracing_total_entries_fops);
8503 
8504 	trace_create_file("free_buffer", 0200, d_tracer,
8505 			  tr, &tracing_free_buffer_fops);
8506 
8507 	trace_create_file("trace_marker", 0220, d_tracer,
8508 			  tr, &tracing_mark_fops);
8509 
8510 	file = __find_event_file(tr, "ftrace", "print");
8511 	if (file && file->dir)
8512 		trace_create_file("trigger", 0644, file->dir, file,
8513 				  &event_trigger_fops);
8514 	tr->trace_marker_file = file;
8515 
8516 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8517 			  tr, &tracing_mark_raw_fops);
8518 
8519 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8520 			  &trace_clock_fops);
8521 
8522 	trace_create_file("tracing_on", 0644, d_tracer,
8523 			  tr, &rb_simple_fops);
8524 
8525 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8526 			  &trace_time_stamp_mode_fops);
8527 
8528 	tr->buffer_percent = 50;
8529 
8530 	trace_create_file("buffer_percent", 0444, d_tracer,
8531 			tr, &buffer_percent_fops);
8532 
8533 	create_trace_options_dir(tr);
8534 
8535 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8536 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8537 			&tr->max_latency, &tracing_max_lat_fops);
8538 #endif
8539 
8540 	if (ftrace_create_function_files(tr, d_tracer))
8541 		WARN(1, "Could not allocate function filter files");
8542 
8543 #ifdef CONFIG_TRACER_SNAPSHOT
8544 	trace_create_file("snapshot", 0644, d_tracer,
8545 			  tr, &snapshot_fops);
8546 #endif
8547 
8548 	trace_create_file("error_log", 0644, d_tracer,
8549 			  tr, &tracing_err_log_fops);
8550 
8551 	for_each_tracing_cpu(cpu)
8552 		tracing_init_tracefs_percpu(tr, cpu);
8553 
8554 	ftrace_init_tracefs(tr, d_tracer);
8555 }
8556 
8557 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8558 {
8559 	struct vfsmount *mnt;
8560 	struct file_system_type *type;
8561 
8562 	/*
8563 	 * To maintain backward compatibility for tools that mount
8564 	 * debugfs to get to the tracing facility, tracefs is automatically
8565 	 * mounted to the debugfs/tracing directory.
8566 	 */
8567 	type = get_fs_type("tracefs");
8568 	if (!type)
8569 		return NULL;
8570 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8571 	put_filesystem(type);
8572 	if (IS_ERR(mnt))
8573 		return NULL;
8574 	mntget(mnt);
8575 
8576 	return mnt;
8577 }
8578 
8579 /**
8580  * tracing_init_dentry - initialize top level trace array
8581  *
8582  * This is called when creating files or directories in the tracing
8583  * directory. It is called via fs_initcall() by any of the boot up code
8584  * and expects to return the dentry of the top level tracing directory.
8585  */
8586 struct dentry *tracing_init_dentry(void)
8587 {
8588 	struct trace_array *tr = &global_trace;
8589 
8590 	/* The top level trace array uses  NULL as parent */
8591 	if (tr->dir)
8592 		return NULL;
8593 
8594 	if (WARN_ON(!tracefs_initialized()) ||
8595 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8596 		 WARN_ON(!debugfs_initialized())))
8597 		return ERR_PTR(-ENODEV);
8598 
8599 	/*
8600 	 * As there may still be users that expect the tracing
8601 	 * files to exist in debugfs/tracing, we must automount
8602 	 * the tracefs file system there, so older tools still
8603 	 * work with the newer kerenl.
8604 	 */
8605 	tr->dir = debugfs_create_automount("tracing", NULL,
8606 					   trace_automount, NULL);
8607 	if (!tr->dir) {
8608 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8609 		return ERR_PTR(-ENOMEM);
8610 	}
8611 
8612 	return NULL;
8613 }
8614 
8615 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8616 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8617 
8618 static void __init trace_eval_init(void)
8619 {
8620 	int len;
8621 
8622 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8623 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8624 }
8625 
8626 #ifdef CONFIG_MODULES
8627 static void trace_module_add_evals(struct module *mod)
8628 {
8629 	if (!mod->num_trace_evals)
8630 		return;
8631 
8632 	/*
8633 	 * Modules with bad taint do not have events created, do
8634 	 * not bother with enums either.
8635 	 */
8636 	if (trace_module_has_bad_taint(mod))
8637 		return;
8638 
8639 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8640 }
8641 
8642 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8643 static void trace_module_remove_evals(struct module *mod)
8644 {
8645 	union trace_eval_map_item *map;
8646 	union trace_eval_map_item **last = &trace_eval_maps;
8647 
8648 	if (!mod->num_trace_evals)
8649 		return;
8650 
8651 	mutex_lock(&trace_eval_mutex);
8652 
8653 	map = trace_eval_maps;
8654 
8655 	while (map) {
8656 		if (map->head.mod == mod)
8657 			break;
8658 		map = trace_eval_jmp_to_tail(map);
8659 		last = &map->tail.next;
8660 		map = map->tail.next;
8661 	}
8662 	if (!map)
8663 		goto out;
8664 
8665 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8666 	kfree(map);
8667  out:
8668 	mutex_unlock(&trace_eval_mutex);
8669 }
8670 #else
8671 static inline void trace_module_remove_evals(struct module *mod) { }
8672 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8673 
8674 static int trace_module_notify(struct notifier_block *self,
8675 			       unsigned long val, void *data)
8676 {
8677 	struct module *mod = data;
8678 
8679 	switch (val) {
8680 	case MODULE_STATE_COMING:
8681 		trace_module_add_evals(mod);
8682 		break;
8683 	case MODULE_STATE_GOING:
8684 		trace_module_remove_evals(mod);
8685 		break;
8686 	}
8687 
8688 	return 0;
8689 }
8690 
8691 static struct notifier_block trace_module_nb = {
8692 	.notifier_call = trace_module_notify,
8693 	.priority = 0,
8694 };
8695 #endif /* CONFIG_MODULES */
8696 
8697 static __init int tracer_init_tracefs(void)
8698 {
8699 	struct dentry *d_tracer;
8700 
8701 	trace_access_lock_init();
8702 
8703 	d_tracer = tracing_init_dentry();
8704 	if (IS_ERR(d_tracer))
8705 		return 0;
8706 
8707 	event_trace_init();
8708 
8709 	init_tracer_tracefs(&global_trace, d_tracer);
8710 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8711 
8712 	trace_create_file("tracing_thresh", 0644, d_tracer,
8713 			&global_trace, &tracing_thresh_fops);
8714 
8715 	trace_create_file("README", 0444, d_tracer,
8716 			NULL, &tracing_readme_fops);
8717 
8718 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8719 			NULL, &tracing_saved_cmdlines_fops);
8720 
8721 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8722 			  NULL, &tracing_saved_cmdlines_size_fops);
8723 
8724 	trace_create_file("saved_tgids", 0444, d_tracer,
8725 			NULL, &tracing_saved_tgids_fops);
8726 
8727 	trace_eval_init();
8728 
8729 	trace_create_eval_file(d_tracer);
8730 
8731 #ifdef CONFIG_MODULES
8732 	register_module_notifier(&trace_module_nb);
8733 #endif
8734 
8735 #ifdef CONFIG_DYNAMIC_FTRACE
8736 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8737 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8738 #endif
8739 
8740 	create_trace_instances(d_tracer);
8741 
8742 	update_tracer_options(&global_trace);
8743 
8744 	return 0;
8745 }
8746 
8747 static int trace_panic_handler(struct notifier_block *this,
8748 			       unsigned long event, void *unused)
8749 {
8750 	if (ftrace_dump_on_oops)
8751 		ftrace_dump(ftrace_dump_on_oops);
8752 	return NOTIFY_OK;
8753 }
8754 
8755 static struct notifier_block trace_panic_notifier = {
8756 	.notifier_call  = trace_panic_handler,
8757 	.next           = NULL,
8758 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8759 };
8760 
8761 static int trace_die_handler(struct notifier_block *self,
8762 			     unsigned long val,
8763 			     void *data)
8764 {
8765 	switch (val) {
8766 	case DIE_OOPS:
8767 		if (ftrace_dump_on_oops)
8768 			ftrace_dump(ftrace_dump_on_oops);
8769 		break;
8770 	default:
8771 		break;
8772 	}
8773 	return NOTIFY_OK;
8774 }
8775 
8776 static struct notifier_block trace_die_notifier = {
8777 	.notifier_call = trace_die_handler,
8778 	.priority = 200
8779 };
8780 
8781 /*
8782  * printk is set to max of 1024, we really don't need it that big.
8783  * Nothing should be printing 1000 characters anyway.
8784  */
8785 #define TRACE_MAX_PRINT		1000
8786 
8787 /*
8788  * Define here KERN_TRACE so that we have one place to modify
8789  * it if we decide to change what log level the ftrace dump
8790  * should be at.
8791  */
8792 #define KERN_TRACE		KERN_EMERG
8793 
8794 void
8795 trace_printk_seq(struct trace_seq *s)
8796 {
8797 	/* Probably should print a warning here. */
8798 	if (s->seq.len >= TRACE_MAX_PRINT)
8799 		s->seq.len = TRACE_MAX_PRINT;
8800 
8801 	/*
8802 	 * More paranoid code. Although the buffer size is set to
8803 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8804 	 * an extra layer of protection.
8805 	 */
8806 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8807 		s->seq.len = s->seq.size - 1;
8808 
8809 	/* should be zero ended, but we are paranoid. */
8810 	s->buffer[s->seq.len] = 0;
8811 
8812 	printk(KERN_TRACE "%s", s->buffer);
8813 
8814 	trace_seq_init(s);
8815 }
8816 
8817 void trace_init_global_iter(struct trace_iterator *iter)
8818 {
8819 	iter->tr = &global_trace;
8820 	iter->trace = iter->tr->current_trace;
8821 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8822 	iter->trace_buffer = &global_trace.trace_buffer;
8823 
8824 	if (iter->trace && iter->trace->open)
8825 		iter->trace->open(iter);
8826 
8827 	/* Annotate start of buffers if we had overruns */
8828 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8829 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8830 
8831 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8832 	if (trace_clocks[iter->tr->clock_id].in_ns)
8833 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8834 }
8835 
8836 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8837 {
8838 	/* use static because iter can be a bit big for the stack */
8839 	static struct trace_iterator iter;
8840 	static atomic_t dump_running;
8841 	struct trace_array *tr = &global_trace;
8842 	unsigned int old_userobj;
8843 	unsigned long flags;
8844 	int cnt = 0, cpu;
8845 
8846 	/* Only allow one dump user at a time. */
8847 	if (atomic_inc_return(&dump_running) != 1) {
8848 		atomic_dec(&dump_running);
8849 		return;
8850 	}
8851 
8852 	/*
8853 	 * Always turn off tracing when we dump.
8854 	 * We don't need to show trace output of what happens
8855 	 * between multiple crashes.
8856 	 *
8857 	 * If the user does a sysrq-z, then they can re-enable
8858 	 * tracing with echo 1 > tracing_on.
8859 	 */
8860 	tracing_off();
8861 
8862 	local_irq_save(flags);
8863 	printk_nmi_direct_enter();
8864 
8865 	/* Simulate the iterator */
8866 	trace_init_global_iter(&iter);
8867 
8868 	for_each_tracing_cpu(cpu) {
8869 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8870 	}
8871 
8872 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8873 
8874 	/* don't look at user memory in panic mode */
8875 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8876 
8877 	switch (oops_dump_mode) {
8878 	case DUMP_ALL:
8879 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8880 		break;
8881 	case DUMP_ORIG:
8882 		iter.cpu_file = raw_smp_processor_id();
8883 		break;
8884 	case DUMP_NONE:
8885 		goto out_enable;
8886 	default:
8887 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8888 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8889 	}
8890 
8891 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8892 
8893 	/* Did function tracer already get disabled? */
8894 	if (ftrace_is_dead()) {
8895 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8896 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8897 	}
8898 
8899 	/*
8900 	 * We need to stop all tracing on all CPUS to read the
8901 	 * the next buffer. This is a bit expensive, but is
8902 	 * not done often. We fill all what we can read,
8903 	 * and then release the locks again.
8904 	 */
8905 
8906 	while (!trace_empty(&iter)) {
8907 
8908 		if (!cnt)
8909 			printk(KERN_TRACE "---------------------------------\n");
8910 
8911 		cnt++;
8912 
8913 		/* reset all but tr, trace, and overruns */
8914 		memset(&iter.seq, 0,
8915 		       sizeof(struct trace_iterator) -
8916 		       offsetof(struct trace_iterator, seq));
8917 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8918 		iter.pos = -1;
8919 
8920 		if (trace_find_next_entry_inc(&iter) != NULL) {
8921 			int ret;
8922 
8923 			ret = print_trace_line(&iter);
8924 			if (ret != TRACE_TYPE_NO_CONSUME)
8925 				trace_consume(&iter);
8926 		}
8927 		touch_nmi_watchdog();
8928 
8929 		trace_printk_seq(&iter.seq);
8930 	}
8931 
8932 	if (!cnt)
8933 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8934 	else
8935 		printk(KERN_TRACE "---------------------------------\n");
8936 
8937  out_enable:
8938 	tr->trace_flags |= old_userobj;
8939 
8940 	for_each_tracing_cpu(cpu) {
8941 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8942 	}
8943 	atomic_dec(&dump_running);
8944 	printk_nmi_direct_exit();
8945 	local_irq_restore(flags);
8946 }
8947 EXPORT_SYMBOL_GPL(ftrace_dump);
8948 
8949 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8950 {
8951 	char **argv;
8952 	int argc, ret;
8953 
8954 	argc = 0;
8955 	ret = 0;
8956 	argv = argv_split(GFP_KERNEL, buf, &argc);
8957 	if (!argv)
8958 		return -ENOMEM;
8959 
8960 	if (argc)
8961 		ret = createfn(argc, argv);
8962 
8963 	argv_free(argv);
8964 
8965 	return ret;
8966 }
8967 
8968 #define WRITE_BUFSIZE  4096
8969 
8970 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8971 				size_t count, loff_t *ppos,
8972 				int (*createfn)(int, char **))
8973 {
8974 	char *kbuf, *buf, *tmp;
8975 	int ret = 0;
8976 	size_t done = 0;
8977 	size_t size;
8978 
8979 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8980 	if (!kbuf)
8981 		return -ENOMEM;
8982 
8983 	while (done < count) {
8984 		size = count - done;
8985 
8986 		if (size >= WRITE_BUFSIZE)
8987 			size = WRITE_BUFSIZE - 1;
8988 
8989 		if (copy_from_user(kbuf, buffer + done, size)) {
8990 			ret = -EFAULT;
8991 			goto out;
8992 		}
8993 		kbuf[size] = '\0';
8994 		buf = kbuf;
8995 		do {
8996 			tmp = strchr(buf, '\n');
8997 			if (tmp) {
8998 				*tmp = '\0';
8999 				size = tmp - buf + 1;
9000 			} else {
9001 				size = strlen(buf);
9002 				if (done + size < count) {
9003 					if (buf != kbuf)
9004 						break;
9005 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9006 					pr_warn("Line length is too long: Should be less than %d\n",
9007 						WRITE_BUFSIZE - 2);
9008 					ret = -EINVAL;
9009 					goto out;
9010 				}
9011 			}
9012 			done += size;
9013 
9014 			/* Remove comments */
9015 			tmp = strchr(buf, '#');
9016 
9017 			if (tmp)
9018 				*tmp = '\0';
9019 
9020 			ret = trace_run_command(buf, createfn);
9021 			if (ret)
9022 				goto out;
9023 			buf += size;
9024 
9025 		} while (done < count);
9026 	}
9027 	ret = done;
9028 
9029 out:
9030 	kfree(kbuf);
9031 
9032 	return ret;
9033 }
9034 
9035 __init static int tracer_alloc_buffers(void)
9036 {
9037 	int ring_buf_size;
9038 	int ret = -ENOMEM;
9039 
9040 	/*
9041 	 * Make sure we don't accidently add more trace options
9042 	 * than we have bits for.
9043 	 */
9044 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9045 
9046 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9047 		goto out;
9048 
9049 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9050 		goto out_free_buffer_mask;
9051 
9052 	/* Only allocate trace_printk buffers if a trace_printk exists */
9053 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9054 		/* Must be called before global_trace.buffer is allocated */
9055 		trace_printk_init_buffers();
9056 
9057 	/* To save memory, keep the ring buffer size to its minimum */
9058 	if (ring_buffer_expanded)
9059 		ring_buf_size = trace_buf_size;
9060 	else
9061 		ring_buf_size = 1;
9062 
9063 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9064 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9065 
9066 	raw_spin_lock_init(&global_trace.start_lock);
9067 
9068 	/*
9069 	 * The prepare callbacks allocates some memory for the ring buffer. We
9070 	 * don't free the buffer if the if the CPU goes down. If we were to free
9071 	 * the buffer, then the user would lose any trace that was in the
9072 	 * buffer. The memory will be removed once the "instance" is removed.
9073 	 */
9074 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9075 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9076 				      NULL);
9077 	if (ret < 0)
9078 		goto out_free_cpumask;
9079 	/* Used for event triggers */
9080 	ret = -ENOMEM;
9081 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9082 	if (!temp_buffer)
9083 		goto out_rm_hp_state;
9084 
9085 	if (trace_create_savedcmd() < 0)
9086 		goto out_free_temp_buffer;
9087 
9088 	/* TODO: make the number of buffers hot pluggable with CPUS */
9089 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9090 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9091 		WARN_ON(1);
9092 		goto out_free_savedcmd;
9093 	}
9094 
9095 	if (global_trace.buffer_disabled)
9096 		tracing_off();
9097 
9098 	if (trace_boot_clock) {
9099 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9100 		if (ret < 0)
9101 			pr_warn("Trace clock %s not defined, going back to default\n",
9102 				trace_boot_clock);
9103 	}
9104 
9105 	/*
9106 	 * register_tracer() might reference current_trace, so it
9107 	 * needs to be set before we register anything. This is
9108 	 * just a bootstrap of current_trace anyway.
9109 	 */
9110 	global_trace.current_trace = &nop_trace;
9111 
9112 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9113 
9114 	ftrace_init_global_array_ops(&global_trace);
9115 
9116 	init_trace_flags_index(&global_trace);
9117 
9118 	register_tracer(&nop_trace);
9119 
9120 	/* Function tracing may start here (via kernel command line) */
9121 	init_function_trace();
9122 
9123 	/* All seems OK, enable tracing */
9124 	tracing_disabled = 0;
9125 
9126 	atomic_notifier_chain_register(&panic_notifier_list,
9127 				       &trace_panic_notifier);
9128 
9129 	register_die_notifier(&trace_die_notifier);
9130 
9131 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9132 
9133 	INIT_LIST_HEAD(&global_trace.systems);
9134 	INIT_LIST_HEAD(&global_trace.events);
9135 	INIT_LIST_HEAD(&global_trace.hist_vars);
9136 	INIT_LIST_HEAD(&global_trace.err_log);
9137 	list_add(&global_trace.list, &ftrace_trace_arrays);
9138 
9139 	apply_trace_boot_options();
9140 
9141 	register_snapshot_cmd();
9142 
9143 	return 0;
9144 
9145 out_free_savedcmd:
9146 	free_saved_cmdlines_buffer(savedcmd);
9147 out_free_temp_buffer:
9148 	ring_buffer_free(temp_buffer);
9149 out_rm_hp_state:
9150 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9151 out_free_cpumask:
9152 	free_cpumask_var(global_trace.tracing_cpumask);
9153 out_free_buffer_mask:
9154 	free_cpumask_var(tracing_buffer_mask);
9155 out:
9156 	return ret;
9157 }
9158 
9159 void __init early_trace_init(void)
9160 {
9161 	if (tracepoint_printk) {
9162 		tracepoint_print_iter =
9163 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9164 		if (WARN_ON(!tracepoint_print_iter))
9165 			tracepoint_printk = 0;
9166 		else
9167 			static_key_enable(&tracepoint_printk_key.key);
9168 	}
9169 	tracer_alloc_buffers();
9170 }
9171 
9172 void __init trace_init(void)
9173 {
9174 	trace_event_init();
9175 }
9176 
9177 __init static int clear_boot_tracer(void)
9178 {
9179 	/*
9180 	 * The default tracer at boot buffer is an init section.
9181 	 * This function is called in lateinit. If we did not
9182 	 * find the boot tracer, then clear it out, to prevent
9183 	 * later registration from accessing the buffer that is
9184 	 * about to be freed.
9185 	 */
9186 	if (!default_bootup_tracer)
9187 		return 0;
9188 
9189 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9190 	       default_bootup_tracer);
9191 	default_bootup_tracer = NULL;
9192 
9193 	return 0;
9194 }
9195 
9196 fs_initcall(tracer_init_tracefs);
9197 late_initcall_sync(clear_boot_tracer);
9198 
9199 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9200 __init static int tracing_set_default_clock(void)
9201 {
9202 	/* sched_clock_stable() is determined in late_initcall */
9203 	if (!trace_boot_clock && !sched_clock_stable()) {
9204 		printk(KERN_WARNING
9205 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9206 		       "If you want to keep using the local clock, then add:\n"
9207 		       "  \"trace_clock=local\"\n"
9208 		       "on the kernel command line\n");
9209 		tracing_set_clock(&global_trace, "global");
9210 	}
9211 
9212 	return 0;
9213 }
9214 late_initcall_sync(tracing_set_default_clock);
9215 #endif
9216