xref: /openbmc/linux/kernel/trace/trace.c (revision 91edde2e6ae1dd5e33812f076f3fe4cb7ccbfdd0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct ring_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 void trace_array_put(struct trace_array *this_tr)
305 {
306 	mutex_lock(&trace_types_lock);
307 	__trace_array_put(this_tr);
308 	mutex_unlock(&trace_types_lock);
309 }
310 
311 int tracing_check_open_get_tr(struct trace_array *tr)
312 {
313 	int ret;
314 
315 	ret = security_locked_down(LOCKDOWN_TRACEFS);
316 	if (ret)
317 		return ret;
318 
319 	if (tracing_disabled)
320 		return -ENODEV;
321 
322 	if (tr && trace_array_get(tr) < 0)
323 		return -ENODEV;
324 
325 	return 0;
326 }
327 
328 int call_filter_check_discard(struct trace_event_call *call, void *rec,
329 			      struct ring_buffer *buffer,
330 			      struct ring_buffer_event *event)
331 {
332 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
333 	    !filter_match_preds(call->filter, rec)) {
334 		__trace_event_discard_commit(buffer, event);
335 		return 1;
336 	}
337 
338 	return 0;
339 }
340 
341 void trace_free_pid_list(struct trace_pid_list *pid_list)
342 {
343 	vfree(pid_list->pids);
344 	kfree(pid_list);
345 }
346 
347 /**
348  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
349  * @filtered_pids: The list of pids to check
350  * @search_pid: The PID to find in @filtered_pids
351  *
352  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
353  */
354 bool
355 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
356 {
357 	/*
358 	 * If pid_max changed after filtered_pids was created, we
359 	 * by default ignore all pids greater than the previous pid_max.
360 	 */
361 	if (search_pid >= filtered_pids->pid_max)
362 		return false;
363 
364 	return test_bit(search_pid, filtered_pids->pids);
365 }
366 
367 /**
368  * trace_ignore_this_task - should a task be ignored for tracing
369  * @filtered_pids: The list of pids to check
370  * @task: The task that should be ignored if not filtered
371  *
372  * Checks if @task should be traced or not from @filtered_pids.
373  * Returns true if @task should *NOT* be traced.
374  * Returns false if @task should be traced.
375  */
376 bool
377 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
378 {
379 	/*
380 	 * Return false, because if filtered_pids does not exist,
381 	 * all pids are good to trace.
382 	 */
383 	if (!filtered_pids)
384 		return false;
385 
386 	return !trace_find_filtered_pid(filtered_pids, task->pid);
387 }
388 
389 /**
390  * trace_filter_add_remove_task - Add or remove a task from a pid_list
391  * @pid_list: The list to modify
392  * @self: The current task for fork or NULL for exit
393  * @task: The task to add or remove
394  *
395  * If adding a task, if @self is defined, the task is only added if @self
396  * is also included in @pid_list. This happens on fork and tasks should
397  * only be added when the parent is listed. If @self is NULL, then the
398  * @task pid will be removed from the list, which would happen on exit
399  * of a task.
400  */
401 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
402 				  struct task_struct *self,
403 				  struct task_struct *task)
404 {
405 	if (!pid_list)
406 		return;
407 
408 	/* For forks, we only add if the forking task is listed */
409 	if (self) {
410 		if (!trace_find_filtered_pid(pid_list, self->pid))
411 			return;
412 	}
413 
414 	/* Sorry, but we don't support pid_max changing after setting */
415 	if (task->pid >= pid_list->pid_max)
416 		return;
417 
418 	/* "self" is set for forks, and NULL for exits */
419 	if (self)
420 		set_bit(task->pid, pid_list->pids);
421 	else
422 		clear_bit(task->pid, pid_list->pids);
423 }
424 
425 /**
426  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
427  * @pid_list: The pid list to show
428  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
429  * @pos: The position of the file
430  *
431  * This is used by the seq_file "next" operation to iterate the pids
432  * listed in a trace_pid_list structure.
433  *
434  * Returns the pid+1 as we want to display pid of zero, but NULL would
435  * stop the iteration.
436  */
437 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
438 {
439 	unsigned long pid = (unsigned long)v;
440 
441 	(*pos)++;
442 
443 	/* pid already is +1 of the actual prevous bit */
444 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
445 
446 	/* Return pid + 1 to allow zero to be represented */
447 	if (pid < pid_list->pid_max)
448 		return (void *)(pid + 1);
449 
450 	return NULL;
451 }
452 
453 /**
454  * trace_pid_start - Used for seq_file to start reading pid lists
455  * @pid_list: The pid list to show
456  * @pos: The position of the file
457  *
458  * This is used by seq_file "start" operation to start the iteration
459  * of listing pids.
460  *
461  * Returns the pid+1 as we want to display pid of zero, but NULL would
462  * stop the iteration.
463  */
464 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
465 {
466 	unsigned long pid;
467 	loff_t l = 0;
468 
469 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
470 	if (pid >= pid_list->pid_max)
471 		return NULL;
472 
473 	/* Return pid + 1 so that zero can be the exit value */
474 	for (pid++; pid && l < *pos;
475 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
476 		;
477 	return (void *)pid;
478 }
479 
480 /**
481  * trace_pid_show - show the current pid in seq_file processing
482  * @m: The seq_file structure to write into
483  * @v: A void pointer of the pid (+1) value to display
484  *
485  * Can be directly used by seq_file operations to display the current
486  * pid value.
487  */
488 int trace_pid_show(struct seq_file *m, void *v)
489 {
490 	unsigned long pid = (unsigned long)v - 1;
491 
492 	seq_printf(m, "%lu\n", pid);
493 	return 0;
494 }
495 
496 /* 128 should be much more than enough */
497 #define PID_BUF_SIZE		127
498 
499 int trace_pid_write(struct trace_pid_list *filtered_pids,
500 		    struct trace_pid_list **new_pid_list,
501 		    const char __user *ubuf, size_t cnt)
502 {
503 	struct trace_pid_list *pid_list;
504 	struct trace_parser parser;
505 	unsigned long val;
506 	int nr_pids = 0;
507 	ssize_t read = 0;
508 	ssize_t ret = 0;
509 	loff_t pos;
510 	pid_t pid;
511 
512 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
513 		return -ENOMEM;
514 
515 	/*
516 	 * Always recreate a new array. The write is an all or nothing
517 	 * operation. Always create a new array when adding new pids by
518 	 * the user. If the operation fails, then the current list is
519 	 * not modified.
520 	 */
521 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
522 	if (!pid_list) {
523 		trace_parser_put(&parser);
524 		return -ENOMEM;
525 	}
526 
527 	pid_list->pid_max = READ_ONCE(pid_max);
528 
529 	/* Only truncating will shrink pid_max */
530 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
531 		pid_list->pid_max = filtered_pids->pid_max;
532 
533 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
534 	if (!pid_list->pids) {
535 		trace_parser_put(&parser);
536 		kfree(pid_list);
537 		return -ENOMEM;
538 	}
539 
540 	if (filtered_pids) {
541 		/* copy the current bits to the new max */
542 		for_each_set_bit(pid, filtered_pids->pids,
543 				 filtered_pids->pid_max) {
544 			set_bit(pid, pid_list->pids);
545 			nr_pids++;
546 		}
547 	}
548 
549 	while (cnt > 0) {
550 
551 		pos = 0;
552 
553 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
554 		if (ret < 0 || !trace_parser_loaded(&parser))
555 			break;
556 
557 		read += ret;
558 		ubuf += ret;
559 		cnt -= ret;
560 
561 		ret = -EINVAL;
562 		if (kstrtoul(parser.buffer, 0, &val))
563 			break;
564 		if (val >= pid_list->pid_max)
565 			break;
566 
567 		pid = (pid_t)val;
568 
569 		set_bit(pid, pid_list->pids);
570 		nr_pids++;
571 
572 		trace_parser_clear(&parser);
573 		ret = 0;
574 	}
575 	trace_parser_put(&parser);
576 
577 	if (ret < 0) {
578 		trace_free_pid_list(pid_list);
579 		return ret;
580 	}
581 
582 	if (!nr_pids) {
583 		/* Cleared the list of pids */
584 		trace_free_pid_list(pid_list);
585 		read = ret;
586 		pid_list = NULL;
587 	}
588 
589 	*new_pid_list = pid_list;
590 
591 	return read;
592 }
593 
594 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
595 {
596 	u64 ts;
597 
598 	/* Early boot up does not have a buffer yet */
599 	if (!buf->buffer)
600 		return trace_clock_local();
601 
602 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
603 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
604 
605 	return ts;
606 }
607 
608 u64 ftrace_now(int cpu)
609 {
610 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
611 }
612 
613 /**
614  * tracing_is_enabled - Show if global_trace has been disabled
615  *
616  * Shows if the global trace has been enabled or not. It uses the
617  * mirror flag "buffer_disabled" to be used in fast paths such as for
618  * the irqsoff tracer. But it may be inaccurate due to races. If you
619  * need to know the accurate state, use tracing_is_on() which is a little
620  * slower, but accurate.
621  */
622 int tracing_is_enabled(void)
623 {
624 	/*
625 	 * For quick access (irqsoff uses this in fast path), just
626 	 * return the mirror variable of the state of the ring buffer.
627 	 * It's a little racy, but we don't really care.
628 	 */
629 	smp_rmb();
630 	return !global_trace.buffer_disabled;
631 }
632 
633 /*
634  * trace_buf_size is the size in bytes that is allocated
635  * for a buffer. Note, the number of bytes is always rounded
636  * to page size.
637  *
638  * This number is purposely set to a low number of 16384.
639  * If the dump on oops happens, it will be much appreciated
640  * to not have to wait for all that output. Anyway this can be
641  * boot time and run time configurable.
642  */
643 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
644 
645 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
646 
647 /* trace_types holds a link list of available tracers. */
648 static struct tracer		*trace_types __read_mostly;
649 
650 /*
651  * trace_types_lock is used to protect the trace_types list.
652  */
653 DEFINE_MUTEX(trace_types_lock);
654 
655 /*
656  * serialize the access of the ring buffer
657  *
658  * ring buffer serializes readers, but it is low level protection.
659  * The validity of the events (which returns by ring_buffer_peek() ..etc)
660  * are not protected by ring buffer.
661  *
662  * The content of events may become garbage if we allow other process consumes
663  * these events concurrently:
664  *   A) the page of the consumed events may become a normal page
665  *      (not reader page) in ring buffer, and this page will be rewrited
666  *      by events producer.
667  *   B) The page of the consumed events may become a page for splice_read,
668  *      and this page will be returned to system.
669  *
670  * These primitives allow multi process access to different cpu ring buffer
671  * concurrently.
672  *
673  * These primitives don't distinguish read-only and read-consume access.
674  * Multi read-only access are also serialized.
675  */
676 
677 #ifdef CONFIG_SMP
678 static DECLARE_RWSEM(all_cpu_access_lock);
679 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
680 
681 static inline void trace_access_lock(int cpu)
682 {
683 	if (cpu == RING_BUFFER_ALL_CPUS) {
684 		/* gain it for accessing the whole ring buffer. */
685 		down_write(&all_cpu_access_lock);
686 	} else {
687 		/* gain it for accessing a cpu ring buffer. */
688 
689 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
690 		down_read(&all_cpu_access_lock);
691 
692 		/* Secondly block other access to this @cpu ring buffer. */
693 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
694 	}
695 }
696 
697 static inline void trace_access_unlock(int cpu)
698 {
699 	if (cpu == RING_BUFFER_ALL_CPUS) {
700 		up_write(&all_cpu_access_lock);
701 	} else {
702 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
703 		up_read(&all_cpu_access_lock);
704 	}
705 }
706 
707 static inline void trace_access_lock_init(void)
708 {
709 	int cpu;
710 
711 	for_each_possible_cpu(cpu)
712 		mutex_init(&per_cpu(cpu_access_lock, cpu));
713 }
714 
715 #else
716 
717 static DEFINE_MUTEX(access_lock);
718 
719 static inline void trace_access_lock(int cpu)
720 {
721 	(void)cpu;
722 	mutex_lock(&access_lock);
723 }
724 
725 static inline void trace_access_unlock(int cpu)
726 {
727 	(void)cpu;
728 	mutex_unlock(&access_lock);
729 }
730 
731 static inline void trace_access_lock_init(void)
732 {
733 }
734 
735 #endif
736 
737 #ifdef CONFIG_STACKTRACE
738 static void __ftrace_trace_stack(struct ring_buffer *buffer,
739 				 unsigned long flags,
740 				 int skip, int pc, struct pt_regs *regs);
741 static inline void ftrace_trace_stack(struct trace_array *tr,
742 				      struct ring_buffer *buffer,
743 				      unsigned long flags,
744 				      int skip, int pc, struct pt_regs *regs);
745 
746 #else
747 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
748 					unsigned long flags,
749 					int skip, int pc, struct pt_regs *regs)
750 {
751 }
752 static inline void ftrace_trace_stack(struct trace_array *tr,
753 				      struct ring_buffer *buffer,
754 				      unsigned long flags,
755 				      int skip, int pc, struct pt_regs *regs)
756 {
757 }
758 
759 #endif
760 
761 static __always_inline void
762 trace_event_setup(struct ring_buffer_event *event,
763 		  int type, unsigned long flags, int pc)
764 {
765 	struct trace_entry *ent = ring_buffer_event_data(event);
766 
767 	tracing_generic_entry_update(ent, type, flags, pc);
768 }
769 
770 static __always_inline struct ring_buffer_event *
771 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
772 			  int type,
773 			  unsigned long len,
774 			  unsigned long flags, int pc)
775 {
776 	struct ring_buffer_event *event;
777 
778 	event = ring_buffer_lock_reserve(buffer, len);
779 	if (event != NULL)
780 		trace_event_setup(event, type, flags, pc);
781 
782 	return event;
783 }
784 
785 void tracer_tracing_on(struct trace_array *tr)
786 {
787 	if (tr->trace_buffer.buffer)
788 		ring_buffer_record_on(tr->trace_buffer.buffer);
789 	/*
790 	 * This flag is looked at when buffers haven't been allocated
791 	 * yet, or by some tracers (like irqsoff), that just want to
792 	 * know if the ring buffer has been disabled, but it can handle
793 	 * races of where it gets disabled but we still do a record.
794 	 * As the check is in the fast path of the tracers, it is more
795 	 * important to be fast than accurate.
796 	 */
797 	tr->buffer_disabled = 0;
798 	/* Make the flag seen by readers */
799 	smp_wmb();
800 }
801 
802 /**
803  * tracing_on - enable tracing buffers
804  *
805  * This function enables tracing buffers that may have been
806  * disabled with tracing_off.
807  */
808 void tracing_on(void)
809 {
810 	tracer_tracing_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_on);
813 
814 
815 static __always_inline void
816 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
817 {
818 	__this_cpu_write(trace_taskinfo_save, true);
819 
820 	/* If this is the temp buffer, we need to commit fully */
821 	if (this_cpu_read(trace_buffered_event) == event) {
822 		/* Length is in event->array[0] */
823 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
824 		/* Release the temp buffer */
825 		this_cpu_dec(trace_buffered_event_cnt);
826 	} else
827 		ring_buffer_unlock_commit(buffer, event);
828 }
829 
830 /**
831  * __trace_puts - write a constant string into the trace buffer.
832  * @ip:	   The address of the caller
833  * @str:   The constant string to write
834  * @size:  The size of the string.
835  */
836 int __trace_puts(unsigned long ip, const char *str, int size)
837 {
838 	struct ring_buffer_event *event;
839 	struct ring_buffer *buffer;
840 	struct print_entry *entry;
841 	unsigned long irq_flags;
842 	int alloc;
843 	int pc;
844 
845 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
846 		return 0;
847 
848 	pc = preempt_count();
849 
850 	if (unlikely(tracing_selftest_running || tracing_disabled))
851 		return 0;
852 
853 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
854 
855 	local_save_flags(irq_flags);
856 	buffer = global_trace.trace_buffer.buffer;
857 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
858 					    irq_flags, pc);
859 	if (!event)
860 		return 0;
861 
862 	entry = ring_buffer_event_data(event);
863 	entry->ip = ip;
864 
865 	memcpy(&entry->buf, str, size);
866 
867 	/* Add a newline if necessary */
868 	if (entry->buf[size - 1] != '\n') {
869 		entry->buf[size] = '\n';
870 		entry->buf[size + 1] = '\0';
871 	} else
872 		entry->buf[size] = '\0';
873 
874 	__buffer_unlock_commit(buffer, event);
875 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
876 
877 	return size;
878 }
879 EXPORT_SYMBOL_GPL(__trace_puts);
880 
881 /**
882  * __trace_bputs - write the pointer to a constant string into trace buffer
883  * @ip:	   The address of the caller
884  * @str:   The constant string to write to the buffer to
885  */
886 int __trace_bputs(unsigned long ip, const char *str)
887 {
888 	struct ring_buffer_event *event;
889 	struct ring_buffer *buffer;
890 	struct bputs_entry *entry;
891 	unsigned long irq_flags;
892 	int size = sizeof(struct bputs_entry);
893 	int pc;
894 
895 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
896 		return 0;
897 
898 	pc = preempt_count();
899 
900 	if (unlikely(tracing_selftest_running || tracing_disabled))
901 		return 0;
902 
903 	local_save_flags(irq_flags);
904 	buffer = global_trace.trace_buffer.buffer;
905 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
906 					    irq_flags, pc);
907 	if (!event)
908 		return 0;
909 
910 	entry = ring_buffer_event_data(event);
911 	entry->ip			= ip;
912 	entry->str			= str;
913 
914 	__buffer_unlock_commit(buffer, event);
915 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
916 
917 	return 1;
918 }
919 EXPORT_SYMBOL_GPL(__trace_bputs);
920 
921 #ifdef CONFIG_TRACER_SNAPSHOT
922 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
923 {
924 	struct tracer *tracer = tr->current_trace;
925 	unsigned long flags;
926 
927 	if (in_nmi()) {
928 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
929 		internal_trace_puts("*** snapshot is being ignored        ***\n");
930 		return;
931 	}
932 
933 	if (!tr->allocated_snapshot) {
934 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
935 		internal_trace_puts("*** stopping trace here!   ***\n");
936 		tracing_off();
937 		return;
938 	}
939 
940 	/* Note, snapshot can not be used when the tracer uses it */
941 	if (tracer->use_max_tr) {
942 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
943 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
944 		return;
945 	}
946 
947 	local_irq_save(flags);
948 	update_max_tr(tr, current, smp_processor_id(), cond_data);
949 	local_irq_restore(flags);
950 }
951 
952 void tracing_snapshot_instance(struct trace_array *tr)
953 {
954 	tracing_snapshot_instance_cond(tr, NULL);
955 }
956 
957 /**
958  * tracing_snapshot - take a snapshot of the current buffer.
959  *
960  * This causes a swap between the snapshot buffer and the current live
961  * tracing buffer. You can use this to take snapshots of the live
962  * trace when some condition is triggered, but continue to trace.
963  *
964  * Note, make sure to allocate the snapshot with either
965  * a tracing_snapshot_alloc(), or by doing it manually
966  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
967  *
968  * If the snapshot buffer is not allocated, it will stop tracing.
969  * Basically making a permanent snapshot.
970  */
971 void tracing_snapshot(void)
972 {
973 	struct trace_array *tr = &global_trace;
974 
975 	tracing_snapshot_instance(tr);
976 }
977 EXPORT_SYMBOL_GPL(tracing_snapshot);
978 
979 /**
980  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
981  * @tr:		The tracing instance to snapshot
982  * @cond_data:	The data to be tested conditionally, and possibly saved
983  *
984  * This is the same as tracing_snapshot() except that the snapshot is
985  * conditional - the snapshot will only happen if the
986  * cond_snapshot.update() implementation receiving the cond_data
987  * returns true, which means that the trace array's cond_snapshot
988  * update() operation used the cond_data to determine whether the
989  * snapshot should be taken, and if it was, presumably saved it along
990  * with the snapshot.
991  */
992 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
993 {
994 	tracing_snapshot_instance_cond(tr, cond_data);
995 }
996 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
997 
998 /**
999  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1000  * @tr:		The tracing instance
1001  *
1002  * When the user enables a conditional snapshot using
1003  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1004  * with the snapshot.  This accessor is used to retrieve it.
1005  *
1006  * Should not be called from cond_snapshot.update(), since it takes
1007  * the tr->max_lock lock, which the code calling
1008  * cond_snapshot.update() has already done.
1009  *
1010  * Returns the cond_data associated with the trace array's snapshot.
1011  */
1012 void *tracing_cond_snapshot_data(struct trace_array *tr)
1013 {
1014 	void *cond_data = NULL;
1015 
1016 	arch_spin_lock(&tr->max_lock);
1017 
1018 	if (tr->cond_snapshot)
1019 		cond_data = tr->cond_snapshot->cond_data;
1020 
1021 	arch_spin_unlock(&tr->max_lock);
1022 
1023 	return cond_data;
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1026 
1027 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1028 					struct trace_buffer *size_buf, int cpu_id);
1029 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1030 
1031 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1032 {
1033 	int ret;
1034 
1035 	if (!tr->allocated_snapshot) {
1036 
1037 		/* allocate spare buffer */
1038 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1039 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1040 		if (ret < 0)
1041 			return ret;
1042 
1043 		tr->allocated_snapshot = true;
1044 	}
1045 
1046 	return 0;
1047 }
1048 
1049 static void free_snapshot(struct trace_array *tr)
1050 {
1051 	/*
1052 	 * We don't free the ring buffer. instead, resize it because
1053 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1054 	 * we want preserve it.
1055 	 */
1056 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1057 	set_buffer_entries(&tr->max_buffer, 1);
1058 	tracing_reset_online_cpus(&tr->max_buffer);
1059 	tr->allocated_snapshot = false;
1060 }
1061 
1062 /**
1063  * tracing_alloc_snapshot - allocate snapshot buffer.
1064  *
1065  * This only allocates the snapshot buffer if it isn't already
1066  * allocated - it doesn't also take a snapshot.
1067  *
1068  * This is meant to be used in cases where the snapshot buffer needs
1069  * to be set up for events that can't sleep but need to be able to
1070  * trigger a snapshot.
1071  */
1072 int tracing_alloc_snapshot(void)
1073 {
1074 	struct trace_array *tr = &global_trace;
1075 	int ret;
1076 
1077 	ret = tracing_alloc_snapshot_instance(tr);
1078 	WARN_ON(ret < 0);
1079 
1080 	return ret;
1081 }
1082 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1083 
1084 /**
1085  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1086  *
1087  * This is similar to tracing_snapshot(), but it will allocate the
1088  * snapshot buffer if it isn't already allocated. Use this only
1089  * where it is safe to sleep, as the allocation may sleep.
1090  *
1091  * This causes a swap between the snapshot buffer and the current live
1092  * tracing buffer. You can use this to take snapshots of the live
1093  * trace when some condition is triggered, but continue to trace.
1094  */
1095 void tracing_snapshot_alloc(void)
1096 {
1097 	int ret;
1098 
1099 	ret = tracing_alloc_snapshot();
1100 	if (ret < 0)
1101 		return;
1102 
1103 	tracing_snapshot();
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1106 
1107 /**
1108  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1109  * @tr:		The tracing instance
1110  * @cond_data:	User data to associate with the snapshot
1111  * @update:	Implementation of the cond_snapshot update function
1112  *
1113  * Check whether the conditional snapshot for the given instance has
1114  * already been enabled, or if the current tracer is already using a
1115  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1116  * save the cond_data and update function inside.
1117  *
1118  * Returns 0 if successful, error otherwise.
1119  */
1120 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1121 				 cond_update_fn_t update)
1122 {
1123 	struct cond_snapshot *cond_snapshot;
1124 	int ret = 0;
1125 
1126 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1127 	if (!cond_snapshot)
1128 		return -ENOMEM;
1129 
1130 	cond_snapshot->cond_data = cond_data;
1131 	cond_snapshot->update = update;
1132 
1133 	mutex_lock(&trace_types_lock);
1134 
1135 	ret = tracing_alloc_snapshot_instance(tr);
1136 	if (ret)
1137 		goto fail_unlock;
1138 
1139 	if (tr->current_trace->use_max_tr) {
1140 		ret = -EBUSY;
1141 		goto fail_unlock;
1142 	}
1143 
1144 	/*
1145 	 * The cond_snapshot can only change to NULL without the
1146 	 * trace_types_lock. We don't care if we race with it going
1147 	 * to NULL, but we want to make sure that it's not set to
1148 	 * something other than NULL when we get here, which we can
1149 	 * do safely with only holding the trace_types_lock and not
1150 	 * having to take the max_lock.
1151 	 */
1152 	if (tr->cond_snapshot) {
1153 		ret = -EBUSY;
1154 		goto fail_unlock;
1155 	}
1156 
1157 	arch_spin_lock(&tr->max_lock);
1158 	tr->cond_snapshot = cond_snapshot;
1159 	arch_spin_unlock(&tr->max_lock);
1160 
1161 	mutex_unlock(&trace_types_lock);
1162 
1163 	return ret;
1164 
1165  fail_unlock:
1166 	mutex_unlock(&trace_types_lock);
1167 	kfree(cond_snapshot);
1168 	return ret;
1169 }
1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1171 
1172 /**
1173  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1174  * @tr:		The tracing instance
1175  *
1176  * Check whether the conditional snapshot for the given instance is
1177  * enabled; if so, free the cond_snapshot associated with it,
1178  * otherwise return -EINVAL.
1179  *
1180  * Returns 0 if successful, error otherwise.
1181  */
1182 int tracing_snapshot_cond_disable(struct trace_array *tr)
1183 {
1184 	int ret = 0;
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 
1188 	if (!tr->cond_snapshot)
1189 		ret = -EINVAL;
1190 	else {
1191 		kfree(tr->cond_snapshot);
1192 		tr->cond_snapshot = NULL;
1193 	}
1194 
1195 	arch_spin_unlock(&tr->max_lock);
1196 
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1200 #else
1201 void tracing_snapshot(void)
1202 {
1203 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_snapshot);
1206 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1207 {
1208 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1211 int tracing_alloc_snapshot(void)
1212 {
1213 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1214 	return -ENODEV;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1217 void tracing_snapshot_alloc(void)
1218 {
1219 	/* Give warning */
1220 	tracing_snapshot();
1221 }
1222 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1223 void *tracing_cond_snapshot_data(struct trace_array *tr)
1224 {
1225 	return NULL;
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1228 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1229 {
1230 	return -ENODEV;
1231 }
1232 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1233 int tracing_snapshot_cond_disable(struct trace_array *tr)
1234 {
1235 	return false;
1236 }
1237 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1238 #endif /* CONFIG_TRACER_SNAPSHOT */
1239 
1240 void tracer_tracing_off(struct trace_array *tr)
1241 {
1242 	if (tr->trace_buffer.buffer)
1243 		ring_buffer_record_off(tr->trace_buffer.buffer);
1244 	/*
1245 	 * This flag is looked at when buffers haven't been allocated
1246 	 * yet, or by some tracers (like irqsoff), that just want to
1247 	 * know if the ring buffer has been disabled, but it can handle
1248 	 * races of where it gets disabled but we still do a record.
1249 	 * As the check is in the fast path of the tracers, it is more
1250 	 * important to be fast than accurate.
1251 	 */
1252 	tr->buffer_disabled = 1;
1253 	/* Make the flag seen by readers */
1254 	smp_wmb();
1255 }
1256 
1257 /**
1258  * tracing_off - turn off tracing buffers
1259  *
1260  * This function stops the tracing buffers from recording data.
1261  * It does not disable any overhead the tracers themselves may
1262  * be causing. This function simply causes all recording to
1263  * the ring buffers to fail.
1264  */
1265 void tracing_off(void)
1266 {
1267 	tracer_tracing_off(&global_trace);
1268 }
1269 EXPORT_SYMBOL_GPL(tracing_off);
1270 
1271 void disable_trace_on_warning(void)
1272 {
1273 	if (__disable_trace_on_warning)
1274 		tracing_off();
1275 }
1276 
1277 /**
1278  * tracer_tracing_is_on - show real state of ring buffer enabled
1279  * @tr : the trace array to know if ring buffer is enabled
1280  *
1281  * Shows real state of the ring buffer if it is enabled or not.
1282  */
1283 bool tracer_tracing_is_on(struct trace_array *tr)
1284 {
1285 	if (tr->trace_buffer.buffer)
1286 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1287 	return !tr->buffer_disabled;
1288 }
1289 
1290 /**
1291  * tracing_is_on - show state of ring buffers enabled
1292  */
1293 int tracing_is_on(void)
1294 {
1295 	return tracer_tracing_is_on(&global_trace);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_is_on);
1298 
1299 static int __init set_buf_size(char *str)
1300 {
1301 	unsigned long buf_size;
1302 
1303 	if (!str)
1304 		return 0;
1305 	buf_size = memparse(str, &str);
1306 	/* nr_entries can not be zero */
1307 	if (buf_size == 0)
1308 		return 0;
1309 	trace_buf_size = buf_size;
1310 	return 1;
1311 }
1312 __setup("trace_buf_size=", set_buf_size);
1313 
1314 static int __init set_tracing_thresh(char *str)
1315 {
1316 	unsigned long threshold;
1317 	int ret;
1318 
1319 	if (!str)
1320 		return 0;
1321 	ret = kstrtoul(str, 0, &threshold);
1322 	if (ret < 0)
1323 		return 0;
1324 	tracing_thresh = threshold * 1000;
1325 	return 1;
1326 }
1327 __setup("tracing_thresh=", set_tracing_thresh);
1328 
1329 unsigned long nsecs_to_usecs(unsigned long nsecs)
1330 {
1331 	return nsecs / 1000;
1332 }
1333 
1334 /*
1335  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1336  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1337  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1338  * of strings in the order that the evals (enum) were defined.
1339  */
1340 #undef C
1341 #define C(a, b) b
1342 
1343 /* These must match the bit postions in trace_iterator_flags */
1344 static const char *trace_options[] = {
1345 	TRACE_FLAGS
1346 	NULL
1347 };
1348 
1349 static struct {
1350 	u64 (*func)(void);
1351 	const char *name;
1352 	int in_ns;		/* is this clock in nanoseconds? */
1353 } trace_clocks[] = {
1354 	{ trace_clock_local,		"local",	1 },
1355 	{ trace_clock_global,		"global",	1 },
1356 	{ trace_clock_counter,		"counter",	0 },
1357 	{ trace_clock_jiffies,		"uptime",	0 },
1358 	{ trace_clock,			"perf",		1 },
1359 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1360 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1361 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1362 	ARCH_TRACE_CLOCKS
1363 };
1364 
1365 bool trace_clock_in_ns(struct trace_array *tr)
1366 {
1367 	if (trace_clocks[tr->clock_id].in_ns)
1368 		return true;
1369 
1370 	return false;
1371 }
1372 
1373 /*
1374  * trace_parser_get_init - gets the buffer for trace parser
1375  */
1376 int trace_parser_get_init(struct trace_parser *parser, int size)
1377 {
1378 	memset(parser, 0, sizeof(*parser));
1379 
1380 	parser->buffer = kmalloc(size, GFP_KERNEL);
1381 	if (!parser->buffer)
1382 		return 1;
1383 
1384 	parser->size = size;
1385 	return 0;
1386 }
1387 
1388 /*
1389  * trace_parser_put - frees the buffer for trace parser
1390  */
1391 void trace_parser_put(struct trace_parser *parser)
1392 {
1393 	kfree(parser->buffer);
1394 	parser->buffer = NULL;
1395 }
1396 
1397 /*
1398  * trace_get_user - reads the user input string separated by  space
1399  * (matched by isspace(ch))
1400  *
1401  * For each string found the 'struct trace_parser' is updated,
1402  * and the function returns.
1403  *
1404  * Returns number of bytes read.
1405  *
1406  * See kernel/trace/trace.h for 'struct trace_parser' details.
1407  */
1408 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1409 	size_t cnt, loff_t *ppos)
1410 {
1411 	char ch;
1412 	size_t read = 0;
1413 	ssize_t ret;
1414 
1415 	if (!*ppos)
1416 		trace_parser_clear(parser);
1417 
1418 	ret = get_user(ch, ubuf++);
1419 	if (ret)
1420 		goto out;
1421 
1422 	read++;
1423 	cnt--;
1424 
1425 	/*
1426 	 * The parser is not finished with the last write,
1427 	 * continue reading the user input without skipping spaces.
1428 	 */
1429 	if (!parser->cont) {
1430 		/* skip white space */
1431 		while (cnt && isspace(ch)) {
1432 			ret = get_user(ch, ubuf++);
1433 			if (ret)
1434 				goto out;
1435 			read++;
1436 			cnt--;
1437 		}
1438 
1439 		parser->idx = 0;
1440 
1441 		/* only spaces were written */
1442 		if (isspace(ch) || !ch) {
1443 			*ppos += read;
1444 			ret = read;
1445 			goto out;
1446 		}
1447 	}
1448 
1449 	/* read the non-space input */
1450 	while (cnt && !isspace(ch) && ch) {
1451 		if (parser->idx < parser->size - 1)
1452 			parser->buffer[parser->idx++] = ch;
1453 		else {
1454 			ret = -EINVAL;
1455 			goto out;
1456 		}
1457 		ret = get_user(ch, ubuf++);
1458 		if (ret)
1459 			goto out;
1460 		read++;
1461 		cnt--;
1462 	}
1463 
1464 	/* We either got finished input or we have to wait for another call. */
1465 	if (isspace(ch) || !ch) {
1466 		parser->buffer[parser->idx] = 0;
1467 		parser->cont = false;
1468 	} else if (parser->idx < parser->size - 1) {
1469 		parser->cont = true;
1470 		parser->buffer[parser->idx++] = ch;
1471 		/* Make sure the parsed string always terminates with '\0'. */
1472 		parser->buffer[parser->idx] = 0;
1473 	} else {
1474 		ret = -EINVAL;
1475 		goto out;
1476 	}
1477 
1478 	*ppos += read;
1479 	ret = read;
1480 
1481 out:
1482 	return ret;
1483 }
1484 
1485 /* TODO add a seq_buf_to_buffer() */
1486 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1487 {
1488 	int len;
1489 
1490 	if (trace_seq_used(s) <= s->seq.readpos)
1491 		return -EBUSY;
1492 
1493 	len = trace_seq_used(s) - s->seq.readpos;
1494 	if (cnt > len)
1495 		cnt = len;
1496 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1497 
1498 	s->seq.readpos += cnt;
1499 	return cnt;
1500 }
1501 
1502 unsigned long __read_mostly	tracing_thresh;
1503 static const struct file_operations tracing_max_lat_fops;
1504 
1505 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1506 	defined(CONFIG_FSNOTIFY)
1507 
1508 static struct workqueue_struct *fsnotify_wq;
1509 
1510 static void latency_fsnotify_workfn(struct work_struct *work)
1511 {
1512 	struct trace_array *tr = container_of(work, struct trace_array,
1513 					      fsnotify_work);
1514 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1515 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1516 }
1517 
1518 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1519 {
1520 	struct trace_array *tr = container_of(iwork, struct trace_array,
1521 					      fsnotify_irqwork);
1522 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1523 }
1524 
1525 static void trace_create_maxlat_file(struct trace_array *tr,
1526 				     struct dentry *d_tracer)
1527 {
1528 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1529 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1530 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1531 					      d_tracer, &tr->max_latency,
1532 					      &tracing_max_lat_fops);
1533 }
1534 
1535 __init static int latency_fsnotify_init(void)
1536 {
1537 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1538 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1539 	if (!fsnotify_wq) {
1540 		pr_err("Unable to allocate tr_max_lat_wq\n");
1541 		return -ENOMEM;
1542 	}
1543 	return 0;
1544 }
1545 
1546 late_initcall_sync(latency_fsnotify_init);
1547 
1548 void latency_fsnotify(struct trace_array *tr)
1549 {
1550 	if (!fsnotify_wq)
1551 		return;
1552 	/*
1553 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1554 	 * possible that we are called from __schedule() or do_idle(), which
1555 	 * could cause a deadlock.
1556 	 */
1557 	irq_work_queue(&tr->fsnotify_irqwork);
1558 }
1559 
1560 /*
1561  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1562  *  defined(CONFIG_FSNOTIFY)
1563  */
1564 #else
1565 
1566 #define trace_create_maxlat_file(tr, d_tracer)				\
1567 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1568 			  &tr->max_latency, &tracing_max_lat_fops)
1569 
1570 #endif
1571 
1572 #ifdef CONFIG_TRACER_MAX_TRACE
1573 /*
1574  * Copy the new maximum trace into the separate maximum-trace
1575  * structure. (this way the maximum trace is permanently saved,
1576  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1577  */
1578 static void
1579 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1580 {
1581 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1582 	struct trace_buffer *max_buf = &tr->max_buffer;
1583 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1584 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1585 
1586 	max_buf->cpu = cpu;
1587 	max_buf->time_start = data->preempt_timestamp;
1588 
1589 	max_data->saved_latency = tr->max_latency;
1590 	max_data->critical_start = data->critical_start;
1591 	max_data->critical_end = data->critical_end;
1592 
1593 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1594 	max_data->pid = tsk->pid;
1595 	/*
1596 	 * If tsk == current, then use current_uid(), as that does not use
1597 	 * RCU. The irq tracer can be called out of RCU scope.
1598 	 */
1599 	if (tsk == current)
1600 		max_data->uid = current_uid();
1601 	else
1602 		max_data->uid = task_uid(tsk);
1603 
1604 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1605 	max_data->policy = tsk->policy;
1606 	max_data->rt_priority = tsk->rt_priority;
1607 
1608 	/* record this tasks comm */
1609 	tracing_record_cmdline(tsk);
1610 	latency_fsnotify(tr);
1611 }
1612 
1613 /**
1614  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1615  * @tr: tracer
1616  * @tsk: the task with the latency
1617  * @cpu: The cpu that initiated the trace.
1618  * @cond_data: User data associated with a conditional snapshot
1619  *
1620  * Flip the buffers between the @tr and the max_tr and record information
1621  * about which task was the cause of this latency.
1622  */
1623 void
1624 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1625 	      void *cond_data)
1626 {
1627 	if (tr->stop_count)
1628 		return;
1629 
1630 	WARN_ON_ONCE(!irqs_disabled());
1631 
1632 	if (!tr->allocated_snapshot) {
1633 		/* Only the nop tracer should hit this when disabling */
1634 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1635 		return;
1636 	}
1637 
1638 	arch_spin_lock(&tr->max_lock);
1639 
1640 	/* Inherit the recordable setting from trace_buffer */
1641 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1642 		ring_buffer_record_on(tr->max_buffer.buffer);
1643 	else
1644 		ring_buffer_record_off(tr->max_buffer.buffer);
1645 
1646 #ifdef CONFIG_TRACER_SNAPSHOT
1647 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1648 		goto out_unlock;
1649 #endif
1650 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1651 
1652 	__update_max_tr(tr, tsk, cpu);
1653 
1654  out_unlock:
1655 	arch_spin_unlock(&tr->max_lock);
1656 }
1657 
1658 /**
1659  * update_max_tr_single - only copy one trace over, and reset the rest
1660  * @tr: tracer
1661  * @tsk: task with the latency
1662  * @cpu: the cpu of the buffer to copy.
1663  *
1664  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1665  */
1666 void
1667 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1668 {
1669 	int ret;
1670 
1671 	if (tr->stop_count)
1672 		return;
1673 
1674 	WARN_ON_ONCE(!irqs_disabled());
1675 	if (!tr->allocated_snapshot) {
1676 		/* Only the nop tracer should hit this when disabling */
1677 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1678 		return;
1679 	}
1680 
1681 	arch_spin_lock(&tr->max_lock);
1682 
1683 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1684 
1685 	if (ret == -EBUSY) {
1686 		/*
1687 		 * We failed to swap the buffer due to a commit taking
1688 		 * place on this CPU. We fail to record, but we reset
1689 		 * the max trace buffer (no one writes directly to it)
1690 		 * and flag that it failed.
1691 		 */
1692 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1693 			"Failed to swap buffers due to commit in progress\n");
1694 	}
1695 
1696 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1697 
1698 	__update_max_tr(tr, tsk, cpu);
1699 	arch_spin_unlock(&tr->max_lock);
1700 }
1701 #endif /* CONFIG_TRACER_MAX_TRACE */
1702 
1703 static int wait_on_pipe(struct trace_iterator *iter, int full)
1704 {
1705 	/* Iterators are static, they should be filled or empty */
1706 	if (trace_buffer_iter(iter, iter->cpu_file))
1707 		return 0;
1708 
1709 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1710 				full);
1711 }
1712 
1713 #ifdef CONFIG_FTRACE_STARTUP_TEST
1714 static bool selftests_can_run;
1715 
1716 struct trace_selftests {
1717 	struct list_head		list;
1718 	struct tracer			*type;
1719 };
1720 
1721 static LIST_HEAD(postponed_selftests);
1722 
1723 static int save_selftest(struct tracer *type)
1724 {
1725 	struct trace_selftests *selftest;
1726 
1727 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1728 	if (!selftest)
1729 		return -ENOMEM;
1730 
1731 	selftest->type = type;
1732 	list_add(&selftest->list, &postponed_selftests);
1733 	return 0;
1734 }
1735 
1736 static int run_tracer_selftest(struct tracer *type)
1737 {
1738 	struct trace_array *tr = &global_trace;
1739 	struct tracer *saved_tracer = tr->current_trace;
1740 	int ret;
1741 
1742 	if (!type->selftest || tracing_selftest_disabled)
1743 		return 0;
1744 
1745 	/*
1746 	 * If a tracer registers early in boot up (before scheduling is
1747 	 * initialized and such), then do not run its selftests yet.
1748 	 * Instead, run it a little later in the boot process.
1749 	 */
1750 	if (!selftests_can_run)
1751 		return save_selftest(type);
1752 
1753 	/*
1754 	 * Run a selftest on this tracer.
1755 	 * Here we reset the trace buffer, and set the current
1756 	 * tracer to be this tracer. The tracer can then run some
1757 	 * internal tracing to verify that everything is in order.
1758 	 * If we fail, we do not register this tracer.
1759 	 */
1760 	tracing_reset_online_cpus(&tr->trace_buffer);
1761 
1762 	tr->current_trace = type;
1763 
1764 #ifdef CONFIG_TRACER_MAX_TRACE
1765 	if (type->use_max_tr) {
1766 		/* If we expanded the buffers, make sure the max is expanded too */
1767 		if (ring_buffer_expanded)
1768 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1769 					   RING_BUFFER_ALL_CPUS);
1770 		tr->allocated_snapshot = true;
1771 	}
1772 #endif
1773 
1774 	/* the test is responsible for initializing and enabling */
1775 	pr_info("Testing tracer %s: ", type->name);
1776 	ret = type->selftest(type, tr);
1777 	/* the test is responsible for resetting too */
1778 	tr->current_trace = saved_tracer;
1779 	if (ret) {
1780 		printk(KERN_CONT "FAILED!\n");
1781 		/* Add the warning after printing 'FAILED' */
1782 		WARN_ON(1);
1783 		return -1;
1784 	}
1785 	/* Only reset on passing, to avoid touching corrupted buffers */
1786 	tracing_reset_online_cpus(&tr->trace_buffer);
1787 
1788 #ifdef CONFIG_TRACER_MAX_TRACE
1789 	if (type->use_max_tr) {
1790 		tr->allocated_snapshot = false;
1791 
1792 		/* Shrink the max buffer again */
1793 		if (ring_buffer_expanded)
1794 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1795 					   RING_BUFFER_ALL_CPUS);
1796 	}
1797 #endif
1798 
1799 	printk(KERN_CONT "PASSED\n");
1800 	return 0;
1801 }
1802 
1803 static __init int init_trace_selftests(void)
1804 {
1805 	struct trace_selftests *p, *n;
1806 	struct tracer *t, **last;
1807 	int ret;
1808 
1809 	selftests_can_run = true;
1810 
1811 	mutex_lock(&trace_types_lock);
1812 
1813 	if (list_empty(&postponed_selftests))
1814 		goto out;
1815 
1816 	pr_info("Running postponed tracer tests:\n");
1817 
1818 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1819 		/* This loop can take minutes when sanitizers are enabled, so
1820 		 * lets make sure we allow RCU processing.
1821 		 */
1822 		cond_resched();
1823 		ret = run_tracer_selftest(p->type);
1824 		/* If the test fails, then warn and remove from available_tracers */
1825 		if (ret < 0) {
1826 			WARN(1, "tracer: %s failed selftest, disabling\n",
1827 			     p->type->name);
1828 			last = &trace_types;
1829 			for (t = trace_types; t; t = t->next) {
1830 				if (t == p->type) {
1831 					*last = t->next;
1832 					break;
1833 				}
1834 				last = &t->next;
1835 			}
1836 		}
1837 		list_del(&p->list);
1838 		kfree(p);
1839 	}
1840 
1841  out:
1842 	mutex_unlock(&trace_types_lock);
1843 
1844 	return 0;
1845 }
1846 core_initcall(init_trace_selftests);
1847 #else
1848 static inline int run_tracer_selftest(struct tracer *type)
1849 {
1850 	return 0;
1851 }
1852 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1853 
1854 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1855 
1856 static void __init apply_trace_boot_options(void);
1857 
1858 /**
1859  * register_tracer - register a tracer with the ftrace system.
1860  * @type: the plugin for the tracer
1861  *
1862  * Register a new plugin tracer.
1863  */
1864 int __init register_tracer(struct tracer *type)
1865 {
1866 	struct tracer *t;
1867 	int ret = 0;
1868 
1869 	if (!type->name) {
1870 		pr_info("Tracer must have a name\n");
1871 		return -1;
1872 	}
1873 
1874 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1875 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1876 		return -1;
1877 	}
1878 
1879 	mutex_lock(&trace_types_lock);
1880 
1881 	tracing_selftest_running = true;
1882 
1883 	for (t = trace_types; t; t = t->next) {
1884 		if (strcmp(type->name, t->name) == 0) {
1885 			/* already found */
1886 			pr_info("Tracer %s already registered\n",
1887 				type->name);
1888 			ret = -1;
1889 			goto out;
1890 		}
1891 	}
1892 
1893 	if (!type->set_flag)
1894 		type->set_flag = &dummy_set_flag;
1895 	if (!type->flags) {
1896 		/*allocate a dummy tracer_flags*/
1897 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1898 		if (!type->flags) {
1899 			ret = -ENOMEM;
1900 			goto out;
1901 		}
1902 		type->flags->val = 0;
1903 		type->flags->opts = dummy_tracer_opt;
1904 	} else
1905 		if (!type->flags->opts)
1906 			type->flags->opts = dummy_tracer_opt;
1907 
1908 	/* store the tracer for __set_tracer_option */
1909 	type->flags->trace = type;
1910 
1911 	ret = run_tracer_selftest(type);
1912 	if (ret < 0)
1913 		goto out;
1914 
1915 	type->next = trace_types;
1916 	trace_types = type;
1917 	add_tracer_options(&global_trace, type);
1918 
1919  out:
1920 	tracing_selftest_running = false;
1921 	mutex_unlock(&trace_types_lock);
1922 
1923 	if (ret || !default_bootup_tracer)
1924 		goto out_unlock;
1925 
1926 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1927 		goto out_unlock;
1928 
1929 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1930 	/* Do we want this tracer to start on bootup? */
1931 	tracing_set_tracer(&global_trace, type->name);
1932 	default_bootup_tracer = NULL;
1933 
1934 	apply_trace_boot_options();
1935 
1936 	/* disable other selftests, since this will break it. */
1937 	tracing_selftest_disabled = true;
1938 #ifdef CONFIG_FTRACE_STARTUP_TEST
1939 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1940 	       type->name);
1941 #endif
1942 
1943  out_unlock:
1944 	return ret;
1945 }
1946 
1947 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1948 {
1949 	struct ring_buffer *buffer = buf->buffer;
1950 
1951 	if (!buffer)
1952 		return;
1953 
1954 	ring_buffer_record_disable(buffer);
1955 
1956 	/* Make sure all commits have finished */
1957 	synchronize_rcu();
1958 	ring_buffer_reset_cpu(buffer, cpu);
1959 
1960 	ring_buffer_record_enable(buffer);
1961 }
1962 
1963 void tracing_reset_online_cpus(struct trace_buffer *buf)
1964 {
1965 	struct ring_buffer *buffer = buf->buffer;
1966 	int cpu;
1967 
1968 	if (!buffer)
1969 		return;
1970 
1971 	ring_buffer_record_disable(buffer);
1972 
1973 	/* Make sure all commits have finished */
1974 	synchronize_rcu();
1975 
1976 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1977 
1978 	for_each_online_cpu(cpu)
1979 		ring_buffer_reset_cpu(buffer, cpu);
1980 
1981 	ring_buffer_record_enable(buffer);
1982 }
1983 
1984 /* Must have trace_types_lock held */
1985 void tracing_reset_all_online_cpus(void)
1986 {
1987 	struct trace_array *tr;
1988 
1989 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1990 		if (!tr->clear_trace)
1991 			continue;
1992 		tr->clear_trace = false;
1993 		tracing_reset_online_cpus(&tr->trace_buffer);
1994 #ifdef CONFIG_TRACER_MAX_TRACE
1995 		tracing_reset_online_cpus(&tr->max_buffer);
1996 #endif
1997 	}
1998 }
1999 
2000 static int *tgid_map;
2001 
2002 #define SAVED_CMDLINES_DEFAULT 128
2003 #define NO_CMDLINE_MAP UINT_MAX
2004 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2005 struct saved_cmdlines_buffer {
2006 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2007 	unsigned *map_cmdline_to_pid;
2008 	unsigned cmdline_num;
2009 	int cmdline_idx;
2010 	char *saved_cmdlines;
2011 };
2012 static struct saved_cmdlines_buffer *savedcmd;
2013 
2014 /* temporary disable recording */
2015 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2016 
2017 static inline char *get_saved_cmdlines(int idx)
2018 {
2019 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2020 }
2021 
2022 static inline void set_cmdline(int idx, const char *cmdline)
2023 {
2024 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2025 }
2026 
2027 static int allocate_cmdlines_buffer(unsigned int val,
2028 				    struct saved_cmdlines_buffer *s)
2029 {
2030 	s->map_cmdline_to_pid = kmalloc_array(val,
2031 					      sizeof(*s->map_cmdline_to_pid),
2032 					      GFP_KERNEL);
2033 	if (!s->map_cmdline_to_pid)
2034 		return -ENOMEM;
2035 
2036 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2037 	if (!s->saved_cmdlines) {
2038 		kfree(s->map_cmdline_to_pid);
2039 		return -ENOMEM;
2040 	}
2041 
2042 	s->cmdline_idx = 0;
2043 	s->cmdline_num = val;
2044 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2045 	       sizeof(s->map_pid_to_cmdline));
2046 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2047 	       val * sizeof(*s->map_cmdline_to_pid));
2048 
2049 	return 0;
2050 }
2051 
2052 static int trace_create_savedcmd(void)
2053 {
2054 	int ret;
2055 
2056 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2057 	if (!savedcmd)
2058 		return -ENOMEM;
2059 
2060 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2061 	if (ret < 0) {
2062 		kfree(savedcmd);
2063 		savedcmd = NULL;
2064 		return -ENOMEM;
2065 	}
2066 
2067 	return 0;
2068 }
2069 
2070 int is_tracing_stopped(void)
2071 {
2072 	return global_trace.stop_count;
2073 }
2074 
2075 /**
2076  * tracing_start - quick start of the tracer
2077  *
2078  * If tracing is enabled but was stopped by tracing_stop,
2079  * this will start the tracer back up.
2080  */
2081 void tracing_start(void)
2082 {
2083 	struct ring_buffer *buffer;
2084 	unsigned long flags;
2085 
2086 	if (tracing_disabled)
2087 		return;
2088 
2089 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2090 	if (--global_trace.stop_count) {
2091 		if (global_trace.stop_count < 0) {
2092 			/* Someone screwed up their debugging */
2093 			WARN_ON_ONCE(1);
2094 			global_trace.stop_count = 0;
2095 		}
2096 		goto out;
2097 	}
2098 
2099 	/* Prevent the buffers from switching */
2100 	arch_spin_lock(&global_trace.max_lock);
2101 
2102 	buffer = global_trace.trace_buffer.buffer;
2103 	if (buffer)
2104 		ring_buffer_record_enable(buffer);
2105 
2106 #ifdef CONFIG_TRACER_MAX_TRACE
2107 	buffer = global_trace.max_buffer.buffer;
2108 	if (buffer)
2109 		ring_buffer_record_enable(buffer);
2110 #endif
2111 
2112 	arch_spin_unlock(&global_trace.max_lock);
2113 
2114  out:
2115 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2116 }
2117 
2118 static void tracing_start_tr(struct trace_array *tr)
2119 {
2120 	struct ring_buffer *buffer;
2121 	unsigned long flags;
2122 
2123 	if (tracing_disabled)
2124 		return;
2125 
2126 	/* If global, we need to also start the max tracer */
2127 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2128 		return tracing_start();
2129 
2130 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2131 
2132 	if (--tr->stop_count) {
2133 		if (tr->stop_count < 0) {
2134 			/* Someone screwed up their debugging */
2135 			WARN_ON_ONCE(1);
2136 			tr->stop_count = 0;
2137 		}
2138 		goto out;
2139 	}
2140 
2141 	buffer = tr->trace_buffer.buffer;
2142 	if (buffer)
2143 		ring_buffer_record_enable(buffer);
2144 
2145  out:
2146 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2147 }
2148 
2149 /**
2150  * tracing_stop - quick stop of the tracer
2151  *
2152  * Light weight way to stop tracing. Use in conjunction with
2153  * tracing_start.
2154  */
2155 void tracing_stop(void)
2156 {
2157 	struct ring_buffer *buffer;
2158 	unsigned long flags;
2159 
2160 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2161 	if (global_trace.stop_count++)
2162 		goto out;
2163 
2164 	/* Prevent the buffers from switching */
2165 	arch_spin_lock(&global_trace.max_lock);
2166 
2167 	buffer = global_trace.trace_buffer.buffer;
2168 	if (buffer)
2169 		ring_buffer_record_disable(buffer);
2170 
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172 	buffer = global_trace.max_buffer.buffer;
2173 	if (buffer)
2174 		ring_buffer_record_disable(buffer);
2175 #endif
2176 
2177 	arch_spin_unlock(&global_trace.max_lock);
2178 
2179  out:
2180 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2181 }
2182 
2183 static void tracing_stop_tr(struct trace_array *tr)
2184 {
2185 	struct ring_buffer *buffer;
2186 	unsigned long flags;
2187 
2188 	/* If global, we need to also stop the max tracer */
2189 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2190 		return tracing_stop();
2191 
2192 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2193 	if (tr->stop_count++)
2194 		goto out;
2195 
2196 	buffer = tr->trace_buffer.buffer;
2197 	if (buffer)
2198 		ring_buffer_record_disable(buffer);
2199 
2200  out:
2201 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2202 }
2203 
2204 static int trace_save_cmdline(struct task_struct *tsk)
2205 {
2206 	unsigned pid, idx;
2207 
2208 	/* treat recording of idle task as a success */
2209 	if (!tsk->pid)
2210 		return 1;
2211 
2212 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2213 		return 0;
2214 
2215 	/*
2216 	 * It's not the end of the world if we don't get
2217 	 * the lock, but we also don't want to spin
2218 	 * nor do we want to disable interrupts,
2219 	 * so if we miss here, then better luck next time.
2220 	 */
2221 	if (!arch_spin_trylock(&trace_cmdline_lock))
2222 		return 0;
2223 
2224 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2225 	if (idx == NO_CMDLINE_MAP) {
2226 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2227 
2228 		/*
2229 		 * Check whether the cmdline buffer at idx has a pid
2230 		 * mapped. We are going to overwrite that entry so we
2231 		 * need to clear the map_pid_to_cmdline. Otherwise we
2232 		 * would read the new comm for the old pid.
2233 		 */
2234 		pid = savedcmd->map_cmdline_to_pid[idx];
2235 		if (pid != NO_CMDLINE_MAP)
2236 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2237 
2238 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2239 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2240 
2241 		savedcmd->cmdline_idx = idx;
2242 	}
2243 
2244 	set_cmdline(idx, tsk->comm);
2245 
2246 	arch_spin_unlock(&trace_cmdline_lock);
2247 
2248 	return 1;
2249 }
2250 
2251 static void __trace_find_cmdline(int pid, char comm[])
2252 {
2253 	unsigned map;
2254 
2255 	if (!pid) {
2256 		strcpy(comm, "<idle>");
2257 		return;
2258 	}
2259 
2260 	if (WARN_ON_ONCE(pid < 0)) {
2261 		strcpy(comm, "<XXX>");
2262 		return;
2263 	}
2264 
2265 	if (pid > PID_MAX_DEFAULT) {
2266 		strcpy(comm, "<...>");
2267 		return;
2268 	}
2269 
2270 	map = savedcmd->map_pid_to_cmdline[pid];
2271 	if (map != NO_CMDLINE_MAP)
2272 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2273 	else
2274 		strcpy(comm, "<...>");
2275 }
2276 
2277 void trace_find_cmdline(int pid, char comm[])
2278 {
2279 	preempt_disable();
2280 	arch_spin_lock(&trace_cmdline_lock);
2281 
2282 	__trace_find_cmdline(pid, comm);
2283 
2284 	arch_spin_unlock(&trace_cmdline_lock);
2285 	preempt_enable();
2286 }
2287 
2288 int trace_find_tgid(int pid)
2289 {
2290 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2291 		return 0;
2292 
2293 	return tgid_map[pid];
2294 }
2295 
2296 static int trace_save_tgid(struct task_struct *tsk)
2297 {
2298 	/* treat recording of idle task as a success */
2299 	if (!tsk->pid)
2300 		return 1;
2301 
2302 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2303 		return 0;
2304 
2305 	tgid_map[tsk->pid] = tsk->tgid;
2306 	return 1;
2307 }
2308 
2309 static bool tracing_record_taskinfo_skip(int flags)
2310 {
2311 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2312 		return true;
2313 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2314 		return true;
2315 	if (!__this_cpu_read(trace_taskinfo_save))
2316 		return true;
2317 	return false;
2318 }
2319 
2320 /**
2321  * tracing_record_taskinfo - record the task info of a task
2322  *
2323  * @task:  task to record
2324  * @flags: TRACE_RECORD_CMDLINE for recording comm
2325  *         TRACE_RECORD_TGID for recording tgid
2326  */
2327 void tracing_record_taskinfo(struct task_struct *task, int flags)
2328 {
2329 	bool done;
2330 
2331 	if (tracing_record_taskinfo_skip(flags))
2332 		return;
2333 
2334 	/*
2335 	 * Record as much task information as possible. If some fail, continue
2336 	 * to try to record the others.
2337 	 */
2338 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2339 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2340 
2341 	/* If recording any information failed, retry again soon. */
2342 	if (!done)
2343 		return;
2344 
2345 	__this_cpu_write(trace_taskinfo_save, false);
2346 }
2347 
2348 /**
2349  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2350  *
2351  * @prev: previous task during sched_switch
2352  * @next: next task during sched_switch
2353  * @flags: TRACE_RECORD_CMDLINE for recording comm
2354  *         TRACE_RECORD_TGID for recording tgid
2355  */
2356 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2357 					  struct task_struct *next, int flags)
2358 {
2359 	bool done;
2360 
2361 	if (tracing_record_taskinfo_skip(flags))
2362 		return;
2363 
2364 	/*
2365 	 * Record as much task information as possible. If some fail, continue
2366 	 * to try to record the others.
2367 	 */
2368 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2369 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2370 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2371 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2372 
2373 	/* If recording any information failed, retry again soon. */
2374 	if (!done)
2375 		return;
2376 
2377 	__this_cpu_write(trace_taskinfo_save, false);
2378 }
2379 
2380 /* Helpers to record a specific task information */
2381 void tracing_record_cmdline(struct task_struct *task)
2382 {
2383 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2384 }
2385 
2386 void tracing_record_tgid(struct task_struct *task)
2387 {
2388 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2389 }
2390 
2391 /*
2392  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2393  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2394  * simplifies those functions and keeps them in sync.
2395  */
2396 enum print_line_t trace_handle_return(struct trace_seq *s)
2397 {
2398 	return trace_seq_has_overflowed(s) ?
2399 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2400 }
2401 EXPORT_SYMBOL_GPL(trace_handle_return);
2402 
2403 void
2404 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2405 			     unsigned long flags, int pc)
2406 {
2407 	struct task_struct *tsk = current;
2408 
2409 	entry->preempt_count		= pc & 0xff;
2410 	entry->pid			= (tsk) ? tsk->pid : 0;
2411 	entry->type			= type;
2412 	entry->flags =
2413 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2414 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2415 #else
2416 		TRACE_FLAG_IRQS_NOSUPPORT |
2417 #endif
2418 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2419 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2420 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2421 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2422 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2423 }
2424 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2425 
2426 struct ring_buffer_event *
2427 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2428 			  int type,
2429 			  unsigned long len,
2430 			  unsigned long flags, int pc)
2431 {
2432 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2433 }
2434 
2435 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2436 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2437 static int trace_buffered_event_ref;
2438 
2439 /**
2440  * trace_buffered_event_enable - enable buffering events
2441  *
2442  * When events are being filtered, it is quicker to use a temporary
2443  * buffer to write the event data into if there's a likely chance
2444  * that it will not be committed. The discard of the ring buffer
2445  * is not as fast as committing, and is much slower than copying
2446  * a commit.
2447  *
2448  * When an event is to be filtered, allocate per cpu buffers to
2449  * write the event data into, and if the event is filtered and discarded
2450  * it is simply dropped, otherwise, the entire data is to be committed
2451  * in one shot.
2452  */
2453 void trace_buffered_event_enable(void)
2454 {
2455 	struct ring_buffer_event *event;
2456 	struct page *page;
2457 	int cpu;
2458 
2459 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2460 
2461 	if (trace_buffered_event_ref++)
2462 		return;
2463 
2464 	for_each_tracing_cpu(cpu) {
2465 		page = alloc_pages_node(cpu_to_node(cpu),
2466 					GFP_KERNEL | __GFP_NORETRY, 0);
2467 		if (!page)
2468 			goto failed;
2469 
2470 		event = page_address(page);
2471 		memset(event, 0, sizeof(*event));
2472 
2473 		per_cpu(trace_buffered_event, cpu) = event;
2474 
2475 		preempt_disable();
2476 		if (cpu == smp_processor_id() &&
2477 		    this_cpu_read(trace_buffered_event) !=
2478 		    per_cpu(trace_buffered_event, cpu))
2479 			WARN_ON_ONCE(1);
2480 		preempt_enable();
2481 	}
2482 
2483 	return;
2484  failed:
2485 	trace_buffered_event_disable();
2486 }
2487 
2488 static void enable_trace_buffered_event(void *data)
2489 {
2490 	/* Probably not needed, but do it anyway */
2491 	smp_rmb();
2492 	this_cpu_dec(trace_buffered_event_cnt);
2493 }
2494 
2495 static void disable_trace_buffered_event(void *data)
2496 {
2497 	this_cpu_inc(trace_buffered_event_cnt);
2498 }
2499 
2500 /**
2501  * trace_buffered_event_disable - disable buffering events
2502  *
2503  * When a filter is removed, it is faster to not use the buffered
2504  * events, and to commit directly into the ring buffer. Free up
2505  * the temp buffers when there are no more users. This requires
2506  * special synchronization with current events.
2507  */
2508 void trace_buffered_event_disable(void)
2509 {
2510 	int cpu;
2511 
2512 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2513 
2514 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2515 		return;
2516 
2517 	if (--trace_buffered_event_ref)
2518 		return;
2519 
2520 	preempt_disable();
2521 	/* For each CPU, set the buffer as used. */
2522 	smp_call_function_many(tracing_buffer_mask,
2523 			       disable_trace_buffered_event, NULL, 1);
2524 	preempt_enable();
2525 
2526 	/* Wait for all current users to finish */
2527 	synchronize_rcu();
2528 
2529 	for_each_tracing_cpu(cpu) {
2530 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2531 		per_cpu(trace_buffered_event, cpu) = NULL;
2532 	}
2533 	/*
2534 	 * Make sure trace_buffered_event is NULL before clearing
2535 	 * trace_buffered_event_cnt.
2536 	 */
2537 	smp_wmb();
2538 
2539 	preempt_disable();
2540 	/* Do the work on each cpu */
2541 	smp_call_function_many(tracing_buffer_mask,
2542 			       enable_trace_buffered_event, NULL, 1);
2543 	preempt_enable();
2544 }
2545 
2546 static struct ring_buffer *temp_buffer;
2547 
2548 struct ring_buffer_event *
2549 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2550 			  struct trace_event_file *trace_file,
2551 			  int type, unsigned long len,
2552 			  unsigned long flags, int pc)
2553 {
2554 	struct ring_buffer_event *entry;
2555 	int val;
2556 
2557 	*current_rb = trace_file->tr->trace_buffer.buffer;
2558 
2559 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2560 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2561 	    (entry = this_cpu_read(trace_buffered_event))) {
2562 		/* Try to use the per cpu buffer first */
2563 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2564 		if (val == 1) {
2565 			trace_event_setup(entry, type, flags, pc);
2566 			entry->array[0] = len;
2567 			return entry;
2568 		}
2569 		this_cpu_dec(trace_buffered_event_cnt);
2570 	}
2571 
2572 	entry = __trace_buffer_lock_reserve(*current_rb,
2573 					    type, len, flags, pc);
2574 	/*
2575 	 * If tracing is off, but we have triggers enabled
2576 	 * we still need to look at the event data. Use the temp_buffer
2577 	 * to store the trace event for the tigger to use. It's recusive
2578 	 * safe and will not be recorded anywhere.
2579 	 */
2580 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2581 		*current_rb = temp_buffer;
2582 		entry = __trace_buffer_lock_reserve(*current_rb,
2583 						    type, len, flags, pc);
2584 	}
2585 	return entry;
2586 }
2587 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2588 
2589 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2590 static DEFINE_MUTEX(tracepoint_printk_mutex);
2591 
2592 static void output_printk(struct trace_event_buffer *fbuffer)
2593 {
2594 	struct trace_event_call *event_call;
2595 	struct trace_event *event;
2596 	unsigned long flags;
2597 	struct trace_iterator *iter = tracepoint_print_iter;
2598 
2599 	/* We should never get here if iter is NULL */
2600 	if (WARN_ON_ONCE(!iter))
2601 		return;
2602 
2603 	event_call = fbuffer->trace_file->event_call;
2604 	if (!event_call || !event_call->event.funcs ||
2605 	    !event_call->event.funcs->trace)
2606 		return;
2607 
2608 	event = &fbuffer->trace_file->event_call->event;
2609 
2610 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2611 	trace_seq_init(&iter->seq);
2612 	iter->ent = fbuffer->entry;
2613 	event_call->event.funcs->trace(iter, 0, event);
2614 	trace_seq_putc(&iter->seq, 0);
2615 	printk("%s", iter->seq.buffer);
2616 
2617 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2618 }
2619 
2620 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2621 			     void __user *buffer, size_t *lenp,
2622 			     loff_t *ppos)
2623 {
2624 	int save_tracepoint_printk;
2625 	int ret;
2626 
2627 	mutex_lock(&tracepoint_printk_mutex);
2628 	save_tracepoint_printk = tracepoint_printk;
2629 
2630 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2631 
2632 	/*
2633 	 * This will force exiting early, as tracepoint_printk
2634 	 * is always zero when tracepoint_printk_iter is not allocated
2635 	 */
2636 	if (!tracepoint_print_iter)
2637 		tracepoint_printk = 0;
2638 
2639 	if (save_tracepoint_printk == tracepoint_printk)
2640 		goto out;
2641 
2642 	if (tracepoint_printk)
2643 		static_key_enable(&tracepoint_printk_key.key);
2644 	else
2645 		static_key_disable(&tracepoint_printk_key.key);
2646 
2647  out:
2648 	mutex_unlock(&tracepoint_printk_mutex);
2649 
2650 	return ret;
2651 }
2652 
2653 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2654 {
2655 	if (static_key_false(&tracepoint_printk_key.key))
2656 		output_printk(fbuffer);
2657 
2658 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2659 				    fbuffer->event, fbuffer->entry,
2660 				    fbuffer->flags, fbuffer->pc);
2661 }
2662 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2663 
2664 /*
2665  * Skip 3:
2666  *
2667  *   trace_buffer_unlock_commit_regs()
2668  *   trace_event_buffer_commit()
2669  *   trace_event_raw_event_xxx()
2670  */
2671 # define STACK_SKIP 3
2672 
2673 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2674 				     struct ring_buffer *buffer,
2675 				     struct ring_buffer_event *event,
2676 				     unsigned long flags, int pc,
2677 				     struct pt_regs *regs)
2678 {
2679 	__buffer_unlock_commit(buffer, event);
2680 
2681 	/*
2682 	 * If regs is not set, then skip the necessary functions.
2683 	 * Note, we can still get here via blktrace, wakeup tracer
2684 	 * and mmiotrace, but that's ok if they lose a function or
2685 	 * two. They are not that meaningful.
2686 	 */
2687 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2688 	ftrace_trace_userstack(buffer, flags, pc);
2689 }
2690 
2691 /*
2692  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2693  */
2694 void
2695 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2696 				   struct ring_buffer_event *event)
2697 {
2698 	__buffer_unlock_commit(buffer, event);
2699 }
2700 
2701 static void
2702 trace_process_export(struct trace_export *export,
2703 	       struct ring_buffer_event *event)
2704 {
2705 	struct trace_entry *entry;
2706 	unsigned int size = 0;
2707 
2708 	entry = ring_buffer_event_data(event);
2709 	size = ring_buffer_event_length(event);
2710 	export->write(export, entry, size);
2711 }
2712 
2713 static DEFINE_MUTEX(ftrace_export_lock);
2714 
2715 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2716 
2717 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2718 
2719 static inline void ftrace_exports_enable(void)
2720 {
2721 	static_branch_enable(&ftrace_exports_enabled);
2722 }
2723 
2724 static inline void ftrace_exports_disable(void)
2725 {
2726 	static_branch_disable(&ftrace_exports_enabled);
2727 }
2728 
2729 static void ftrace_exports(struct ring_buffer_event *event)
2730 {
2731 	struct trace_export *export;
2732 
2733 	preempt_disable_notrace();
2734 
2735 	export = rcu_dereference_raw_check(ftrace_exports_list);
2736 	while (export) {
2737 		trace_process_export(export, event);
2738 		export = rcu_dereference_raw_check(export->next);
2739 	}
2740 
2741 	preempt_enable_notrace();
2742 }
2743 
2744 static inline void
2745 add_trace_export(struct trace_export **list, struct trace_export *export)
2746 {
2747 	rcu_assign_pointer(export->next, *list);
2748 	/*
2749 	 * We are entering export into the list but another
2750 	 * CPU might be walking that list. We need to make sure
2751 	 * the export->next pointer is valid before another CPU sees
2752 	 * the export pointer included into the list.
2753 	 */
2754 	rcu_assign_pointer(*list, export);
2755 }
2756 
2757 static inline int
2758 rm_trace_export(struct trace_export **list, struct trace_export *export)
2759 {
2760 	struct trace_export **p;
2761 
2762 	for (p = list; *p != NULL; p = &(*p)->next)
2763 		if (*p == export)
2764 			break;
2765 
2766 	if (*p != export)
2767 		return -1;
2768 
2769 	rcu_assign_pointer(*p, (*p)->next);
2770 
2771 	return 0;
2772 }
2773 
2774 static inline void
2775 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2776 {
2777 	if (*list == NULL)
2778 		ftrace_exports_enable();
2779 
2780 	add_trace_export(list, export);
2781 }
2782 
2783 static inline int
2784 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2785 {
2786 	int ret;
2787 
2788 	ret = rm_trace_export(list, export);
2789 	if (*list == NULL)
2790 		ftrace_exports_disable();
2791 
2792 	return ret;
2793 }
2794 
2795 int register_ftrace_export(struct trace_export *export)
2796 {
2797 	if (WARN_ON_ONCE(!export->write))
2798 		return -1;
2799 
2800 	mutex_lock(&ftrace_export_lock);
2801 
2802 	add_ftrace_export(&ftrace_exports_list, export);
2803 
2804 	mutex_unlock(&ftrace_export_lock);
2805 
2806 	return 0;
2807 }
2808 EXPORT_SYMBOL_GPL(register_ftrace_export);
2809 
2810 int unregister_ftrace_export(struct trace_export *export)
2811 {
2812 	int ret;
2813 
2814 	mutex_lock(&ftrace_export_lock);
2815 
2816 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2817 
2818 	mutex_unlock(&ftrace_export_lock);
2819 
2820 	return ret;
2821 }
2822 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2823 
2824 void
2825 trace_function(struct trace_array *tr,
2826 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2827 	       int pc)
2828 {
2829 	struct trace_event_call *call = &event_function;
2830 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2831 	struct ring_buffer_event *event;
2832 	struct ftrace_entry *entry;
2833 
2834 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2835 					    flags, pc);
2836 	if (!event)
2837 		return;
2838 	entry	= ring_buffer_event_data(event);
2839 	entry->ip			= ip;
2840 	entry->parent_ip		= parent_ip;
2841 
2842 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2843 		if (static_branch_unlikely(&ftrace_exports_enabled))
2844 			ftrace_exports(event);
2845 		__buffer_unlock_commit(buffer, event);
2846 	}
2847 }
2848 
2849 #ifdef CONFIG_STACKTRACE
2850 
2851 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2852 #define FTRACE_KSTACK_NESTING	4
2853 
2854 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2855 
2856 struct ftrace_stack {
2857 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2858 };
2859 
2860 
2861 struct ftrace_stacks {
2862 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2863 };
2864 
2865 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2866 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2867 
2868 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2869 				 unsigned long flags,
2870 				 int skip, int pc, struct pt_regs *regs)
2871 {
2872 	struct trace_event_call *call = &event_kernel_stack;
2873 	struct ring_buffer_event *event;
2874 	unsigned int size, nr_entries;
2875 	struct ftrace_stack *fstack;
2876 	struct stack_entry *entry;
2877 	int stackidx;
2878 
2879 	/*
2880 	 * Add one, for this function and the call to save_stack_trace()
2881 	 * If regs is set, then these functions will not be in the way.
2882 	 */
2883 #ifndef CONFIG_UNWINDER_ORC
2884 	if (!regs)
2885 		skip++;
2886 #endif
2887 
2888 	/*
2889 	 * Since events can happen in NMIs there's no safe way to
2890 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2891 	 * or NMI comes in, it will just have to use the default
2892 	 * FTRACE_STACK_SIZE.
2893 	 */
2894 	preempt_disable_notrace();
2895 
2896 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2897 
2898 	/* This should never happen. If it does, yell once and skip */
2899 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2900 		goto out;
2901 
2902 	/*
2903 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2904 	 * interrupt will either see the value pre increment or post
2905 	 * increment. If the interrupt happens pre increment it will have
2906 	 * restored the counter when it returns.  We just need a barrier to
2907 	 * keep gcc from moving things around.
2908 	 */
2909 	barrier();
2910 
2911 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2912 	size = ARRAY_SIZE(fstack->calls);
2913 
2914 	if (regs) {
2915 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2916 						   size, skip);
2917 	} else {
2918 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2919 	}
2920 
2921 	size = nr_entries * sizeof(unsigned long);
2922 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2923 					    sizeof(*entry) + size, flags, pc);
2924 	if (!event)
2925 		goto out;
2926 	entry = ring_buffer_event_data(event);
2927 
2928 	memcpy(&entry->caller, fstack->calls, size);
2929 	entry->size = nr_entries;
2930 
2931 	if (!call_filter_check_discard(call, entry, buffer, event))
2932 		__buffer_unlock_commit(buffer, event);
2933 
2934  out:
2935 	/* Again, don't let gcc optimize things here */
2936 	barrier();
2937 	__this_cpu_dec(ftrace_stack_reserve);
2938 	preempt_enable_notrace();
2939 
2940 }
2941 
2942 static inline void ftrace_trace_stack(struct trace_array *tr,
2943 				      struct ring_buffer *buffer,
2944 				      unsigned long flags,
2945 				      int skip, int pc, struct pt_regs *regs)
2946 {
2947 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2948 		return;
2949 
2950 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2951 }
2952 
2953 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2954 		   int pc)
2955 {
2956 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2957 
2958 	if (rcu_is_watching()) {
2959 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2960 		return;
2961 	}
2962 
2963 	/*
2964 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2965 	 * but if the above rcu_is_watching() failed, then the NMI
2966 	 * triggered someplace critical, and rcu_irq_enter() should
2967 	 * not be called from NMI.
2968 	 */
2969 	if (unlikely(in_nmi()))
2970 		return;
2971 
2972 	rcu_irq_enter_irqson();
2973 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2974 	rcu_irq_exit_irqson();
2975 }
2976 
2977 /**
2978  * trace_dump_stack - record a stack back trace in the trace buffer
2979  * @skip: Number of functions to skip (helper handlers)
2980  */
2981 void trace_dump_stack(int skip)
2982 {
2983 	unsigned long flags;
2984 
2985 	if (tracing_disabled || tracing_selftest_running)
2986 		return;
2987 
2988 	local_save_flags(flags);
2989 
2990 #ifndef CONFIG_UNWINDER_ORC
2991 	/* Skip 1 to skip this function. */
2992 	skip++;
2993 #endif
2994 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2995 			     flags, skip, preempt_count(), NULL);
2996 }
2997 EXPORT_SYMBOL_GPL(trace_dump_stack);
2998 
2999 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3000 static DEFINE_PER_CPU(int, user_stack_count);
3001 
3002 static void
3003 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
3004 {
3005 	struct trace_event_call *call = &event_user_stack;
3006 	struct ring_buffer_event *event;
3007 	struct userstack_entry *entry;
3008 
3009 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3010 		return;
3011 
3012 	/*
3013 	 * NMIs can not handle page faults, even with fix ups.
3014 	 * The save user stack can (and often does) fault.
3015 	 */
3016 	if (unlikely(in_nmi()))
3017 		return;
3018 
3019 	/*
3020 	 * prevent recursion, since the user stack tracing may
3021 	 * trigger other kernel events.
3022 	 */
3023 	preempt_disable();
3024 	if (__this_cpu_read(user_stack_count))
3025 		goto out;
3026 
3027 	__this_cpu_inc(user_stack_count);
3028 
3029 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3030 					    sizeof(*entry), flags, pc);
3031 	if (!event)
3032 		goto out_drop_count;
3033 	entry	= ring_buffer_event_data(event);
3034 
3035 	entry->tgid		= current->tgid;
3036 	memset(&entry->caller, 0, sizeof(entry->caller));
3037 
3038 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3039 	if (!call_filter_check_discard(call, entry, buffer, event))
3040 		__buffer_unlock_commit(buffer, event);
3041 
3042  out_drop_count:
3043 	__this_cpu_dec(user_stack_count);
3044  out:
3045 	preempt_enable();
3046 }
3047 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3048 static void ftrace_trace_userstack(struct ring_buffer *buffer,
3049 				   unsigned long flags, int pc)
3050 {
3051 }
3052 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3053 
3054 #endif /* CONFIG_STACKTRACE */
3055 
3056 /* created for use with alloc_percpu */
3057 struct trace_buffer_struct {
3058 	int nesting;
3059 	char buffer[4][TRACE_BUF_SIZE];
3060 };
3061 
3062 static struct trace_buffer_struct *trace_percpu_buffer;
3063 
3064 /*
3065  * Thise allows for lockless recording.  If we're nested too deeply, then
3066  * this returns NULL.
3067  */
3068 static char *get_trace_buf(void)
3069 {
3070 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3071 
3072 	if (!buffer || buffer->nesting >= 4)
3073 		return NULL;
3074 
3075 	buffer->nesting++;
3076 
3077 	/* Interrupts must see nesting incremented before we use the buffer */
3078 	barrier();
3079 	return &buffer->buffer[buffer->nesting][0];
3080 }
3081 
3082 static void put_trace_buf(void)
3083 {
3084 	/* Don't let the decrement of nesting leak before this */
3085 	barrier();
3086 	this_cpu_dec(trace_percpu_buffer->nesting);
3087 }
3088 
3089 static int alloc_percpu_trace_buffer(void)
3090 {
3091 	struct trace_buffer_struct *buffers;
3092 
3093 	buffers = alloc_percpu(struct trace_buffer_struct);
3094 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3095 		return -ENOMEM;
3096 
3097 	trace_percpu_buffer = buffers;
3098 	return 0;
3099 }
3100 
3101 static int buffers_allocated;
3102 
3103 void trace_printk_init_buffers(void)
3104 {
3105 	if (buffers_allocated)
3106 		return;
3107 
3108 	if (alloc_percpu_trace_buffer())
3109 		return;
3110 
3111 	/* trace_printk() is for debug use only. Don't use it in production. */
3112 
3113 	pr_warn("\n");
3114 	pr_warn("**********************************************************\n");
3115 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3116 	pr_warn("**                                                      **\n");
3117 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3118 	pr_warn("**                                                      **\n");
3119 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3120 	pr_warn("** unsafe for production use.                           **\n");
3121 	pr_warn("**                                                      **\n");
3122 	pr_warn("** If you see this message and you are not debugging    **\n");
3123 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3124 	pr_warn("**                                                      **\n");
3125 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3126 	pr_warn("**********************************************************\n");
3127 
3128 	/* Expand the buffers to set size */
3129 	tracing_update_buffers();
3130 
3131 	buffers_allocated = 1;
3132 
3133 	/*
3134 	 * trace_printk_init_buffers() can be called by modules.
3135 	 * If that happens, then we need to start cmdline recording
3136 	 * directly here. If the global_trace.buffer is already
3137 	 * allocated here, then this was called by module code.
3138 	 */
3139 	if (global_trace.trace_buffer.buffer)
3140 		tracing_start_cmdline_record();
3141 }
3142 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3143 
3144 void trace_printk_start_comm(void)
3145 {
3146 	/* Start tracing comms if trace printk is set */
3147 	if (!buffers_allocated)
3148 		return;
3149 	tracing_start_cmdline_record();
3150 }
3151 
3152 static void trace_printk_start_stop_comm(int enabled)
3153 {
3154 	if (!buffers_allocated)
3155 		return;
3156 
3157 	if (enabled)
3158 		tracing_start_cmdline_record();
3159 	else
3160 		tracing_stop_cmdline_record();
3161 }
3162 
3163 /**
3164  * trace_vbprintk - write binary msg to tracing buffer
3165  * @ip:    The address of the caller
3166  * @fmt:   The string format to write to the buffer
3167  * @args:  Arguments for @fmt
3168  */
3169 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3170 {
3171 	struct trace_event_call *call = &event_bprint;
3172 	struct ring_buffer_event *event;
3173 	struct ring_buffer *buffer;
3174 	struct trace_array *tr = &global_trace;
3175 	struct bprint_entry *entry;
3176 	unsigned long flags;
3177 	char *tbuffer;
3178 	int len = 0, size, pc;
3179 
3180 	if (unlikely(tracing_selftest_running || tracing_disabled))
3181 		return 0;
3182 
3183 	/* Don't pollute graph traces with trace_vprintk internals */
3184 	pause_graph_tracing();
3185 
3186 	pc = preempt_count();
3187 	preempt_disable_notrace();
3188 
3189 	tbuffer = get_trace_buf();
3190 	if (!tbuffer) {
3191 		len = 0;
3192 		goto out_nobuffer;
3193 	}
3194 
3195 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3196 
3197 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3198 		goto out;
3199 
3200 	local_save_flags(flags);
3201 	size = sizeof(*entry) + sizeof(u32) * len;
3202 	buffer = tr->trace_buffer.buffer;
3203 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3204 					    flags, pc);
3205 	if (!event)
3206 		goto out;
3207 	entry = ring_buffer_event_data(event);
3208 	entry->ip			= ip;
3209 	entry->fmt			= fmt;
3210 
3211 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3212 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3213 		__buffer_unlock_commit(buffer, event);
3214 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3215 	}
3216 
3217 out:
3218 	put_trace_buf();
3219 
3220 out_nobuffer:
3221 	preempt_enable_notrace();
3222 	unpause_graph_tracing();
3223 
3224 	return len;
3225 }
3226 EXPORT_SYMBOL_GPL(trace_vbprintk);
3227 
3228 __printf(3, 0)
3229 static int
3230 __trace_array_vprintk(struct ring_buffer *buffer,
3231 		      unsigned long ip, const char *fmt, va_list args)
3232 {
3233 	struct trace_event_call *call = &event_print;
3234 	struct ring_buffer_event *event;
3235 	int len = 0, size, pc;
3236 	struct print_entry *entry;
3237 	unsigned long flags;
3238 	char *tbuffer;
3239 
3240 	if (tracing_disabled || tracing_selftest_running)
3241 		return 0;
3242 
3243 	/* Don't pollute graph traces with trace_vprintk internals */
3244 	pause_graph_tracing();
3245 
3246 	pc = preempt_count();
3247 	preempt_disable_notrace();
3248 
3249 
3250 	tbuffer = get_trace_buf();
3251 	if (!tbuffer) {
3252 		len = 0;
3253 		goto out_nobuffer;
3254 	}
3255 
3256 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3257 
3258 	local_save_flags(flags);
3259 	size = sizeof(*entry) + len + 1;
3260 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3261 					    flags, pc);
3262 	if (!event)
3263 		goto out;
3264 	entry = ring_buffer_event_data(event);
3265 	entry->ip = ip;
3266 
3267 	memcpy(&entry->buf, tbuffer, len + 1);
3268 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3269 		__buffer_unlock_commit(buffer, event);
3270 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3271 	}
3272 
3273 out:
3274 	put_trace_buf();
3275 
3276 out_nobuffer:
3277 	preempt_enable_notrace();
3278 	unpause_graph_tracing();
3279 
3280 	return len;
3281 }
3282 
3283 __printf(3, 0)
3284 int trace_array_vprintk(struct trace_array *tr,
3285 			unsigned long ip, const char *fmt, va_list args)
3286 {
3287 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3288 }
3289 
3290 __printf(3, 0)
3291 int trace_array_printk(struct trace_array *tr,
3292 		       unsigned long ip, const char *fmt, ...)
3293 {
3294 	int ret;
3295 	va_list ap;
3296 
3297 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3298 		return 0;
3299 
3300 	va_start(ap, fmt);
3301 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3302 	va_end(ap);
3303 	return ret;
3304 }
3305 EXPORT_SYMBOL_GPL(trace_array_printk);
3306 
3307 __printf(3, 4)
3308 int trace_array_printk_buf(struct ring_buffer *buffer,
3309 			   unsigned long ip, const char *fmt, ...)
3310 {
3311 	int ret;
3312 	va_list ap;
3313 
3314 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3315 		return 0;
3316 
3317 	va_start(ap, fmt);
3318 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3319 	va_end(ap);
3320 	return ret;
3321 }
3322 
3323 __printf(2, 0)
3324 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3325 {
3326 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3327 }
3328 EXPORT_SYMBOL_GPL(trace_vprintk);
3329 
3330 static void trace_iterator_increment(struct trace_iterator *iter)
3331 {
3332 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3333 
3334 	iter->idx++;
3335 	if (buf_iter)
3336 		ring_buffer_read(buf_iter, NULL);
3337 }
3338 
3339 static struct trace_entry *
3340 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3341 		unsigned long *lost_events)
3342 {
3343 	struct ring_buffer_event *event;
3344 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3345 
3346 	if (buf_iter)
3347 		event = ring_buffer_iter_peek(buf_iter, ts);
3348 	else
3349 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3350 					 lost_events);
3351 
3352 	if (event) {
3353 		iter->ent_size = ring_buffer_event_length(event);
3354 		return ring_buffer_event_data(event);
3355 	}
3356 	iter->ent_size = 0;
3357 	return NULL;
3358 }
3359 
3360 static struct trace_entry *
3361 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3362 		  unsigned long *missing_events, u64 *ent_ts)
3363 {
3364 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3365 	struct trace_entry *ent, *next = NULL;
3366 	unsigned long lost_events = 0, next_lost = 0;
3367 	int cpu_file = iter->cpu_file;
3368 	u64 next_ts = 0, ts;
3369 	int next_cpu = -1;
3370 	int next_size = 0;
3371 	int cpu;
3372 
3373 	/*
3374 	 * If we are in a per_cpu trace file, don't bother by iterating over
3375 	 * all cpu and peek directly.
3376 	 */
3377 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3378 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3379 			return NULL;
3380 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3381 		if (ent_cpu)
3382 			*ent_cpu = cpu_file;
3383 
3384 		return ent;
3385 	}
3386 
3387 	for_each_tracing_cpu(cpu) {
3388 
3389 		if (ring_buffer_empty_cpu(buffer, cpu))
3390 			continue;
3391 
3392 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3393 
3394 		/*
3395 		 * Pick the entry with the smallest timestamp:
3396 		 */
3397 		if (ent && (!next || ts < next_ts)) {
3398 			next = ent;
3399 			next_cpu = cpu;
3400 			next_ts = ts;
3401 			next_lost = lost_events;
3402 			next_size = iter->ent_size;
3403 		}
3404 	}
3405 
3406 	iter->ent_size = next_size;
3407 
3408 	if (ent_cpu)
3409 		*ent_cpu = next_cpu;
3410 
3411 	if (ent_ts)
3412 		*ent_ts = next_ts;
3413 
3414 	if (missing_events)
3415 		*missing_events = next_lost;
3416 
3417 	return next;
3418 }
3419 
3420 /* Find the next real entry, without updating the iterator itself */
3421 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3422 					  int *ent_cpu, u64 *ent_ts)
3423 {
3424 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3425 }
3426 
3427 /* Find the next real entry, and increment the iterator to the next entry */
3428 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3429 {
3430 	iter->ent = __find_next_entry(iter, &iter->cpu,
3431 				      &iter->lost_events, &iter->ts);
3432 
3433 	if (iter->ent)
3434 		trace_iterator_increment(iter);
3435 
3436 	return iter->ent ? iter : NULL;
3437 }
3438 
3439 static void trace_consume(struct trace_iterator *iter)
3440 {
3441 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3442 			    &iter->lost_events);
3443 }
3444 
3445 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3446 {
3447 	struct trace_iterator *iter = m->private;
3448 	int i = (int)*pos;
3449 	void *ent;
3450 
3451 	WARN_ON_ONCE(iter->leftover);
3452 
3453 	(*pos)++;
3454 
3455 	/* can't go backwards */
3456 	if (iter->idx > i)
3457 		return NULL;
3458 
3459 	if (iter->idx < 0)
3460 		ent = trace_find_next_entry_inc(iter);
3461 	else
3462 		ent = iter;
3463 
3464 	while (ent && iter->idx < i)
3465 		ent = trace_find_next_entry_inc(iter);
3466 
3467 	iter->pos = *pos;
3468 
3469 	return ent;
3470 }
3471 
3472 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3473 {
3474 	struct ring_buffer_event *event;
3475 	struct ring_buffer_iter *buf_iter;
3476 	unsigned long entries = 0;
3477 	u64 ts;
3478 
3479 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3480 
3481 	buf_iter = trace_buffer_iter(iter, cpu);
3482 	if (!buf_iter)
3483 		return;
3484 
3485 	ring_buffer_iter_reset(buf_iter);
3486 
3487 	/*
3488 	 * We could have the case with the max latency tracers
3489 	 * that a reset never took place on a cpu. This is evident
3490 	 * by the timestamp being before the start of the buffer.
3491 	 */
3492 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3493 		if (ts >= iter->trace_buffer->time_start)
3494 			break;
3495 		entries++;
3496 		ring_buffer_read(buf_iter, NULL);
3497 	}
3498 
3499 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3500 }
3501 
3502 /*
3503  * The current tracer is copied to avoid a global locking
3504  * all around.
3505  */
3506 static void *s_start(struct seq_file *m, loff_t *pos)
3507 {
3508 	struct trace_iterator *iter = m->private;
3509 	struct trace_array *tr = iter->tr;
3510 	int cpu_file = iter->cpu_file;
3511 	void *p = NULL;
3512 	loff_t l = 0;
3513 	int cpu;
3514 
3515 	/*
3516 	 * copy the tracer to avoid using a global lock all around.
3517 	 * iter->trace is a copy of current_trace, the pointer to the
3518 	 * name may be used instead of a strcmp(), as iter->trace->name
3519 	 * will point to the same string as current_trace->name.
3520 	 */
3521 	mutex_lock(&trace_types_lock);
3522 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3523 		*iter->trace = *tr->current_trace;
3524 	mutex_unlock(&trace_types_lock);
3525 
3526 #ifdef CONFIG_TRACER_MAX_TRACE
3527 	if (iter->snapshot && iter->trace->use_max_tr)
3528 		return ERR_PTR(-EBUSY);
3529 #endif
3530 
3531 	if (!iter->snapshot)
3532 		atomic_inc(&trace_record_taskinfo_disabled);
3533 
3534 	if (*pos != iter->pos) {
3535 		iter->ent = NULL;
3536 		iter->cpu = 0;
3537 		iter->idx = -1;
3538 
3539 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3540 			for_each_tracing_cpu(cpu)
3541 				tracing_iter_reset(iter, cpu);
3542 		} else
3543 			tracing_iter_reset(iter, cpu_file);
3544 
3545 		iter->leftover = 0;
3546 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3547 			;
3548 
3549 	} else {
3550 		/*
3551 		 * If we overflowed the seq_file before, then we want
3552 		 * to just reuse the trace_seq buffer again.
3553 		 */
3554 		if (iter->leftover)
3555 			p = iter;
3556 		else {
3557 			l = *pos - 1;
3558 			p = s_next(m, p, &l);
3559 		}
3560 	}
3561 
3562 	trace_event_read_lock();
3563 	trace_access_lock(cpu_file);
3564 	return p;
3565 }
3566 
3567 static void s_stop(struct seq_file *m, void *p)
3568 {
3569 	struct trace_iterator *iter = m->private;
3570 
3571 #ifdef CONFIG_TRACER_MAX_TRACE
3572 	if (iter->snapshot && iter->trace->use_max_tr)
3573 		return;
3574 #endif
3575 
3576 	if (!iter->snapshot)
3577 		atomic_dec(&trace_record_taskinfo_disabled);
3578 
3579 	trace_access_unlock(iter->cpu_file);
3580 	trace_event_read_unlock();
3581 }
3582 
3583 static void
3584 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3585 		      unsigned long *entries, int cpu)
3586 {
3587 	unsigned long count;
3588 
3589 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3590 	/*
3591 	 * If this buffer has skipped entries, then we hold all
3592 	 * entries for the trace and we need to ignore the
3593 	 * ones before the time stamp.
3594 	 */
3595 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3596 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3597 		/* total is the same as the entries */
3598 		*total = count;
3599 	} else
3600 		*total = count +
3601 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3602 	*entries = count;
3603 }
3604 
3605 static void
3606 get_total_entries(struct trace_buffer *buf,
3607 		  unsigned long *total, unsigned long *entries)
3608 {
3609 	unsigned long t, e;
3610 	int cpu;
3611 
3612 	*total = 0;
3613 	*entries = 0;
3614 
3615 	for_each_tracing_cpu(cpu) {
3616 		get_total_entries_cpu(buf, &t, &e, cpu);
3617 		*total += t;
3618 		*entries += e;
3619 	}
3620 }
3621 
3622 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3623 {
3624 	unsigned long total, entries;
3625 
3626 	if (!tr)
3627 		tr = &global_trace;
3628 
3629 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3630 
3631 	return entries;
3632 }
3633 
3634 unsigned long trace_total_entries(struct trace_array *tr)
3635 {
3636 	unsigned long total, entries;
3637 
3638 	if (!tr)
3639 		tr = &global_trace;
3640 
3641 	get_total_entries(&tr->trace_buffer, &total, &entries);
3642 
3643 	return entries;
3644 }
3645 
3646 static void print_lat_help_header(struct seq_file *m)
3647 {
3648 	seq_puts(m, "#                  _------=> CPU#            \n"
3649 		    "#                 / _-----=> irqs-off        \n"
3650 		    "#                | / _----=> need-resched    \n"
3651 		    "#                || / _---=> hardirq/softirq \n"
3652 		    "#                ||| / _--=> preempt-depth   \n"
3653 		    "#                |||| /     delay            \n"
3654 		    "#  cmd     pid   ||||| time  |   caller      \n"
3655 		    "#     \\   /      |||||  \\    |   /         \n");
3656 }
3657 
3658 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3659 {
3660 	unsigned long total;
3661 	unsigned long entries;
3662 
3663 	get_total_entries(buf, &total, &entries);
3664 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3665 		   entries, total, num_online_cpus());
3666 	seq_puts(m, "#\n");
3667 }
3668 
3669 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3670 				   unsigned int flags)
3671 {
3672 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3673 
3674 	print_event_info(buf, m);
3675 
3676 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3677 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3678 }
3679 
3680 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3681 				       unsigned int flags)
3682 {
3683 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3684 	const char *space = "          ";
3685 	int prec = tgid ? 10 : 2;
3686 
3687 	print_event_info(buf, m);
3688 
3689 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3690 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3691 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3692 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3693 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3694 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3695 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3696 }
3697 
3698 void
3699 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3700 {
3701 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3702 	struct trace_buffer *buf = iter->trace_buffer;
3703 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3704 	struct tracer *type = iter->trace;
3705 	unsigned long entries;
3706 	unsigned long total;
3707 	const char *name = "preemption";
3708 
3709 	name = type->name;
3710 
3711 	get_total_entries(buf, &total, &entries);
3712 
3713 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3714 		   name, UTS_RELEASE);
3715 	seq_puts(m, "# -----------------------------------"
3716 		 "---------------------------------\n");
3717 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3718 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3719 		   nsecs_to_usecs(data->saved_latency),
3720 		   entries,
3721 		   total,
3722 		   buf->cpu,
3723 #if defined(CONFIG_PREEMPT_NONE)
3724 		   "server",
3725 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3726 		   "desktop",
3727 #elif defined(CONFIG_PREEMPT)
3728 		   "preempt",
3729 #else
3730 		   "unknown",
3731 #endif
3732 		   /* These are reserved for later use */
3733 		   0, 0, 0, 0);
3734 #ifdef CONFIG_SMP
3735 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3736 #else
3737 	seq_puts(m, ")\n");
3738 #endif
3739 	seq_puts(m, "#    -----------------\n");
3740 	seq_printf(m, "#    | task: %.16s-%d "
3741 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3742 		   data->comm, data->pid,
3743 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3744 		   data->policy, data->rt_priority);
3745 	seq_puts(m, "#    -----------------\n");
3746 
3747 	if (data->critical_start) {
3748 		seq_puts(m, "#  => started at: ");
3749 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3750 		trace_print_seq(m, &iter->seq);
3751 		seq_puts(m, "\n#  => ended at:   ");
3752 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3753 		trace_print_seq(m, &iter->seq);
3754 		seq_puts(m, "\n#\n");
3755 	}
3756 
3757 	seq_puts(m, "#\n");
3758 }
3759 
3760 static void test_cpu_buff_start(struct trace_iterator *iter)
3761 {
3762 	struct trace_seq *s = &iter->seq;
3763 	struct trace_array *tr = iter->tr;
3764 
3765 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3766 		return;
3767 
3768 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3769 		return;
3770 
3771 	if (cpumask_available(iter->started) &&
3772 	    cpumask_test_cpu(iter->cpu, iter->started))
3773 		return;
3774 
3775 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3776 		return;
3777 
3778 	if (cpumask_available(iter->started))
3779 		cpumask_set_cpu(iter->cpu, iter->started);
3780 
3781 	/* Don't print started cpu buffer for the first entry of the trace */
3782 	if (iter->idx > 1)
3783 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3784 				iter->cpu);
3785 }
3786 
3787 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3788 {
3789 	struct trace_array *tr = iter->tr;
3790 	struct trace_seq *s = &iter->seq;
3791 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3792 	struct trace_entry *entry;
3793 	struct trace_event *event;
3794 
3795 	entry = iter->ent;
3796 
3797 	test_cpu_buff_start(iter);
3798 
3799 	event = ftrace_find_event(entry->type);
3800 
3801 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3802 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3803 			trace_print_lat_context(iter);
3804 		else
3805 			trace_print_context(iter);
3806 	}
3807 
3808 	if (trace_seq_has_overflowed(s))
3809 		return TRACE_TYPE_PARTIAL_LINE;
3810 
3811 	if (event)
3812 		return event->funcs->trace(iter, sym_flags, event);
3813 
3814 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3815 
3816 	return trace_handle_return(s);
3817 }
3818 
3819 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3820 {
3821 	struct trace_array *tr = iter->tr;
3822 	struct trace_seq *s = &iter->seq;
3823 	struct trace_entry *entry;
3824 	struct trace_event *event;
3825 
3826 	entry = iter->ent;
3827 
3828 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3829 		trace_seq_printf(s, "%d %d %llu ",
3830 				 entry->pid, iter->cpu, iter->ts);
3831 
3832 	if (trace_seq_has_overflowed(s))
3833 		return TRACE_TYPE_PARTIAL_LINE;
3834 
3835 	event = ftrace_find_event(entry->type);
3836 	if (event)
3837 		return event->funcs->raw(iter, 0, event);
3838 
3839 	trace_seq_printf(s, "%d ?\n", entry->type);
3840 
3841 	return trace_handle_return(s);
3842 }
3843 
3844 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3845 {
3846 	struct trace_array *tr = iter->tr;
3847 	struct trace_seq *s = &iter->seq;
3848 	unsigned char newline = '\n';
3849 	struct trace_entry *entry;
3850 	struct trace_event *event;
3851 
3852 	entry = iter->ent;
3853 
3854 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3855 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3856 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3857 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3858 		if (trace_seq_has_overflowed(s))
3859 			return TRACE_TYPE_PARTIAL_LINE;
3860 	}
3861 
3862 	event = ftrace_find_event(entry->type);
3863 	if (event) {
3864 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3865 		if (ret != TRACE_TYPE_HANDLED)
3866 			return ret;
3867 	}
3868 
3869 	SEQ_PUT_FIELD(s, newline);
3870 
3871 	return trace_handle_return(s);
3872 }
3873 
3874 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3875 {
3876 	struct trace_array *tr = iter->tr;
3877 	struct trace_seq *s = &iter->seq;
3878 	struct trace_entry *entry;
3879 	struct trace_event *event;
3880 
3881 	entry = iter->ent;
3882 
3883 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3884 		SEQ_PUT_FIELD(s, entry->pid);
3885 		SEQ_PUT_FIELD(s, iter->cpu);
3886 		SEQ_PUT_FIELD(s, iter->ts);
3887 		if (trace_seq_has_overflowed(s))
3888 			return TRACE_TYPE_PARTIAL_LINE;
3889 	}
3890 
3891 	event = ftrace_find_event(entry->type);
3892 	return event ? event->funcs->binary(iter, 0, event) :
3893 		TRACE_TYPE_HANDLED;
3894 }
3895 
3896 int trace_empty(struct trace_iterator *iter)
3897 {
3898 	struct ring_buffer_iter *buf_iter;
3899 	int cpu;
3900 
3901 	/* If we are looking at one CPU buffer, only check that one */
3902 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3903 		cpu = iter->cpu_file;
3904 		buf_iter = trace_buffer_iter(iter, cpu);
3905 		if (buf_iter) {
3906 			if (!ring_buffer_iter_empty(buf_iter))
3907 				return 0;
3908 		} else {
3909 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3910 				return 0;
3911 		}
3912 		return 1;
3913 	}
3914 
3915 	for_each_tracing_cpu(cpu) {
3916 		buf_iter = trace_buffer_iter(iter, cpu);
3917 		if (buf_iter) {
3918 			if (!ring_buffer_iter_empty(buf_iter))
3919 				return 0;
3920 		} else {
3921 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3922 				return 0;
3923 		}
3924 	}
3925 
3926 	return 1;
3927 }
3928 
3929 /*  Called with trace_event_read_lock() held. */
3930 enum print_line_t print_trace_line(struct trace_iterator *iter)
3931 {
3932 	struct trace_array *tr = iter->tr;
3933 	unsigned long trace_flags = tr->trace_flags;
3934 	enum print_line_t ret;
3935 
3936 	if (iter->lost_events) {
3937 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3938 				 iter->cpu, iter->lost_events);
3939 		if (trace_seq_has_overflowed(&iter->seq))
3940 			return TRACE_TYPE_PARTIAL_LINE;
3941 	}
3942 
3943 	if (iter->trace && iter->trace->print_line) {
3944 		ret = iter->trace->print_line(iter);
3945 		if (ret != TRACE_TYPE_UNHANDLED)
3946 			return ret;
3947 	}
3948 
3949 	if (iter->ent->type == TRACE_BPUTS &&
3950 			trace_flags & TRACE_ITER_PRINTK &&
3951 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3952 		return trace_print_bputs_msg_only(iter);
3953 
3954 	if (iter->ent->type == TRACE_BPRINT &&
3955 			trace_flags & TRACE_ITER_PRINTK &&
3956 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3957 		return trace_print_bprintk_msg_only(iter);
3958 
3959 	if (iter->ent->type == TRACE_PRINT &&
3960 			trace_flags & TRACE_ITER_PRINTK &&
3961 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3962 		return trace_print_printk_msg_only(iter);
3963 
3964 	if (trace_flags & TRACE_ITER_BIN)
3965 		return print_bin_fmt(iter);
3966 
3967 	if (trace_flags & TRACE_ITER_HEX)
3968 		return print_hex_fmt(iter);
3969 
3970 	if (trace_flags & TRACE_ITER_RAW)
3971 		return print_raw_fmt(iter);
3972 
3973 	return print_trace_fmt(iter);
3974 }
3975 
3976 void trace_latency_header(struct seq_file *m)
3977 {
3978 	struct trace_iterator *iter = m->private;
3979 	struct trace_array *tr = iter->tr;
3980 
3981 	/* print nothing if the buffers are empty */
3982 	if (trace_empty(iter))
3983 		return;
3984 
3985 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3986 		print_trace_header(m, iter);
3987 
3988 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3989 		print_lat_help_header(m);
3990 }
3991 
3992 void trace_default_header(struct seq_file *m)
3993 {
3994 	struct trace_iterator *iter = m->private;
3995 	struct trace_array *tr = iter->tr;
3996 	unsigned long trace_flags = tr->trace_flags;
3997 
3998 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3999 		return;
4000 
4001 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4002 		/* print nothing if the buffers are empty */
4003 		if (trace_empty(iter))
4004 			return;
4005 		print_trace_header(m, iter);
4006 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4007 			print_lat_help_header(m);
4008 	} else {
4009 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4010 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4011 				print_func_help_header_irq(iter->trace_buffer,
4012 							   m, trace_flags);
4013 			else
4014 				print_func_help_header(iter->trace_buffer, m,
4015 						       trace_flags);
4016 		}
4017 	}
4018 }
4019 
4020 static void test_ftrace_alive(struct seq_file *m)
4021 {
4022 	if (!ftrace_is_dead())
4023 		return;
4024 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4025 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4026 }
4027 
4028 #ifdef CONFIG_TRACER_MAX_TRACE
4029 static void show_snapshot_main_help(struct seq_file *m)
4030 {
4031 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4032 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4033 		    "#                      Takes a snapshot of the main buffer.\n"
4034 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4035 		    "#                      (Doesn't have to be '2' works with any number that\n"
4036 		    "#                       is not a '0' or '1')\n");
4037 }
4038 
4039 static void show_snapshot_percpu_help(struct seq_file *m)
4040 {
4041 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4042 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4043 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4044 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4045 #else
4046 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4047 		    "#                     Must use main snapshot file to allocate.\n");
4048 #endif
4049 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4050 		    "#                      (Doesn't have to be '2' works with any number that\n"
4051 		    "#                       is not a '0' or '1')\n");
4052 }
4053 
4054 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4055 {
4056 	if (iter->tr->allocated_snapshot)
4057 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4058 	else
4059 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4060 
4061 	seq_puts(m, "# Snapshot commands:\n");
4062 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4063 		show_snapshot_main_help(m);
4064 	else
4065 		show_snapshot_percpu_help(m);
4066 }
4067 #else
4068 /* Should never be called */
4069 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4070 #endif
4071 
4072 static int s_show(struct seq_file *m, void *v)
4073 {
4074 	struct trace_iterator *iter = v;
4075 	int ret;
4076 
4077 	if (iter->ent == NULL) {
4078 		if (iter->tr) {
4079 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4080 			seq_puts(m, "#\n");
4081 			test_ftrace_alive(m);
4082 		}
4083 		if (iter->snapshot && trace_empty(iter))
4084 			print_snapshot_help(m, iter);
4085 		else if (iter->trace && iter->trace->print_header)
4086 			iter->trace->print_header(m);
4087 		else
4088 			trace_default_header(m);
4089 
4090 	} else if (iter->leftover) {
4091 		/*
4092 		 * If we filled the seq_file buffer earlier, we
4093 		 * want to just show it now.
4094 		 */
4095 		ret = trace_print_seq(m, &iter->seq);
4096 
4097 		/* ret should this time be zero, but you never know */
4098 		iter->leftover = ret;
4099 
4100 	} else {
4101 		print_trace_line(iter);
4102 		ret = trace_print_seq(m, &iter->seq);
4103 		/*
4104 		 * If we overflow the seq_file buffer, then it will
4105 		 * ask us for this data again at start up.
4106 		 * Use that instead.
4107 		 *  ret is 0 if seq_file write succeeded.
4108 		 *        -1 otherwise.
4109 		 */
4110 		iter->leftover = ret;
4111 	}
4112 
4113 	return 0;
4114 }
4115 
4116 /*
4117  * Should be used after trace_array_get(), trace_types_lock
4118  * ensures that i_cdev was already initialized.
4119  */
4120 static inline int tracing_get_cpu(struct inode *inode)
4121 {
4122 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4123 		return (long)inode->i_cdev - 1;
4124 	return RING_BUFFER_ALL_CPUS;
4125 }
4126 
4127 static const struct seq_operations tracer_seq_ops = {
4128 	.start		= s_start,
4129 	.next		= s_next,
4130 	.stop		= s_stop,
4131 	.show		= s_show,
4132 };
4133 
4134 static struct trace_iterator *
4135 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4136 {
4137 	struct trace_array *tr = inode->i_private;
4138 	struct trace_iterator *iter;
4139 	int cpu;
4140 
4141 	if (tracing_disabled)
4142 		return ERR_PTR(-ENODEV);
4143 
4144 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4145 	if (!iter)
4146 		return ERR_PTR(-ENOMEM);
4147 
4148 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4149 				    GFP_KERNEL);
4150 	if (!iter->buffer_iter)
4151 		goto release;
4152 
4153 	/*
4154 	 * We make a copy of the current tracer to avoid concurrent
4155 	 * changes on it while we are reading.
4156 	 */
4157 	mutex_lock(&trace_types_lock);
4158 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4159 	if (!iter->trace)
4160 		goto fail;
4161 
4162 	*iter->trace = *tr->current_trace;
4163 
4164 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4165 		goto fail;
4166 
4167 	iter->tr = tr;
4168 
4169 #ifdef CONFIG_TRACER_MAX_TRACE
4170 	/* Currently only the top directory has a snapshot */
4171 	if (tr->current_trace->print_max || snapshot)
4172 		iter->trace_buffer = &tr->max_buffer;
4173 	else
4174 #endif
4175 		iter->trace_buffer = &tr->trace_buffer;
4176 	iter->snapshot = snapshot;
4177 	iter->pos = -1;
4178 	iter->cpu_file = tracing_get_cpu(inode);
4179 	mutex_init(&iter->mutex);
4180 
4181 	/* Notify the tracer early; before we stop tracing. */
4182 	if (iter->trace && iter->trace->open)
4183 		iter->trace->open(iter);
4184 
4185 	/* Annotate start of buffers if we had overruns */
4186 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4187 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4188 
4189 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4190 	if (trace_clocks[tr->clock_id].in_ns)
4191 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4192 
4193 	/* stop the trace while dumping if we are not opening "snapshot" */
4194 	if (!iter->snapshot)
4195 		tracing_stop_tr(tr);
4196 
4197 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4198 		for_each_tracing_cpu(cpu) {
4199 			iter->buffer_iter[cpu] =
4200 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4201 							 cpu, GFP_KERNEL);
4202 		}
4203 		ring_buffer_read_prepare_sync();
4204 		for_each_tracing_cpu(cpu) {
4205 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4206 			tracing_iter_reset(iter, cpu);
4207 		}
4208 	} else {
4209 		cpu = iter->cpu_file;
4210 		iter->buffer_iter[cpu] =
4211 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4212 						 cpu, GFP_KERNEL);
4213 		ring_buffer_read_prepare_sync();
4214 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4215 		tracing_iter_reset(iter, cpu);
4216 	}
4217 
4218 	mutex_unlock(&trace_types_lock);
4219 
4220 	return iter;
4221 
4222  fail:
4223 	mutex_unlock(&trace_types_lock);
4224 	kfree(iter->trace);
4225 	kfree(iter->buffer_iter);
4226 release:
4227 	seq_release_private(inode, file);
4228 	return ERR_PTR(-ENOMEM);
4229 }
4230 
4231 int tracing_open_generic(struct inode *inode, struct file *filp)
4232 {
4233 	int ret;
4234 
4235 	ret = tracing_check_open_get_tr(NULL);
4236 	if (ret)
4237 		return ret;
4238 
4239 	filp->private_data = inode->i_private;
4240 	return 0;
4241 }
4242 
4243 bool tracing_is_disabled(void)
4244 {
4245 	return (tracing_disabled) ? true: false;
4246 }
4247 
4248 /*
4249  * Open and update trace_array ref count.
4250  * Must have the current trace_array passed to it.
4251  */
4252 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4253 {
4254 	struct trace_array *tr = inode->i_private;
4255 	int ret;
4256 
4257 	ret = tracing_check_open_get_tr(tr);
4258 	if (ret)
4259 		return ret;
4260 
4261 	filp->private_data = inode->i_private;
4262 
4263 	return 0;
4264 }
4265 
4266 static int tracing_release(struct inode *inode, struct file *file)
4267 {
4268 	struct trace_array *tr = inode->i_private;
4269 	struct seq_file *m = file->private_data;
4270 	struct trace_iterator *iter;
4271 	int cpu;
4272 
4273 	if (!(file->f_mode & FMODE_READ)) {
4274 		trace_array_put(tr);
4275 		return 0;
4276 	}
4277 
4278 	/* Writes do not use seq_file */
4279 	iter = m->private;
4280 	mutex_lock(&trace_types_lock);
4281 
4282 	for_each_tracing_cpu(cpu) {
4283 		if (iter->buffer_iter[cpu])
4284 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4285 	}
4286 
4287 	if (iter->trace && iter->trace->close)
4288 		iter->trace->close(iter);
4289 
4290 	if (!iter->snapshot)
4291 		/* reenable tracing if it was previously enabled */
4292 		tracing_start_tr(tr);
4293 
4294 	__trace_array_put(tr);
4295 
4296 	mutex_unlock(&trace_types_lock);
4297 
4298 	mutex_destroy(&iter->mutex);
4299 	free_cpumask_var(iter->started);
4300 	kfree(iter->trace);
4301 	kfree(iter->buffer_iter);
4302 	seq_release_private(inode, file);
4303 
4304 	return 0;
4305 }
4306 
4307 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4308 {
4309 	struct trace_array *tr = inode->i_private;
4310 
4311 	trace_array_put(tr);
4312 	return 0;
4313 }
4314 
4315 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4316 {
4317 	struct trace_array *tr = inode->i_private;
4318 
4319 	trace_array_put(tr);
4320 
4321 	return single_release(inode, file);
4322 }
4323 
4324 static int tracing_open(struct inode *inode, struct file *file)
4325 {
4326 	struct trace_array *tr = inode->i_private;
4327 	struct trace_iterator *iter;
4328 	int ret;
4329 
4330 	ret = tracing_check_open_get_tr(tr);
4331 	if (ret)
4332 		return ret;
4333 
4334 	/* If this file was open for write, then erase contents */
4335 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4336 		int cpu = tracing_get_cpu(inode);
4337 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4338 
4339 #ifdef CONFIG_TRACER_MAX_TRACE
4340 		if (tr->current_trace->print_max)
4341 			trace_buf = &tr->max_buffer;
4342 #endif
4343 
4344 		if (cpu == RING_BUFFER_ALL_CPUS)
4345 			tracing_reset_online_cpus(trace_buf);
4346 		else
4347 			tracing_reset_cpu(trace_buf, cpu);
4348 	}
4349 
4350 	if (file->f_mode & FMODE_READ) {
4351 		iter = __tracing_open(inode, file, false);
4352 		if (IS_ERR(iter))
4353 			ret = PTR_ERR(iter);
4354 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4355 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4356 	}
4357 
4358 	if (ret < 0)
4359 		trace_array_put(tr);
4360 
4361 	return ret;
4362 }
4363 
4364 /*
4365  * Some tracers are not suitable for instance buffers.
4366  * A tracer is always available for the global array (toplevel)
4367  * or if it explicitly states that it is.
4368  */
4369 static bool
4370 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4371 {
4372 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4373 }
4374 
4375 /* Find the next tracer that this trace array may use */
4376 static struct tracer *
4377 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4378 {
4379 	while (t && !trace_ok_for_array(t, tr))
4380 		t = t->next;
4381 
4382 	return t;
4383 }
4384 
4385 static void *
4386 t_next(struct seq_file *m, void *v, loff_t *pos)
4387 {
4388 	struct trace_array *tr = m->private;
4389 	struct tracer *t = v;
4390 
4391 	(*pos)++;
4392 
4393 	if (t)
4394 		t = get_tracer_for_array(tr, t->next);
4395 
4396 	return t;
4397 }
4398 
4399 static void *t_start(struct seq_file *m, loff_t *pos)
4400 {
4401 	struct trace_array *tr = m->private;
4402 	struct tracer *t;
4403 	loff_t l = 0;
4404 
4405 	mutex_lock(&trace_types_lock);
4406 
4407 	t = get_tracer_for_array(tr, trace_types);
4408 	for (; t && l < *pos; t = t_next(m, t, &l))
4409 			;
4410 
4411 	return t;
4412 }
4413 
4414 static void t_stop(struct seq_file *m, void *p)
4415 {
4416 	mutex_unlock(&trace_types_lock);
4417 }
4418 
4419 static int t_show(struct seq_file *m, void *v)
4420 {
4421 	struct tracer *t = v;
4422 
4423 	if (!t)
4424 		return 0;
4425 
4426 	seq_puts(m, t->name);
4427 	if (t->next)
4428 		seq_putc(m, ' ');
4429 	else
4430 		seq_putc(m, '\n');
4431 
4432 	return 0;
4433 }
4434 
4435 static const struct seq_operations show_traces_seq_ops = {
4436 	.start		= t_start,
4437 	.next		= t_next,
4438 	.stop		= t_stop,
4439 	.show		= t_show,
4440 };
4441 
4442 static int show_traces_open(struct inode *inode, struct file *file)
4443 {
4444 	struct trace_array *tr = inode->i_private;
4445 	struct seq_file *m;
4446 	int ret;
4447 
4448 	ret = tracing_check_open_get_tr(tr);
4449 	if (ret)
4450 		return ret;
4451 
4452 	ret = seq_open(file, &show_traces_seq_ops);
4453 	if (ret) {
4454 		trace_array_put(tr);
4455 		return ret;
4456 	}
4457 
4458 	m = file->private_data;
4459 	m->private = tr;
4460 
4461 	return 0;
4462 }
4463 
4464 static int show_traces_release(struct inode *inode, struct file *file)
4465 {
4466 	struct trace_array *tr = inode->i_private;
4467 
4468 	trace_array_put(tr);
4469 	return seq_release(inode, file);
4470 }
4471 
4472 static ssize_t
4473 tracing_write_stub(struct file *filp, const char __user *ubuf,
4474 		   size_t count, loff_t *ppos)
4475 {
4476 	return count;
4477 }
4478 
4479 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4480 {
4481 	int ret;
4482 
4483 	if (file->f_mode & FMODE_READ)
4484 		ret = seq_lseek(file, offset, whence);
4485 	else
4486 		file->f_pos = ret = 0;
4487 
4488 	return ret;
4489 }
4490 
4491 static const struct file_operations tracing_fops = {
4492 	.open		= tracing_open,
4493 	.read		= seq_read,
4494 	.write		= tracing_write_stub,
4495 	.llseek		= tracing_lseek,
4496 	.release	= tracing_release,
4497 };
4498 
4499 static const struct file_operations show_traces_fops = {
4500 	.open		= show_traces_open,
4501 	.read		= seq_read,
4502 	.llseek		= seq_lseek,
4503 	.release	= show_traces_release,
4504 };
4505 
4506 static ssize_t
4507 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4508 		     size_t count, loff_t *ppos)
4509 {
4510 	struct trace_array *tr = file_inode(filp)->i_private;
4511 	char *mask_str;
4512 	int len;
4513 
4514 	len = snprintf(NULL, 0, "%*pb\n",
4515 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4516 	mask_str = kmalloc(len, GFP_KERNEL);
4517 	if (!mask_str)
4518 		return -ENOMEM;
4519 
4520 	len = snprintf(mask_str, len, "%*pb\n",
4521 		       cpumask_pr_args(tr->tracing_cpumask));
4522 	if (len >= count) {
4523 		count = -EINVAL;
4524 		goto out_err;
4525 	}
4526 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4527 
4528 out_err:
4529 	kfree(mask_str);
4530 
4531 	return count;
4532 }
4533 
4534 static ssize_t
4535 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4536 		      size_t count, loff_t *ppos)
4537 {
4538 	struct trace_array *tr = file_inode(filp)->i_private;
4539 	cpumask_var_t tracing_cpumask_new;
4540 	int err, cpu;
4541 
4542 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4543 		return -ENOMEM;
4544 
4545 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4546 	if (err)
4547 		goto err_unlock;
4548 
4549 	local_irq_disable();
4550 	arch_spin_lock(&tr->max_lock);
4551 	for_each_tracing_cpu(cpu) {
4552 		/*
4553 		 * Increase/decrease the disabled counter if we are
4554 		 * about to flip a bit in the cpumask:
4555 		 */
4556 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4557 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4558 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4559 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4560 		}
4561 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4562 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4563 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4564 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4565 		}
4566 	}
4567 	arch_spin_unlock(&tr->max_lock);
4568 	local_irq_enable();
4569 
4570 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4571 	free_cpumask_var(tracing_cpumask_new);
4572 
4573 	return count;
4574 
4575 err_unlock:
4576 	free_cpumask_var(tracing_cpumask_new);
4577 
4578 	return err;
4579 }
4580 
4581 static const struct file_operations tracing_cpumask_fops = {
4582 	.open		= tracing_open_generic_tr,
4583 	.read		= tracing_cpumask_read,
4584 	.write		= tracing_cpumask_write,
4585 	.release	= tracing_release_generic_tr,
4586 	.llseek		= generic_file_llseek,
4587 };
4588 
4589 static int tracing_trace_options_show(struct seq_file *m, void *v)
4590 {
4591 	struct tracer_opt *trace_opts;
4592 	struct trace_array *tr = m->private;
4593 	u32 tracer_flags;
4594 	int i;
4595 
4596 	mutex_lock(&trace_types_lock);
4597 	tracer_flags = tr->current_trace->flags->val;
4598 	trace_opts = tr->current_trace->flags->opts;
4599 
4600 	for (i = 0; trace_options[i]; i++) {
4601 		if (tr->trace_flags & (1 << i))
4602 			seq_printf(m, "%s\n", trace_options[i]);
4603 		else
4604 			seq_printf(m, "no%s\n", trace_options[i]);
4605 	}
4606 
4607 	for (i = 0; trace_opts[i].name; i++) {
4608 		if (tracer_flags & trace_opts[i].bit)
4609 			seq_printf(m, "%s\n", trace_opts[i].name);
4610 		else
4611 			seq_printf(m, "no%s\n", trace_opts[i].name);
4612 	}
4613 	mutex_unlock(&trace_types_lock);
4614 
4615 	return 0;
4616 }
4617 
4618 static int __set_tracer_option(struct trace_array *tr,
4619 			       struct tracer_flags *tracer_flags,
4620 			       struct tracer_opt *opts, int neg)
4621 {
4622 	struct tracer *trace = tracer_flags->trace;
4623 	int ret;
4624 
4625 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4626 	if (ret)
4627 		return ret;
4628 
4629 	if (neg)
4630 		tracer_flags->val &= ~opts->bit;
4631 	else
4632 		tracer_flags->val |= opts->bit;
4633 	return 0;
4634 }
4635 
4636 /* Try to assign a tracer specific option */
4637 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4638 {
4639 	struct tracer *trace = tr->current_trace;
4640 	struct tracer_flags *tracer_flags = trace->flags;
4641 	struct tracer_opt *opts = NULL;
4642 	int i;
4643 
4644 	for (i = 0; tracer_flags->opts[i].name; i++) {
4645 		opts = &tracer_flags->opts[i];
4646 
4647 		if (strcmp(cmp, opts->name) == 0)
4648 			return __set_tracer_option(tr, trace->flags, opts, neg);
4649 	}
4650 
4651 	return -EINVAL;
4652 }
4653 
4654 /* Some tracers require overwrite to stay enabled */
4655 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4656 {
4657 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4658 		return -1;
4659 
4660 	return 0;
4661 }
4662 
4663 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4664 {
4665 	/* do nothing if flag is already set */
4666 	if (!!(tr->trace_flags & mask) == !!enabled)
4667 		return 0;
4668 
4669 	/* Give the tracer a chance to approve the change */
4670 	if (tr->current_trace->flag_changed)
4671 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4672 			return -EINVAL;
4673 
4674 	if (enabled)
4675 		tr->trace_flags |= mask;
4676 	else
4677 		tr->trace_flags &= ~mask;
4678 
4679 	if (mask == TRACE_ITER_RECORD_CMD)
4680 		trace_event_enable_cmd_record(enabled);
4681 
4682 	if (mask == TRACE_ITER_RECORD_TGID) {
4683 		if (!tgid_map)
4684 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4685 					   sizeof(*tgid_map),
4686 					   GFP_KERNEL);
4687 		if (!tgid_map) {
4688 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4689 			return -ENOMEM;
4690 		}
4691 
4692 		trace_event_enable_tgid_record(enabled);
4693 	}
4694 
4695 	if (mask == TRACE_ITER_EVENT_FORK)
4696 		trace_event_follow_fork(tr, enabled);
4697 
4698 	if (mask == TRACE_ITER_FUNC_FORK)
4699 		ftrace_pid_follow_fork(tr, enabled);
4700 
4701 	if (mask == TRACE_ITER_OVERWRITE) {
4702 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4703 #ifdef CONFIG_TRACER_MAX_TRACE
4704 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4705 #endif
4706 	}
4707 
4708 	if (mask == TRACE_ITER_PRINTK) {
4709 		trace_printk_start_stop_comm(enabled);
4710 		trace_printk_control(enabled);
4711 	}
4712 
4713 	return 0;
4714 }
4715 
4716 static int trace_set_options(struct trace_array *tr, char *option)
4717 {
4718 	char *cmp;
4719 	int neg = 0;
4720 	int ret;
4721 	size_t orig_len = strlen(option);
4722 	int len;
4723 
4724 	cmp = strstrip(option);
4725 
4726 	len = str_has_prefix(cmp, "no");
4727 	if (len)
4728 		neg = 1;
4729 
4730 	cmp += len;
4731 
4732 	mutex_lock(&trace_types_lock);
4733 
4734 	ret = match_string(trace_options, -1, cmp);
4735 	/* If no option could be set, test the specific tracer options */
4736 	if (ret < 0)
4737 		ret = set_tracer_option(tr, cmp, neg);
4738 	else
4739 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4740 
4741 	mutex_unlock(&trace_types_lock);
4742 
4743 	/*
4744 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4745 	 * turn it back into a space.
4746 	 */
4747 	if (orig_len > strlen(option))
4748 		option[strlen(option)] = ' ';
4749 
4750 	return ret;
4751 }
4752 
4753 static void __init apply_trace_boot_options(void)
4754 {
4755 	char *buf = trace_boot_options_buf;
4756 	char *option;
4757 
4758 	while (true) {
4759 		option = strsep(&buf, ",");
4760 
4761 		if (!option)
4762 			break;
4763 
4764 		if (*option)
4765 			trace_set_options(&global_trace, option);
4766 
4767 		/* Put back the comma to allow this to be called again */
4768 		if (buf)
4769 			*(buf - 1) = ',';
4770 	}
4771 }
4772 
4773 static ssize_t
4774 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4775 			size_t cnt, loff_t *ppos)
4776 {
4777 	struct seq_file *m = filp->private_data;
4778 	struct trace_array *tr = m->private;
4779 	char buf[64];
4780 	int ret;
4781 
4782 	if (cnt >= sizeof(buf))
4783 		return -EINVAL;
4784 
4785 	if (copy_from_user(buf, ubuf, cnt))
4786 		return -EFAULT;
4787 
4788 	buf[cnt] = 0;
4789 
4790 	ret = trace_set_options(tr, buf);
4791 	if (ret < 0)
4792 		return ret;
4793 
4794 	*ppos += cnt;
4795 
4796 	return cnt;
4797 }
4798 
4799 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4800 {
4801 	struct trace_array *tr = inode->i_private;
4802 	int ret;
4803 
4804 	ret = tracing_check_open_get_tr(tr);
4805 	if (ret)
4806 		return ret;
4807 
4808 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4809 	if (ret < 0)
4810 		trace_array_put(tr);
4811 
4812 	return ret;
4813 }
4814 
4815 static const struct file_operations tracing_iter_fops = {
4816 	.open		= tracing_trace_options_open,
4817 	.read		= seq_read,
4818 	.llseek		= seq_lseek,
4819 	.release	= tracing_single_release_tr,
4820 	.write		= tracing_trace_options_write,
4821 };
4822 
4823 static const char readme_msg[] =
4824 	"tracing mini-HOWTO:\n\n"
4825 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4826 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4827 	" Important files:\n"
4828 	"  trace\t\t\t- The static contents of the buffer\n"
4829 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4830 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4831 	"  current_tracer\t- function and latency tracers\n"
4832 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4833 	"  error_log\t- error log for failed commands (that support it)\n"
4834 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4835 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4836 	"  trace_clock\t\t-change the clock used to order events\n"
4837 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4838 	"      global:   Synced across CPUs but slows tracing down.\n"
4839 	"     counter:   Not a clock, but just an increment\n"
4840 	"      uptime:   Jiffy counter from time of boot\n"
4841 	"        perf:   Same clock that perf events use\n"
4842 #ifdef CONFIG_X86_64
4843 	"     x86-tsc:   TSC cycle counter\n"
4844 #endif
4845 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4846 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4847 	"    absolute:   Absolute (standalone) timestamp\n"
4848 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4849 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4850 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4851 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4852 	"\t\t\t  Remove sub-buffer with rmdir\n"
4853 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4854 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4855 	"\t\t\t  option name\n"
4856 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4857 #ifdef CONFIG_DYNAMIC_FTRACE
4858 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4859 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4860 	"\t\t\t  functions\n"
4861 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4862 	"\t     modules: Can select a group via module\n"
4863 	"\t      Format: :mod:<module-name>\n"
4864 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4865 	"\t    triggers: a command to perform when function is hit\n"
4866 	"\t      Format: <function>:<trigger>[:count]\n"
4867 	"\t     trigger: traceon, traceoff\n"
4868 	"\t\t      enable_event:<system>:<event>\n"
4869 	"\t\t      disable_event:<system>:<event>\n"
4870 #ifdef CONFIG_STACKTRACE
4871 	"\t\t      stacktrace\n"
4872 #endif
4873 #ifdef CONFIG_TRACER_SNAPSHOT
4874 	"\t\t      snapshot\n"
4875 #endif
4876 	"\t\t      dump\n"
4877 	"\t\t      cpudump\n"
4878 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4879 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4880 	"\t     The first one will disable tracing every time do_fault is hit\n"
4881 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4882 	"\t       The first time do trap is hit and it disables tracing, the\n"
4883 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4884 	"\t       the counter will not decrement. It only decrements when the\n"
4885 	"\t       trigger did work\n"
4886 	"\t     To remove trigger without count:\n"
4887 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4888 	"\t     To remove trigger with a count:\n"
4889 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4890 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4891 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4892 	"\t    modules: Can select a group via module command :mod:\n"
4893 	"\t    Does not accept triggers\n"
4894 #endif /* CONFIG_DYNAMIC_FTRACE */
4895 #ifdef CONFIG_FUNCTION_TRACER
4896 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4897 	"\t\t    (function)\n"
4898 #endif
4899 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4900 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4901 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4902 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4903 #endif
4904 #ifdef CONFIG_TRACER_SNAPSHOT
4905 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4906 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4907 	"\t\t\t  information\n"
4908 #endif
4909 #ifdef CONFIG_STACK_TRACER
4910 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4911 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4912 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4913 	"\t\t\t  new trace)\n"
4914 #ifdef CONFIG_DYNAMIC_FTRACE
4915 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4916 	"\t\t\t  traces\n"
4917 #endif
4918 #endif /* CONFIG_STACK_TRACER */
4919 #ifdef CONFIG_DYNAMIC_EVENTS
4920 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4921 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4922 #endif
4923 #ifdef CONFIG_KPROBE_EVENTS
4924 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4925 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4926 #endif
4927 #ifdef CONFIG_UPROBE_EVENTS
4928 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4929 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4930 #endif
4931 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4932 	"\t  accepts: event-definitions (one definition per line)\n"
4933 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4934 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4935 #ifdef CONFIG_HIST_TRIGGERS
4936 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4937 #endif
4938 	"\t           -:[<group>/]<event>\n"
4939 #ifdef CONFIG_KPROBE_EVENTS
4940 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4941   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4942 #endif
4943 #ifdef CONFIG_UPROBE_EVENTS
4944   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4945 #endif
4946 	"\t     args: <name>=fetcharg[:type]\n"
4947 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4948 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4949 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4950 #else
4951 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4952 #endif
4953 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4954 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4955 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4956 	"\t           <type>\\[<array-size>\\]\n"
4957 #ifdef CONFIG_HIST_TRIGGERS
4958 	"\t    field: <stype> <name>;\n"
4959 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4960 	"\t           [unsigned] char/int/long\n"
4961 #endif
4962 #endif
4963 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4964 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4965 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4966 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4967 	"\t\t\t  events\n"
4968 	"      filter\t\t- If set, only events passing filter are traced\n"
4969 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4970 	"\t\t\t  <event>:\n"
4971 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4972 	"      filter\t\t- If set, only events passing filter are traced\n"
4973 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4974 	"\t    Format: <trigger>[:count][if <filter>]\n"
4975 	"\t   trigger: traceon, traceoff\n"
4976 	"\t            enable_event:<system>:<event>\n"
4977 	"\t            disable_event:<system>:<event>\n"
4978 #ifdef CONFIG_HIST_TRIGGERS
4979 	"\t            enable_hist:<system>:<event>\n"
4980 	"\t            disable_hist:<system>:<event>\n"
4981 #endif
4982 #ifdef CONFIG_STACKTRACE
4983 	"\t\t    stacktrace\n"
4984 #endif
4985 #ifdef CONFIG_TRACER_SNAPSHOT
4986 	"\t\t    snapshot\n"
4987 #endif
4988 #ifdef CONFIG_HIST_TRIGGERS
4989 	"\t\t    hist (see below)\n"
4990 #endif
4991 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4992 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4993 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4994 	"\t                  events/block/block_unplug/trigger\n"
4995 	"\t   The first disables tracing every time block_unplug is hit.\n"
4996 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4997 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4998 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4999 	"\t   Like function triggers, the counter is only decremented if it\n"
5000 	"\t    enabled or disabled tracing.\n"
5001 	"\t   To remove a trigger without a count:\n"
5002 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5003 	"\t   To remove a trigger with a count:\n"
5004 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5005 	"\t   Filters can be ignored when removing a trigger.\n"
5006 #ifdef CONFIG_HIST_TRIGGERS
5007 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5008 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5009 	"\t            [:values=<field1[,field2,...]>]\n"
5010 	"\t            [:sort=<field1[,field2,...]>]\n"
5011 	"\t            [:size=#entries]\n"
5012 	"\t            [:pause][:continue][:clear]\n"
5013 	"\t            [:name=histname1]\n"
5014 	"\t            [:<handler>.<action>]\n"
5015 	"\t            [if <filter>]\n\n"
5016 	"\t    When a matching event is hit, an entry is added to a hash\n"
5017 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5018 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5019 	"\t    correspond to fields in the event's format description.  Keys\n"
5020 	"\t    can be any field, or the special string 'stacktrace'.\n"
5021 	"\t    Compound keys consisting of up to two fields can be specified\n"
5022 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5023 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5024 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5025 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5026 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5027 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5028 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5029 	"\t    its histogram data will be shared with other triggers of the\n"
5030 	"\t    same name, and trigger hits will update this common data.\n\n"
5031 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5032 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5033 	"\t    triggers attached to an event, there will be a table for each\n"
5034 	"\t    trigger in the output.  The table displayed for a named\n"
5035 	"\t    trigger will be the same as any other instance having the\n"
5036 	"\t    same name.  The default format used to display a given field\n"
5037 	"\t    can be modified by appending any of the following modifiers\n"
5038 	"\t    to the field name, as applicable:\n\n"
5039 	"\t            .hex        display a number as a hex value\n"
5040 	"\t            .sym        display an address as a symbol\n"
5041 	"\t            .sym-offset display an address as a symbol and offset\n"
5042 	"\t            .execname   display a common_pid as a program name\n"
5043 	"\t            .syscall    display a syscall id as a syscall name\n"
5044 	"\t            .log2       display log2 value rather than raw number\n"
5045 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5046 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5047 	"\t    trigger or to start a hist trigger but not log any events\n"
5048 	"\t    until told to do so.  'continue' can be used to start or\n"
5049 	"\t    restart a paused hist trigger.\n\n"
5050 	"\t    The 'clear' parameter will clear the contents of a running\n"
5051 	"\t    hist trigger and leave its current paused/active state\n"
5052 	"\t    unchanged.\n\n"
5053 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5054 	"\t    have one event conditionally start and stop another event's\n"
5055 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5056 	"\t    the enable_event and disable_event triggers.\n\n"
5057 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5058 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5059 	"\t        <handler>.<action>\n\n"
5060 	"\t    The available handlers are:\n\n"
5061 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5062 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5063 	"\t        onchange(var)            - invoke action if var changes\n\n"
5064 	"\t    The available actions are:\n\n"
5065 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5066 	"\t        save(field,...)                      - save current event fields\n"
5067 #ifdef CONFIG_TRACER_SNAPSHOT
5068 	"\t        snapshot()                           - snapshot the trace buffer\n"
5069 #endif
5070 #endif
5071 ;
5072 
5073 static ssize_t
5074 tracing_readme_read(struct file *filp, char __user *ubuf,
5075 		       size_t cnt, loff_t *ppos)
5076 {
5077 	return simple_read_from_buffer(ubuf, cnt, ppos,
5078 					readme_msg, strlen(readme_msg));
5079 }
5080 
5081 static const struct file_operations tracing_readme_fops = {
5082 	.open		= tracing_open_generic,
5083 	.read		= tracing_readme_read,
5084 	.llseek		= generic_file_llseek,
5085 };
5086 
5087 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5088 {
5089 	int *ptr = v;
5090 
5091 	if (*pos || m->count)
5092 		ptr++;
5093 
5094 	(*pos)++;
5095 
5096 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5097 		if (trace_find_tgid(*ptr))
5098 			return ptr;
5099 	}
5100 
5101 	return NULL;
5102 }
5103 
5104 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5105 {
5106 	void *v;
5107 	loff_t l = 0;
5108 
5109 	if (!tgid_map)
5110 		return NULL;
5111 
5112 	v = &tgid_map[0];
5113 	while (l <= *pos) {
5114 		v = saved_tgids_next(m, v, &l);
5115 		if (!v)
5116 			return NULL;
5117 	}
5118 
5119 	return v;
5120 }
5121 
5122 static void saved_tgids_stop(struct seq_file *m, void *v)
5123 {
5124 }
5125 
5126 static int saved_tgids_show(struct seq_file *m, void *v)
5127 {
5128 	int pid = (int *)v - tgid_map;
5129 
5130 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5131 	return 0;
5132 }
5133 
5134 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5135 	.start		= saved_tgids_start,
5136 	.stop		= saved_tgids_stop,
5137 	.next		= saved_tgids_next,
5138 	.show		= saved_tgids_show,
5139 };
5140 
5141 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5142 {
5143 	int ret;
5144 
5145 	ret = tracing_check_open_get_tr(NULL);
5146 	if (ret)
5147 		return ret;
5148 
5149 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5150 }
5151 
5152 
5153 static const struct file_operations tracing_saved_tgids_fops = {
5154 	.open		= tracing_saved_tgids_open,
5155 	.read		= seq_read,
5156 	.llseek		= seq_lseek,
5157 	.release	= seq_release,
5158 };
5159 
5160 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5161 {
5162 	unsigned int *ptr = v;
5163 
5164 	if (*pos || m->count)
5165 		ptr++;
5166 
5167 	(*pos)++;
5168 
5169 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5170 	     ptr++) {
5171 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5172 			continue;
5173 
5174 		return ptr;
5175 	}
5176 
5177 	return NULL;
5178 }
5179 
5180 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5181 {
5182 	void *v;
5183 	loff_t l = 0;
5184 
5185 	preempt_disable();
5186 	arch_spin_lock(&trace_cmdline_lock);
5187 
5188 	v = &savedcmd->map_cmdline_to_pid[0];
5189 	while (l <= *pos) {
5190 		v = saved_cmdlines_next(m, v, &l);
5191 		if (!v)
5192 			return NULL;
5193 	}
5194 
5195 	return v;
5196 }
5197 
5198 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5199 {
5200 	arch_spin_unlock(&trace_cmdline_lock);
5201 	preempt_enable();
5202 }
5203 
5204 static int saved_cmdlines_show(struct seq_file *m, void *v)
5205 {
5206 	char buf[TASK_COMM_LEN];
5207 	unsigned int *pid = v;
5208 
5209 	__trace_find_cmdline(*pid, buf);
5210 	seq_printf(m, "%d %s\n", *pid, buf);
5211 	return 0;
5212 }
5213 
5214 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5215 	.start		= saved_cmdlines_start,
5216 	.next		= saved_cmdlines_next,
5217 	.stop		= saved_cmdlines_stop,
5218 	.show		= saved_cmdlines_show,
5219 };
5220 
5221 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5222 {
5223 	int ret;
5224 
5225 	ret = tracing_check_open_get_tr(NULL);
5226 	if (ret)
5227 		return ret;
5228 
5229 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5230 }
5231 
5232 static const struct file_operations tracing_saved_cmdlines_fops = {
5233 	.open		= tracing_saved_cmdlines_open,
5234 	.read		= seq_read,
5235 	.llseek		= seq_lseek,
5236 	.release	= seq_release,
5237 };
5238 
5239 static ssize_t
5240 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5241 				 size_t cnt, loff_t *ppos)
5242 {
5243 	char buf[64];
5244 	int r;
5245 
5246 	arch_spin_lock(&trace_cmdline_lock);
5247 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5248 	arch_spin_unlock(&trace_cmdline_lock);
5249 
5250 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5251 }
5252 
5253 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5254 {
5255 	kfree(s->saved_cmdlines);
5256 	kfree(s->map_cmdline_to_pid);
5257 	kfree(s);
5258 }
5259 
5260 static int tracing_resize_saved_cmdlines(unsigned int val)
5261 {
5262 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5263 
5264 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5265 	if (!s)
5266 		return -ENOMEM;
5267 
5268 	if (allocate_cmdlines_buffer(val, s) < 0) {
5269 		kfree(s);
5270 		return -ENOMEM;
5271 	}
5272 
5273 	arch_spin_lock(&trace_cmdline_lock);
5274 	savedcmd_temp = savedcmd;
5275 	savedcmd = s;
5276 	arch_spin_unlock(&trace_cmdline_lock);
5277 	free_saved_cmdlines_buffer(savedcmd_temp);
5278 
5279 	return 0;
5280 }
5281 
5282 static ssize_t
5283 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5284 				  size_t cnt, loff_t *ppos)
5285 {
5286 	unsigned long val;
5287 	int ret;
5288 
5289 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5290 	if (ret)
5291 		return ret;
5292 
5293 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5294 	if (!val || val > PID_MAX_DEFAULT)
5295 		return -EINVAL;
5296 
5297 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5298 	if (ret < 0)
5299 		return ret;
5300 
5301 	*ppos += cnt;
5302 
5303 	return cnt;
5304 }
5305 
5306 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5307 	.open		= tracing_open_generic,
5308 	.read		= tracing_saved_cmdlines_size_read,
5309 	.write		= tracing_saved_cmdlines_size_write,
5310 };
5311 
5312 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5313 static union trace_eval_map_item *
5314 update_eval_map(union trace_eval_map_item *ptr)
5315 {
5316 	if (!ptr->map.eval_string) {
5317 		if (ptr->tail.next) {
5318 			ptr = ptr->tail.next;
5319 			/* Set ptr to the next real item (skip head) */
5320 			ptr++;
5321 		} else
5322 			return NULL;
5323 	}
5324 	return ptr;
5325 }
5326 
5327 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5328 {
5329 	union trace_eval_map_item *ptr = v;
5330 
5331 	/*
5332 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5333 	 * This really should never happen.
5334 	 */
5335 	ptr = update_eval_map(ptr);
5336 	if (WARN_ON_ONCE(!ptr))
5337 		return NULL;
5338 
5339 	ptr++;
5340 
5341 	(*pos)++;
5342 
5343 	ptr = update_eval_map(ptr);
5344 
5345 	return ptr;
5346 }
5347 
5348 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5349 {
5350 	union trace_eval_map_item *v;
5351 	loff_t l = 0;
5352 
5353 	mutex_lock(&trace_eval_mutex);
5354 
5355 	v = trace_eval_maps;
5356 	if (v)
5357 		v++;
5358 
5359 	while (v && l < *pos) {
5360 		v = eval_map_next(m, v, &l);
5361 	}
5362 
5363 	return v;
5364 }
5365 
5366 static void eval_map_stop(struct seq_file *m, void *v)
5367 {
5368 	mutex_unlock(&trace_eval_mutex);
5369 }
5370 
5371 static int eval_map_show(struct seq_file *m, void *v)
5372 {
5373 	union trace_eval_map_item *ptr = v;
5374 
5375 	seq_printf(m, "%s %ld (%s)\n",
5376 		   ptr->map.eval_string, ptr->map.eval_value,
5377 		   ptr->map.system);
5378 
5379 	return 0;
5380 }
5381 
5382 static const struct seq_operations tracing_eval_map_seq_ops = {
5383 	.start		= eval_map_start,
5384 	.next		= eval_map_next,
5385 	.stop		= eval_map_stop,
5386 	.show		= eval_map_show,
5387 };
5388 
5389 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5390 {
5391 	int ret;
5392 
5393 	ret = tracing_check_open_get_tr(NULL);
5394 	if (ret)
5395 		return ret;
5396 
5397 	return seq_open(filp, &tracing_eval_map_seq_ops);
5398 }
5399 
5400 static const struct file_operations tracing_eval_map_fops = {
5401 	.open		= tracing_eval_map_open,
5402 	.read		= seq_read,
5403 	.llseek		= seq_lseek,
5404 	.release	= seq_release,
5405 };
5406 
5407 static inline union trace_eval_map_item *
5408 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5409 {
5410 	/* Return tail of array given the head */
5411 	return ptr + ptr->head.length + 1;
5412 }
5413 
5414 static void
5415 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5416 			   int len)
5417 {
5418 	struct trace_eval_map **stop;
5419 	struct trace_eval_map **map;
5420 	union trace_eval_map_item *map_array;
5421 	union trace_eval_map_item *ptr;
5422 
5423 	stop = start + len;
5424 
5425 	/*
5426 	 * The trace_eval_maps contains the map plus a head and tail item,
5427 	 * where the head holds the module and length of array, and the
5428 	 * tail holds a pointer to the next list.
5429 	 */
5430 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5431 	if (!map_array) {
5432 		pr_warn("Unable to allocate trace eval mapping\n");
5433 		return;
5434 	}
5435 
5436 	mutex_lock(&trace_eval_mutex);
5437 
5438 	if (!trace_eval_maps)
5439 		trace_eval_maps = map_array;
5440 	else {
5441 		ptr = trace_eval_maps;
5442 		for (;;) {
5443 			ptr = trace_eval_jmp_to_tail(ptr);
5444 			if (!ptr->tail.next)
5445 				break;
5446 			ptr = ptr->tail.next;
5447 
5448 		}
5449 		ptr->tail.next = map_array;
5450 	}
5451 	map_array->head.mod = mod;
5452 	map_array->head.length = len;
5453 	map_array++;
5454 
5455 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5456 		map_array->map = **map;
5457 		map_array++;
5458 	}
5459 	memset(map_array, 0, sizeof(*map_array));
5460 
5461 	mutex_unlock(&trace_eval_mutex);
5462 }
5463 
5464 static void trace_create_eval_file(struct dentry *d_tracer)
5465 {
5466 	trace_create_file("eval_map", 0444, d_tracer,
5467 			  NULL, &tracing_eval_map_fops);
5468 }
5469 
5470 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5471 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5472 static inline void trace_insert_eval_map_file(struct module *mod,
5473 			      struct trace_eval_map **start, int len) { }
5474 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5475 
5476 static void trace_insert_eval_map(struct module *mod,
5477 				  struct trace_eval_map **start, int len)
5478 {
5479 	struct trace_eval_map **map;
5480 
5481 	if (len <= 0)
5482 		return;
5483 
5484 	map = start;
5485 
5486 	trace_event_eval_update(map, len);
5487 
5488 	trace_insert_eval_map_file(mod, start, len);
5489 }
5490 
5491 static ssize_t
5492 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5493 		       size_t cnt, loff_t *ppos)
5494 {
5495 	struct trace_array *tr = filp->private_data;
5496 	char buf[MAX_TRACER_SIZE+2];
5497 	int r;
5498 
5499 	mutex_lock(&trace_types_lock);
5500 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5501 	mutex_unlock(&trace_types_lock);
5502 
5503 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5504 }
5505 
5506 int tracer_init(struct tracer *t, struct trace_array *tr)
5507 {
5508 	tracing_reset_online_cpus(&tr->trace_buffer);
5509 	return t->init(tr);
5510 }
5511 
5512 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5513 {
5514 	int cpu;
5515 
5516 	for_each_tracing_cpu(cpu)
5517 		per_cpu_ptr(buf->data, cpu)->entries = val;
5518 }
5519 
5520 #ifdef CONFIG_TRACER_MAX_TRACE
5521 /* resize @tr's buffer to the size of @size_tr's entries */
5522 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5523 					struct trace_buffer *size_buf, int cpu_id)
5524 {
5525 	int cpu, ret = 0;
5526 
5527 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5528 		for_each_tracing_cpu(cpu) {
5529 			ret = ring_buffer_resize(trace_buf->buffer,
5530 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5531 			if (ret < 0)
5532 				break;
5533 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5534 				per_cpu_ptr(size_buf->data, cpu)->entries;
5535 		}
5536 	} else {
5537 		ret = ring_buffer_resize(trace_buf->buffer,
5538 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5539 		if (ret == 0)
5540 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5541 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5542 	}
5543 
5544 	return ret;
5545 }
5546 #endif /* CONFIG_TRACER_MAX_TRACE */
5547 
5548 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5549 					unsigned long size, int cpu)
5550 {
5551 	int ret;
5552 
5553 	/*
5554 	 * If kernel or user changes the size of the ring buffer
5555 	 * we use the size that was given, and we can forget about
5556 	 * expanding it later.
5557 	 */
5558 	ring_buffer_expanded = true;
5559 
5560 	/* May be called before buffers are initialized */
5561 	if (!tr->trace_buffer.buffer)
5562 		return 0;
5563 
5564 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5565 	if (ret < 0)
5566 		return ret;
5567 
5568 #ifdef CONFIG_TRACER_MAX_TRACE
5569 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5570 	    !tr->current_trace->use_max_tr)
5571 		goto out;
5572 
5573 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5574 	if (ret < 0) {
5575 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5576 						     &tr->trace_buffer, cpu);
5577 		if (r < 0) {
5578 			/*
5579 			 * AARGH! We are left with different
5580 			 * size max buffer!!!!
5581 			 * The max buffer is our "snapshot" buffer.
5582 			 * When a tracer needs a snapshot (one of the
5583 			 * latency tracers), it swaps the max buffer
5584 			 * with the saved snap shot. We succeeded to
5585 			 * update the size of the main buffer, but failed to
5586 			 * update the size of the max buffer. But when we tried
5587 			 * to reset the main buffer to the original size, we
5588 			 * failed there too. This is very unlikely to
5589 			 * happen, but if it does, warn and kill all
5590 			 * tracing.
5591 			 */
5592 			WARN_ON(1);
5593 			tracing_disabled = 1;
5594 		}
5595 		return ret;
5596 	}
5597 
5598 	if (cpu == RING_BUFFER_ALL_CPUS)
5599 		set_buffer_entries(&tr->max_buffer, size);
5600 	else
5601 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5602 
5603  out:
5604 #endif /* CONFIG_TRACER_MAX_TRACE */
5605 
5606 	if (cpu == RING_BUFFER_ALL_CPUS)
5607 		set_buffer_entries(&tr->trace_buffer, size);
5608 	else
5609 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5610 
5611 	return ret;
5612 }
5613 
5614 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5615 					  unsigned long size, int cpu_id)
5616 {
5617 	int ret = size;
5618 
5619 	mutex_lock(&trace_types_lock);
5620 
5621 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5622 		/* make sure, this cpu is enabled in the mask */
5623 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5624 			ret = -EINVAL;
5625 			goto out;
5626 		}
5627 	}
5628 
5629 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5630 	if (ret < 0)
5631 		ret = -ENOMEM;
5632 
5633 out:
5634 	mutex_unlock(&trace_types_lock);
5635 
5636 	return ret;
5637 }
5638 
5639 
5640 /**
5641  * tracing_update_buffers - used by tracing facility to expand ring buffers
5642  *
5643  * To save on memory when the tracing is never used on a system with it
5644  * configured in. The ring buffers are set to a minimum size. But once
5645  * a user starts to use the tracing facility, then they need to grow
5646  * to their default size.
5647  *
5648  * This function is to be called when a tracer is about to be used.
5649  */
5650 int tracing_update_buffers(void)
5651 {
5652 	int ret = 0;
5653 
5654 	mutex_lock(&trace_types_lock);
5655 	if (!ring_buffer_expanded)
5656 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5657 						RING_BUFFER_ALL_CPUS);
5658 	mutex_unlock(&trace_types_lock);
5659 
5660 	return ret;
5661 }
5662 
5663 struct trace_option_dentry;
5664 
5665 static void
5666 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5667 
5668 /*
5669  * Used to clear out the tracer before deletion of an instance.
5670  * Must have trace_types_lock held.
5671  */
5672 static void tracing_set_nop(struct trace_array *tr)
5673 {
5674 	if (tr->current_trace == &nop_trace)
5675 		return;
5676 
5677 	tr->current_trace->enabled--;
5678 
5679 	if (tr->current_trace->reset)
5680 		tr->current_trace->reset(tr);
5681 
5682 	tr->current_trace = &nop_trace;
5683 }
5684 
5685 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5686 {
5687 	/* Only enable if the directory has been created already. */
5688 	if (!tr->dir)
5689 		return;
5690 
5691 	create_trace_option_files(tr, t);
5692 }
5693 
5694 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5695 {
5696 	struct tracer *t;
5697 #ifdef CONFIG_TRACER_MAX_TRACE
5698 	bool had_max_tr;
5699 #endif
5700 	int ret = 0;
5701 
5702 	mutex_lock(&trace_types_lock);
5703 
5704 	if (!ring_buffer_expanded) {
5705 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5706 						RING_BUFFER_ALL_CPUS);
5707 		if (ret < 0)
5708 			goto out;
5709 		ret = 0;
5710 	}
5711 
5712 	for (t = trace_types; t; t = t->next) {
5713 		if (strcmp(t->name, buf) == 0)
5714 			break;
5715 	}
5716 	if (!t) {
5717 		ret = -EINVAL;
5718 		goto out;
5719 	}
5720 	if (t == tr->current_trace)
5721 		goto out;
5722 
5723 #ifdef CONFIG_TRACER_SNAPSHOT
5724 	if (t->use_max_tr) {
5725 		arch_spin_lock(&tr->max_lock);
5726 		if (tr->cond_snapshot)
5727 			ret = -EBUSY;
5728 		arch_spin_unlock(&tr->max_lock);
5729 		if (ret)
5730 			goto out;
5731 	}
5732 #endif
5733 	/* Some tracers won't work on kernel command line */
5734 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5735 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5736 			t->name);
5737 		goto out;
5738 	}
5739 
5740 	/* Some tracers are only allowed for the top level buffer */
5741 	if (!trace_ok_for_array(t, tr)) {
5742 		ret = -EINVAL;
5743 		goto out;
5744 	}
5745 
5746 	/* If trace pipe files are being read, we can't change the tracer */
5747 	if (tr->current_trace->ref) {
5748 		ret = -EBUSY;
5749 		goto out;
5750 	}
5751 
5752 	trace_branch_disable();
5753 
5754 	tr->current_trace->enabled--;
5755 
5756 	if (tr->current_trace->reset)
5757 		tr->current_trace->reset(tr);
5758 
5759 	/* Current trace needs to be nop_trace before synchronize_rcu */
5760 	tr->current_trace = &nop_trace;
5761 
5762 #ifdef CONFIG_TRACER_MAX_TRACE
5763 	had_max_tr = tr->allocated_snapshot;
5764 
5765 	if (had_max_tr && !t->use_max_tr) {
5766 		/*
5767 		 * We need to make sure that the update_max_tr sees that
5768 		 * current_trace changed to nop_trace to keep it from
5769 		 * swapping the buffers after we resize it.
5770 		 * The update_max_tr is called from interrupts disabled
5771 		 * so a synchronized_sched() is sufficient.
5772 		 */
5773 		synchronize_rcu();
5774 		free_snapshot(tr);
5775 	}
5776 #endif
5777 
5778 #ifdef CONFIG_TRACER_MAX_TRACE
5779 	if (t->use_max_tr && !had_max_tr) {
5780 		ret = tracing_alloc_snapshot_instance(tr);
5781 		if (ret < 0)
5782 			goto out;
5783 	}
5784 #endif
5785 
5786 	if (t->init) {
5787 		ret = tracer_init(t, tr);
5788 		if (ret)
5789 			goto out;
5790 	}
5791 
5792 	tr->current_trace = t;
5793 	tr->current_trace->enabled++;
5794 	trace_branch_enable(tr);
5795  out:
5796 	mutex_unlock(&trace_types_lock);
5797 
5798 	return ret;
5799 }
5800 
5801 static ssize_t
5802 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5803 			size_t cnt, loff_t *ppos)
5804 {
5805 	struct trace_array *tr = filp->private_data;
5806 	char buf[MAX_TRACER_SIZE+1];
5807 	int i;
5808 	size_t ret;
5809 	int err;
5810 
5811 	ret = cnt;
5812 
5813 	if (cnt > MAX_TRACER_SIZE)
5814 		cnt = MAX_TRACER_SIZE;
5815 
5816 	if (copy_from_user(buf, ubuf, cnt))
5817 		return -EFAULT;
5818 
5819 	buf[cnt] = 0;
5820 
5821 	/* strip ending whitespace. */
5822 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5823 		buf[i] = 0;
5824 
5825 	err = tracing_set_tracer(tr, buf);
5826 	if (err)
5827 		return err;
5828 
5829 	*ppos += ret;
5830 
5831 	return ret;
5832 }
5833 
5834 static ssize_t
5835 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5836 		   size_t cnt, loff_t *ppos)
5837 {
5838 	char buf[64];
5839 	int r;
5840 
5841 	r = snprintf(buf, sizeof(buf), "%ld\n",
5842 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5843 	if (r > sizeof(buf))
5844 		r = sizeof(buf);
5845 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5846 }
5847 
5848 static ssize_t
5849 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5850 		    size_t cnt, loff_t *ppos)
5851 {
5852 	unsigned long val;
5853 	int ret;
5854 
5855 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5856 	if (ret)
5857 		return ret;
5858 
5859 	*ptr = val * 1000;
5860 
5861 	return cnt;
5862 }
5863 
5864 static ssize_t
5865 tracing_thresh_read(struct file *filp, char __user *ubuf,
5866 		    size_t cnt, loff_t *ppos)
5867 {
5868 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5869 }
5870 
5871 static ssize_t
5872 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5873 		     size_t cnt, loff_t *ppos)
5874 {
5875 	struct trace_array *tr = filp->private_data;
5876 	int ret;
5877 
5878 	mutex_lock(&trace_types_lock);
5879 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5880 	if (ret < 0)
5881 		goto out;
5882 
5883 	if (tr->current_trace->update_thresh) {
5884 		ret = tr->current_trace->update_thresh(tr);
5885 		if (ret < 0)
5886 			goto out;
5887 	}
5888 
5889 	ret = cnt;
5890 out:
5891 	mutex_unlock(&trace_types_lock);
5892 
5893 	return ret;
5894 }
5895 
5896 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5897 
5898 static ssize_t
5899 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5900 		     size_t cnt, loff_t *ppos)
5901 {
5902 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5903 }
5904 
5905 static ssize_t
5906 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5907 		      size_t cnt, loff_t *ppos)
5908 {
5909 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5910 }
5911 
5912 #endif
5913 
5914 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5915 {
5916 	struct trace_array *tr = inode->i_private;
5917 	struct trace_iterator *iter;
5918 	int ret;
5919 
5920 	ret = tracing_check_open_get_tr(tr);
5921 	if (ret)
5922 		return ret;
5923 
5924 	mutex_lock(&trace_types_lock);
5925 
5926 	/* create a buffer to store the information to pass to userspace */
5927 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5928 	if (!iter) {
5929 		ret = -ENOMEM;
5930 		__trace_array_put(tr);
5931 		goto out;
5932 	}
5933 
5934 	trace_seq_init(&iter->seq);
5935 	iter->trace = tr->current_trace;
5936 
5937 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5938 		ret = -ENOMEM;
5939 		goto fail;
5940 	}
5941 
5942 	/* trace pipe does not show start of buffer */
5943 	cpumask_setall(iter->started);
5944 
5945 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5946 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5947 
5948 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5949 	if (trace_clocks[tr->clock_id].in_ns)
5950 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5951 
5952 	iter->tr = tr;
5953 	iter->trace_buffer = &tr->trace_buffer;
5954 	iter->cpu_file = tracing_get_cpu(inode);
5955 	mutex_init(&iter->mutex);
5956 	filp->private_data = iter;
5957 
5958 	if (iter->trace->pipe_open)
5959 		iter->trace->pipe_open(iter);
5960 
5961 	nonseekable_open(inode, filp);
5962 
5963 	tr->current_trace->ref++;
5964 out:
5965 	mutex_unlock(&trace_types_lock);
5966 	return ret;
5967 
5968 fail:
5969 	kfree(iter);
5970 	__trace_array_put(tr);
5971 	mutex_unlock(&trace_types_lock);
5972 	return ret;
5973 }
5974 
5975 static int tracing_release_pipe(struct inode *inode, struct file *file)
5976 {
5977 	struct trace_iterator *iter = file->private_data;
5978 	struct trace_array *tr = inode->i_private;
5979 
5980 	mutex_lock(&trace_types_lock);
5981 
5982 	tr->current_trace->ref--;
5983 
5984 	if (iter->trace->pipe_close)
5985 		iter->trace->pipe_close(iter);
5986 
5987 	mutex_unlock(&trace_types_lock);
5988 
5989 	free_cpumask_var(iter->started);
5990 	mutex_destroy(&iter->mutex);
5991 	kfree(iter);
5992 
5993 	trace_array_put(tr);
5994 
5995 	return 0;
5996 }
5997 
5998 static __poll_t
5999 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6000 {
6001 	struct trace_array *tr = iter->tr;
6002 
6003 	/* Iterators are static, they should be filled or empty */
6004 	if (trace_buffer_iter(iter, iter->cpu_file))
6005 		return EPOLLIN | EPOLLRDNORM;
6006 
6007 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6008 		/*
6009 		 * Always select as readable when in blocking mode
6010 		 */
6011 		return EPOLLIN | EPOLLRDNORM;
6012 	else
6013 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6014 					     filp, poll_table);
6015 }
6016 
6017 static __poll_t
6018 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6019 {
6020 	struct trace_iterator *iter = filp->private_data;
6021 
6022 	return trace_poll(iter, filp, poll_table);
6023 }
6024 
6025 /* Must be called with iter->mutex held. */
6026 static int tracing_wait_pipe(struct file *filp)
6027 {
6028 	struct trace_iterator *iter = filp->private_data;
6029 	int ret;
6030 
6031 	while (trace_empty(iter)) {
6032 
6033 		if ((filp->f_flags & O_NONBLOCK)) {
6034 			return -EAGAIN;
6035 		}
6036 
6037 		/*
6038 		 * We block until we read something and tracing is disabled.
6039 		 * We still block if tracing is disabled, but we have never
6040 		 * read anything. This allows a user to cat this file, and
6041 		 * then enable tracing. But after we have read something,
6042 		 * we give an EOF when tracing is again disabled.
6043 		 *
6044 		 * iter->pos will be 0 if we haven't read anything.
6045 		 */
6046 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6047 			break;
6048 
6049 		mutex_unlock(&iter->mutex);
6050 
6051 		ret = wait_on_pipe(iter, 0);
6052 
6053 		mutex_lock(&iter->mutex);
6054 
6055 		if (ret)
6056 			return ret;
6057 	}
6058 
6059 	return 1;
6060 }
6061 
6062 /*
6063  * Consumer reader.
6064  */
6065 static ssize_t
6066 tracing_read_pipe(struct file *filp, char __user *ubuf,
6067 		  size_t cnt, loff_t *ppos)
6068 {
6069 	struct trace_iterator *iter = filp->private_data;
6070 	ssize_t sret;
6071 
6072 	/*
6073 	 * Avoid more than one consumer on a single file descriptor
6074 	 * This is just a matter of traces coherency, the ring buffer itself
6075 	 * is protected.
6076 	 */
6077 	mutex_lock(&iter->mutex);
6078 
6079 	/* return any leftover data */
6080 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6081 	if (sret != -EBUSY)
6082 		goto out;
6083 
6084 	trace_seq_init(&iter->seq);
6085 
6086 	if (iter->trace->read) {
6087 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6088 		if (sret)
6089 			goto out;
6090 	}
6091 
6092 waitagain:
6093 	sret = tracing_wait_pipe(filp);
6094 	if (sret <= 0)
6095 		goto out;
6096 
6097 	/* stop when tracing is finished */
6098 	if (trace_empty(iter)) {
6099 		sret = 0;
6100 		goto out;
6101 	}
6102 
6103 	if (cnt >= PAGE_SIZE)
6104 		cnt = PAGE_SIZE - 1;
6105 
6106 	/* reset all but tr, trace, and overruns */
6107 	memset(&iter->seq, 0,
6108 	       sizeof(struct trace_iterator) -
6109 	       offsetof(struct trace_iterator, seq));
6110 	cpumask_clear(iter->started);
6111 	trace_seq_init(&iter->seq);
6112 	iter->pos = -1;
6113 
6114 	trace_event_read_lock();
6115 	trace_access_lock(iter->cpu_file);
6116 	while (trace_find_next_entry_inc(iter) != NULL) {
6117 		enum print_line_t ret;
6118 		int save_len = iter->seq.seq.len;
6119 
6120 		ret = print_trace_line(iter);
6121 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6122 			/* don't print partial lines */
6123 			iter->seq.seq.len = save_len;
6124 			break;
6125 		}
6126 		if (ret != TRACE_TYPE_NO_CONSUME)
6127 			trace_consume(iter);
6128 
6129 		if (trace_seq_used(&iter->seq) >= cnt)
6130 			break;
6131 
6132 		/*
6133 		 * Setting the full flag means we reached the trace_seq buffer
6134 		 * size and we should leave by partial output condition above.
6135 		 * One of the trace_seq_* functions is not used properly.
6136 		 */
6137 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6138 			  iter->ent->type);
6139 	}
6140 	trace_access_unlock(iter->cpu_file);
6141 	trace_event_read_unlock();
6142 
6143 	/* Now copy what we have to the user */
6144 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6145 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6146 		trace_seq_init(&iter->seq);
6147 
6148 	/*
6149 	 * If there was nothing to send to user, in spite of consuming trace
6150 	 * entries, go back to wait for more entries.
6151 	 */
6152 	if (sret == -EBUSY)
6153 		goto waitagain;
6154 
6155 out:
6156 	mutex_unlock(&iter->mutex);
6157 
6158 	return sret;
6159 }
6160 
6161 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6162 				     unsigned int idx)
6163 {
6164 	__free_page(spd->pages[idx]);
6165 }
6166 
6167 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6168 	.confirm		= generic_pipe_buf_confirm,
6169 	.release		= generic_pipe_buf_release,
6170 	.steal			= generic_pipe_buf_steal,
6171 	.get			= generic_pipe_buf_get,
6172 };
6173 
6174 static size_t
6175 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6176 {
6177 	size_t count;
6178 	int save_len;
6179 	int ret;
6180 
6181 	/* Seq buffer is page-sized, exactly what we need. */
6182 	for (;;) {
6183 		save_len = iter->seq.seq.len;
6184 		ret = print_trace_line(iter);
6185 
6186 		if (trace_seq_has_overflowed(&iter->seq)) {
6187 			iter->seq.seq.len = save_len;
6188 			break;
6189 		}
6190 
6191 		/*
6192 		 * This should not be hit, because it should only
6193 		 * be set if the iter->seq overflowed. But check it
6194 		 * anyway to be safe.
6195 		 */
6196 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6197 			iter->seq.seq.len = save_len;
6198 			break;
6199 		}
6200 
6201 		count = trace_seq_used(&iter->seq) - save_len;
6202 		if (rem < count) {
6203 			rem = 0;
6204 			iter->seq.seq.len = save_len;
6205 			break;
6206 		}
6207 
6208 		if (ret != TRACE_TYPE_NO_CONSUME)
6209 			trace_consume(iter);
6210 		rem -= count;
6211 		if (!trace_find_next_entry_inc(iter))	{
6212 			rem = 0;
6213 			iter->ent = NULL;
6214 			break;
6215 		}
6216 	}
6217 
6218 	return rem;
6219 }
6220 
6221 static ssize_t tracing_splice_read_pipe(struct file *filp,
6222 					loff_t *ppos,
6223 					struct pipe_inode_info *pipe,
6224 					size_t len,
6225 					unsigned int flags)
6226 {
6227 	struct page *pages_def[PIPE_DEF_BUFFERS];
6228 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6229 	struct trace_iterator *iter = filp->private_data;
6230 	struct splice_pipe_desc spd = {
6231 		.pages		= pages_def,
6232 		.partial	= partial_def,
6233 		.nr_pages	= 0, /* This gets updated below. */
6234 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6235 		.ops		= &tracing_pipe_buf_ops,
6236 		.spd_release	= tracing_spd_release_pipe,
6237 	};
6238 	ssize_t ret;
6239 	size_t rem;
6240 	unsigned int i;
6241 
6242 	if (splice_grow_spd(pipe, &spd))
6243 		return -ENOMEM;
6244 
6245 	mutex_lock(&iter->mutex);
6246 
6247 	if (iter->trace->splice_read) {
6248 		ret = iter->trace->splice_read(iter, filp,
6249 					       ppos, pipe, len, flags);
6250 		if (ret)
6251 			goto out_err;
6252 	}
6253 
6254 	ret = tracing_wait_pipe(filp);
6255 	if (ret <= 0)
6256 		goto out_err;
6257 
6258 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6259 		ret = -EFAULT;
6260 		goto out_err;
6261 	}
6262 
6263 	trace_event_read_lock();
6264 	trace_access_lock(iter->cpu_file);
6265 
6266 	/* Fill as many pages as possible. */
6267 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6268 		spd.pages[i] = alloc_page(GFP_KERNEL);
6269 		if (!spd.pages[i])
6270 			break;
6271 
6272 		rem = tracing_fill_pipe_page(rem, iter);
6273 
6274 		/* Copy the data into the page, so we can start over. */
6275 		ret = trace_seq_to_buffer(&iter->seq,
6276 					  page_address(spd.pages[i]),
6277 					  trace_seq_used(&iter->seq));
6278 		if (ret < 0) {
6279 			__free_page(spd.pages[i]);
6280 			break;
6281 		}
6282 		spd.partial[i].offset = 0;
6283 		spd.partial[i].len = trace_seq_used(&iter->seq);
6284 
6285 		trace_seq_init(&iter->seq);
6286 	}
6287 
6288 	trace_access_unlock(iter->cpu_file);
6289 	trace_event_read_unlock();
6290 	mutex_unlock(&iter->mutex);
6291 
6292 	spd.nr_pages = i;
6293 
6294 	if (i)
6295 		ret = splice_to_pipe(pipe, &spd);
6296 	else
6297 		ret = 0;
6298 out:
6299 	splice_shrink_spd(&spd);
6300 	return ret;
6301 
6302 out_err:
6303 	mutex_unlock(&iter->mutex);
6304 	goto out;
6305 }
6306 
6307 static ssize_t
6308 tracing_entries_read(struct file *filp, char __user *ubuf,
6309 		     size_t cnt, loff_t *ppos)
6310 {
6311 	struct inode *inode = file_inode(filp);
6312 	struct trace_array *tr = inode->i_private;
6313 	int cpu = tracing_get_cpu(inode);
6314 	char buf[64];
6315 	int r = 0;
6316 	ssize_t ret;
6317 
6318 	mutex_lock(&trace_types_lock);
6319 
6320 	if (cpu == RING_BUFFER_ALL_CPUS) {
6321 		int cpu, buf_size_same;
6322 		unsigned long size;
6323 
6324 		size = 0;
6325 		buf_size_same = 1;
6326 		/* check if all cpu sizes are same */
6327 		for_each_tracing_cpu(cpu) {
6328 			/* fill in the size from first enabled cpu */
6329 			if (size == 0)
6330 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6331 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6332 				buf_size_same = 0;
6333 				break;
6334 			}
6335 		}
6336 
6337 		if (buf_size_same) {
6338 			if (!ring_buffer_expanded)
6339 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6340 					    size >> 10,
6341 					    trace_buf_size >> 10);
6342 			else
6343 				r = sprintf(buf, "%lu\n", size >> 10);
6344 		} else
6345 			r = sprintf(buf, "X\n");
6346 	} else
6347 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6348 
6349 	mutex_unlock(&trace_types_lock);
6350 
6351 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6352 	return ret;
6353 }
6354 
6355 static ssize_t
6356 tracing_entries_write(struct file *filp, const char __user *ubuf,
6357 		      size_t cnt, loff_t *ppos)
6358 {
6359 	struct inode *inode = file_inode(filp);
6360 	struct trace_array *tr = inode->i_private;
6361 	unsigned long val;
6362 	int ret;
6363 
6364 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6365 	if (ret)
6366 		return ret;
6367 
6368 	/* must have at least 1 entry */
6369 	if (!val)
6370 		return -EINVAL;
6371 
6372 	/* value is in KB */
6373 	val <<= 10;
6374 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6375 	if (ret < 0)
6376 		return ret;
6377 
6378 	*ppos += cnt;
6379 
6380 	return cnt;
6381 }
6382 
6383 static ssize_t
6384 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6385 				size_t cnt, loff_t *ppos)
6386 {
6387 	struct trace_array *tr = filp->private_data;
6388 	char buf[64];
6389 	int r, cpu;
6390 	unsigned long size = 0, expanded_size = 0;
6391 
6392 	mutex_lock(&trace_types_lock);
6393 	for_each_tracing_cpu(cpu) {
6394 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6395 		if (!ring_buffer_expanded)
6396 			expanded_size += trace_buf_size >> 10;
6397 	}
6398 	if (ring_buffer_expanded)
6399 		r = sprintf(buf, "%lu\n", size);
6400 	else
6401 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6402 	mutex_unlock(&trace_types_lock);
6403 
6404 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6405 }
6406 
6407 static ssize_t
6408 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6409 			  size_t cnt, loff_t *ppos)
6410 {
6411 	/*
6412 	 * There is no need to read what the user has written, this function
6413 	 * is just to make sure that there is no error when "echo" is used
6414 	 */
6415 
6416 	*ppos += cnt;
6417 
6418 	return cnt;
6419 }
6420 
6421 static int
6422 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6423 {
6424 	struct trace_array *tr = inode->i_private;
6425 
6426 	/* disable tracing ? */
6427 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6428 		tracer_tracing_off(tr);
6429 	/* resize the ring buffer to 0 */
6430 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6431 
6432 	trace_array_put(tr);
6433 
6434 	return 0;
6435 }
6436 
6437 static ssize_t
6438 tracing_mark_write(struct file *filp, const char __user *ubuf,
6439 					size_t cnt, loff_t *fpos)
6440 {
6441 	struct trace_array *tr = filp->private_data;
6442 	struct ring_buffer_event *event;
6443 	enum event_trigger_type tt = ETT_NONE;
6444 	struct ring_buffer *buffer;
6445 	struct print_entry *entry;
6446 	unsigned long irq_flags;
6447 	ssize_t written;
6448 	int size;
6449 	int len;
6450 
6451 /* Used in tracing_mark_raw_write() as well */
6452 #define FAULTED_STR "<faulted>"
6453 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6454 
6455 	if (tracing_disabled)
6456 		return -EINVAL;
6457 
6458 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6459 		return -EINVAL;
6460 
6461 	if (cnt > TRACE_BUF_SIZE)
6462 		cnt = TRACE_BUF_SIZE;
6463 
6464 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6465 
6466 	local_save_flags(irq_flags);
6467 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6468 
6469 	/* If less than "<faulted>", then make sure we can still add that */
6470 	if (cnt < FAULTED_SIZE)
6471 		size += FAULTED_SIZE - cnt;
6472 
6473 	buffer = tr->trace_buffer.buffer;
6474 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6475 					    irq_flags, preempt_count());
6476 	if (unlikely(!event))
6477 		/* Ring buffer disabled, return as if not open for write */
6478 		return -EBADF;
6479 
6480 	entry = ring_buffer_event_data(event);
6481 	entry->ip = _THIS_IP_;
6482 
6483 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6484 	if (len) {
6485 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6486 		cnt = FAULTED_SIZE;
6487 		written = -EFAULT;
6488 	} else
6489 		written = cnt;
6490 	len = cnt;
6491 
6492 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6493 		/* do not add \n before testing triggers, but add \0 */
6494 		entry->buf[cnt] = '\0';
6495 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6496 	}
6497 
6498 	if (entry->buf[cnt - 1] != '\n') {
6499 		entry->buf[cnt] = '\n';
6500 		entry->buf[cnt + 1] = '\0';
6501 	} else
6502 		entry->buf[cnt] = '\0';
6503 
6504 	__buffer_unlock_commit(buffer, event);
6505 
6506 	if (tt)
6507 		event_triggers_post_call(tr->trace_marker_file, tt);
6508 
6509 	if (written > 0)
6510 		*fpos += written;
6511 
6512 	return written;
6513 }
6514 
6515 /* Limit it for now to 3K (including tag) */
6516 #define RAW_DATA_MAX_SIZE (1024*3)
6517 
6518 static ssize_t
6519 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6520 					size_t cnt, loff_t *fpos)
6521 {
6522 	struct trace_array *tr = filp->private_data;
6523 	struct ring_buffer_event *event;
6524 	struct ring_buffer *buffer;
6525 	struct raw_data_entry *entry;
6526 	unsigned long irq_flags;
6527 	ssize_t written;
6528 	int size;
6529 	int len;
6530 
6531 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6532 
6533 	if (tracing_disabled)
6534 		return -EINVAL;
6535 
6536 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6537 		return -EINVAL;
6538 
6539 	/* The marker must at least have a tag id */
6540 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6541 		return -EINVAL;
6542 
6543 	if (cnt > TRACE_BUF_SIZE)
6544 		cnt = TRACE_BUF_SIZE;
6545 
6546 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6547 
6548 	local_save_flags(irq_flags);
6549 	size = sizeof(*entry) + cnt;
6550 	if (cnt < FAULT_SIZE_ID)
6551 		size += FAULT_SIZE_ID - cnt;
6552 
6553 	buffer = tr->trace_buffer.buffer;
6554 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6555 					    irq_flags, preempt_count());
6556 	if (!event)
6557 		/* Ring buffer disabled, return as if not open for write */
6558 		return -EBADF;
6559 
6560 	entry = ring_buffer_event_data(event);
6561 
6562 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6563 	if (len) {
6564 		entry->id = -1;
6565 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6566 		written = -EFAULT;
6567 	} else
6568 		written = cnt;
6569 
6570 	__buffer_unlock_commit(buffer, event);
6571 
6572 	if (written > 0)
6573 		*fpos += written;
6574 
6575 	return written;
6576 }
6577 
6578 static int tracing_clock_show(struct seq_file *m, void *v)
6579 {
6580 	struct trace_array *tr = m->private;
6581 	int i;
6582 
6583 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6584 		seq_printf(m,
6585 			"%s%s%s%s", i ? " " : "",
6586 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6587 			i == tr->clock_id ? "]" : "");
6588 	seq_putc(m, '\n');
6589 
6590 	return 0;
6591 }
6592 
6593 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6594 {
6595 	int i;
6596 
6597 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6598 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6599 			break;
6600 	}
6601 	if (i == ARRAY_SIZE(trace_clocks))
6602 		return -EINVAL;
6603 
6604 	mutex_lock(&trace_types_lock);
6605 
6606 	tr->clock_id = i;
6607 
6608 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6609 
6610 	/*
6611 	 * New clock may not be consistent with the previous clock.
6612 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6613 	 */
6614 	tracing_reset_online_cpus(&tr->trace_buffer);
6615 
6616 #ifdef CONFIG_TRACER_MAX_TRACE
6617 	if (tr->max_buffer.buffer)
6618 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6619 	tracing_reset_online_cpus(&tr->max_buffer);
6620 #endif
6621 
6622 	mutex_unlock(&trace_types_lock);
6623 
6624 	return 0;
6625 }
6626 
6627 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6628 				   size_t cnt, loff_t *fpos)
6629 {
6630 	struct seq_file *m = filp->private_data;
6631 	struct trace_array *tr = m->private;
6632 	char buf[64];
6633 	const char *clockstr;
6634 	int ret;
6635 
6636 	if (cnt >= sizeof(buf))
6637 		return -EINVAL;
6638 
6639 	if (copy_from_user(buf, ubuf, cnt))
6640 		return -EFAULT;
6641 
6642 	buf[cnt] = 0;
6643 
6644 	clockstr = strstrip(buf);
6645 
6646 	ret = tracing_set_clock(tr, clockstr);
6647 	if (ret)
6648 		return ret;
6649 
6650 	*fpos += cnt;
6651 
6652 	return cnt;
6653 }
6654 
6655 static int tracing_clock_open(struct inode *inode, struct file *file)
6656 {
6657 	struct trace_array *tr = inode->i_private;
6658 	int ret;
6659 
6660 	ret = tracing_check_open_get_tr(tr);
6661 	if (ret)
6662 		return ret;
6663 
6664 	ret = single_open(file, tracing_clock_show, inode->i_private);
6665 	if (ret < 0)
6666 		trace_array_put(tr);
6667 
6668 	return ret;
6669 }
6670 
6671 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6672 {
6673 	struct trace_array *tr = m->private;
6674 
6675 	mutex_lock(&trace_types_lock);
6676 
6677 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6678 		seq_puts(m, "delta [absolute]\n");
6679 	else
6680 		seq_puts(m, "[delta] absolute\n");
6681 
6682 	mutex_unlock(&trace_types_lock);
6683 
6684 	return 0;
6685 }
6686 
6687 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6688 {
6689 	struct trace_array *tr = inode->i_private;
6690 	int ret;
6691 
6692 	ret = tracing_check_open_get_tr(tr);
6693 	if (ret)
6694 		return ret;
6695 
6696 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6697 	if (ret < 0)
6698 		trace_array_put(tr);
6699 
6700 	return ret;
6701 }
6702 
6703 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6704 {
6705 	int ret = 0;
6706 
6707 	mutex_lock(&trace_types_lock);
6708 
6709 	if (abs && tr->time_stamp_abs_ref++)
6710 		goto out;
6711 
6712 	if (!abs) {
6713 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6714 			ret = -EINVAL;
6715 			goto out;
6716 		}
6717 
6718 		if (--tr->time_stamp_abs_ref)
6719 			goto out;
6720 	}
6721 
6722 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6723 
6724 #ifdef CONFIG_TRACER_MAX_TRACE
6725 	if (tr->max_buffer.buffer)
6726 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6727 #endif
6728  out:
6729 	mutex_unlock(&trace_types_lock);
6730 
6731 	return ret;
6732 }
6733 
6734 struct ftrace_buffer_info {
6735 	struct trace_iterator	iter;
6736 	void			*spare;
6737 	unsigned int		spare_cpu;
6738 	unsigned int		read;
6739 };
6740 
6741 #ifdef CONFIG_TRACER_SNAPSHOT
6742 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6743 {
6744 	struct trace_array *tr = inode->i_private;
6745 	struct trace_iterator *iter;
6746 	struct seq_file *m;
6747 	int ret;
6748 
6749 	ret = tracing_check_open_get_tr(tr);
6750 	if (ret)
6751 		return ret;
6752 
6753 	if (file->f_mode & FMODE_READ) {
6754 		iter = __tracing_open(inode, file, true);
6755 		if (IS_ERR(iter))
6756 			ret = PTR_ERR(iter);
6757 	} else {
6758 		/* Writes still need the seq_file to hold the private data */
6759 		ret = -ENOMEM;
6760 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6761 		if (!m)
6762 			goto out;
6763 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6764 		if (!iter) {
6765 			kfree(m);
6766 			goto out;
6767 		}
6768 		ret = 0;
6769 
6770 		iter->tr = tr;
6771 		iter->trace_buffer = &tr->max_buffer;
6772 		iter->cpu_file = tracing_get_cpu(inode);
6773 		m->private = iter;
6774 		file->private_data = m;
6775 	}
6776 out:
6777 	if (ret < 0)
6778 		trace_array_put(tr);
6779 
6780 	return ret;
6781 }
6782 
6783 static ssize_t
6784 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6785 		       loff_t *ppos)
6786 {
6787 	struct seq_file *m = filp->private_data;
6788 	struct trace_iterator *iter = m->private;
6789 	struct trace_array *tr = iter->tr;
6790 	unsigned long val;
6791 	int ret;
6792 
6793 	ret = tracing_update_buffers();
6794 	if (ret < 0)
6795 		return ret;
6796 
6797 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6798 	if (ret)
6799 		return ret;
6800 
6801 	mutex_lock(&trace_types_lock);
6802 
6803 	if (tr->current_trace->use_max_tr) {
6804 		ret = -EBUSY;
6805 		goto out;
6806 	}
6807 
6808 	arch_spin_lock(&tr->max_lock);
6809 	if (tr->cond_snapshot)
6810 		ret = -EBUSY;
6811 	arch_spin_unlock(&tr->max_lock);
6812 	if (ret)
6813 		goto out;
6814 
6815 	switch (val) {
6816 	case 0:
6817 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6818 			ret = -EINVAL;
6819 			break;
6820 		}
6821 		if (tr->allocated_snapshot)
6822 			free_snapshot(tr);
6823 		break;
6824 	case 1:
6825 /* Only allow per-cpu swap if the ring buffer supports it */
6826 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6827 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6828 			ret = -EINVAL;
6829 			break;
6830 		}
6831 #endif
6832 		if (tr->allocated_snapshot)
6833 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6834 					&tr->trace_buffer, iter->cpu_file);
6835 		else
6836 			ret = tracing_alloc_snapshot_instance(tr);
6837 		if (ret < 0)
6838 			break;
6839 		local_irq_disable();
6840 		/* Now, we're going to swap */
6841 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6842 			update_max_tr(tr, current, smp_processor_id(), NULL);
6843 		else
6844 			update_max_tr_single(tr, current, iter->cpu_file);
6845 		local_irq_enable();
6846 		break;
6847 	default:
6848 		if (tr->allocated_snapshot) {
6849 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6850 				tracing_reset_online_cpus(&tr->max_buffer);
6851 			else
6852 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6853 		}
6854 		break;
6855 	}
6856 
6857 	if (ret >= 0) {
6858 		*ppos += cnt;
6859 		ret = cnt;
6860 	}
6861 out:
6862 	mutex_unlock(&trace_types_lock);
6863 	return ret;
6864 }
6865 
6866 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6867 {
6868 	struct seq_file *m = file->private_data;
6869 	int ret;
6870 
6871 	ret = tracing_release(inode, file);
6872 
6873 	if (file->f_mode & FMODE_READ)
6874 		return ret;
6875 
6876 	/* If write only, the seq_file is just a stub */
6877 	if (m)
6878 		kfree(m->private);
6879 	kfree(m);
6880 
6881 	return 0;
6882 }
6883 
6884 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6885 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6886 				    size_t count, loff_t *ppos);
6887 static int tracing_buffers_release(struct inode *inode, struct file *file);
6888 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6889 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6890 
6891 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6892 {
6893 	struct ftrace_buffer_info *info;
6894 	int ret;
6895 
6896 	/* The following checks for tracefs lockdown */
6897 	ret = tracing_buffers_open(inode, filp);
6898 	if (ret < 0)
6899 		return ret;
6900 
6901 	info = filp->private_data;
6902 
6903 	if (info->iter.trace->use_max_tr) {
6904 		tracing_buffers_release(inode, filp);
6905 		return -EBUSY;
6906 	}
6907 
6908 	info->iter.snapshot = true;
6909 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6910 
6911 	return ret;
6912 }
6913 
6914 #endif /* CONFIG_TRACER_SNAPSHOT */
6915 
6916 
6917 static const struct file_operations tracing_thresh_fops = {
6918 	.open		= tracing_open_generic,
6919 	.read		= tracing_thresh_read,
6920 	.write		= tracing_thresh_write,
6921 	.llseek		= generic_file_llseek,
6922 };
6923 
6924 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6925 static const struct file_operations tracing_max_lat_fops = {
6926 	.open		= tracing_open_generic,
6927 	.read		= tracing_max_lat_read,
6928 	.write		= tracing_max_lat_write,
6929 	.llseek		= generic_file_llseek,
6930 };
6931 #endif
6932 
6933 static const struct file_operations set_tracer_fops = {
6934 	.open		= tracing_open_generic,
6935 	.read		= tracing_set_trace_read,
6936 	.write		= tracing_set_trace_write,
6937 	.llseek		= generic_file_llseek,
6938 };
6939 
6940 static const struct file_operations tracing_pipe_fops = {
6941 	.open		= tracing_open_pipe,
6942 	.poll		= tracing_poll_pipe,
6943 	.read		= tracing_read_pipe,
6944 	.splice_read	= tracing_splice_read_pipe,
6945 	.release	= tracing_release_pipe,
6946 	.llseek		= no_llseek,
6947 };
6948 
6949 static const struct file_operations tracing_entries_fops = {
6950 	.open		= tracing_open_generic_tr,
6951 	.read		= tracing_entries_read,
6952 	.write		= tracing_entries_write,
6953 	.llseek		= generic_file_llseek,
6954 	.release	= tracing_release_generic_tr,
6955 };
6956 
6957 static const struct file_operations tracing_total_entries_fops = {
6958 	.open		= tracing_open_generic_tr,
6959 	.read		= tracing_total_entries_read,
6960 	.llseek		= generic_file_llseek,
6961 	.release	= tracing_release_generic_tr,
6962 };
6963 
6964 static const struct file_operations tracing_free_buffer_fops = {
6965 	.open		= tracing_open_generic_tr,
6966 	.write		= tracing_free_buffer_write,
6967 	.release	= tracing_free_buffer_release,
6968 };
6969 
6970 static const struct file_operations tracing_mark_fops = {
6971 	.open		= tracing_open_generic_tr,
6972 	.write		= tracing_mark_write,
6973 	.llseek		= generic_file_llseek,
6974 	.release	= tracing_release_generic_tr,
6975 };
6976 
6977 static const struct file_operations tracing_mark_raw_fops = {
6978 	.open		= tracing_open_generic_tr,
6979 	.write		= tracing_mark_raw_write,
6980 	.llseek		= generic_file_llseek,
6981 	.release	= tracing_release_generic_tr,
6982 };
6983 
6984 static const struct file_operations trace_clock_fops = {
6985 	.open		= tracing_clock_open,
6986 	.read		= seq_read,
6987 	.llseek		= seq_lseek,
6988 	.release	= tracing_single_release_tr,
6989 	.write		= tracing_clock_write,
6990 };
6991 
6992 static const struct file_operations trace_time_stamp_mode_fops = {
6993 	.open		= tracing_time_stamp_mode_open,
6994 	.read		= seq_read,
6995 	.llseek		= seq_lseek,
6996 	.release	= tracing_single_release_tr,
6997 };
6998 
6999 #ifdef CONFIG_TRACER_SNAPSHOT
7000 static const struct file_operations snapshot_fops = {
7001 	.open		= tracing_snapshot_open,
7002 	.read		= seq_read,
7003 	.write		= tracing_snapshot_write,
7004 	.llseek		= tracing_lseek,
7005 	.release	= tracing_snapshot_release,
7006 };
7007 
7008 static const struct file_operations snapshot_raw_fops = {
7009 	.open		= snapshot_raw_open,
7010 	.read		= tracing_buffers_read,
7011 	.release	= tracing_buffers_release,
7012 	.splice_read	= tracing_buffers_splice_read,
7013 	.llseek		= no_llseek,
7014 };
7015 
7016 #endif /* CONFIG_TRACER_SNAPSHOT */
7017 
7018 #define TRACING_LOG_ERRS_MAX	8
7019 #define TRACING_LOG_LOC_MAX	128
7020 
7021 #define CMD_PREFIX "  Command: "
7022 
7023 struct err_info {
7024 	const char	**errs;	/* ptr to loc-specific array of err strings */
7025 	u8		type;	/* index into errs -> specific err string */
7026 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7027 	u64		ts;
7028 };
7029 
7030 struct tracing_log_err {
7031 	struct list_head	list;
7032 	struct err_info		info;
7033 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7034 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7035 };
7036 
7037 static DEFINE_MUTEX(tracing_err_log_lock);
7038 
7039 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7040 {
7041 	struct tracing_log_err *err;
7042 
7043 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7044 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7045 		if (!err)
7046 			err = ERR_PTR(-ENOMEM);
7047 		tr->n_err_log_entries++;
7048 
7049 		return err;
7050 	}
7051 
7052 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7053 	list_del(&err->list);
7054 
7055 	return err;
7056 }
7057 
7058 /**
7059  * err_pos - find the position of a string within a command for error careting
7060  * @cmd: The tracing command that caused the error
7061  * @str: The string to position the caret at within @cmd
7062  *
7063  * Finds the position of the first occurence of @str within @cmd.  The
7064  * return value can be passed to tracing_log_err() for caret placement
7065  * within @cmd.
7066  *
7067  * Returns the index within @cmd of the first occurence of @str or 0
7068  * if @str was not found.
7069  */
7070 unsigned int err_pos(char *cmd, const char *str)
7071 {
7072 	char *found;
7073 
7074 	if (WARN_ON(!strlen(cmd)))
7075 		return 0;
7076 
7077 	found = strstr(cmd, str);
7078 	if (found)
7079 		return found - cmd;
7080 
7081 	return 0;
7082 }
7083 
7084 /**
7085  * tracing_log_err - write an error to the tracing error log
7086  * @tr: The associated trace array for the error (NULL for top level array)
7087  * @loc: A string describing where the error occurred
7088  * @cmd: The tracing command that caused the error
7089  * @errs: The array of loc-specific static error strings
7090  * @type: The index into errs[], which produces the specific static err string
7091  * @pos: The position the caret should be placed in the cmd
7092  *
7093  * Writes an error into tracing/error_log of the form:
7094  *
7095  * <loc>: error: <text>
7096  *   Command: <cmd>
7097  *              ^
7098  *
7099  * tracing/error_log is a small log file containing the last
7100  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7101  * unless there has been a tracing error, and the error log can be
7102  * cleared and have its memory freed by writing the empty string in
7103  * truncation mode to it i.e. echo > tracing/error_log.
7104  *
7105  * NOTE: the @errs array along with the @type param are used to
7106  * produce a static error string - this string is not copied and saved
7107  * when the error is logged - only a pointer to it is saved.  See
7108  * existing callers for examples of how static strings are typically
7109  * defined for use with tracing_log_err().
7110  */
7111 void tracing_log_err(struct trace_array *tr,
7112 		     const char *loc, const char *cmd,
7113 		     const char **errs, u8 type, u8 pos)
7114 {
7115 	struct tracing_log_err *err;
7116 
7117 	if (!tr)
7118 		tr = &global_trace;
7119 
7120 	mutex_lock(&tracing_err_log_lock);
7121 	err = get_tracing_log_err(tr);
7122 	if (PTR_ERR(err) == -ENOMEM) {
7123 		mutex_unlock(&tracing_err_log_lock);
7124 		return;
7125 	}
7126 
7127 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7128 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7129 
7130 	err->info.errs = errs;
7131 	err->info.type = type;
7132 	err->info.pos = pos;
7133 	err->info.ts = local_clock();
7134 
7135 	list_add_tail(&err->list, &tr->err_log);
7136 	mutex_unlock(&tracing_err_log_lock);
7137 }
7138 
7139 static void clear_tracing_err_log(struct trace_array *tr)
7140 {
7141 	struct tracing_log_err *err, *next;
7142 
7143 	mutex_lock(&tracing_err_log_lock);
7144 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7145 		list_del(&err->list);
7146 		kfree(err);
7147 	}
7148 
7149 	tr->n_err_log_entries = 0;
7150 	mutex_unlock(&tracing_err_log_lock);
7151 }
7152 
7153 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7154 {
7155 	struct trace_array *tr = m->private;
7156 
7157 	mutex_lock(&tracing_err_log_lock);
7158 
7159 	return seq_list_start(&tr->err_log, *pos);
7160 }
7161 
7162 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7163 {
7164 	struct trace_array *tr = m->private;
7165 
7166 	return seq_list_next(v, &tr->err_log, pos);
7167 }
7168 
7169 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7170 {
7171 	mutex_unlock(&tracing_err_log_lock);
7172 }
7173 
7174 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7175 {
7176 	u8 i;
7177 
7178 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7179 		seq_putc(m, ' ');
7180 	for (i = 0; i < pos; i++)
7181 		seq_putc(m, ' ');
7182 	seq_puts(m, "^\n");
7183 }
7184 
7185 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7186 {
7187 	struct tracing_log_err *err = v;
7188 
7189 	if (err) {
7190 		const char *err_text = err->info.errs[err->info.type];
7191 		u64 sec = err->info.ts;
7192 		u32 nsec;
7193 
7194 		nsec = do_div(sec, NSEC_PER_SEC);
7195 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7196 			   err->loc, err_text);
7197 		seq_printf(m, "%s", err->cmd);
7198 		tracing_err_log_show_pos(m, err->info.pos);
7199 	}
7200 
7201 	return 0;
7202 }
7203 
7204 static const struct seq_operations tracing_err_log_seq_ops = {
7205 	.start  = tracing_err_log_seq_start,
7206 	.next   = tracing_err_log_seq_next,
7207 	.stop   = tracing_err_log_seq_stop,
7208 	.show   = tracing_err_log_seq_show
7209 };
7210 
7211 static int tracing_err_log_open(struct inode *inode, struct file *file)
7212 {
7213 	struct trace_array *tr = inode->i_private;
7214 	int ret = 0;
7215 
7216 	ret = tracing_check_open_get_tr(tr);
7217 	if (ret)
7218 		return ret;
7219 
7220 	/* If this file was opened for write, then erase contents */
7221 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7222 		clear_tracing_err_log(tr);
7223 
7224 	if (file->f_mode & FMODE_READ) {
7225 		ret = seq_open(file, &tracing_err_log_seq_ops);
7226 		if (!ret) {
7227 			struct seq_file *m = file->private_data;
7228 			m->private = tr;
7229 		} else {
7230 			trace_array_put(tr);
7231 		}
7232 	}
7233 	return ret;
7234 }
7235 
7236 static ssize_t tracing_err_log_write(struct file *file,
7237 				     const char __user *buffer,
7238 				     size_t count, loff_t *ppos)
7239 {
7240 	return count;
7241 }
7242 
7243 static int tracing_err_log_release(struct inode *inode, struct file *file)
7244 {
7245 	struct trace_array *tr = inode->i_private;
7246 
7247 	trace_array_put(tr);
7248 
7249 	if (file->f_mode & FMODE_READ)
7250 		seq_release(inode, file);
7251 
7252 	return 0;
7253 }
7254 
7255 static const struct file_operations tracing_err_log_fops = {
7256 	.open           = tracing_err_log_open,
7257 	.write		= tracing_err_log_write,
7258 	.read           = seq_read,
7259 	.llseek         = seq_lseek,
7260 	.release        = tracing_err_log_release,
7261 };
7262 
7263 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7264 {
7265 	struct trace_array *tr = inode->i_private;
7266 	struct ftrace_buffer_info *info;
7267 	int ret;
7268 
7269 	ret = tracing_check_open_get_tr(tr);
7270 	if (ret)
7271 		return ret;
7272 
7273 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7274 	if (!info) {
7275 		trace_array_put(tr);
7276 		return -ENOMEM;
7277 	}
7278 
7279 	mutex_lock(&trace_types_lock);
7280 
7281 	info->iter.tr		= tr;
7282 	info->iter.cpu_file	= tracing_get_cpu(inode);
7283 	info->iter.trace	= tr->current_trace;
7284 	info->iter.trace_buffer = &tr->trace_buffer;
7285 	info->spare		= NULL;
7286 	/* Force reading ring buffer for first read */
7287 	info->read		= (unsigned int)-1;
7288 
7289 	filp->private_data = info;
7290 
7291 	tr->current_trace->ref++;
7292 
7293 	mutex_unlock(&trace_types_lock);
7294 
7295 	ret = nonseekable_open(inode, filp);
7296 	if (ret < 0)
7297 		trace_array_put(tr);
7298 
7299 	return ret;
7300 }
7301 
7302 static __poll_t
7303 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7304 {
7305 	struct ftrace_buffer_info *info = filp->private_data;
7306 	struct trace_iterator *iter = &info->iter;
7307 
7308 	return trace_poll(iter, filp, poll_table);
7309 }
7310 
7311 static ssize_t
7312 tracing_buffers_read(struct file *filp, char __user *ubuf,
7313 		     size_t count, loff_t *ppos)
7314 {
7315 	struct ftrace_buffer_info *info = filp->private_data;
7316 	struct trace_iterator *iter = &info->iter;
7317 	ssize_t ret = 0;
7318 	ssize_t size;
7319 
7320 	if (!count)
7321 		return 0;
7322 
7323 #ifdef CONFIG_TRACER_MAX_TRACE
7324 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7325 		return -EBUSY;
7326 #endif
7327 
7328 	if (!info->spare) {
7329 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7330 							  iter->cpu_file);
7331 		if (IS_ERR(info->spare)) {
7332 			ret = PTR_ERR(info->spare);
7333 			info->spare = NULL;
7334 		} else {
7335 			info->spare_cpu = iter->cpu_file;
7336 		}
7337 	}
7338 	if (!info->spare)
7339 		return ret;
7340 
7341 	/* Do we have previous read data to read? */
7342 	if (info->read < PAGE_SIZE)
7343 		goto read;
7344 
7345  again:
7346 	trace_access_lock(iter->cpu_file);
7347 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7348 				    &info->spare,
7349 				    count,
7350 				    iter->cpu_file, 0);
7351 	trace_access_unlock(iter->cpu_file);
7352 
7353 	if (ret < 0) {
7354 		if (trace_empty(iter)) {
7355 			if ((filp->f_flags & O_NONBLOCK))
7356 				return -EAGAIN;
7357 
7358 			ret = wait_on_pipe(iter, 0);
7359 			if (ret)
7360 				return ret;
7361 
7362 			goto again;
7363 		}
7364 		return 0;
7365 	}
7366 
7367 	info->read = 0;
7368  read:
7369 	size = PAGE_SIZE - info->read;
7370 	if (size > count)
7371 		size = count;
7372 
7373 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7374 	if (ret == size)
7375 		return -EFAULT;
7376 
7377 	size -= ret;
7378 
7379 	*ppos += size;
7380 	info->read += size;
7381 
7382 	return size;
7383 }
7384 
7385 static int tracing_buffers_release(struct inode *inode, struct file *file)
7386 {
7387 	struct ftrace_buffer_info *info = file->private_data;
7388 	struct trace_iterator *iter = &info->iter;
7389 
7390 	mutex_lock(&trace_types_lock);
7391 
7392 	iter->tr->current_trace->ref--;
7393 
7394 	__trace_array_put(iter->tr);
7395 
7396 	if (info->spare)
7397 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7398 					   info->spare_cpu, info->spare);
7399 	kfree(info);
7400 
7401 	mutex_unlock(&trace_types_lock);
7402 
7403 	return 0;
7404 }
7405 
7406 struct buffer_ref {
7407 	struct ring_buffer	*buffer;
7408 	void			*page;
7409 	int			cpu;
7410 	refcount_t		refcount;
7411 };
7412 
7413 static void buffer_ref_release(struct buffer_ref *ref)
7414 {
7415 	if (!refcount_dec_and_test(&ref->refcount))
7416 		return;
7417 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7418 	kfree(ref);
7419 }
7420 
7421 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7422 				    struct pipe_buffer *buf)
7423 {
7424 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7425 
7426 	buffer_ref_release(ref);
7427 	buf->private = 0;
7428 }
7429 
7430 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7431 				struct pipe_buffer *buf)
7432 {
7433 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7434 
7435 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7436 		return false;
7437 
7438 	refcount_inc(&ref->refcount);
7439 	return true;
7440 }
7441 
7442 /* Pipe buffer operations for a buffer. */
7443 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7444 	.confirm		= generic_pipe_buf_confirm,
7445 	.release		= buffer_pipe_buf_release,
7446 	.steal			= generic_pipe_buf_nosteal,
7447 	.get			= buffer_pipe_buf_get,
7448 };
7449 
7450 /*
7451  * Callback from splice_to_pipe(), if we need to release some pages
7452  * at the end of the spd in case we error'ed out in filling the pipe.
7453  */
7454 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7455 {
7456 	struct buffer_ref *ref =
7457 		(struct buffer_ref *)spd->partial[i].private;
7458 
7459 	buffer_ref_release(ref);
7460 	spd->partial[i].private = 0;
7461 }
7462 
7463 static ssize_t
7464 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7465 			    struct pipe_inode_info *pipe, size_t len,
7466 			    unsigned int flags)
7467 {
7468 	struct ftrace_buffer_info *info = file->private_data;
7469 	struct trace_iterator *iter = &info->iter;
7470 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7471 	struct page *pages_def[PIPE_DEF_BUFFERS];
7472 	struct splice_pipe_desc spd = {
7473 		.pages		= pages_def,
7474 		.partial	= partial_def,
7475 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7476 		.ops		= &buffer_pipe_buf_ops,
7477 		.spd_release	= buffer_spd_release,
7478 	};
7479 	struct buffer_ref *ref;
7480 	int entries, i;
7481 	ssize_t ret = 0;
7482 
7483 #ifdef CONFIG_TRACER_MAX_TRACE
7484 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7485 		return -EBUSY;
7486 #endif
7487 
7488 	if (*ppos & (PAGE_SIZE - 1))
7489 		return -EINVAL;
7490 
7491 	if (len & (PAGE_SIZE - 1)) {
7492 		if (len < PAGE_SIZE)
7493 			return -EINVAL;
7494 		len &= PAGE_MASK;
7495 	}
7496 
7497 	if (splice_grow_spd(pipe, &spd))
7498 		return -ENOMEM;
7499 
7500  again:
7501 	trace_access_lock(iter->cpu_file);
7502 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7503 
7504 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7505 		struct page *page;
7506 		int r;
7507 
7508 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7509 		if (!ref) {
7510 			ret = -ENOMEM;
7511 			break;
7512 		}
7513 
7514 		refcount_set(&ref->refcount, 1);
7515 		ref->buffer = iter->trace_buffer->buffer;
7516 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7517 		if (IS_ERR(ref->page)) {
7518 			ret = PTR_ERR(ref->page);
7519 			ref->page = NULL;
7520 			kfree(ref);
7521 			break;
7522 		}
7523 		ref->cpu = iter->cpu_file;
7524 
7525 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7526 					  len, iter->cpu_file, 1);
7527 		if (r < 0) {
7528 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7529 						   ref->page);
7530 			kfree(ref);
7531 			break;
7532 		}
7533 
7534 		page = virt_to_page(ref->page);
7535 
7536 		spd.pages[i] = page;
7537 		spd.partial[i].len = PAGE_SIZE;
7538 		spd.partial[i].offset = 0;
7539 		spd.partial[i].private = (unsigned long)ref;
7540 		spd.nr_pages++;
7541 		*ppos += PAGE_SIZE;
7542 
7543 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7544 	}
7545 
7546 	trace_access_unlock(iter->cpu_file);
7547 	spd.nr_pages = i;
7548 
7549 	/* did we read anything? */
7550 	if (!spd.nr_pages) {
7551 		if (ret)
7552 			goto out;
7553 
7554 		ret = -EAGAIN;
7555 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7556 			goto out;
7557 
7558 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7559 		if (ret)
7560 			goto out;
7561 
7562 		goto again;
7563 	}
7564 
7565 	ret = splice_to_pipe(pipe, &spd);
7566 out:
7567 	splice_shrink_spd(&spd);
7568 
7569 	return ret;
7570 }
7571 
7572 static const struct file_operations tracing_buffers_fops = {
7573 	.open		= tracing_buffers_open,
7574 	.read		= tracing_buffers_read,
7575 	.poll		= tracing_buffers_poll,
7576 	.release	= tracing_buffers_release,
7577 	.splice_read	= tracing_buffers_splice_read,
7578 	.llseek		= no_llseek,
7579 };
7580 
7581 static ssize_t
7582 tracing_stats_read(struct file *filp, char __user *ubuf,
7583 		   size_t count, loff_t *ppos)
7584 {
7585 	struct inode *inode = file_inode(filp);
7586 	struct trace_array *tr = inode->i_private;
7587 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7588 	int cpu = tracing_get_cpu(inode);
7589 	struct trace_seq *s;
7590 	unsigned long cnt;
7591 	unsigned long long t;
7592 	unsigned long usec_rem;
7593 
7594 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7595 	if (!s)
7596 		return -ENOMEM;
7597 
7598 	trace_seq_init(s);
7599 
7600 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7601 	trace_seq_printf(s, "entries: %ld\n", cnt);
7602 
7603 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7604 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7605 
7606 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7607 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7608 
7609 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7610 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7611 
7612 	if (trace_clocks[tr->clock_id].in_ns) {
7613 		/* local or global for trace_clock */
7614 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7615 		usec_rem = do_div(t, USEC_PER_SEC);
7616 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7617 								t, usec_rem);
7618 
7619 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7620 		usec_rem = do_div(t, USEC_PER_SEC);
7621 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7622 	} else {
7623 		/* counter or tsc mode for trace_clock */
7624 		trace_seq_printf(s, "oldest event ts: %llu\n",
7625 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7626 
7627 		trace_seq_printf(s, "now ts: %llu\n",
7628 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7629 	}
7630 
7631 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7632 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7633 
7634 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7635 	trace_seq_printf(s, "read events: %ld\n", cnt);
7636 
7637 	count = simple_read_from_buffer(ubuf, count, ppos,
7638 					s->buffer, trace_seq_used(s));
7639 
7640 	kfree(s);
7641 
7642 	return count;
7643 }
7644 
7645 static const struct file_operations tracing_stats_fops = {
7646 	.open		= tracing_open_generic_tr,
7647 	.read		= tracing_stats_read,
7648 	.llseek		= generic_file_llseek,
7649 	.release	= tracing_release_generic_tr,
7650 };
7651 
7652 #ifdef CONFIG_DYNAMIC_FTRACE
7653 
7654 static ssize_t
7655 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7656 		  size_t cnt, loff_t *ppos)
7657 {
7658 	ssize_t ret;
7659 	char *buf;
7660 	int r;
7661 
7662 	/* 256 should be plenty to hold the amount needed */
7663 	buf = kmalloc(256, GFP_KERNEL);
7664 	if (!buf)
7665 		return -ENOMEM;
7666 
7667 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7668 		      ftrace_update_tot_cnt,
7669 		      ftrace_number_of_pages,
7670 		      ftrace_number_of_groups);
7671 
7672 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7673 	kfree(buf);
7674 	return ret;
7675 }
7676 
7677 static const struct file_operations tracing_dyn_info_fops = {
7678 	.open		= tracing_open_generic,
7679 	.read		= tracing_read_dyn_info,
7680 	.llseek		= generic_file_llseek,
7681 };
7682 #endif /* CONFIG_DYNAMIC_FTRACE */
7683 
7684 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7685 static void
7686 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7687 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7688 		void *data)
7689 {
7690 	tracing_snapshot_instance(tr);
7691 }
7692 
7693 static void
7694 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7695 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7696 		      void *data)
7697 {
7698 	struct ftrace_func_mapper *mapper = data;
7699 	long *count = NULL;
7700 
7701 	if (mapper)
7702 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7703 
7704 	if (count) {
7705 
7706 		if (*count <= 0)
7707 			return;
7708 
7709 		(*count)--;
7710 	}
7711 
7712 	tracing_snapshot_instance(tr);
7713 }
7714 
7715 static int
7716 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7717 		      struct ftrace_probe_ops *ops, void *data)
7718 {
7719 	struct ftrace_func_mapper *mapper = data;
7720 	long *count = NULL;
7721 
7722 	seq_printf(m, "%ps:", (void *)ip);
7723 
7724 	seq_puts(m, "snapshot");
7725 
7726 	if (mapper)
7727 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7728 
7729 	if (count)
7730 		seq_printf(m, ":count=%ld\n", *count);
7731 	else
7732 		seq_puts(m, ":unlimited\n");
7733 
7734 	return 0;
7735 }
7736 
7737 static int
7738 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7739 		     unsigned long ip, void *init_data, void **data)
7740 {
7741 	struct ftrace_func_mapper *mapper = *data;
7742 
7743 	if (!mapper) {
7744 		mapper = allocate_ftrace_func_mapper();
7745 		if (!mapper)
7746 			return -ENOMEM;
7747 		*data = mapper;
7748 	}
7749 
7750 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7751 }
7752 
7753 static void
7754 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7755 		     unsigned long ip, void *data)
7756 {
7757 	struct ftrace_func_mapper *mapper = data;
7758 
7759 	if (!ip) {
7760 		if (!mapper)
7761 			return;
7762 		free_ftrace_func_mapper(mapper, NULL);
7763 		return;
7764 	}
7765 
7766 	ftrace_func_mapper_remove_ip(mapper, ip);
7767 }
7768 
7769 static struct ftrace_probe_ops snapshot_probe_ops = {
7770 	.func			= ftrace_snapshot,
7771 	.print			= ftrace_snapshot_print,
7772 };
7773 
7774 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7775 	.func			= ftrace_count_snapshot,
7776 	.print			= ftrace_snapshot_print,
7777 	.init			= ftrace_snapshot_init,
7778 	.free			= ftrace_snapshot_free,
7779 };
7780 
7781 static int
7782 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7783 			       char *glob, char *cmd, char *param, int enable)
7784 {
7785 	struct ftrace_probe_ops *ops;
7786 	void *count = (void *)-1;
7787 	char *number;
7788 	int ret;
7789 
7790 	if (!tr)
7791 		return -ENODEV;
7792 
7793 	/* hash funcs only work with set_ftrace_filter */
7794 	if (!enable)
7795 		return -EINVAL;
7796 
7797 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7798 
7799 	if (glob[0] == '!')
7800 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7801 
7802 	if (!param)
7803 		goto out_reg;
7804 
7805 	number = strsep(&param, ":");
7806 
7807 	if (!strlen(number))
7808 		goto out_reg;
7809 
7810 	/*
7811 	 * We use the callback data field (which is a pointer)
7812 	 * as our counter.
7813 	 */
7814 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7815 	if (ret)
7816 		return ret;
7817 
7818  out_reg:
7819 	ret = tracing_alloc_snapshot_instance(tr);
7820 	if (ret < 0)
7821 		goto out;
7822 
7823 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7824 
7825  out:
7826 	return ret < 0 ? ret : 0;
7827 }
7828 
7829 static struct ftrace_func_command ftrace_snapshot_cmd = {
7830 	.name			= "snapshot",
7831 	.func			= ftrace_trace_snapshot_callback,
7832 };
7833 
7834 static __init int register_snapshot_cmd(void)
7835 {
7836 	return register_ftrace_command(&ftrace_snapshot_cmd);
7837 }
7838 #else
7839 static inline __init int register_snapshot_cmd(void) { return 0; }
7840 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7841 
7842 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7843 {
7844 	if (WARN_ON(!tr->dir))
7845 		return ERR_PTR(-ENODEV);
7846 
7847 	/* Top directory uses NULL as the parent */
7848 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7849 		return NULL;
7850 
7851 	/* All sub buffers have a descriptor */
7852 	return tr->dir;
7853 }
7854 
7855 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7856 {
7857 	struct dentry *d_tracer;
7858 
7859 	if (tr->percpu_dir)
7860 		return tr->percpu_dir;
7861 
7862 	d_tracer = tracing_get_dentry(tr);
7863 	if (IS_ERR(d_tracer))
7864 		return NULL;
7865 
7866 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7867 
7868 	WARN_ONCE(!tr->percpu_dir,
7869 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7870 
7871 	return tr->percpu_dir;
7872 }
7873 
7874 static struct dentry *
7875 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7876 		      void *data, long cpu, const struct file_operations *fops)
7877 {
7878 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7879 
7880 	if (ret) /* See tracing_get_cpu() */
7881 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7882 	return ret;
7883 }
7884 
7885 static void
7886 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7887 {
7888 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7889 	struct dentry *d_cpu;
7890 	char cpu_dir[30]; /* 30 characters should be more than enough */
7891 
7892 	if (!d_percpu)
7893 		return;
7894 
7895 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7896 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7897 	if (!d_cpu) {
7898 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7899 		return;
7900 	}
7901 
7902 	/* per cpu trace_pipe */
7903 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7904 				tr, cpu, &tracing_pipe_fops);
7905 
7906 	/* per cpu trace */
7907 	trace_create_cpu_file("trace", 0644, d_cpu,
7908 				tr, cpu, &tracing_fops);
7909 
7910 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7911 				tr, cpu, &tracing_buffers_fops);
7912 
7913 	trace_create_cpu_file("stats", 0444, d_cpu,
7914 				tr, cpu, &tracing_stats_fops);
7915 
7916 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7917 				tr, cpu, &tracing_entries_fops);
7918 
7919 #ifdef CONFIG_TRACER_SNAPSHOT
7920 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7921 				tr, cpu, &snapshot_fops);
7922 
7923 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7924 				tr, cpu, &snapshot_raw_fops);
7925 #endif
7926 }
7927 
7928 #ifdef CONFIG_FTRACE_SELFTEST
7929 /* Let selftest have access to static functions in this file */
7930 #include "trace_selftest.c"
7931 #endif
7932 
7933 static ssize_t
7934 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7935 			loff_t *ppos)
7936 {
7937 	struct trace_option_dentry *topt = filp->private_data;
7938 	char *buf;
7939 
7940 	if (topt->flags->val & topt->opt->bit)
7941 		buf = "1\n";
7942 	else
7943 		buf = "0\n";
7944 
7945 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7946 }
7947 
7948 static ssize_t
7949 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7950 			 loff_t *ppos)
7951 {
7952 	struct trace_option_dentry *topt = filp->private_data;
7953 	unsigned long val;
7954 	int ret;
7955 
7956 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7957 	if (ret)
7958 		return ret;
7959 
7960 	if (val != 0 && val != 1)
7961 		return -EINVAL;
7962 
7963 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7964 		mutex_lock(&trace_types_lock);
7965 		ret = __set_tracer_option(topt->tr, topt->flags,
7966 					  topt->opt, !val);
7967 		mutex_unlock(&trace_types_lock);
7968 		if (ret)
7969 			return ret;
7970 	}
7971 
7972 	*ppos += cnt;
7973 
7974 	return cnt;
7975 }
7976 
7977 
7978 static const struct file_operations trace_options_fops = {
7979 	.open = tracing_open_generic,
7980 	.read = trace_options_read,
7981 	.write = trace_options_write,
7982 	.llseek	= generic_file_llseek,
7983 };
7984 
7985 /*
7986  * In order to pass in both the trace_array descriptor as well as the index
7987  * to the flag that the trace option file represents, the trace_array
7988  * has a character array of trace_flags_index[], which holds the index
7989  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7990  * The address of this character array is passed to the flag option file
7991  * read/write callbacks.
7992  *
7993  * In order to extract both the index and the trace_array descriptor,
7994  * get_tr_index() uses the following algorithm.
7995  *
7996  *   idx = *ptr;
7997  *
7998  * As the pointer itself contains the address of the index (remember
7999  * index[1] == 1).
8000  *
8001  * Then to get the trace_array descriptor, by subtracting that index
8002  * from the ptr, we get to the start of the index itself.
8003  *
8004  *   ptr - idx == &index[0]
8005  *
8006  * Then a simple container_of() from that pointer gets us to the
8007  * trace_array descriptor.
8008  */
8009 static void get_tr_index(void *data, struct trace_array **ptr,
8010 			 unsigned int *pindex)
8011 {
8012 	*pindex = *(unsigned char *)data;
8013 
8014 	*ptr = container_of(data - *pindex, struct trace_array,
8015 			    trace_flags_index);
8016 }
8017 
8018 static ssize_t
8019 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8020 			loff_t *ppos)
8021 {
8022 	void *tr_index = filp->private_data;
8023 	struct trace_array *tr;
8024 	unsigned int index;
8025 	char *buf;
8026 
8027 	get_tr_index(tr_index, &tr, &index);
8028 
8029 	if (tr->trace_flags & (1 << index))
8030 		buf = "1\n";
8031 	else
8032 		buf = "0\n";
8033 
8034 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8035 }
8036 
8037 static ssize_t
8038 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8039 			 loff_t *ppos)
8040 {
8041 	void *tr_index = filp->private_data;
8042 	struct trace_array *tr;
8043 	unsigned int index;
8044 	unsigned long val;
8045 	int ret;
8046 
8047 	get_tr_index(tr_index, &tr, &index);
8048 
8049 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8050 	if (ret)
8051 		return ret;
8052 
8053 	if (val != 0 && val != 1)
8054 		return -EINVAL;
8055 
8056 	mutex_lock(&trace_types_lock);
8057 	ret = set_tracer_flag(tr, 1 << index, val);
8058 	mutex_unlock(&trace_types_lock);
8059 
8060 	if (ret < 0)
8061 		return ret;
8062 
8063 	*ppos += cnt;
8064 
8065 	return cnt;
8066 }
8067 
8068 static const struct file_operations trace_options_core_fops = {
8069 	.open = tracing_open_generic,
8070 	.read = trace_options_core_read,
8071 	.write = trace_options_core_write,
8072 	.llseek = generic_file_llseek,
8073 };
8074 
8075 struct dentry *trace_create_file(const char *name,
8076 				 umode_t mode,
8077 				 struct dentry *parent,
8078 				 void *data,
8079 				 const struct file_operations *fops)
8080 {
8081 	struct dentry *ret;
8082 
8083 	ret = tracefs_create_file(name, mode, parent, data, fops);
8084 	if (!ret)
8085 		pr_warn("Could not create tracefs '%s' entry\n", name);
8086 
8087 	return ret;
8088 }
8089 
8090 
8091 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8092 {
8093 	struct dentry *d_tracer;
8094 
8095 	if (tr->options)
8096 		return tr->options;
8097 
8098 	d_tracer = tracing_get_dentry(tr);
8099 	if (IS_ERR(d_tracer))
8100 		return NULL;
8101 
8102 	tr->options = tracefs_create_dir("options", d_tracer);
8103 	if (!tr->options) {
8104 		pr_warn("Could not create tracefs directory 'options'\n");
8105 		return NULL;
8106 	}
8107 
8108 	return tr->options;
8109 }
8110 
8111 static void
8112 create_trace_option_file(struct trace_array *tr,
8113 			 struct trace_option_dentry *topt,
8114 			 struct tracer_flags *flags,
8115 			 struct tracer_opt *opt)
8116 {
8117 	struct dentry *t_options;
8118 
8119 	t_options = trace_options_init_dentry(tr);
8120 	if (!t_options)
8121 		return;
8122 
8123 	topt->flags = flags;
8124 	topt->opt = opt;
8125 	topt->tr = tr;
8126 
8127 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8128 				    &trace_options_fops);
8129 
8130 }
8131 
8132 static void
8133 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8134 {
8135 	struct trace_option_dentry *topts;
8136 	struct trace_options *tr_topts;
8137 	struct tracer_flags *flags;
8138 	struct tracer_opt *opts;
8139 	int cnt;
8140 	int i;
8141 
8142 	if (!tracer)
8143 		return;
8144 
8145 	flags = tracer->flags;
8146 
8147 	if (!flags || !flags->opts)
8148 		return;
8149 
8150 	/*
8151 	 * If this is an instance, only create flags for tracers
8152 	 * the instance may have.
8153 	 */
8154 	if (!trace_ok_for_array(tracer, tr))
8155 		return;
8156 
8157 	for (i = 0; i < tr->nr_topts; i++) {
8158 		/* Make sure there's no duplicate flags. */
8159 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8160 			return;
8161 	}
8162 
8163 	opts = flags->opts;
8164 
8165 	for (cnt = 0; opts[cnt].name; cnt++)
8166 		;
8167 
8168 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8169 	if (!topts)
8170 		return;
8171 
8172 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8173 			    GFP_KERNEL);
8174 	if (!tr_topts) {
8175 		kfree(topts);
8176 		return;
8177 	}
8178 
8179 	tr->topts = tr_topts;
8180 	tr->topts[tr->nr_topts].tracer = tracer;
8181 	tr->topts[tr->nr_topts].topts = topts;
8182 	tr->nr_topts++;
8183 
8184 	for (cnt = 0; opts[cnt].name; cnt++) {
8185 		create_trace_option_file(tr, &topts[cnt], flags,
8186 					 &opts[cnt]);
8187 		WARN_ONCE(topts[cnt].entry == NULL,
8188 			  "Failed to create trace option: %s",
8189 			  opts[cnt].name);
8190 	}
8191 }
8192 
8193 static struct dentry *
8194 create_trace_option_core_file(struct trace_array *tr,
8195 			      const char *option, long index)
8196 {
8197 	struct dentry *t_options;
8198 
8199 	t_options = trace_options_init_dentry(tr);
8200 	if (!t_options)
8201 		return NULL;
8202 
8203 	return trace_create_file(option, 0644, t_options,
8204 				 (void *)&tr->trace_flags_index[index],
8205 				 &trace_options_core_fops);
8206 }
8207 
8208 static void create_trace_options_dir(struct trace_array *tr)
8209 {
8210 	struct dentry *t_options;
8211 	bool top_level = tr == &global_trace;
8212 	int i;
8213 
8214 	t_options = trace_options_init_dentry(tr);
8215 	if (!t_options)
8216 		return;
8217 
8218 	for (i = 0; trace_options[i]; i++) {
8219 		if (top_level ||
8220 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8221 			create_trace_option_core_file(tr, trace_options[i], i);
8222 	}
8223 }
8224 
8225 static ssize_t
8226 rb_simple_read(struct file *filp, char __user *ubuf,
8227 	       size_t cnt, loff_t *ppos)
8228 {
8229 	struct trace_array *tr = filp->private_data;
8230 	char buf[64];
8231 	int r;
8232 
8233 	r = tracer_tracing_is_on(tr);
8234 	r = sprintf(buf, "%d\n", r);
8235 
8236 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8237 }
8238 
8239 static ssize_t
8240 rb_simple_write(struct file *filp, const char __user *ubuf,
8241 		size_t cnt, loff_t *ppos)
8242 {
8243 	struct trace_array *tr = filp->private_data;
8244 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8245 	unsigned long val;
8246 	int ret;
8247 
8248 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8249 	if (ret)
8250 		return ret;
8251 
8252 	if (buffer) {
8253 		mutex_lock(&trace_types_lock);
8254 		if (!!val == tracer_tracing_is_on(tr)) {
8255 			val = 0; /* do nothing */
8256 		} else if (val) {
8257 			tracer_tracing_on(tr);
8258 			if (tr->current_trace->start)
8259 				tr->current_trace->start(tr);
8260 		} else {
8261 			tracer_tracing_off(tr);
8262 			if (tr->current_trace->stop)
8263 				tr->current_trace->stop(tr);
8264 		}
8265 		mutex_unlock(&trace_types_lock);
8266 	}
8267 
8268 	(*ppos)++;
8269 
8270 	return cnt;
8271 }
8272 
8273 static const struct file_operations rb_simple_fops = {
8274 	.open		= tracing_open_generic_tr,
8275 	.read		= rb_simple_read,
8276 	.write		= rb_simple_write,
8277 	.release	= tracing_release_generic_tr,
8278 	.llseek		= default_llseek,
8279 };
8280 
8281 static ssize_t
8282 buffer_percent_read(struct file *filp, char __user *ubuf,
8283 		    size_t cnt, loff_t *ppos)
8284 {
8285 	struct trace_array *tr = filp->private_data;
8286 	char buf[64];
8287 	int r;
8288 
8289 	r = tr->buffer_percent;
8290 	r = sprintf(buf, "%d\n", r);
8291 
8292 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8293 }
8294 
8295 static ssize_t
8296 buffer_percent_write(struct file *filp, const char __user *ubuf,
8297 		     size_t cnt, loff_t *ppos)
8298 {
8299 	struct trace_array *tr = filp->private_data;
8300 	unsigned long val;
8301 	int ret;
8302 
8303 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8304 	if (ret)
8305 		return ret;
8306 
8307 	if (val > 100)
8308 		return -EINVAL;
8309 
8310 	if (!val)
8311 		val = 1;
8312 
8313 	tr->buffer_percent = val;
8314 
8315 	(*ppos)++;
8316 
8317 	return cnt;
8318 }
8319 
8320 static const struct file_operations buffer_percent_fops = {
8321 	.open		= tracing_open_generic_tr,
8322 	.read		= buffer_percent_read,
8323 	.write		= buffer_percent_write,
8324 	.release	= tracing_release_generic_tr,
8325 	.llseek		= default_llseek,
8326 };
8327 
8328 static struct dentry *trace_instance_dir;
8329 
8330 static void
8331 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8332 
8333 static int
8334 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8335 {
8336 	enum ring_buffer_flags rb_flags;
8337 
8338 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8339 
8340 	buf->tr = tr;
8341 
8342 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8343 	if (!buf->buffer)
8344 		return -ENOMEM;
8345 
8346 	buf->data = alloc_percpu(struct trace_array_cpu);
8347 	if (!buf->data) {
8348 		ring_buffer_free(buf->buffer);
8349 		buf->buffer = NULL;
8350 		return -ENOMEM;
8351 	}
8352 
8353 	/* Allocate the first page for all buffers */
8354 	set_buffer_entries(&tr->trace_buffer,
8355 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8356 
8357 	return 0;
8358 }
8359 
8360 static int allocate_trace_buffers(struct trace_array *tr, int size)
8361 {
8362 	int ret;
8363 
8364 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8365 	if (ret)
8366 		return ret;
8367 
8368 #ifdef CONFIG_TRACER_MAX_TRACE
8369 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8370 				    allocate_snapshot ? size : 1);
8371 	if (WARN_ON(ret)) {
8372 		ring_buffer_free(tr->trace_buffer.buffer);
8373 		tr->trace_buffer.buffer = NULL;
8374 		free_percpu(tr->trace_buffer.data);
8375 		tr->trace_buffer.data = NULL;
8376 		return -ENOMEM;
8377 	}
8378 	tr->allocated_snapshot = allocate_snapshot;
8379 
8380 	/*
8381 	 * Only the top level trace array gets its snapshot allocated
8382 	 * from the kernel command line.
8383 	 */
8384 	allocate_snapshot = false;
8385 #endif
8386 	return 0;
8387 }
8388 
8389 static void free_trace_buffer(struct trace_buffer *buf)
8390 {
8391 	if (buf->buffer) {
8392 		ring_buffer_free(buf->buffer);
8393 		buf->buffer = NULL;
8394 		free_percpu(buf->data);
8395 		buf->data = NULL;
8396 	}
8397 }
8398 
8399 static void free_trace_buffers(struct trace_array *tr)
8400 {
8401 	if (!tr)
8402 		return;
8403 
8404 	free_trace_buffer(&tr->trace_buffer);
8405 
8406 #ifdef CONFIG_TRACER_MAX_TRACE
8407 	free_trace_buffer(&tr->max_buffer);
8408 #endif
8409 }
8410 
8411 static void init_trace_flags_index(struct trace_array *tr)
8412 {
8413 	int i;
8414 
8415 	/* Used by the trace options files */
8416 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8417 		tr->trace_flags_index[i] = i;
8418 }
8419 
8420 static void __update_tracer_options(struct trace_array *tr)
8421 {
8422 	struct tracer *t;
8423 
8424 	for (t = trace_types; t; t = t->next)
8425 		add_tracer_options(tr, t);
8426 }
8427 
8428 static void update_tracer_options(struct trace_array *tr)
8429 {
8430 	mutex_lock(&trace_types_lock);
8431 	__update_tracer_options(tr);
8432 	mutex_unlock(&trace_types_lock);
8433 }
8434 
8435 struct trace_array *trace_array_create(const char *name)
8436 {
8437 	struct trace_array *tr;
8438 	int ret;
8439 
8440 	mutex_lock(&event_mutex);
8441 	mutex_lock(&trace_types_lock);
8442 
8443 	ret = -EEXIST;
8444 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8445 		if (tr->name && strcmp(tr->name, name) == 0)
8446 			goto out_unlock;
8447 	}
8448 
8449 	ret = -ENOMEM;
8450 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8451 	if (!tr)
8452 		goto out_unlock;
8453 
8454 	tr->name = kstrdup(name, GFP_KERNEL);
8455 	if (!tr->name)
8456 		goto out_free_tr;
8457 
8458 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8459 		goto out_free_tr;
8460 
8461 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8462 
8463 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8464 
8465 	raw_spin_lock_init(&tr->start_lock);
8466 
8467 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8468 
8469 	tr->current_trace = &nop_trace;
8470 
8471 	INIT_LIST_HEAD(&tr->systems);
8472 	INIT_LIST_HEAD(&tr->events);
8473 	INIT_LIST_HEAD(&tr->hist_vars);
8474 	INIT_LIST_HEAD(&tr->err_log);
8475 
8476 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8477 		goto out_free_tr;
8478 
8479 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8480 	if (!tr->dir)
8481 		goto out_free_tr;
8482 
8483 	ret = event_trace_add_tracer(tr->dir, tr);
8484 	if (ret) {
8485 		tracefs_remove_recursive(tr->dir);
8486 		goto out_free_tr;
8487 	}
8488 
8489 	ftrace_init_trace_array(tr);
8490 
8491 	init_tracer_tracefs(tr, tr->dir);
8492 	init_trace_flags_index(tr);
8493 	__update_tracer_options(tr);
8494 
8495 	list_add(&tr->list, &ftrace_trace_arrays);
8496 
8497 	mutex_unlock(&trace_types_lock);
8498 	mutex_unlock(&event_mutex);
8499 
8500 	return tr;
8501 
8502  out_free_tr:
8503 	free_trace_buffers(tr);
8504 	free_cpumask_var(tr->tracing_cpumask);
8505 	kfree(tr->name);
8506 	kfree(tr);
8507 
8508  out_unlock:
8509 	mutex_unlock(&trace_types_lock);
8510 	mutex_unlock(&event_mutex);
8511 
8512 	return ERR_PTR(ret);
8513 }
8514 EXPORT_SYMBOL_GPL(trace_array_create);
8515 
8516 static int instance_mkdir(const char *name)
8517 {
8518 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8519 }
8520 
8521 static int __remove_instance(struct trace_array *tr)
8522 {
8523 	int i;
8524 
8525 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8526 		return -EBUSY;
8527 
8528 	list_del(&tr->list);
8529 
8530 	/* Disable all the flags that were enabled coming in */
8531 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8532 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8533 			set_tracer_flag(tr, 1 << i, 0);
8534 	}
8535 
8536 	tracing_set_nop(tr);
8537 	clear_ftrace_function_probes(tr);
8538 	event_trace_del_tracer(tr);
8539 	ftrace_clear_pids(tr);
8540 	ftrace_destroy_function_files(tr);
8541 	tracefs_remove_recursive(tr->dir);
8542 	free_trace_buffers(tr);
8543 
8544 	for (i = 0; i < tr->nr_topts; i++) {
8545 		kfree(tr->topts[i].topts);
8546 	}
8547 	kfree(tr->topts);
8548 
8549 	free_cpumask_var(tr->tracing_cpumask);
8550 	kfree(tr->name);
8551 	kfree(tr);
8552 	tr = NULL;
8553 
8554 	return 0;
8555 }
8556 
8557 int trace_array_destroy(struct trace_array *tr)
8558 {
8559 	int ret;
8560 
8561 	if (!tr)
8562 		return -EINVAL;
8563 
8564 	mutex_lock(&event_mutex);
8565 	mutex_lock(&trace_types_lock);
8566 
8567 	ret = __remove_instance(tr);
8568 
8569 	mutex_unlock(&trace_types_lock);
8570 	mutex_unlock(&event_mutex);
8571 
8572 	return ret;
8573 }
8574 EXPORT_SYMBOL_GPL(trace_array_destroy);
8575 
8576 static int instance_rmdir(const char *name)
8577 {
8578 	struct trace_array *tr;
8579 	int ret;
8580 
8581 	mutex_lock(&event_mutex);
8582 	mutex_lock(&trace_types_lock);
8583 
8584 	ret = -ENODEV;
8585 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8586 		if (tr->name && strcmp(tr->name, name) == 0) {
8587 			ret = __remove_instance(tr);
8588 			break;
8589 		}
8590 	}
8591 
8592 	mutex_unlock(&trace_types_lock);
8593 	mutex_unlock(&event_mutex);
8594 
8595 	return ret;
8596 }
8597 
8598 static __init void create_trace_instances(struct dentry *d_tracer)
8599 {
8600 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8601 							 instance_mkdir,
8602 							 instance_rmdir);
8603 	if (WARN_ON(!trace_instance_dir))
8604 		return;
8605 }
8606 
8607 static void
8608 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8609 {
8610 	struct trace_event_file *file;
8611 	int cpu;
8612 
8613 	trace_create_file("available_tracers", 0444, d_tracer,
8614 			tr, &show_traces_fops);
8615 
8616 	trace_create_file("current_tracer", 0644, d_tracer,
8617 			tr, &set_tracer_fops);
8618 
8619 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8620 			  tr, &tracing_cpumask_fops);
8621 
8622 	trace_create_file("trace_options", 0644, d_tracer,
8623 			  tr, &tracing_iter_fops);
8624 
8625 	trace_create_file("trace", 0644, d_tracer,
8626 			  tr, &tracing_fops);
8627 
8628 	trace_create_file("trace_pipe", 0444, d_tracer,
8629 			  tr, &tracing_pipe_fops);
8630 
8631 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8632 			  tr, &tracing_entries_fops);
8633 
8634 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8635 			  tr, &tracing_total_entries_fops);
8636 
8637 	trace_create_file("free_buffer", 0200, d_tracer,
8638 			  tr, &tracing_free_buffer_fops);
8639 
8640 	trace_create_file("trace_marker", 0220, d_tracer,
8641 			  tr, &tracing_mark_fops);
8642 
8643 	file = __find_event_file(tr, "ftrace", "print");
8644 	if (file && file->dir)
8645 		trace_create_file("trigger", 0644, file->dir, file,
8646 				  &event_trigger_fops);
8647 	tr->trace_marker_file = file;
8648 
8649 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8650 			  tr, &tracing_mark_raw_fops);
8651 
8652 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8653 			  &trace_clock_fops);
8654 
8655 	trace_create_file("tracing_on", 0644, d_tracer,
8656 			  tr, &rb_simple_fops);
8657 
8658 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8659 			  &trace_time_stamp_mode_fops);
8660 
8661 	tr->buffer_percent = 50;
8662 
8663 	trace_create_file("buffer_percent", 0444, d_tracer,
8664 			tr, &buffer_percent_fops);
8665 
8666 	create_trace_options_dir(tr);
8667 
8668 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8669 	trace_create_maxlat_file(tr, d_tracer);
8670 #endif
8671 
8672 	if (ftrace_create_function_files(tr, d_tracer))
8673 		WARN(1, "Could not allocate function filter files");
8674 
8675 #ifdef CONFIG_TRACER_SNAPSHOT
8676 	trace_create_file("snapshot", 0644, d_tracer,
8677 			  tr, &snapshot_fops);
8678 #endif
8679 
8680 	trace_create_file("error_log", 0644, d_tracer,
8681 			  tr, &tracing_err_log_fops);
8682 
8683 	for_each_tracing_cpu(cpu)
8684 		tracing_init_tracefs_percpu(tr, cpu);
8685 
8686 	ftrace_init_tracefs(tr, d_tracer);
8687 }
8688 
8689 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8690 {
8691 	struct vfsmount *mnt;
8692 	struct file_system_type *type;
8693 
8694 	/*
8695 	 * To maintain backward compatibility for tools that mount
8696 	 * debugfs to get to the tracing facility, tracefs is automatically
8697 	 * mounted to the debugfs/tracing directory.
8698 	 */
8699 	type = get_fs_type("tracefs");
8700 	if (!type)
8701 		return NULL;
8702 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8703 	put_filesystem(type);
8704 	if (IS_ERR(mnt))
8705 		return NULL;
8706 	mntget(mnt);
8707 
8708 	return mnt;
8709 }
8710 
8711 /**
8712  * tracing_init_dentry - initialize top level trace array
8713  *
8714  * This is called when creating files or directories in the tracing
8715  * directory. It is called via fs_initcall() by any of the boot up code
8716  * and expects to return the dentry of the top level tracing directory.
8717  */
8718 struct dentry *tracing_init_dentry(void)
8719 {
8720 	struct trace_array *tr = &global_trace;
8721 
8722 	/* The top level trace array uses  NULL as parent */
8723 	if (tr->dir)
8724 		return NULL;
8725 
8726 	if (WARN_ON(!tracefs_initialized()) ||
8727 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8728 		 WARN_ON(!debugfs_initialized())))
8729 		return ERR_PTR(-ENODEV);
8730 
8731 	/*
8732 	 * As there may still be users that expect the tracing
8733 	 * files to exist in debugfs/tracing, we must automount
8734 	 * the tracefs file system there, so older tools still
8735 	 * work with the newer kerenl.
8736 	 */
8737 	tr->dir = debugfs_create_automount("tracing", NULL,
8738 					   trace_automount, NULL);
8739 
8740 	return NULL;
8741 }
8742 
8743 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8744 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8745 
8746 static void __init trace_eval_init(void)
8747 {
8748 	int len;
8749 
8750 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8751 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8752 }
8753 
8754 #ifdef CONFIG_MODULES
8755 static void trace_module_add_evals(struct module *mod)
8756 {
8757 	if (!mod->num_trace_evals)
8758 		return;
8759 
8760 	/*
8761 	 * Modules with bad taint do not have events created, do
8762 	 * not bother with enums either.
8763 	 */
8764 	if (trace_module_has_bad_taint(mod))
8765 		return;
8766 
8767 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8768 }
8769 
8770 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8771 static void trace_module_remove_evals(struct module *mod)
8772 {
8773 	union trace_eval_map_item *map;
8774 	union trace_eval_map_item **last = &trace_eval_maps;
8775 
8776 	if (!mod->num_trace_evals)
8777 		return;
8778 
8779 	mutex_lock(&trace_eval_mutex);
8780 
8781 	map = trace_eval_maps;
8782 
8783 	while (map) {
8784 		if (map->head.mod == mod)
8785 			break;
8786 		map = trace_eval_jmp_to_tail(map);
8787 		last = &map->tail.next;
8788 		map = map->tail.next;
8789 	}
8790 	if (!map)
8791 		goto out;
8792 
8793 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8794 	kfree(map);
8795  out:
8796 	mutex_unlock(&trace_eval_mutex);
8797 }
8798 #else
8799 static inline void trace_module_remove_evals(struct module *mod) { }
8800 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8801 
8802 static int trace_module_notify(struct notifier_block *self,
8803 			       unsigned long val, void *data)
8804 {
8805 	struct module *mod = data;
8806 
8807 	switch (val) {
8808 	case MODULE_STATE_COMING:
8809 		trace_module_add_evals(mod);
8810 		break;
8811 	case MODULE_STATE_GOING:
8812 		trace_module_remove_evals(mod);
8813 		break;
8814 	}
8815 
8816 	return 0;
8817 }
8818 
8819 static struct notifier_block trace_module_nb = {
8820 	.notifier_call = trace_module_notify,
8821 	.priority = 0,
8822 };
8823 #endif /* CONFIG_MODULES */
8824 
8825 static __init int tracer_init_tracefs(void)
8826 {
8827 	struct dentry *d_tracer;
8828 
8829 	trace_access_lock_init();
8830 
8831 	d_tracer = tracing_init_dentry();
8832 	if (IS_ERR(d_tracer))
8833 		return 0;
8834 
8835 	event_trace_init();
8836 
8837 	init_tracer_tracefs(&global_trace, d_tracer);
8838 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8839 
8840 	trace_create_file("tracing_thresh", 0644, d_tracer,
8841 			&global_trace, &tracing_thresh_fops);
8842 
8843 	trace_create_file("README", 0444, d_tracer,
8844 			NULL, &tracing_readme_fops);
8845 
8846 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8847 			NULL, &tracing_saved_cmdlines_fops);
8848 
8849 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8850 			  NULL, &tracing_saved_cmdlines_size_fops);
8851 
8852 	trace_create_file("saved_tgids", 0444, d_tracer,
8853 			NULL, &tracing_saved_tgids_fops);
8854 
8855 	trace_eval_init();
8856 
8857 	trace_create_eval_file(d_tracer);
8858 
8859 #ifdef CONFIG_MODULES
8860 	register_module_notifier(&trace_module_nb);
8861 #endif
8862 
8863 #ifdef CONFIG_DYNAMIC_FTRACE
8864 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8865 			NULL, &tracing_dyn_info_fops);
8866 #endif
8867 
8868 	create_trace_instances(d_tracer);
8869 
8870 	update_tracer_options(&global_trace);
8871 
8872 	return 0;
8873 }
8874 
8875 static int trace_panic_handler(struct notifier_block *this,
8876 			       unsigned long event, void *unused)
8877 {
8878 	if (ftrace_dump_on_oops)
8879 		ftrace_dump(ftrace_dump_on_oops);
8880 	return NOTIFY_OK;
8881 }
8882 
8883 static struct notifier_block trace_panic_notifier = {
8884 	.notifier_call  = trace_panic_handler,
8885 	.next           = NULL,
8886 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8887 };
8888 
8889 static int trace_die_handler(struct notifier_block *self,
8890 			     unsigned long val,
8891 			     void *data)
8892 {
8893 	switch (val) {
8894 	case DIE_OOPS:
8895 		if (ftrace_dump_on_oops)
8896 			ftrace_dump(ftrace_dump_on_oops);
8897 		break;
8898 	default:
8899 		break;
8900 	}
8901 	return NOTIFY_OK;
8902 }
8903 
8904 static struct notifier_block trace_die_notifier = {
8905 	.notifier_call = trace_die_handler,
8906 	.priority = 200
8907 };
8908 
8909 /*
8910  * printk is set to max of 1024, we really don't need it that big.
8911  * Nothing should be printing 1000 characters anyway.
8912  */
8913 #define TRACE_MAX_PRINT		1000
8914 
8915 /*
8916  * Define here KERN_TRACE so that we have one place to modify
8917  * it if we decide to change what log level the ftrace dump
8918  * should be at.
8919  */
8920 #define KERN_TRACE		KERN_EMERG
8921 
8922 void
8923 trace_printk_seq(struct trace_seq *s)
8924 {
8925 	/* Probably should print a warning here. */
8926 	if (s->seq.len >= TRACE_MAX_PRINT)
8927 		s->seq.len = TRACE_MAX_PRINT;
8928 
8929 	/*
8930 	 * More paranoid code. Although the buffer size is set to
8931 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8932 	 * an extra layer of protection.
8933 	 */
8934 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8935 		s->seq.len = s->seq.size - 1;
8936 
8937 	/* should be zero ended, but we are paranoid. */
8938 	s->buffer[s->seq.len] = 0;
8939 
8940 	printk(KERN_TRACE "%s", s->buffer);
8941 
8942 	trace_seq_init(s);
8943 }
8944 
8945 void trace_init_global_iter(struct trace_iterator *iter)
8946 {
8947 	iter->tr = &global_trace;
8948 	iter->trace = iter->tr->current_trace;
8949 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8950 	iter->trace_buffer = &global_trace.trace_buffer;
8951 
8952 	if (iter->trace && iter->trace->open)
8953 		iter->trace->open(iter);
8954 
8955 	/* Annotate start of buffers if we had overruns */
8956 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8957 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8958 
8959 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8960 	if (trace_clocks[iter->tr->clock_id].in_ns)
8961 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8962 }
8963 
8964 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8965 {
8966 	/* use static because iter can be a bit big for the stack */
8967 	static struct trace_iterator iter;
8968 	static atomic_t dump_running;
8969 	struct trace_array *tr = &global_trace;
8970 	unsigned int old_userobj;
8971 	unsigned long flags;
8972 	int cnt = 0, cpu;
8973 
8974 	/* Only allow one dump user at a time. */
8975 	if (atomic_inc_return(&dump_running) != 1) {
8976 		atomic_dec(&dump_running);
8977 		return;
8978 	}
8979 
8980 	/*
8981 	 * Always turn off tracing when we dump.
8982 	 * We don't need to show trace output of what happens
8983 	 * between multiple crashes.
8984 	 *
8985 	 * If the user does a sysrq-z, then they can re-enable
8986 	 * tracing with echo 1 > tracing_on.
8987 	 */
8988 	tracing_off();
8989 
8990 	local_irq_save(flags);
8991 	printk_nmi_direct_enter();
8992 
8993 	/* Simulate the iterator */
8994 	trace_init_global_iter(&iter);
8995 
8996 	for_each_tracing_cpu(cpu) {
8997 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8998 	}
8999 
9000 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9001 
9002 	/* don't look at user memory in panic mode */
9003 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9004 
9005 	switch (oops_dump_mode) {
9006 	case DUMP_ALL:
9007 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9008 		break;
9009 	case DUMP_ORIG:
9010 		iter.cpu_file = raw_smp_processor_id();
9011 		break;
9012 	case DUMP_NONE:
9013 		goto out_enable;
9014 	default:
9015 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9016 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9017 	}
9018 
9019 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9020 
9021 	/* Did function tracer already get disabled? */
9022 	if (ftrace_is_dead()) {
9023 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9024 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9025 	}
9026 
9027 	/*
9028 	 * We need to stop all tracing on all CPUS to read the
9029 	 * the next buffer. This is a bit expensive, but is
9030 	 * not done often. We fill all what we can read,
9031 	 * and then release the locks again.
9032 	 */
9033 
9034 	while (!trace_empty(&iter)) {
9035 
9036 		if (!cnt)
9037 			printk(KERN_TRACE "---------------------------------\n");
9038 
9039 		cnt++;
9040 
9041 		trace_iterator_reset(&iter);
9042 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9043 
9044 		if (trace_find_next_entry_inc(&iter) != NULL) {
9045 			int ret;
9046 
9047 			ret = print_trace_line(&iter);
9048 			if (ret != TRACE_TYPE_NO_CONSUME)
9049 				trace_consume(&iter);
9050 		}
9051 		touch_nmi_watchdog();
9052 
9053 		trace_printk_seq(&iter.seq);
9054 	}
9055 
9056 	if (!cnt)
9057 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9058 	else
9059 		printk(KERN_TRACE "---------------------------------\n");
9060 
9061  out_enable:
9062 	tr->trace_flags |= old_userobj;
9063 
9064 	for_each_tracing_cpu(cpu) {
9065 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9066 	}
9067 	atomic_dec(&dump_running);
9068 	printk_nmi_direct_exit();
9069 	local_irq_restore(flags);
9070 }
9071 EXPORT_SYMBOL_GPL(ftrace_dump);
9072 
9073 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9074 {
9075 	char **argv;
9076 	int argc, ret;
9077 
9078 	argc = 0;
9079 	ret = 0;
9080 	argv = argv_split(GFP_KERNEL, buf, &argc);
9081 	if (!argv)
9082 		return -ENOMEM;
9083 
9084 	if (argc)
9085 		ret = createfn(argc, argv);
9086 
9087 	argv_free(argv);
9088 
9089 	return ret;
9090 }
9091 
9092 #define WRITE_BUFSIZE  4096
9093 
9094 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9095 				size_t count, loff_t *ppos,
9096 				int (*createfn)(int, char **))
9097 {
9098 	char *kbuf, *buf, *tmp;
9099 	int ret = 0;
9100 	size_t done = 0;
9101 	size_t size;
9102 
9103 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9104 	if (!kbuf)
9105 		return -ENOMEM;
9106 
9107 	while (done < count) {
9108 		size = count - done;
9109 
9110 		if (size >= WRITE_BUFSIZE)
9111 			size = WRITE_BUFSIZE - 1;
9112 
9113 		if (copy_from_user(kbuf, buffer + done, size)) {
9114 			ret = -EFAULT;
9115 			goto out;
9116 		}
9117 		kbuf[size] = '\0';
9118 		buf = kbuf;
9119 		do {
9120 			tmp = strchr(buf, '\n');
9121 			if (tmp) {
9122 				*tmp = '\0';
9123 				size = tmp - buf + 1;
9124 			} else {
9125 				size = strlen(buf);
9126 				if (done + size < count) {
9127 					if (buf != kbuf)
9128 						break;
9129 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9130 					pr_warn("Line length is too long: Should be less than %d\n",
9131 						WRITE_BUFSIZE - 2);
9132 					ret = -EINVAL;
9133 					goto out;
9134 				}
9135 			}
9136 			done += size;
9137 
9138 			/* Remove comments */
9139 			tmp = strchr(buf, '#');
9140 
9141 			if (tmp)
9142 				*tmp = '\0';
9143 
9144 			ret = trace_run_command(buf, createfn);
9145 			if (ret)
9146 				goto out;
9147 			buf += size;
9148 
9149 		} while (done < count);
9150 	}
9151 	ret = done;
9152 
9153 out:
9154 	kfree(kbuf);
9155 
9156 	return ret;
9157 }
9158 
9159 __init static int tracer_alloc_buffers(void)
9160 {
9161 	int ring_buf_size;
9162 	int ret = -ENOMEM;
9163 
9164 	/*
9165 	 * Make sure we don't accidently add more trace options
9166 	 * than we have bits for.
9167 	 */
9168 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9169 
9170 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9171 		goto out;
9172 
9173 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9174 		goto out_free_buffer_mask;
9175 
9176 	/* Only allocate trace_printk buffers if a trace_printk exists */
9177 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9178 		/* Must be called before global_trace.buffer is allocated */
9179 		trace_printk_init_buffers();
9180 
9181 	/* To save memory, keep the ring buffer size to its minimum */
9182 	if (ring_buffer_expanded)
9183 		ring_buf_size = trace_buf_size;
9184 	else
9185 		ring_buf_size = 1;
9186 
9187 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9188 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9189 
9190 	raw_spin_lock_init(&global_trace.start_lock);
9191 
9192 	/*
9193 	 * The prepare callbacks allocates some memory for the ring buffer. We
9194 	 * don't free the buffer if the if the CPU goes down. If we were to free
9195 	 * the buffer, then the user would lose any trace that was in the
9196 	 * buffer. The memory will be removed once the "instance" is removed.
9197 	 */
9198 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9199 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9200 				      NULL);
9201 	if (ret < 0)
9202 		goto out_free_cpumask;
9203 	/* Used for event triggers */
9204 	ret = -ENOMEM;
9205 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9206 	if (!temp_buffer)
9207 		goto out_rm_hp_state;
9208 
9209 	if (trace_create_savedcmd() < 0)
9210 		goto out_free_temp_buffer;
9211 
9212 	/* TODO: make the number of buffers hot pluggable with CPUS */
9213 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9214 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9215 		WARN_ON(1);
9216 		goto out_free_savedcmd;
9217 	}
9218 
9219 	if (global_trace.buffer_disabled)
9220 		tracing_off();
9221 
9222 	if (trace_boot_clock) {
9223 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9224 		if (ret < 0)
9225 			pr_warn("Trace clock %s not defined, going back to default\n",
9226 				trace_boot_clock);
9227 	}
9228 
9229 	/*
9230 	 * register_tracer() might reference current_trace, so it
9231 	 * needs to be set before we register anything. This is
9232 	 * just a bootstrap of current_trace anyway.
9233 	 */
9234 	global_trace.current_trace = &nop_trace;
9235 
9236 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9237 
9238 	ftrace_init_global_array_ops(&global_trace);
9239 
9240 	init_trace_flags_index(&global_trace);
9241 
9242 	register_tracer(&nop_trace);
9243 
9244 	/* Function tracing may start here (via kernel command line) */
9245 	init_function_trace();
9246 
9247 	/* All seems OK, enable tracing */
9248 	tracing_disabled = 0;
9249 
9250 	atomic_notifier_chain_register(&panic_notifier_list,
9251 				       &trace_panic_notifier);
9252 
9253 	register_die_notifier(&trace_die_notifier);
9254 
9255 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9256 
9257 	INIT_LIST_HEAD(&global_trace.systems);
9258 	INIT_LIST_HEAD(&global_trace.events);
9259 	INIT_LIST_HEAD(&global_trace.hist_vars);
9260 	INIT_LIST_HEAD(&global_trace.err_log);
9261 	list_add(&global_trace.list, &ftrace_trace_arrays);
9262 
9263 	apply_trace_boot_options();
9264 
9265 	register_snapshot_cmd();
9266 
9267 	return 0;
9268 
9269 out_free_savedcmd:
9270 	free_saved_cmdlines_buffer(savedcmd);
9271 out_free_temp_buffer:
9272 	ring_buffer_free(temp_buffer);
9273 out_rm_hp_state:
9274 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9275 out_free_cpumask:
9276 	free_cpumask_var(global_trace.tracing_cpumask);
9277 out_free_buffer_mask:
9278 	free_cpumask_var(tracing_buffer_mask);
9279 out:
9280 	return ret;
9281 }
9282 
9283 void __init early_trace_init(void)
9284 {
9285 	if (tracepoint_printk) {
9286 		tracepoint_print_iter =
9287 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9288 		if (WARN_ON(!tracepoint_print_iter))
9289 			tracepoint_printk = 0;
9290 		else
9291 			static_key_enable(&tracepoint_printk_key.key);
9292 	}
9293 	tracer_alloc_buffers();
9294 }
9295 
9296 void __init trace_init(void)
9297 {
9298 	trace_event_init();
9299 }
9300 
9301 __init static int clear_boot_tracer(void)
9302 {
9303 	/*
9304 	 * The default tracer at boot buffer is an init section.
9305 	 * This function is called in lateinit. If we did not
9306 	 * find the boot tracer, then clear it out, to prevent
9307 	 * later registration from accessing the buffer that is
9308 	 * about to be freed.
9309 	 */
9310 	if (!default_bootup_tracer)
9311 		return 0;
9312 
9313 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9314 	       default_bootup_tracer);
9315 	default_bootup_tracer = NULL;
9316 
9317 	return 0;
9318 }
9319 
9320 fs_initcall(tracer_init_tracefs);
9321 late_initcall_sync(clear_boot_tracer);
9322 
9323 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9324 __init static int tracing_set_default_clock(void)
9325 {
9326 	/* sched_clock_stable() is determined in late_initcall */
9327 	if (!trace_boot_clock && !sched_clock_stable()) {
9328 		printk(KERN_WARNING
9329 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9330 		       "If you want to keep using the local clock, then add:\n"
9331 		       "  \"trace_clock=local\"\n"
9332 		       "on the kernel command line\n");
9333 		tracing_set_clock(&global_trace, "global");
9334 	}
9335 
9336 	return 0;
9337 }
9338 late_initcall_sync(tracing_set_default_clock);
9339 #endif
9340