xref: /openbmc/linux/kernel/trace/trace.c (revision 98ddec80)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = tracing_alloc_snapshot_instance(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	if (tr->stop_count)
1364 		return;
1365 
1366 	WARN_ON_ONCE(!irqs_disabled());
1367 
1368 	if (!tr->allocated_snapshot) {
1369 		/* Only the nop tracer should hit this when disabling */
1370 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1371 		return;
1372 	}
1373 
1374 	arch_spin_lock(&tr->max_lock);
1375 
1376 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1377 
1378 	__update_max_tr(tr, tsk, cpu);
1379 	arch_spin_unlock(&tr->max_lock);
1380 }
1381 
1382 /**
1383  * update_max_tr_single - only copy one trace over, and reset the rest
1384  * @tr - tracer
1385  * @tsk - task with the latency
1386  * @cpu - the cpu of the buffer to copy.
1387  *
1388  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1389  */
1390 void
1391 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1392 {
1393 	int ret;
1394 
1395 	if (tr->stop_count)
1396 		return;
1397 
1398 	WARN_ON_ONCE(!irqs_disabled());
1399 	if (!tr->allocated_snapshot) {
1400 		/* Only the nop tracer should hit this when disabling */
1401 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1402 		return;
1403 	}
1404 
1405 	arch_spin_lock(&tr->max_lock);
1406 
1407 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1408 
1409 	if (ret == -EBUSY) {
1410 		/*
1411 		 * We failed to swap the buffer due to a commit taking
1412 		 * place on this CPU. We fail to record, but we reset
1413 		 * the max trace buffer (no one writes directly to it)
1414 		 * and flag that it failed.
1415 		 */
1416 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1417 			"Failed to swap buffers due to commit in progress\n");
1418 	}
1419 
1420 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1421 
1422 	__update_max_tr(tr, tsk, cpu);
1423 	arch_spin_unlock(&tr->max_lock);
1424 }
1425 #endif /* CONFIG_TRACER_MAX_TRACE */
1426 
1427 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1428 {
1429 	/* Iterators are static, they should be filled or empty */
1430 	if (trace_buffer_iter(iter, iter->cpu_file))
1431 		return 0;
1432 
1433 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1434 				full);
1435 }
1436 
1437 #ifdef CONFIG_FTRACE_STARTUP_TEST
1438 static bool selftests_can_run;
1439 
1440 struct trace_selftests {
1441 	struct list_head		list;
1442 	struct tracer			*type;
1443 };
1444 
1445 static LIST_HEAD(postponed_selftests);
1446 
1447 static int save_selftest(struct tracer *type)
1448 {
1449 	struct trace_selftests *selftest;
1450 
1451 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1452 	if (!selftest)
1453 		return -ENOMEM;
1454 
1455 	selftest->type = type;
1456 	list_add(&selftest->list, &postponed_selftests);
1457 	return 0;
1458 }
1459 
1460 static int run_tracer_selftest(struct tracer *type)
1461 {
1462 	struct trace_array *tr = &global_trace;
1463 	struct tracer *saved_tracer = tr->current_trace;
1464 	int ret;
1465 
1466 	if (!type->selftest || tracing_selftest_disabled)
1467 		return 0;
1468 
1469 	/*
1470 	 * If a tracer registers early in boot up (before scheduling is
1471 	 * initialized and such), then do not run its selftests yet.
1472 	 * Instead, run it a little later in the boot process.
1473 	 */
1474 	if (!selftests_can_run)
1475 		return save_selftest(type);
1476 
1477 	/*
1478 	 * Run a selftest on this tracer.
1479 	 * Here we reset the trace buffer, and set the current
1480 	 * tracer to be this tracer. The tracer can then run some
1481 	 * internal tracing to verify that everything is in order.
1482 	 * If we fail, we do not register this tracer.
1483 	 */
1484 	tracing_reset_online_cpus(&tr->trace_buffer);
1485 
1486 	tr->current_trace = type;
1487 
1488 #ifdef CONFIG_TRACER_MAX_TRACE
1489 	if (type->use_max_tr) {
1490 		/* If we expanded the buffers, make sure the max is expanded too */
1491 		if (ring_buffer_expanded)
1492 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1493 					   RING_BUFFER_ALL_CPUS);
1494 		tr->allocated_snapshot = true;
1495 	}
1496 #endif
1497 
1498 	/* the test is responsible for initializing and enabling */
1499 	pr_info("Testing tracer %s: ", type->name);
1500 	ret = type->selftest(type, tr);
1501 	/* the test is responsible for resetting too */
1502 	tr->current_trace = saved_tracer;
1503 	if (ret) {
1504 		printk(KERN_CONT "FAILED!\n");
1505 		/* Add the warning after printing 'FAILED' */
1506 		WARN_ON(1);
1507 		return -1;
1508 	}
1509 	/* Only reset on passing, to avoid touching corrupted buffers */
1510 	tracing_reset_online_cpus(&tr->trace_buffer);
1511 
1512 #ifdef CONFIG_TRACER_MAX_TRACE
1513 	if (type->use_max_tr) {
1514 		tr->allocated_snapshot = false;
1515 
1516 		/* Shrink the max buffer again */
1517 		if (ring_buffer_expanded)
1518 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1519 					   RING_BUFFER_ALL_CPUS);
1520 	}
1521 #endif
1522 
1523 	printk(KERN_CONT "PASSED\n");
1524 	return 0;
1525 }
1526 
1527 static __init int init_trace_selftests(void)
1528 {
1529 	struct trace_selftests *p, *n;
1530 	struct tracer *t, **last;
1531 	int ret;
1532 
1533 	selftests_can_run = true;
1534 
1535 	mutex_lock(&trace_types_lock);
1536 
1537 	if (list_empty(&postponed_selftests))
1538 		goto out;
1539 
1540 	pr_info("Running postponed tracer tests:\n");
1541 
1542 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1543 		ret = run_tracer_selftest(p->type);
1544 		/* If the test fails, then warn and remove from available_tracers */
1545 		if (ret < 0) {
1546 			WARN(1, "tracer: %s failed selftest, disabling\n",
1547 			     p->type->name);
1548 			last = &trace_types;
1549 			for (t = trace_types; t; t = t->next) {
1550 				if (t == p->type) {
1551 					*last = t->next;
1552 					break;
1553 				}
1554 				last = &t->next;
1555 			}
1556 		}
1557 		list_del(&p->list);
1558 		kfree(p);
1559 	}
1560 
1561  out:
1562 	mutex_unlock(&trace_types_lock);
1563 
1564 	return 0;
1565 }
1566 core_initcall(init_trace_selftests);
1567 #else
1568 static inline int run_tracer_selftest(struct tracer *type)
1569 {
1570 	return 0;
1571 }
1572 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1573 
1574 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1575 
1576 static void __init apply_trace_boot_options(void);
1577 
1578 /**
1579  * register_tracer - register a tracer with the ftrace system.
1580  * @type - the plugin for the tracer
1581  *
1582  * Register a new plugin tracer.
1583  */
1584 int __init register_tracer(struct tracer *type)
1585 {
1586 	struct tracer *t;
1587 	int ret = 0;
1588 
1589 	if (!type->name) {
1590 		pr_info("Tracer must have a name\n");
1591 		return -1;
1592 	}
1593 
1594 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1595 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1596 		return -1;
1597 	}
1598 
1599 	mutex_lock(&trace_types_lock);
1600 
1601 	tracing_selftest_running = true;
1602 
1603 	for (t = trace_types; t; t = t->next) {
1604 		if (strcmp(type->name, t->name) == 0) {
1605 			/* already found */
1606 			pr_info("Tracer %s already registered\n",
1607 				type->name);
1608 			ret = -1;
1609 			goto out;
1610 		}
1611 	}
1612 
1613 	if (!type->set_flag)
1614 		type->set_flag = &dummy_set_flag;
1615 	if (!type->flags) {
1616 		/*allocate a dummy tracer_flags*/
1617 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1618 		if (!type->flags) {
1619 			ret = -ENOMEM;
1620 			goto out;
1621 		}
1622 		type->flags->val = 0;
1623 		type->flags->opts = dummy_tracer_opt;
1624 	} else
1625 		if (!type->flags->opts)
1626 			type->flags->opts = dummy_tracer_opt;
1627 
1628 	/* store the tracer for __set_tracer_option */
1629 	type->flags->trace = type;
1630 
1631 	ret = run_tracer_selftest(type);
1632 	if (ret < 0)
1633 		goto out;
1634 
1635 	type->next = trace_types;
1636 	trace_types = type;
1637 	add_tracer_options(&global_trace, type);
1638 
1639  out:
1640 	tracing_selftest_running = false;
1641 	mutex_unlock(&trace_types_lock);
1642 
1643 	if (ret || !default_bootup_tracer)
1644 		goto out_unlock;
1645 
1646 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1647 		goto out_unlock;
1648 
1649 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1650 	/* Do we want this tracer to start on bootup? */
1651 	tracing_set_tracer(&global_trace, type->name);
1652 	default_bootup_tracer = NULL;
1653 
1654 	apply_trace_boot_options();
1655 
1656 	/* disable other selftests, since this will break it. */
1657 	tracing_selftest_disabled = true;
1658 #ifdef CONFIG_FTRACE_STARTUP_TEST
1659 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1660 	       type->name);
1661 #endif
1662 
1663  out_unlock:
1664 	return ret;
1665 }
1666 
1667 void tracing_reset(struct trace_buffer *buf, int cpu)
1668 {
1669 	struct ring_buffer *buffer = buf->buffer;
1670 
1671 	if (!buffer)
1672 		return;
1673 
1674 	ring_buffer_record_disable(buffer);
1675 
1676 	/* Make sure all commits have finished */
1677 	synchronize_sched();
1678 	ring_buffer_reset_cpu(buffer, cpu);
1679 
1680 	ring_buffer_record_enable(buffer);
1681 }
1682 
1683 void tracing_reset_online_cpus(struct trace_buffer *buf)
1684 {
1685 	struct ring_buffer *buffer = buf->buffer;
1686 	int cpu;
1687 
1688 	if (!buffer)
1689 		return;
1690 
1691 	ring_buffer_record_disable(buffer);
1692 
1693 	/* Make sure all commits have finished */
1694 	synchronize_sched();
1695 
1696 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1697 
1698 	for_each_online_cpu(cpu)
1699 		ring_buffer_reset_cpu(buffer, cpu);
1700 
1701 	ring_buffer_record_enable(buffer);
1702 }
1703 
1704 /* Must have trace_types_lock held */
1705 void tracing_reset_all_online_cpus(void)
1706 {
1707 	struct trace_array *tr;
1708 
1709 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1710 		if (!tr->clear_trace)
1711 			continue;
1712 		tr->clear_trace = false;
1713 		tracing_reset_online_cpus(&tr->trace_buffer);
1714 #ifdef CONFIG_TRACER_MAX_TRACE
1715 		tracing_reset_online_cpus(&tr->max_buffer);
1716 #endif
1717 	}
1718 }
1719 
1720 static int *tgid_map;
1721 
1722 #define SAVED_CMDLINES_DEFAULT 128
1723 #define NO_CMDLINE_MAP UINT_MAX
1724 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1725 struct saved_cmdlines_buffer {
1726 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1727 	unsigned *map_cmdline_to_pid;
1728 	unsigned cmdline_num;
1729 	int cmdline_idx;
1730 	char *saved_cmdlines;
1731 };
1732 static struct saved_cmdlines_buffer *savedcmd;
1733 
1734 /* temporary disable recording */
1735 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1736 
1737 static inline char *get_saved_cmdlines(int idx)
1738 {
1739 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1740 }
1741 
1742 static inline void set_cmdline(int idx, const char *cmdline)
1743 {
1744 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1745 }
1746 
1747 static int allocate_cmdlines_buffer(unsigned int val,
1748 				    struct saved_cmdlines_buffer *s)
1749 {
1750 	s->map_cmdline_to_pid = kmalloc_array(val,
1751 					      sizeof(*s->map_cmdline_to_pid),
1752 					      GFP_KERNEL);
1753 	if (!s->map_cmdline_to_pid)
1754 		return -ENOMEM;
1755 
1756 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1757 	if (!s->saved_cmdlines) {
1758 		kfree(s->map_cmdline_to_pid);
1759 		return -ENOMEM;
1760 	}
1761 
1762 	s->cmdline_idx = 0;
1763 	s->cmdline_num = val;
1764 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1765 	       sizeof(s->map_pid_to_cmdline));
1766 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1767 	       val * sizeof(*s->map_cmdline_to_pid));
1768 
1769 	return 0;
1770 }
1771 
1772 static int trace_create_savedcmd(void)
1773 {
1774 	int ret;
1775 
1776 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1777 	if (!savedcmd)
1778 		return -ENOMEM;
1779 
1780 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1781 	if (ret < 0) {
1782 		kfree(savedcmd);
1783 		savedcmd = NULL;
1784 		return -ENOMEM;
1785 	}
1786 
1787 	return 0;
1788 }
1789 
1790 int is_tracing_stopped(void)
1791 {
1792 	return global_trace.stop_count;
1793 }
1794 
1795 /**
1796  * tracing_start - quick start of the tracer
1797  *
1798  * If tracing is enabled but was stopped by tracing_stop,
1799  * this will start the tracer back up.
1800  */
1801 void tracing_start(void)
1802 {
1803 	struct ring_buffer *buffer;
1804 	unsigned long flags;
1805 
1806 	if (tracing_disabled)
1807 		return;
1808 
1809 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1810 	if (--global_trace.stop_count) {
1811 		if (global_trace.stop_count < 0) {
1812 			/* Someone screwed up their debugging */
1813 			WARN_ON_ONCE(1);
1814 			global_trace.stop_count = 0;
1815 		}
1816 		goto out;
1817 	}
1818 
1819 	/* Prevent the buffers from switching */
1820 	arch_spin_lock(&global_trace.max_lock);
1821 
1822 	buffer = global_trace.trace_buffer.buffer;
1823 	if (buffer)
1824 		ring_buffer_record_enable(buffer);
1825 
1826 #ifdef CONFIG_TRACER_MAX_TRACE
1827 	buffer = global_trace.max_buffer.buffer;
1828 	if (buffer)
1829 		ring_buffer_record_enable(buffer);
1830 #endif
1831 
1832 	arch_spin_unlock(&global_trace.max_lock);
1833 
1834  out:
1835 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1836 }
1837 
1838 static void tracing_start_tr(struct trace_array *tr)
1839 {
1840 	struct ring_buffer *buffer;
1841 	unsigned long flags;
1842 
1843 	if (tracing_disabled)
1844 		return;
1845 
1846 	/* If global, we need to also start the max tracer */
1847 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1848 		return tracing_start();
1849 
1850 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1851 
1852 	if (--tr->stop_count) {
1853 		if (tr->stop_count < 0) {
1854 			/* Someone screwed up their debugging */
1855 			WARN_ON_ONCE(1);
1856 			tr->stop_count = 0;
1857 		}
1858 		goto out;
1859 	}
1860 
1861 	buffer = tr->trace_buffer.buffer;
1862 	if (buffer)
1863 		ring_buffer_record_enable(buffer);
1864 
1865  out:
1866 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1867 }
1868 
1869 /**
1870  * tracing_stop - quick stop of the tracer
1871  *
1872  * Light weight way to stop tracing. Use in conjunction with
1873  * tracing_start.
1874  */
1875 void tracing_stop(void)
1876 {
1877 	struct ring_buffer *buffer;
1878 	unsigned long flags;
1879 
1880 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1881 	if (global_trace.stop_count++)
1882 		goto out;
1883 
1884 	/* Prevent the buffers from switching */
1885 	arch_spin_lock(&global_trace.max_lock);
1886 
1887 	buffer = global_trace.trace_buffer.buffer;
1888 	if (buffer)
1889 		ring_buffer_record_disable(buffer);
1890 
1891 #ifdef CONFIG_TRACER_MAX_TRACE
1892 	buffer = global_trace.max_buffer.buffer;
1893 	if (buffer)
1894 		ring_buffer_record_disable(buffer);
1895 #endif
1896 
1897 	arch_spin_unlock(&global_trace.max_lock);
1898 
1899  out:
1900 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1901 }
1902 
1903 static void tracing_stop_tr(struct trace_array *tr)
1904 {
1905 	struct ring_buffer *buffer;
1906 	unsigned long flags;
1907 
1908 	/* If global, we need to also stop the max tracer */
1909 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1910 		return tracing_stop();
1911 
1912 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1913 	if (tr->stop_count++)
1914 		goto out;
1915 
1916 	buffer = tr->trace_buffer.buffer;
1917 	if (buffer)
1918 		ring_buffer_record_disable(buffer);
1919 
1920  out:
1921 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1922 }
1923 
1924 static int trace_save_cmdline(struct task_struct *tsk)
1925 {
1926 	unsigned pid, idx;
1927 
1928 	/* treat recording of idle task as a success */
1929 	if (!tsk->pid)
1930 		return 1;
1931 
1932 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1933 		return 0;
1934 
1935 	/*
1936 	 * It's not the end of the world if we don't get
1937 	 * the lock, but we also don't want to spin
1938 	 * nor do we want to disable interrupts,
1939 	 * so if we miss here, then better luck next time.
1940 	 */
1941 	if (!arch_spin_trylock(&trace_cmdline_lock))
1942 		return 0;
1943 
1944 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1945 	if (idx == NO_CMDLINE_MAP) {
1946 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1947 
1948 		/*
1949 		 * Check whether the cmdline buffer at idx has a pid
1950 		 * mapped. We are going to overwrite that entry so we
1951 		 * need to clear the map_pid_to_cmdline. Otherwise we
1952 		 * would read the new comm for the old pid.
1953 		 */
1954 		pid = savedcmd->map_cmdline_to_pid[idx];
1955 		if (pid != NO_CMDLINE_MAP)
1956 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1957 
1958 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1959 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1960 
1961 		savedcmd->cmdline_idx = idx;
1962 	}
1963 
1964 	set_cmdline(idx, tsk->comm);
1965 
1966 	arch_spin_unlock(&trace_cmdline_lock);
1967 
1968 	return 1;
1969 }
1970 
1971 static void __trace_find_cmdline(int pid, char comm[])
1972 {
1973 	unsigned map;
1974 
1975 	if (!pid) {
1976 		strcpy(comm, "<idle>");
1977 		return;
1978 	}
1979 
1980 	if (WARN_ON_ONCE(pid < 0)) {
1981 		strcpy(comm, "<XXX>");
1982 		return;
1983 	}
1984 
1985 	if (pid > PID_MAX_DEFAULT) {
1986 		strcpy(comm, "<...>");
1987 		return;
1988 	}
1989 
1990 	map = savedcmd->map_pid_to_cmdline[pid];
1991 	if (map != NO_CMDLINE_MAP)
1992 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1993 	else
1994 		strcpy(comm, "<...>");
1995 }
1996 
1997 void trace_find_cmdline(int pid, char comm[])
1998 {
1999 	preempt_disable();
2000 	arch_spin_lock(&trace_cmdline_lock);
2001 
2002 	__trace_find_cmdline(pid, comm);
2003 
2004 	arch_spin_unlock(&trace_cmdline_lock);
2005 	preempt_enable();
2006 }
2007 
2008 int trace_find_tgid(int pid)
2009 {
2010 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2011 		return 0;
2012 
2013 	return tgid_map[pid];
2014 }
2015 
2016 static int trace_save_tgid(struct task_struct *tsk)
2017 {
2018 	/* treat recording of idle task as a success */
2019 	if (!tsk->pid)
2020 		return 1;
2021 
2022 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2023 		return 0;
2024 
2025 	tgid_map[tsk->pid] = tsk->tgid;
2026 	return 1;
2027 }
2028 
2029 static bool tracing_record_taskinfo_skip(int flags)
2030 {
2031 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2032 		return true;
2033 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2034 		return true;
2035 	if (!__this_cpu_read(trace_taskinfo_save))
2036 		return true;
2037 	return false;
2038 }
2039 
2040 /**
2041  * tracing_record_taskinfo - record the task info of a task
2042  *
2043  * @task  - task to record
2044  * @flags - TRACE_RECORD_CMDLINE for recording comm
2045  *        - TRACE_RECORD_TGID for recording tgid
2046  */
2047 void tracing_record_taskinfo(struct task_struct *task, int flags)
2048 {
2049 	bool done;
2050 
2051 	if (tracing_record_taskinfo_skip(flags))
2052 		return;
2053 
2054 	/*
2055 	 * Record as much task information as possible. If some fail, continue
2056 	 * to try to record the others.
2057 	 */
2058 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2059 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2060 
2061 	/* If recording any information failed, retry again soon. */
2062 	if (!done)
2063 		return;
2064 
2065 	__this_cpu_write(trace_taskinfo_save, false);
2066 }
2067 
2068 /**
2069  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2070  *
2071  * @prev - previous task during sched_switch
2072  * @next - next task during sched_switch
2073  * @flags - TRACE_RECORD_CMDLINE for recording comm
2074  *          TRACE_RECORD_TGID for recording tgid
2075  */
2076 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2077 					  struct task_struct *next, int flags)
2078 {
2079 	bool done;
2080 
2081 	if (tracing_record_taskinfo_skip(flags))
2082 		return;
2083 
2084 	/*
2085 	 * Record as much task information as possible. If some fail, continue
2086 	 * to try to record the others.
2087 	 */
2088 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2089 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2090 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2091 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2092 
2093 	/* If recording any information failed, retry again soon. */
2094 	if (!done)
2095 		return;
2096 
2097 	__this_cpu_write(trace_taskinfo_save, false);
2098 }
2099 
2100 /* Helpers to record a specific task information */
2101 void tracing_record_cmdline(struct task_struct *task)
2102 {
2103 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2104 }
2105 
2106 void tracing_record_tgid(struct task_struct *task)
2107 {
2108 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2109 }
2110 
2111 /*
2112  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2113  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2114  * simplifies those functions and keeps them in sync.
2115  */
2116 enum print_line_t trace_handle_return(struct trace_seq *s)
2117 {
2118 	return trace_seq_has_overflowed(s) ?
2119 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2120 }
2121 EXPORT_SYMBOL_GPL(trace_handle_return);
2122 
2123 void
2124 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2125 			     int pc)
2126 {
2127 	struct task_struct *tsk = current;
2128 
2129 	entry->preempt_count		= pc & 0xff;
2130 	entry->pid			= (tsk) ? tsk->pid : 0;
2131 	entry->flags =
2132 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2133 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2134 #else
2135 		TRACE_FLAG_IRQS_NOSUPPORT |
2136 #endif
2137 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2138 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2139 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2140 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2141 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2142 }
2143 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2144 
2145 struct ring_buffer_event *
2146 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2147 			  int type,
2148 			  unsigned long len,
2149 			  unsigned long flags, int pc)
2150 {
2151 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2152 }
2153 
2154 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2155 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2156 static int trace_buffered_event_ref;
2157 
2158 /**
2159  * trace_buffered_event_enable - enable buffering events
2160  *
2161  * When events are being filtered, it is quicker to use a temporary
2162  * buffer to write the event data into if there's a likely chance
2163  * that it will not be committed. The discard of the ring buffer
2164  * is not as fast as committing, and is much slower than copying
2165  * a commit.
2166  *
2167  * When an event is to be filtered, allocate per cpu buffers to
2168  * write the event data into, and if the event is filtered and discarded
2169  * it is simply dropped, otherwise, the entire data is to be committed
2170  * in one shot.
2171  */
2172 void trace_buffered_event_enable(void)
2173 {
2174 	struct ring_buffer_event *event;
2175 	struct page *page;
2176 	int cpu;
2177 
2178 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2179 
2180 	if (trace_buffered_event_ref++)
2181 		return;
2182 
2183 	for_each_tracing_cpu(cpu) {
2184 		page = alloc_pages_node(cpu_to_node(cpu),
2185 					GFP_KERNEL | __GFP_NORETRY, 0);
2186 		if (!page)
2187 			goto failed;
2188 
2189 		event = page_address(page);
2190 		memset(event, 0, sizeof(*event));
2191 
2192 		per_cpu(trace_buffered_event, cpu) = event;
2193 
2194 		preempt_disable();
2195 		if (cpu == smp_processor_id() &&
2196 		    this_cpu_read(trace_buffered_event) !=
2197 		    per_cpu(trace_buffered_event, cpu))
2198 			WARN_ON_ONCE(1);
2199 		preempt_enable();
2200 	}
2201 
2202 	return;
2203  failed:
2204 	trace_buffered_event_disable();
2205 }
2206 
2207 static void enable_trace_buffered_event(void *data)
2208 {
2209 	/* Probably not needed, but do it anyway */
2210 	smp_rmb();
2211 	this_cpu_dec(trace_buffered_event_cnt);
2212 }
2213 
2214 static void disable_trace_buffered_event(void *data)
2215 {
2216 	this_cpu_inc(trace_buffered_event_cnt);
2217 }
2218 
2219 /**
2220  * trace_buffered_event_disable - disable buffering events
2221  *
2222  * When a filter is removed, it is faster to not use the buffered
2223  * events, and to commit directly into the ring buffer. Free up
2224  * the temp buffers when there are no more users. This requires
2225  * special synchronization with current events.
2226  */
2227 void trace_buffered_event_disable(void)
2228 {
2229 	int cpu;
2230 
2231 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2232 
2233 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2234 		return;
2235 
2236 	if (--trace_buffered_event_ref)
2237 		return;
2238 
2239 	preempt_disable();
2240 	/* For each CPU, set the buffer as used. */
2241 	smp_call_function_many(tracing_buffer_mask,
2242 			       disable_trace_buffered_event, NULL, 1);
2243 	preempt_enable();
2244 
2245 	/* Wait for all current users to finish */
2246 	synchronize_sched();
2247 
2248 	for_each_tracing_cpu(cpu) {
2249 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2250 		per_cpu(trace_buffered_event, cpu) = NULL;
2251 	}
2252 	/*
2253 	 * Make sure trace_buffered_event is NULL before clearing
2254 	 * trace_buffered_event_cnt.
2255 	 */
2256 	smp_wmb();
2257 
2258 	preempt_disable();
2259 	/* Do the work on each cpu */
2260 	smp_call_function_many(tracing_buffer_mask,
2261 			       enable_trace_buffered_event, NULL, 1);
2262 	preempt_enable();
2263 }
2264 
2265 static struct ring_buffer *temp_buffer;
2266 
2267 struct ring_buffer_event *
2268 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2269 			  struct trace_event_file *trace_file,
2270 			  int type, unsigned long len,
2271 			  unsigned long flags, int pc)
2272 {
2273 	struct ring_buffer_event *entry;
2274 	int val;
2275 
2276 	*current_rb = trace_file->tr->trace_buffer.buffer;
2277 
2278 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2279 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2280 	    (entry = this_cpu_read(trace_buffered_event))) {
2281 		/* Try to use the per cpu buffer first */
2282 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2283 		if (val == 1) {
2284 			trace_event_setup(entry, type, flags, pc);
2285 			entry->array[0] = len;
2286 			return entry;
2287 		}
2288 		this_cpu_dec(trace_buffered_event_cnt);
2289 	}
2290 
2291 	entry = __trace_buffer_lock_reserve(*current_rb,
2292 					    type, len, flags, pc);
2293 	/*
2294 	 * If tracing is off, but we have triggers enabled
2295 	 * we still need to look at the event data. Use the temp_buffer
2296 	 * to store the trace event for the tigger to use. It's recusive
2297 	 * safe and will not be recorded anywhere.
2298 	 */
2299 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2300 		*current_rb = temp_buffer;
2301 		entry = __trace_buffer_lock_reserve(*current_rb,
2302 						    type, len, flags, pc);
2303 	}
2304 	return entry;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2307 
2308 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2309 static DEFINE_MUTEX(tracepoint_printk_mutex);
2310 
2311 static void output_printk(struct trace_event_buffer *fbuffer)
2312 {
2313 	struct trace_event_call *event_call;
2314 	struct trace_event *event;
2315 	unsigned long flags;
2316 	struct trace_iterator *iter = tracepoint_print_iter;
2317 
2318 	/* We should never get here if iter is NULL */
2319 	if (WARN_ON_ONCE(!iter))
2320 		return;
2321 
2322 	event_call = fbuffer->trace_file->event_call;
2323 	if (!event_call || !event_call->event.funcs ||
2324 	    !event_call->event.funcs->trace)
2325 		return;
2326 
2327 	event = &fbuffer->trace_file->event_call->event;
2328 
2329 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2330 	trace_seq_init(&iter->seq);
2331 	iter->ent = fbuffer->entry;
2332 	event_call->event.funcs->trace(iter, 0, event);
2333 	trace_seq_putc(&iter->seq, 0);
2334 	printk("%s", iter->seq.buffer);
2335 
2336 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2337 }
2338 
2339 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2340 			     void __user *buffer, size_t *lenp,
2341 			     loff_t *ppos)
2342 {
2343 	int save_tracepoint_printk;
2344 	int ret;
2345 
2346 	mutex_lock(&tracepoint_printk_mutex);
2347 	save_tracepoint_printk = tracepoint_printk;
2348 
2349 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2350 
2351 	/*
2352 	 * This will force exiting early, as tracepoint_printk
2353 	 * is always zero when tracepoint_printk_iter is not allocated
2354 	 */
2355 	if (!tracepoint_print_iter)
2356 		tracepoint_printk = 0;
2357 
2358 	if (save_tracepoint_printk == tracepoint_printk)
2359 		goto out;
2360 
2361 	if (tracepoint_printk)
2362 		static_key_enable(&tracepoint_printk_key.key);
2363 	else
2364 		static_key_disable(&tracepoint_printk_key.key);
2365 
2366  out:
2367 	mutex_unlock(&tracepoint_printk_mutex);
2368 
2369 	return ret;
2370 }
2371 
2372 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2373 {
2374 	if (static_key_false(&tracepoint_printk_key.key))
2375 		output_printk(fbuffer);
2376 
2377 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2378 				    fbuffer->event, fbuffer->entry,
2379 				    fbuffer->flags, fbuffer->pc);
2380 }
2381 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2382 
2383 /*
2384  * Skip 3:
2385  *
2386  *   trace_buffer_unlock_commit_regs()
2387  *   trace_event_buffer_commit()
2388  *   trace_event_raw_event_xxx()
2389  */
2390 # define STACK_SKIP 3
2391 
2392 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2393 				     struct ring_buffer *buffer,
2394 				     struct ring_buffer_event *event,
2395 				     unsigned long flags, int pc,
2396 				     struct pt_regs *regs)
2397 {
2398 	__buffer_unlock_commit(buffer, event);
2399 
2400 	/*
2401 	 * If regs is not set, then skip the necessary functions.
2402 	 * Note, we can still get here via blktrace, wakeup tracer
2403 	 * and mmiotrace, but that's ok if they lose a function or
2404 	 * two. They are not that meaningful.
2405 	 */
2406 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2407 	ftrace_trace_userstack(buffer, flags, pc);
2408 }
2409 
2410 /*
2411  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2412  */
2413 void
2414 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2415 				   struct ring_buffer_event *event)
2416 {
2417 	__buffer_unlock_commit(buffer, event);
2418 }
2419 
2420 static void
2421 trace_process_export(struct trace_export *export,
2422 	       struct ring_buffer_event *event)
2423 {
2424 	struct trace_entry *entry;
2425 	unsigned int size = 0;
2426 
2427 	entry = ring_buffer_event_data(event);
2428 	size = ring_buffer_event_length(event);
2429 	export->write(export, entry, size);
2430 }
2431 
2432 static DEFINE_MUTEX(ftrace_export_lock);
2433 
2434 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2435 
2436 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2437 
2438 static inline void ftrace_exports_enable(void)
2439 {
2440 	static_branch_enable(&ftrace_exports_enabled);
2441 }
2442 
2443 static inline void ftrace_exports_disable(void)
2444 {
2445 	static_branch_disable(&ftrace_exports_enabled);
2446 }
2447 
2448 void ftrace_exports(struct ring_buffer_event *event)
2449 {
2450 	struct trace_export *export;
2451 
2452 	preempt_disable_notrace();
2453 
2454 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2455 	while (export) {
2456 		trace_process_export(export, event);
2457 		export = rcu_dereference_raw_notrace(export->next);
2458 	}
2459 
2460 	preempt_enable_notrace();
2461 }
2462 
2463 static inline void
2464 add_trace_export(struct trace_export **list, struct trace_export *export)
2465 {
2466 	rcu_assign_pointer(export->next, *list);
2467 	/*
2468 	 * We are entering export into the list but another
2469 	 * CPU might be walking that list. We need to make sure
2470 	 * the export->next pointer is valid before another CPU sees
2471 	 * the export pointer included into the list.
2472 	 */
2473 	rcu_assign_pointer(*list, export);
2474 }
2475 
2476 static inline int
2477 rm_trace_export(struct trace_export **list, struct trace_export *export)
2478 {
2479 	struct trace_export **p;
2480 
2481 	for (p = list; *p != NULL; p = &(*p)->next)
2482 		if (*p == export)
2483 			break;
2484 
2485 	if (*p != export)
2486 		return -1;
2487 
2488 	rcu_assign_pointer(*p, (*p)->next);
2489 
2490 	return 0;
2491 }
2492 
2493 static inline void
2494 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2495 {
2496 	if (*list == NULL)
2497 		ftrace_exports_enable();
2498 
2499 	add_trace_export(list, export);
2500 }
2501 
2502 static inline int
2503 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505 	int ret;
2506 
2507 	ret = rm_trace_export(list, export);
2508 	if (*list == NULL)
2509 		ftrace_exports_disable();
2510 
2511 	return ret;
2512 }
2513 
2514 int register_ftrace_export(struct trace_export *export)
2515 {
2516 	if (WARN_ON_ONCE(!export->write))
2517 		return -1;
2518 
2519 	mutex_lock(&ftrace_export_lock);
2520 
2521 	add_ftrace_export(&ftrace_exports_list, export);
2522 
2523 	mutex_unlock(&ftrace_export_lock);
2524 
2525 	return 0;
2526 }
2527 EXPORT_SYMBOL_GPL(register_ftrace_export);
2528 
2529 int unregister_ftrace_export(struct trace_export *export)
2530 {
2531 	int ret;
2532 
2533 	mutex_lock(&ftrace_export_lock);
2534 
2535 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2536 
2537 	mutex_unlock(&ftrace_export_lock);
2538 
2539 	return ret;
2540 }
2541 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2542 
2543 void
2544 trace_function(struct trace_array *tr,
2545 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2546 	       int pc)
2547 {
2548 	struct trace_event_call *call = &event_function;
2549 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2550 	struct ring_buffer_event *event;
2551 	struct ftrace_entry *entry;
2552 
2553 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2554 					    flags, pc);
2555 	if (!event)
2556 		return;
2557 	entry	= ring_buffer_event_data(event);
2558 	entry->ip			= ip;
2559 	entry->parent_ip		= parent_ip;
2560 
2561 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2562 		if (static_branch_unlikely(&ftrace_exports_enabled))
2563 			ftrace_exports(event);
2564 		__buffer_unlock_commit(buffer, event);
2565 	}
2566 }
2567 
2568 #ifdef CONFIG_STACKTRACE
2569 
2570 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2571 struct ftrace_stack {
2572 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2573 };
2574 
2575 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2576 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2577 
2578 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2579 				 unsigned long flags,
2580 				 int skip, int pc, struct pt_regs *regs)
2581 {
2582 	struct trace_event_call *call = &event_kernel_stack;
2583 	struct ring_buffer_event *event;
2584 	struct stack_entry *entry;
2585 	struct stack_trace trace;
2586 	int use_stack;
2587 	int size = FTRACE_STACK_ENTRIES;
2588 
2589 	trace.nr_entries	= 0;
2590 	trace.skip		= skip;
2591 
2592 	/*
2593 	 * Add one, for this function and the call to save_stack_trace()
2594 	 * If regs is set, then these functions will not be in the way.
2595 	 */
2596 #ifndef CONFIG_UNWINDER_ORC
2597 	if (!regs)
2598 		trace.skip++;
2599 #endif
2600 
2601 	/*
2602 	 * Since events can happen in NMIs there's no safe way to
2603 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2604 	 * or NMI comes in, it will just have to use the default
2605 	 * FTRACE_STACK_SIZE.
2606 	 */
2607 	preempt_disable_notrace();
2608 
2609 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2610 	/*
2611 	 * We don't need any atomic variables, just a barrier.
2612 	 * If an interrupt comes in, we don't care, because it would
2613 	 * have exited and put the counter back to what we want.
2614 	 * We just need a barrier to keep gcc from moving things
2615 	 * around.
2616 	 */
2617 	barrier();
2618 	if (use_stack == 1) {
2619 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2620 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2621 
2622 		if (regs)
2623 			save_stack_trace_regs(regs, &trace);
2624 		else
2625 			save_stack_trace(&trace);
2626 
2627 		if (trace.nr_entries > size)
2628 			size = trace.nr_entries;
2629 	} else
2630 		/* From now on, use_stack is a boolean */
2631 		use_stack = 0;
2632 
2633 	size *= sizeof(unsigned long);
2634 
2635 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2636 					    sizeof(*entry) + size, flags, pc);
2637 	if (!event)
2638 		goto out;
2639 	entry = ring_buffer_event_data(event);
2640 
2641 	memset(&entry->caller, 0, size);
2642 
2643 	if (use_stack)
2644 		memcpy(&entry->caller, trace.entries,
2645 		       trace.nr_entries * sizeof(unsigned long));
2646 	else {
2647 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2648 		trace.entries		= entry->caller;
2649 		if (regs)
2650 			save_stack_trace_regs(regs, &trace);
2651 		else
2652 			save_stack_trace(&trace);
2653 	}
2654 
2655 	entry->size = trace.nr_entries;
2656 
2657 	if (!call_filter_check_discard(call, entry, buffer, event))
2658 		__buffer_unlock_commit(buffer, event);
2659 
2660  out:
2661 	/* Again, don't let gcc optimize things here */
2662 	barrier();
2663 	__this_cpu_dec(ftrace_stack_reserve);
2664 	preempt_enable_notrace();
2665 
2666 }
2667 
2668 static inline void ftrace_trace_stack(struct trace_array *tr,
2669 				      struct ring_buffer *buffer,
2670 				      unsigned long flags,
2671 				      int skip, int pc, struct pt_regs *regs)
2672 {
2673 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2674 		return;
2675 
2676 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2677 }
2678 
2679 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2680 		   int pc)
2681 {
2682 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2683 
2684 	if (rcu_is_watching()) {
2685 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2686 		return;
2687 	}
2688 
2689 	/*
2690 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2691 	 * but if the above rcu_is_watching() failed, then the NMI
2692 	 * triggered someplace critical, and rcu_irq_enter() should
2693 	 * not be called from NMI.
2694 	 */
2695 	if (unlikely(in_nmi()))
2696 		return;
2697 
2698 	rcu_irq_enter_irqson();
2699 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2700 	rcu_irq_exit_irqson();
2701 }
2702 
2703 /**
2704  * trace_dump_stack - record a stack back trace in the trace buffer
2705  * @skip: Number of functions to skip (helper handlers)
2706  */
2707 void trace_dump_stack(int skip)
2708 {
2709 	unsigned long flags;
2710 
2711 	if (tracing_disabled || tracing_selftest_running)
2712 		return;
2713 
2714 	local_save_flags(flags);
2715 
2716 #ifndef CONFIG_UNWINDER_ORC
2717 	/* Skip 1 to skip this function. */
2718 	skip++;
2719 #endif
2720 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2721 			     flags, skip, preempt_count(), NULL);
2722 }
2723 
2724 static DEFINE_PER_CPU(int, user_stack_count);
2725 
2726 void
2727 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2728 {
2729 	struct trace_event_call *call = &event_user_stack;
2730 	struct ring_buffer_event *event;
2731 	struct userstack_entry *entry;
2732 	struct stack_trace trace;
2733 
2734 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2735 		return;
2736 
2737 	/*
2738 	 * NMIs can not handle page faults, even with fix ups.
2739 	 * The save user stack can (and often does) fault.
2740 	 */
2741 	if (unlikely(in_nmi()))
2742 		return;
2743 
2744 	/*
2745 	 * prevent recursion, since the user stack tracing may
2746 	 * trigger other kernel events.
2747 	 */
2748 	preempt_disable();
2749 	if (__this_cpu_read(user_stack_count))
2750 		goto out;
2751 
2752 	__this_cpu_inc(user_stack_count);
2753 
2754 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2755 					    sizeof(*entry), flags, pc);
2756 	if (!event)
2757 		goto out_drop_count;
2758 	entry	= ring_buffer_event_data(event);
2759 
2760 	entry->tgid		= current->tgid;
2761 	memset(&entry->caller, 0, sizeof(entry->caller));
2762 
2763 	trace.nr_entries	= 0;
2764 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2765 	trace.skip		= 0;
2766 	trace.entries		= entry->caller;
2767 
2768 	save_stack_trace_user(&trace);
2769 	if (!call_filter_check_discard(call, entry, buffer, event))
2770 		__buffer_unlock_commit(buffer, event);
2771 
2772  out_drop_count:
2773 	__this_cpu_dec(user_stack_count);
2774  out:
2775 	preempt_enable();
2776 }
2777 
2778 #ifdef UNUSED
2779 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2780 {
2781 	ftrace_trace_userstack(tr, flags, preempt_count());
2782 }
2783 #endif /* UNUSED */
2784 
2785 #endif /* CONFIG_STACKTRACE */
2786 
2787 /* created for use with alloc_percpu */
2788 struct trace_buffer_struct {
2789 	int nesting;
2790 	char buffer[4][TRACE_BUF_SIZE];
2791 };
2792 
2793 static struct trace_buffer_struct *trace_percpu_buffer;
2794 
2795 /*
2796  * Thise allows for lockless recording.  If we're nested too deeply, then
2797  * this returns NULL.
2798  */
2799 static char *get_trace_buf(void)
2800 {
2801 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2802 
2803 	if (!buffer || buffer->nesting >= 4)
2804 		return NULL;
2805 
2806 	buffer->nesting++;
2807 
2808 	/* Interrupts must see nesting incremented before we use the buffer */
2809 	barrier();
2810 	return &buffer->buffer[buffer->nesting][0];
2811 }
2812 
2813 static void put_trace_buf(void)
2814 {
2815 	/* Don't let the decrement of nesting leak before this */
2816 	barrier();
2817 	this_cpu_dec(trace_percpu_buffer->nesting);
2818 }
2819 
2820 static int alloc_percpu_trace_buffer(void)
2821 {
2822 	struct trace_buffer_struct *buffers;
2823 
2824 	buffers = alloc_percpu(struct trace_buffer_struct);
2825 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2826 		return -ENOMEM;
2827 
2828 	trace_percpu_buffer = buffers;
2829 	return 0;
2830 }
2831 
2832 static int buffers_allocated;
2833 
2834 void trace_printk_init_buffers(void)
2835 {
2836 	if (buffers_allocated)
2837 		return;
2838 
2839 	if (alloc_percpu_trace_buffer())
2840 		return;
2841 
2842 	/* trace_printk() is for debug use only. Don't use it in production. */
2843 
2844 	pr_warn("\n");
2845 	pr_warn("**********************************************************\n");
2846 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2847 	pr_warn("**                                                      **\n");
2848 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2849 	pr_warn("**                                                      **\n");
2850 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2851 	pr_warn("** unsafe for production use.                           **\n");
2852 	pr_warn("**                                                      **\n");
2853 	pr_warn("** If you see this message and you are not debugging    **\n");
2854 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2857 	pr_warn("**********************************************************\n");
2858 
2859 	/* Expand the buffers to set size */
2860 	tracing_update_buffers();
2861 
2862 	buffers_allocated = 1;
2863 
2864 	/*
2865 	 * trace_printk_init_buffers() can be called by modules.
2866 	 * If that happens, then we need to start cmdline recording
2867 	 * directly here. If the global_trace.buffer is already
2868 	 * allocated here, then this was called by module code.
2869 	 */
2870 	if (global_trace.trace_buffer.buffer)
2871 		tracing_start_cmdline_record();
2872 }
2873 
2874 void trace_printk_start_comm(void)
2875 {
2876 	/* Start tracing comms if trace printk is set */
2877 	if (!buffers_allocated)
2878 		return;
2879 	tracing_start_cmdline_record();
2880 }
2881 
2882 static void trace_printk_start_stop_comm(int enabled)
2883 {
2884 	if (!buffers_allocated)
2885 		return;
2886 
2887 	if (enabled)
2888 		tracing_start_cmdline_record();
2889 	else
2890 		tracing_stop_cmdline_record();
2891 }
2892 
2893 /**
2894  * trace_vbprintk - write binary msg to tracing buffer
2895  *
2896  */
2897 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2898 {
2899 	struct trace_event_call *call = &event_bprint;
2900 	struct ring_buffer_event *event;
2901 	struct ring_buffer *buffer;
2902 	struct trace_array *tr = &global_trace;
2903 	struct bprint_entry *entry;
2904 	unsigned long flags;
2905 	char *tbuffer;
2906 	int len = 0, size, pc;
2907 
2908 	if (unlikely(tracing_selftest_running || tracing_disabled))
2909 		return 0;
2910 
2911 	/* Don't pollute graph traces with trace_vprintk internals */
2912 	pause_graph_tracing();
2913 
2914 	pc = preempt_count();
2915 	preempt_disable_notrace();
2916 
2917 	tbuffer = get_trace_buf();
2918 	if (!tbuffer) {
2919 		len = 0;
2920 		goto out_nobuffer;
2921 	}
2922 
2923 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2924 
2925 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2926 		goto out;
2927 
2928 	local_save_flags(flags);
2929 	size = sizeof(*entry) + sizeof(u32) * len;
2930 	buffer = tr->trace_buffer.buffer;
2931 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2932 					    flags, pc);
2933 	if (!event)
2934 		goto out;
2935 	entry = ring_buffer_event_data(event);
2936 	entry->ip			= ip;
2937 	entry->fmt			= fmt;
2938 
2939 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2940 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2941 		__buffer_unlock_commit(buffer, event);
2942 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2943 	}
2944 
2945 out:
2946 	put_trace_buf();
2947 
2948 out_nobuffer:
2949 	preempt_enable_notrace();
2950 	unpause_graph_tracing();
2951 
2952 	return len;
2953 }
2954 EXPORT_SYMBOL_GPL(trace_vbprintk);
2955 
2956 static int
2957 __trace_array_vprintk(struct ring_buffer *buffer,
2958 		      unsigned long ip, const char *fmt, va_list args)
2959 {
2960 	struct trace_event_call *call = &event_print;
2961 	struct ring_buffer_event *event;
2962 	int len = 0, size, pc;
2963 	struct print_entry *entry;
2964 	unsigned long flags;
2965 	char *tbuffer;
2966 
2967 	if (tracing_disabled || tracing_selftest_running)
2968 		return 0;
2969 
2970 	/* Don't pollute graph traces with trace_vprintk internals */
2971 	pause_graph_tracing();
2972 
2973 	pc = preempt_count();
2974 	preempt_disable_notrace();
2975 
2976 
2977 	tbuffer = get_trace_buf();
2978 	if (!tbuffer) {
2979 		len = 0;
2980 		goto out_nobuffer;
2981 	}
2982 
2983 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2984 
2985 	local_save_flags(flags);
2986 	size = sizeof(*entry) + len + 1;
2987 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2988 					    flags, pc);
2989 	if (!event)
2990 		goto out;
2991 	entry = ring_buffer_event_data(event);
2992 	entry->ip = ip;
2993 
2994 	memcpy(&entry->buf, tbuffer, len + 1);
2995 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2996 		__buffer_unlock_commit(buffer, event);
2997 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2998 	}
2999 
3000 out:
3001 	put_trace_buf();
3002 
3003 out_nobuffer:
3004 	preempt_enable_notrace();
3005 	unpause_graph_tracing();
3006 
3007 	return len;
3008 }
3009 
3010 int trace_array_vprintk(struct trace_array *tr,
3011 			unsigned long ip, const char *fmt, va_list args)
3012 {
3013 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3014 }
3015 
3016 int trace_array_printk(struct trace_array *tr,
3017 		       unsigned long ip, const char *fmt, ...)
3018 {
3019 	int ret;
3020 	va_list ap;
3021 
3022 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3023 		return 0;
3024 
3025 	va_start(ap, fmt);
3026 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3027 	va_end(ap);
3028 	return ret;
3029 }
3030 
3031 int trace_array_printk_buf(struct ring_buffer *buffer,
3032 			   unsigned long ip, const char *fmt, ...)
3033 {
3034 	int ret;
3035 	va_list ap;
3036 
3037 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3038 		return 0;
3039 
3040 	va_start(ap, fmt);
3041 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3042 	va_end(ap);
3043 	return ret;
3044 }
3045 
3046 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3047 {
3048 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3049 }
3050 EXPORT_SYMBOL_GPL(trace_vprintk);
3051 
3052 static void trace_iterator_increment(struct trace_iterator *iter)
3053 {
3054 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3055 
3056 	iter->idx++;
3057 	if (buf_iter)
3058 		ring_buffer_read(buf_iter, NULL);
3059 }
3060 
3061 static struct trace_entry *
3062 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3063 		unsigned long *lost_events)
3064 {
3065 	struct ring_buffer_event *event;
3066 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3067 
3068 	if (buf_iter)
3069 		event = ring_buffer_iter_peek(buf_iter, ts);
3070 	else
3071 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3072 					 lost_events);
3073 
3074 	if (event) {
3075 		iter->ent_size = ring_buffer_event_length(event);
3076 		return ring_buffer_event_data(event);
3077 	}
3078 	iter->ent_size = 0;
3079 	return NULL;
3080 }
3081 
3082 static struct trace_entry *
3083 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3084 		  unsigned long *missing_events, u64 *ent_ts)
3085 {
3086 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3087 	struct trace_entry *ent, *next = NULL;
3088 	unsigned long lost_events = 0, next_lost = 0;
3089 	int cpu_file = iter->cpu_file;
3090 	u64 next_ts = 0, ts;
3091 	int next_cpu = -1;
3092 	int next_size = 0;
3093 	int cpu;
3094 
3095 	/*
3096 	 * If we are in a per_cpu trace file, don't bother by iterating over
3097 	 * all cpu and peek directly.
3098 	 */
3099 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3100 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3101 			return NULL;
3102 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3103 		if (ent_cpu)
3104 			*ent_cpu = cpu_file;
3105 
3106 		return ent;
3107 	}
3108 
3109 	for_each_tracing_cpu(cpu) {
3110 
3111 		if (ring_buffer_empty_cpu(buffer, cpu))
3112 			continue;
3113 
3114 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3115 
3116 		/*
3117 		 * Pick the entry with the smallest timestamp:
3118 		 */
3119 		if (ent && (!next || ts < next_ts)) {
3120 			next = ent;
3121 			next_cpu = cpu;
3122 			next_ts = ts;
3123 			next_lost = lost_events;
3124 			next_size = iter->ent_size;
3125 		}
3126 	}
3127 
3128 	iter->ent_size = next_size;
3129 
3130 	if (ent_cpu)
3131 		*ent_cpu = next_cpu;
3132 
3133 	if (ent_ts)
3134 		*ent_ts = next_ts;
3135 
3136 	if (missing_events)
3137 		*missing_events = next_lost;
3138 
3139 	return next;
3140 }
3141 
3142 /* Find the next real entry, without updating the iterator itself */
3143 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3144 					  int *ent_cpu, u64 *ent_ts)
3145 {
3146 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3147 }
3148 
3149 /* Find the next real entry, and increment the iterator to the next entry */
3150 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3151 {
3152 	iter->ent = __find_next_entry(iter, &iter->cpu,
3153 				      &iter->lost_events, &iter->ts);
3154 
3155 	if (iter->ent)
3156 		trace_iterator_increment(iter);
3157 
3158 	return iter->ent ? iter : NULL;
3159 }
3160 
3161 static void trace_consume(struct trace_iterator *iter)
3162 {
3163 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3164 			    &iter->lost_events);
3165 }
3166 
3167 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3168 {
3169 	struct trace_iterator *iter = m->private;
3170 	int i = (int)*pos;
3171 	void *ent;
3172 
3173 	WARN_ON_ONCE(iter->leftover);
3174 
3175 	(*pos)++;
3176 
3177 	/* can't go backwards */
3178 	if (iter->idx > i)
3179 		return NULL;
3180 
3181 	if (iter->idx < 0)
3182 		ent = trace_find_next_entry_inc(iter);
3183 	else
3184 		ent = iter;
3185 
3186 	while (ent && iter->idx < i)
3187 		ent = trace_find_next_entry_inc(iter);
3188 
3189 	iter->pos = *pos;
3190 
3191 	return ent;
3192 }
3193 
3194 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3195 {
3196 	struct ring_buffer_event *event;
3197 	struct ring_buffer_iter *buf_iter;
3198 	unsigned long entries = 0;
3199 	u64 ts;
3200 
3201 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3202 
3203 	buf_iter = trace_buffer_iter(iter, cpu);
3204 	if (!buf_iter)
3205 		return;
3206 
3207 	ring_buffer_iter_reset(buf_iter);
3208 
3209 	/*
3210 	 * We could have the case with the max latency tracers
3211 	 * that a reset never took place on a cpu. This is evident
3212 	 * by the timestamp being before the start of the buffer.
3213 	 */
3214 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3215 		if (ts >= iter->trace_buffer->time_start)
3216 			break;
3217 		entries++;
3218 		ring_buffer_read(buf_iter, NULL);
3219 	}
3220 
3221 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3222 }
3223 
3224 /*
3225  * The current tracer is copied to avoid a global locking
3226  * all around.
3227  */
3228 static void *s_start(struct seq_file *m, loff_t *pos)
3229 {
3230 	struct trace_iterator *iter = m->private;
3231 	struct trace_array *tr = iter->tr;
3232 	int cpu_file = iter->cpu_file;
3233 	void *p = NULL;
3234 	loff_t l = 0;
3235 	int cpu;
3236 
3237 	/*
3238 	 * copy the tracer to avoid using a global lock all around.
3239 	 * iter->trace is a copy of current_trace, the pointer to the
3240 	 * name may be used instead of a strcmp(), as iter->trace->name
3241 	 * will point to the same string as current_trace->name.
3242 	 */
3243 	mutex_lock(&trace_types_lock);
3244 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3245 		*iter->trace = *tr->current_trace;
3246 	mutex_unlock(&trace_types_lock);
3247 
3248 #ifdef CONFIG_TRACER_MAX_TRACE
3249 	if (iter->snapshot && iter->trace->use_max_tr)
3250 		return ERR_PTR(-EBUSY);
3251 #endif
3252 
3253 	if (!iter->snapshot)
3254 		atomic_inc(&trace_record_taskinfo_disabled);
3255 
3256 	if (*pos != iter->pos) {
3257 		iter->ent = NULL;
3258 		iter->cpu = 0;
3259 		iter->idx = -1;
3260 
3261 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3262 			for_each_tracing_cpu(cpu)
3263 				tracing_iter_reset(iter, cpu);
3264 		} else
3265 			tracing_iter_reset(iter, cpu_file);
3266 
3267 		iter->leftover = 0;
3268 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3269 			;
3270 
3271 	} else {
3272 		/*
3273 		 * If we overflowed the seq_file before, then we want
3274 		 * to just reuse the trace_seq buffer again.
3275 		 */
3276 		if (iter->leftover)
3277 			p = iter;
3278 		else {
3279 			l = *pos - 1;
3280 			p = s_next(m, p, &l);
3281 		}
3282 	}
3283 
3284 	trace_event_read_lock();
3285 	trace_access_lock(cpu_file);
3286 	return p;
3287 }
3288 
3289 static void s_stop(struct seq_file *m, void *p)
3290 {
3291 	struct trace_iterator *iter = m->private;
3292 
3293 #ifdef CONFIG_TRACER_MAX_TRACE
3294 	if (iter->snapshot && iter->trace->use_max_tr)
3295 		return;
3296 #endif
3297 
3298 	if (!iter->snapshot)
3299 		atomic_dec(&trace_record_taskinfo_disabled);
3300 
3301 	trace_access_unlock(iter->cpu_file);
3302 	trace_event_read_unlock();
3303 }
3304 
3305 static void
3306 get_total_entries(struct trace_buffer *buf,
3307 		  unsigned long *total, unsigned long *entries)
3308 {
3309 	unsigned long count;
3310 	int cpu;
3311 
3312 	*total = 0;
3313 	*entries = 0;
3314 
3315 	for_each_tracing_cpu(cpu) {
3316 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3317 		/*
3318 		 * If this buffer has skipped entries, then we hold all
3319 		 * entries for the trace and we need to ignore the
3320 		 * ones before the time stamp.
3321 		 */
3322 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3323 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3324 			/* total is the same as the entries */
3325 			*total += count;
3326 		} else
3327 			*total += count +
3328 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3329 		*entries += count;
3330 	}
3331 }
3332 
3333 static void print_lat_help_header(struct seq_file *m)
3334 {
3335 	seq_puts(m, "#                  _------=> CPU#            \n"
3336 		    "#                 / _-----=> irqs-off        \n"
3337 		    "#                | / _----=> need-resched    \n"
3338 		    "#                || / _---=> hardirq/softirq \n"
3339 		    "#                ||| / _--=> preempt-depth   \n"
3340 		    "#                |||| /     delay            \n"
3341 		    "#  cmd     pid   ||||| time  |   caller      \n"
3342 		    "#     \\   /      |||||  \\    |   /         \n");
3343 }
3344 
3345 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3346 {
3347 	unsigned long total;
3348 	unsigned long entries;
3349 
3350 	get_total_entries(buf, &total, &entries);
3351 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3352 		   entries, total, num_online_cpus());
3353 	seq_puts(m, "#\n");
3354 }
3355 
3356 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3357 				   unsigned int flags)
3358 {
3359 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3360 
3361 	print_event_info(buf, m);
3362 
3363 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3364 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3365 }
3366 
3367 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3368 				       unsigned int flags)
3369 {
3370 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3371 	const char tgid_space[] = "          ";
3372 	const char space[] = "  ";
3373 
3374 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3375 		   tgid ? tgid_space : space);
3376 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3377 		   tgid ? tgid_space : space);
3378 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3379 		   tgid ? tgid_space : space);
3380 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3381 		   tgid ? tgid_space : space);
3382 	seq_printf(m, "#                          %s||| /     delay\n",
3383 		   tgid ? tgid_space : space);
3384 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3385 		   tgid ? "   TGID   " : space);
3386 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3387 		   tgid ? "     |    " : space);
3388 }
3389 
3390 void
3391 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3392 {
3393 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3394 	struct trace_buffer *buf = iter->trace_buffer;
3395 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3396 	struct tracer *type = iter->trace;
3397 	unsigned long entries;
3398 	unsigned long total;
3399 	const char *name = "preemption";
3400 
3401 	name = type->name;
3402 
3403 	get_total_entries(buf, &total, &entries);
3404 
3405 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3406 		   name, UTS_RELEASE);
3407 	seq_puts(m, "# -----------------------------------"
3408 		 "---------------------------------\n");
3409 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3410 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3411 		   nsecs_to_usecs(data->saved_latency),
3412 		   entries,
3413 		   total,
3414 		   buf->cpu,
3415 #if defined(CONFIG_PREEMPT_NONE)
3416 		   "server",
3417 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3418 		   "desktop",
3419 #elif defined(CONFIG_PREEMPT)
3420 		   "preempt",
3421 #else
3422 		   "unknown",
3423 #endif
3424 		   /* These are reserved for later use */
3425 		   0, 0, 0, 0);
3426 #ifdef CONFIG_SMP
3427 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3428 #else
3429 	seq_puts(m, ")\n");
3430 #endif
3431 	seq_puts(m, "#    -----------------\n");
3432 	seq_printf(m, "#    | task: %.16s-%d "
3433 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3434 		   data->comm, data->pid,
3435 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3436 		   data->policy, data->rt_priority);
3437 	seq_puts(m, "#    -----------------\n");
3438 
3439 	if (data->critical_start) {
3440 		seq_puts(m, "#  => started at: ");
3441 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3442 		trace_print_seq(m, &iter->seq);
3443 		seq_puts(m, "\n#  => ended at:   ");
3444 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3445 		trace_print_seq(m, &iter->seq);
3446 		seq_puts(m, "\n#\n");
3447 	}
3448 
3449 	seq_puts(m, "#\n");
3450 }
3451 
3452 static void test_cpu_buff_start(struct trace_iterator *iter)
3453 {
3454 	struct trace_seq *s = &iter->seq;
3455 	struct trace_array *tr = iter->tr;
3456 
3457 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3458 		return;
3459 
3460 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3461 		return;
3462 
3463 	if (cpumask_available(iter->started) &&
3464 	    cpumask_test_cpu(iter->cpu, iter->started))
3465 		return;
3466 
3467 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3468 		return;
3469 
3470 	if (cpumask_available(iter->started))
3471 		cpumask_set_cpu(iter->cpu, iter->started);
3472 
3473 	/* Don't print started cpu buffer for the first entry of the trace */
3474 	if (iter->idx > 1)
3475 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3476 				iter->cpu);
3477 }
3478 
3479 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3480 {
3481 	struct trace_array *tr = iter->tr;
3482 	struct trace_seq *s = &iter->seq;
3483 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3484 	struct trace_entry *entry;
3485 	struct trace_event *event;
3486 
3487 	entry = iter->ent;
3488 
3489 	test_cpu_buff_start(iter);
3490 
3491 	event = ftrace_find_event(entry->type);
3492 
3493 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3494 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3495 			trace_print_lat_context(iter);
3496 		else
3497 			trace_print_context(iter);
3498 	}
3499 
3500 	if (trace_seq_has_overflowed(s))
3501 		return TRACE_TYPE_PARTIAL_LINE;
3502 
3503 	if (event)
3504 		return event->funcs->trace(iter, sym_flags, event);
3505 
3506 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3507 
3508 	return trace_handle_return(s);
3509 }
3510 
3511 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3512 {
3513 	struct trace_array *tr = iter->tr;
3514 	struct trace_seq *s = &iter->seq;
3515 	struct trace_entry *entry;
3516 	struct trace_event *event;
3517 
3518 	entry = iter->ent;
3519 
3520 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3521 		trace_seq_printf(s, "%d %d %llu ",
3522 				 entry->pid, iter->cpu, iter->ts);
3523 
3524 	if (trace_seq_has_overflowed(s))
3525 		return TRACE_TYPE_PARTIAL_LINE;
3526 
3527 	event = ftrace_find_event(entry->type);
3528 	if (event)
3529 		return event->funcs->raw(iter, 0, event);
3530 
3531 	trace_seq_printf(s, "%d ?\n", entry->type);
3532 
3533 	return trace_handle_return(s);
3534 }
3535 
3536 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3537 {
3538 	struct trace_array *tr = iter->tr;
3539 	struct trace_seq *s = &iter->seq;
3540 	unsigned char newline = '\n';
3541 	struct trace_entry *entry;
3542 	struct trace_event *event;
3543 
3544 	entry = iter->ent;
3545 
3546 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3547 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3548 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3549 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3550 		if (trace_seq_has_overflowed(s))
3551 			return TRACE_TYPE_PARTIAL_LINE;
3552 	}
3553 
3554 	event = ftrace_find_event(entry->type);
3555 	if (event) {
3556 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3557 		if (ret != TRACE_TYPE_HANDLED)
3558 			return ret;
3559 	}
3560 
3561 	SEQ_PUT_FIELD(s, newline);
3562 
3563 	return trace_handle_return(s);
3564 }
3565 
3566 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3567 {
3568 	struct trace_array *tr = iter->tr;
3569 	struct trace_seq *s = &iter->seq;
3570 	struct trace_entry *entry;
3571 	struct trace_event *event;
3572 
3573 	entry = iter->ent;
3574 
3575 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3576 		SEQ_PUT_FIELD(s, entry->pid);
3577 		SEQ_PUT_FIELD(s, iter->cpu);
3578 		SEQ_PUT_FIELD(s, iter->ts);
3579 		if (trace_seq_has_overflowed(s))
3580 			return TRACE_TYPE_PARTIAL_LINE;
3581 	}
3582 
3583 	event = ftrace_find_event(entry->type);
3584 	return event ? event->funcs->binary(iter, 0, event) :
3585 		TRACE_TYPE_HANDLED;
3586 }
3587 
3588 int trace_empty(struct trace_iterator *iter)
3589 {
3590 	struct ring_buffer_iter *buf_iter;
3591 	int cpu;
3592 
3593 	/* If we are looking at one CPU buffer, only check that one */
3594 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3595 		cpu = iter->cpu_file;
3596 		buf_iter = trace_buffer_iter(iter, cpu);
3597 		if (buf_iter) {
3598 			if (!ring_buffer_iter_empty(buf_iter))
3599 				return 0;
3600 		} else {
3601 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3602 				return 0;
3603 		}
3604 		return 1;
3605 	}
3606 
3607 	for_each_tracing_cpu(cpu) {
3608 		buf_iter = trace_buffer_iter(iter, cpu);
3609 		if (buf_iter) {
3610 			if (!ring_buffer_iter_empty(buf_iter))
3611 				return 0;
3612 		} else {
3613 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614 				return 0;
3615 		}
3616 	}
3617 
3618 	return 1;
3619 }
3620 
3621 /*  Called with trace_event_read_lock() held. */
3622 enum print_line_t print_trace_line(struct trace_iterator *iter)
3623 {
3624 	struct trace_array *tr = iter->tr;
3625 	unsigned long trace_flags = tr->trace_flags;
3626 	enum print_line_t ret;
3627 
3628 	if (iter->lost_events) {
3629 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3630 				 iter->cpu, iter->lost_events);
3631 		if (trace_seq_has_overflowed(&iter->seq))
3632 			return TRACE_TYPE_PARTIAL_LINE;
3633 	}
3634 
3635 	if (iter->trace && iter->trace->print_line) {
3636 		ret = iter->trace->print_line(iter);
3637 		if (ret != TRACE_TYPE_UNHANDLED)
3638 			return ret;
3639 	}
3640 
3641 	if (iter->ent->type == TRACE_BPUTS &&
3642 			trace_flags & TRACE_ITER_PRINTK &&
3643 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3644 		return trace_print_bputs_msg_only(iter);
3645 
3646 	if (iter->ent->type == TRACE_BPRINT &&
3647 			trace_flags & TRACE_ITER_PRINTK &&
3648 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649 		return trace_print_bprintk_msg_only(iter);
3650 
3651 	if (iter->ent->type == TRACE_PRINT &&
3652 			trace_flags & TRACE_ITER_PRINTK &&
3653 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654 		return trace_print_printk_msg_only(iter);
3655 
3656 	if (trace_flags & TRACE_ITER_BIN)
3657 		return print_bin_fmt(iter);
3658 
3659 	if (trace_flags & TRACE_ITER_HEX)
3660 		return print_hex_fmt(iter);
3661 
3662 	if (trace_flags & TRACE_ITER_RAW)
3663 		return print_raw_fmt(iter);
3664 
3665 	return print_trace_fmt(iter);
3666 }
3667 
3668 void trace_latency_header(struct seq_file *m)
3669 {
3670 	struct trace_iterator *iter = m->private;
3671 	struct trace_array *tr = iter->tr;
3672 
3673 	/* print nothing if the buffers are empty */
3674 	if (trace_empty(iter))
3675 		return;
3676 
3677 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3678 		print_trace_header(m, iter);
3679 
3680 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3681 		print_lat_help_header(m);
3682 }
3683 
3684 void trace_default_header(struct seq_file *m)
3685 {
3686 	struct trace_iterator *iter = m->private;
3687 	struct trace_array *tr = iter->tr;
3688 	unsigned long trace_flags = tr->trace_flags;
3689 
3690 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3691 		return;
3692 
3693 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3694 		/* print nothing if the buffers are empty */
3695 		if (trace_empty(iter))
3696 			return;
3697 		print_trace_header(m, iter);
3698 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3699 			print_lat_help_header(m);
3700 	} else {
3701 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3702 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3703 				print_func_help_header_irq(iter->trace_buffer,
3704 							   m, trace_flags);
3705 			else
3706 				print_func_help_header(iter->trace_buffer, m,
3707 						       trace_flags);
3708 		}
3709 	}
3710 }
3711 
3712 static void test_ftrace_alive(struct seq_file *m)
3713 {
3714 	if (!ftrace_is_dead())
3715 		return;
3716 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3717 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3718 }
3719 
3720 #ifdef CONFIG_TRACER_MAX_TRACE
3721 static void show_snapshot_main_help(struct seq_file *m)
3722 {
3723 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3724 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3725 		    "#                      Takes a snapshot of the main buffer.\n"
3726 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3727 		    "#                      (Doesn't have to be '2' works with any number that\n"
3728 		    "#                       is not a '0' or '1')\n");
3729 }
3730 
3731 static void show_snapshot_percpu_help(struct seq_file *m)
3732 {
3733 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3734 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3735 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3736 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3737 #else
3738 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3739 		    "#                     Must use main snapshot file to allocate.\n");
3740 #endif
3741 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3742 		    "#                      (Doesn't have to be '2' works with any number that\n"
3743 		    "#                       is not a '0' or '1')\n");
3744 }
3745 
3746 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3747 {
3748 	if (iter->tr->allocated_snapshot)
3749 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3750 	else
3751 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3752 
3753 	seq_puts(m, "# Snapshot commands:\n");
3754 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3755 		show_snapshot_main_help(m);
3756 	else
3757 		show_snapshot_percpu_help(m);
3758 }
3759 #else
3760 /* Should never be called */
3761 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3762 #endif
3763 
3764 static int s_show(struct seq_file *m, void *v)
3765 {
3766 	struct trace_iterator *iter = v;
3767 	int ret;
3768 
3769 	if (iter->ent == NULL) {
3770 		if (iter->tr) {
3771 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3772 			seq_puts(m, "#\n");
3773 			test_ftrace_alive(m);
3774 		}
3775 		if (iter->snapshot && trace_empty(iter))
3776 			print_snapshot_help(m, iter);
3777 		else if (iter->trace && iter->trace->print_header)
3778 			iter->trace->print_header(m);
3779 		else
3780 			trace_default_header(m);
3781 
3782 	} else if (iter->leftover) {
3783 		/*
3784 		 * If we filled the seq_file buffer earlier, we
3785 		 * want to just show it now.
3786 		 */
3787 		ret = trace_print_seq(m, &iter->seq);
3788 
3789 		/* ret should this time be zero, but you never know */
3790 		iter->leftover = ret;
3791 
3792 	} else {
3793 		print_trace_line(iter);
3794 		ret = trace_print_seq(m, &iter->seq);
3795 		/*
3796 		 * If we overflow the seq_file buffer, then it will
3797 		 * ask us for this data again at start up.
3798 		 * Use that instead.
3799 		 *  ret is 0 if seq_file write succeeded.
3800 		 *        -1 otherwise.
3801 		 */
3802 		iter->leftover = ret;
3803 	}
3804 
3805 	return 0;
3806 }
3807 
3808 /*
3809  * Should be used after trace_array_get(), trace_types_lock
3810  * ensures that i_cdev was already initialized.
3811  */
3812 static inline int tracing_get_cpu(struct inode *inode)
3813 {
3814 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3815 		return (long)inode->i_cdev - 1;
3816 	return RING_BUFFER_ALL_CPUS;
3817 }
3818 
3819 static const struct seq_operations tracer_seq_ops = {
3820 	.start		= s_start,
3821 	.next		= s_next,
3822 	.stop		= s_stop,
3823 	.show		= s_show,
3824 };
3825 
3826 static struct trace_iterator *
3827 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3828 {
3829 	struct trace_array *tr = inode->i_private;
3830 	struct trace_iterator *iter;
3831 	int cpu;
3832 
3833 	if (tracing_disabled)
3834 		return ERR_PTR(-ENODEV);
3835 
3836 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3837 	if (!iter)
3838 		return ERR_PTR(-ENOMEM);
3839 
3840 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3841 				    GFP_KERNEL);
3842 	if (!iter->buffer_iter)
3843 		goto release;
3844 
3845 	/*
3846 	 * We make a copy of the current tracer to avoid concurrent
3847 	 * changes on it while we are reading.
3848 	 */
3849 	mutex_lock(&trace_types_lock);
3850 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3851 	if (!iter->trace)
3852 		goto fail;
3853 
3854 	*iter->trace = *tr->current_trace;
3855 
3856 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3857 		goto fail;
3858 
3859 	iter->tr = tr;
3860 
3861 #ifdef CONFIG_TRACER_MAX_TRACE
3862 	/* Currently only the top directory has a snapshot */
3863 	if (tr->current_trace->print_max || snapshot)
3864 		iter->trace_buffer = &tr->max_buffer;
3865 	else
3866 #endif
3867 		iter->trace_buffer = &tr->trace_buffer;
3868 	iter->snapshot = snapshot;
3869 	iter->pos = -1;
3870 	iter->cpu_file = tracing_get_cpu(inode);
3871 	mutex_init(&iter->mutex);
3872 
3873 	/* Notify the tracer early; before we stop tracing. */
3874 	if (iter->trace && iter->trace->open)
3875 		iter->trace->open(iter);
3876 
3877 	/* Annotate start of buffers if we had overruns */
3878 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3879 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3880 
3881 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3882 	if (trace_clocks[tr->clock_id].in_ns)
3883 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3884 
3885 	/* stop the trace while dumping if we are not opening "snapshot" */
3886 	if (!iter->snapshot)
3887 		tracing_stop_tr(tr);
3888 
3889 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3890 		for_each_tracing_cpu(cpu) {
3891 			iter->buffer_iter[cpu] =
3892 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3893 		}
3894 		ring_buffer_read_prepare_sync();
3895 		for_each_tracing_cpu(cpu) {
3896 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3897 			tracing_iter_reset(iter, cpu);
3898 		}
3899 	} else {
3900 		cpu = iter->cpu_file;
3901 		iter->buffer_iter[cpu] =
3902 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3903 		ring_buffer_read_prepare_sync();
3904 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3905 		tracing_iter_reset(iter, cpu);
3906 	}
3907 
3908 	mutex_unlock(&trace_types_lock);
3909 
3910 	return iter;
3911 
3912  fail:
3913 	mutex_unlock(&trace_types_lock);
3914 	kfree(iter->trace);
3915 	kfree(iter->buffer_iter);
3916 release:
3917 	seq_release_private(inode, file);
3918 	return ERR_PTR(-ENOMEM);
3919 }
3920 
3921 int tracing_open_generic(struct inode *inode, struct file *filp)
3922 {
3923 	if (tracing_disabled)
3924 		return -ENODEV;
3925 
3926 	filp->private_data = inode->i_private;
3927 	return 0;
3928 }
3929 
3930 bool tracing_is_disabled(void)
3931 {
3932 	return (tracing_disabled) ? true: false;
3933 }
3934 
3935 /*
3936  * Open and update trace_array ref count.
3937  * Must have the current trace_array passed to it.
3938  */
3939 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3940 {
3941 	struct trace_array *tr = inode->i_private;
3942 
3943 	if (tracing_disabled)
3944 		return -ENODEV;
3945 
3946 	if (trace_array_get(tr) < 0)
3947 		return -ENODEV;
3948 
3949 	filp->private_data = inode->i_private;
3950 
3951 	return 0;
3952 }
3953 
3954 static int tracing_release(struct inode *inode, struct file *file)
3955 {
3956 	struct trace_array *tr = inode->i_private;
3957 	struct seq_file *m = file->private_data;
3958 	struct trace_iterator *iter;
3959 	int cpu;
3960 
3961 	if (!(file->f_mode & FMODE_READ)) {
3962 		trace_array_put(tr);
3963 		return 0;
3964 	}
3965 
3966 	/* Writes do not use seq_file */
3967 	iter = m->private;
3968 	mutex_lock(&trace_types_lock);
3969 
3970 	for_each_tracing_cpu(cpu) {
3971 		if (iter->buffer_iter[cpu])
3972 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3973 	}
3974 
3975 	if (iter->trace && iter->trace->close)
3976 		iter->trace->close(iter);
3977 
3978 	if (!iter->snapshot)
3979 		/* reenable tracing if it was previously enabled */
3980 		tracing_start_tr(tr);
3981 
3982 	__trace_array_put(tr);
3983 
3984 	mutex_unlock(&trace_types_lock);
3985 
3986 	mutex_destroy(&iter->mutex);
3987 	free_cpumask_var(iter->started);
3988 	kfree(iter->trace);
3989 	kfree(iter->buffer_iter);
3990 	seq_release_private(inode, file);
3991 
3992 	return 0;
3993 }
3994 
3995 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3996 {
3997 	struct trace_array *tr = inode->i_private;
3998 
3999 	trace_array_put(tr);
4000 	return 0;
4001 }
4002 
4003 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4004 {
4005 	struct trace_array *tr = inode->i_private;
4006 
4007 	trace_array_put(tr);
4008 
4009 	return single_release(inode, file);
4010 }
4011 
4012 static int tracing_open(struct inode *inode, struct file *file)
4013 {
4014 	struct trace_array *tr = inode->i_private;
4015 	struct trace_iterator *iter;
4016 	int ret = 0;
4017 
4018 	if (trace_array_get(tr) < 0)
4019 		return -ENODEV;
4020 
4021 	/* If this file was open for write, then erase contents */
4022 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4023 		int cpu = tracing_get_cpu(inode);
4024 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4025 
4026 #ifdef CONFIG_TRACER_MAX_TRACE
4027 		if (tr->current_trace->print_max)
4028 			trace_buf = &tr->max_buffer;
4029 #endif
4030 
4031 		if (cpu == RING_BUFFER_ALL_CPUS)
4032 			tracing_reset_online_cpus(trace_buf);
4033 		else
4034 			tracing_reset(trace_buf, cpu);
4035 	}
4036 
4037 	if (file->f_mode & FMODE_READ) {
4038 		iter = __tracing_open(inode, file, false);
4039 		if (IS_ERR(iter))
4040 			ret = PTR_ERR(iter);
4041 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4042 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4043 	}
4044 
4045 	if (ret < 0)
4046 		trace_array_put(tr);
4047 
4048 	return ret;
4049 }
4050 
4051 /*
4052  * Some tracers are not suitable for instance buffers.
4053  * A tracer is always available for the global array (toplevel)
4054  * or if it explicitly states that it is.
4055  */
4056 static bool
4057 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4058 {
4059 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4060 }
4061 
4062 /* Find the next tracer that this trace array may use */
4063 static struct tracer *
4064 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4065 {
4066 	while (t && !trace_ok_for_array(t, tr))
4067 		t = t->next;
4068 
4069 	return t;
4070 }
4071 
4072 static void *
4073 t_next(struct seq_file *m, void *v, loff_t *pos)
4074 {
4075 	struct trace_array *tr = m->private;
4076 	struct tracer *t = v;
4077 
4078 	(*pos)++;
4079 
4080 	if (t)
4081 		t = get_tracer_for_array(tr, t->next);
4082 
4083 	return t;
4084 }
4085 
4086 static void *t_start(struct seq_file *m, loff_t *pos)
4087 {
4088 	struct trace_array *tr = m->private;
4089 	struct tracer *t;
4090 	loff_t l = 0;
4091 
4092 	mutex_lock(&trace_types_lock);
4093 
4094 	t = get_tracer_for_array(tr, trace_types);
4095 	for (; t && l < *pos; t = t_next(m, t, &l))
4096 			;
4097 
4098 	return t;
4099 }
4100 
4101 static void t_stop(struct seq_file *m, void *p)
4102 {
4103 	mutex_unlock(&trace_types_lock);
4104 }
4105 
4106 static int t_show(struct seq_file *m, void *v)
4107 {
4108 	struct tracer *t = v;
4109 
4110 	if (!t)
4111 		return 0;
4112 
4113 	seq_puts(m, t->name);
4114 	if (t->next)
4115 		seq_putc(m, ' ');
4116 	else
4117 		seq_putc(m, '\n');
4118 
4119 	return 0;
4120 }
4121 
4122 static const struct seq_operations show_traces_seq_ops = {
4123 	.start		= t_start,
4124 	.next		= t_next,
4125 	.stop		= t_stop,
4126 	.show		= t_show,
4127 };
4128 
4129 static int show_traces_open(struct inode *inode, struct file *file)
4130 {
4131 	struct trace_array *tr = inode->i_private;
4132 	struct seq_file *m;
4133 	int ret;
4134 
4135 	if (tracing_disabled)
4136 		return -ENODEV;
4137 
4138 	ret = seq_open(file, &show_traces_seq_ops);
4139 	if (ret)
4140 		return ret;
4141 
4142 	m = file->private_data;
4143 	m->private = tr;
4144 
4145 	return 0;
4146 }
4147 
4148 static ssize_t
4149 tracing_write_stub(struct file *filp, const char __user *ubuf,
4150 		   size_t count, loff_t *ppos)
4151 {
4152 	return count;
4153 }
4154 
4155 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4156 {
4157 	int ret;
4158 
4159 	if (file->f_mode & FMODE_READ)
4160 		ret = seq_lseek(file, offset, whence);
4161 	else
4162 		file->f_pos = ret = 0;
4163 
4164 	return ret;
4165 }
4166 
4167 static const struct file_operations tracing_fops = {
4168 	.open		= tracing_open,
4169 	.read		= seq_read,
4170 	.write		= tracing_write_stub,
4171 	.llseek		= tracing_lseek,
4172 	.release	= tracing_release,
4173 };
4174 
4175 static const struct file_operations show_traces_fops = {
4176 	.open		= show_traces_open,
4177 	.read		= seq_read,
4178 	.release	= seq_release,
4179 	.llseek		= seq_lseek,
4180 };
4181 
4182 static ssize_t
4183 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4184 		     size_t count, loff_t *ppos)
4185 {
4186 	struct trace_array *tr = file_inode(filp)->i_private;
4187 	char *mask_str;
4188 	int len;
4189 
4190 	len = snprintf(NULL, 0, "%*pb\n",
4191 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4192 	mask_str = kmalloc(len, GFP_KERNEL);
4193 	if (!mask_str)
4194 		return -ENOMEM;
4195 
4196 	len = snprintf(mask_str, len, "%*pb\n",
4197 		       cpumask_pr_args(tr->tracing_cpumask));
4198 	if (len >= count) {
4199 		count = -EINVAL;
4200 		goto out_err;
4201 	}
4202 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4203 
4204 out_err:
4205 	kfree(mask_str);
4206 
4207 	return count;
4208 }
4209 
4210 static ssize_t
4211 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4212 		      size_t count, loff_t *ppos)
4213 {
4214 	struct trace_array *tr = file_inode(filp)->i_private;
4215 	cpumask_var_t tracing_cpumask_new;
4216 	int err, cpu;
4217 
4218 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4219 		return -ENOMEM;
4220 
4221 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4222 	if (err)
4223 		goto err_unlock;
4224 
4225 	local_irq_disable();
4226 	arch_spin_lock(&tr->max_lock);
4227 	for_each_tracing_cpu(cpu) {
4228 		/*
4229 		 * Increase/decrease the disabled counter if we are
4230 		 * about to flip a bit in the cpumask:
4231 		 */
4232 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4233 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4234 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4235 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4236 		}
4237 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4241 		}
4242 	}
4243 	arch_spin_unlock(&tr->max_lock);
4244 	local_irq_enable();
4245 
4246 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4247 	free_cpumask_var(tracing_cpumask_new);
4248 
4249 	return count;
4250 
4251 err_unlock:
4252 	free_cpumask_var(tracing_cpumask_new);
4253 
4254 	return err;
4255 }
4256 
4257 static const struct file_operations tracing_cpumask_fops = {
4258 	.open		= tracing_open_generic_tr,
4259 	.read		= tracing_cpumask_read,
4260 	.write		= tracing_cpumask_write,
4261 	.release	= tracing_release_generic_tr,
4262 	.llseek		= generic_file_llseek,
4263 };
4264 
4265 static int tracing_trace_options_show(struct seq_file *m, void *v)
4266 {
4267 	struct tracer_opt *trace_opts;
4268 	struct trace_array *tr = m->private;
4269 	u32 tracer_flags;
4270 	int i;
4271 
4272 	mutex_lock(&trace_types_lock);
4273 	tracer_flags = tr->current_trace->flags->val;
4274 	trace_opts = tr->current_trace->flags->opts;
4275 
4276 	for (i = 0; trace_options[i]; i++) {
4277 		if (tr->trace_flags & (1 << i))
4278 			seq_printf(m, "%s\n", trace_options[i]);
4279 		else
4280 			seq_printf(m, "no%s\n", trace_options[i]);
4281 	}
4282 
4283 	for (i = 0; trace_opts[i].name; i++) {
4284 		if (tracer_flags & trace_opts[i].bit)
4285 			seq_printf(m, "%s\n", trace_opts[i].name);
4286 		else
4287 			seq_printf(m, "no%s\n", trace_opts[i].name);
4288 	}
4289 	mutex_unlock(&trace_types_lock);
4290 
4291 	return 0;
4292 }
4293 
4294 static int __set_tracer_option(struct trace_array *tr,
4295 			       struct tracer_flags *tracer_flags,
4296 			       struct tracer_opt *opts, int neg)
4297 {
4298 	struct tracer *trace = tracer_flags->trace;
4299 	int ret;
4300 
4301 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4302 	if (ret)
4303 		return ret;
4304 
4305 	if (neg)
4306 		tracer_flags->val &= ~opts->bit;
4307 	else
4308 		tracer_flags->val |= opts->bit;
4309 	return 0;
4310 }
4311 
4312 /* Try to assign a tracer specific option */
4313 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4314 {
4315 	struct tracer *trace = tr->current_trace;
4316 	struct tracer_flags *tracer_flags = trace->flags;
4317 	struct tracer_opt *opts = NULL;
4318 	int i;
4319 
4320 	for (i = 0; tracer_flags->opts[i].name; i++) {
4321 		opts = &tracer_flags->opts[i];
4322 
4323 		if (strcmp(cmp, opts->name) == 0)
4324 			return __set_tracer_option(tr, trace->flags, opts, neg);
4325 	}
4326 
4327 	return -EINVAL;
4328 }
4329 
4330 /* Some tracers require overwrite to stay enabled */
4331 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4332 {
4333 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4334 		return -1;
4335 
4336 	return 0;
4337 }
4338 
4339 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4340 {
4341 	/* do nothing if flag is already set */
4342 	if (!!(tr->trace_flags & mask) == !!enabled)
4343 		return 0;
4344 
4345 	/* Give the tracer a chance to approve the change */
4346 	if (tr->current_trace->flag_changed)
4347 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4348 			return -EINVAL;
4349 
4350 	if (enabled)
4351 		tr->trace_flags |= mask;
4352 	else
4353 		tr->trace_flags &= ~mask;
4354 
4355 	if (mask == TRACE_ITER_RECORD_CMD)
4356 		trace_event_enable_cmd_record(enabled);
4357 
4358 	if (mask == TRACE_ITER_RECORD_TGID) {
4359 		if (!tgid_map)
4360 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4361 					   sizeof(*tgid_map),
4362 					   GFP_KERNEL);
4363 		if (!tgid_map) {
4364 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4365 			return -ENOMEM;
4366 		}
4367 
4368 		trace_event_enable_tgid_record(enabled);
4369 	}
4370 
4371 	if (mask == TRACE_ITER_EVENT_FORK)
4372 		trace_event_follow_fork(tr, enabled);
4373 
4374 	if (mask == TRACE_ITER_FUNC_FORK)
4375 		ftrace_pid_follow_fork(tr, enabled);
4376 
4377 	if (mask == TRACE_ITER_OVERWRITE) {
4378 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4379 #ifdef CONFIG_TRACER_MAX_TRACE
4380 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4381 #endif
4382 	}
4383 
4384 	if (mask == TRACE_ITER_PRINTK) {
4385 		trace_printk_start_stop_comm(enabled);
4386 		trace_printk_control(enabled);
4387 	}
4388 
4389 	return 0;
4390 }
4391 
4392 static int trace_set_options(struct trace_array *tr, char *option)
4393 {
4394 	char *cmp;
4395 	int neg = 0;
4396 	int ret;
4397 	size_t orig_len = strlen(option);
4398 
4399 	cmp = strstrip(option);
4400 
4401 	if (strncmp(cmp, "no", 2) == 0) {
4402 		neg = 1;
4403 		cmp += 2;
4404 	}
4405 
4406 	mutex_lock(&trace_types_lock);
4407 
4408 	ret = match_string(trace_options, -1, cmp);
4409 	/* If no option could be set, test the specific tracer options */
4410 	if (ret < 0)
4411 		ret = set_tracer_option(tr, cmp, neg);
4412 	else
4413 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4414 
4415 	mutex_unlock(&trace_types_lock);
4416 
4417 	/*
4418 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4419 	 * turn it back into a space.
4420 	 */
4421 	if (orig_len > strlen(option))
4422 		option[strlen(option)] = ' ';
4423 
4424 	return ret;
4425 }
4426 
4427 static void __init apply_trace_boot_options(void)
4428 {
4429 	char *buf = trace_boot_options_buf;
4430 	char *option;
4431 
4432 	while (true) {
4433 		option = strsep(&buf, ",");
4434 
4435 		if (!option)
4436 			break;
4437 
4438 		if (*option)
4439 			trace_set_options(&global_trace, option);
4440 
4441 		/* Put back the comma to allow this to be called again */
4442 		if (buf)
4443 			*(buf - 1) = ',';
4444 	}
4445 }
4446 
4447 static ssize_t
4448 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4449 			size_t cnt, loff_t *ppos)
4450 {
4451 	struct seq_file *m = filp->private_data;
4452 	struct trace_array *tr = m->private;
4453 	char buf[64];
4454 	int ret;
4455 
4456 	if (cnt >= sizeof(buf))
4457 		return -EINVAL;
4458 
4459 	if (copy_from_user(buf, ubuf, cnt))
4460 		return -EFAULT;
4461 
4462 	buf[cnt] = 0;
4463 
4464 	ret = trace_set_options(tr, buf);
4465 	if (ret < 0)
4466 		return ret;
4467 
4468 	*ppos += cnt;
4469 
4470 	return cnt;
4471 }
4472 
4473 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4474 {
4475 	struct trace_array *tr = inode->i_private;
4476 	int ret;
4477 
4478 	if (tracing_disabled)
4479 		return -ENODEV;
4480 
4481 	if (trace_array_get(tr) < 0)
4482 		return -ENODEV;
4483 
4484 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4485 	if (ret < 0)
4486 		trace_array_put(tr);
4487 
4488 	return ret;
4489 }
4490 
4491 static const struct file_operations tracing_iter_fops = {
4492 	.open		= tracing_trace_options_open,
4493 	.read		= seq_read,
4494 	.llseek		= seq_lseek,
4495 	.release	= tracing_single_release_tr,
4496 	.write		= tracing_trace_options_write,
4497 };
4498 
4499 static const char readme_msg[] =
4500 	"tracing mini-HOWTO:\n\n"
4501 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4502 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4503 	" Important files:\n"
4504 	"  trace\t\t\t- The static contents of the buffer\n"
4505 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4506 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4507 	"  current_tracer\t- function and latency tracers\n"
4508 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4509 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4510 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4511 	"  trace_clock\t\t-change the clock used to order events\n"
4512 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4513 	"      global:   Synced across CPUs but slows tracing down.\n"
4514 	"     counter:   Not a clock, but just an increment\n"
4515 	"      uptime:   Jiffy counter from time of boot\n"
4516 	"        perf:   Same clock that perf events use\n"
4517 #ifdef CONFIG_X86_64
4518 	"     x86-tsc:   TSC cycle counter\n"
4519 #endif
4520 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4521 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4522 	"    absolute:   Absolute (standalone) timestamp\n"
4523 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4524 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4525 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4526 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4527 	"\t\t\t  Remove sub-buffer with rmdir\n"
4528 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4529 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4530 	"\t\t\t  option name\n"
4531 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4532 #ifdef CONFIG_DYNAMIC_FTRACE
4533 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4534 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4535 	"\t\t\t  functions\n"
4536 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4537 	"\t     modules: Can select a group via module\n"
4538 	"\t      Format: :mod:<module-name>\n"
4539 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4540 	"\t    triggers: a command to perform when function is hit\n"
4541 	"\t      Format: <function>:<trigger>[:count]\n"
4542 	"\t     trigger: traceon, traceoff\n"
4543 	"\t\t      enable_event:<system>:<event>\n"
4544 	"\t\t      disable_event:<system>:<event>\n"
4545 #ifdef CONFIG_STACKTRACE
4546 	"\t\t      stacktrace\n"
4547 #endif
4548 #ifdef CONFIG_TRACER_SNAPSHOT
4549 	"\t\t      snapshot\n"
4550 #endif
4551 	"\t\t      dump\n"
4552 	"\t\t      cpudump\n"
4553 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4554 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4555 	"\t     The first one will disable tracing every time do_fault is hit\n"
4556 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4557 	"\t       The first time do trap is hit and it disables tracing, the\n"
4558 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4559 	"\t       the counter will not decrement. It only decrements when the\n"
4560 	"\t       trigger did work\n"
4561 	"\t     To remove trigger without count:\n"
4562 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4563 	"\t     To remove trigger with a count:\n"
4564 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4565 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4566 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4567 	"\t    modules: Can select a group via module command :mod:\n"
4568 	"\t    Does not accept triggers\n"
4569 #endif /* CONFIG_DYNAMIC_FTRACE */
4570 #ifdef CONFIG_FUNCTION_TRACER
4571 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4572 	"\t\t    (function)\n"
4573 #endif
4574 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4575 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4576 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4577 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4578 #endif
4579 #ifdef CONFIG_TRACER_SNAPSHOT
4580 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4581 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4582 	"\t\t\t  information\n"
4583 #endif
4584 #ifdef CONFIG_STACK_TRACER
4585 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4586 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4587 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4588 	"\t\t\t  new trace)\n"
4589 #ifdef CONFIG_DYNAMIC_FTRACE
4590 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4591 	"\t\t\t  traces\n"
4592 #endif
4593 #endif /* CONFIG_STACK_TRACER */
4594 #ifdef CONFIG_KPROBE_EVENTS
4595 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4596 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4597 #endif
4598 #ifdef CONFIG_UPROBE_EVENTS
4599 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4600 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4601 #endif
4602 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4603 	"\t  accepts: event-definitions (one definition per line)\n"
4604 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4605 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4606 	"\t           -:[<group>/]<event>\n"
4607 #ifdef CONFIG_KPROBE_EVENTS
4608 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4609   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4610 #endif
4611 #ifdef CONFIG_UPROBE_EVENTS
4612 	"\t    place: <path>:<offset>\n"
4613 #endif
4614 	"\t     args: <name>=fetcharg[:type]\n"
4615 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4616 	"\t           $stack<index>, $stack, $retval, $comm\n"
4617 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4618 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4619 #endif
4620 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4621 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4622 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4623 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4624 	"\t\t\t  events\n"
4625 	"      filter\t\t- If set, only events passing filter are traced\n"
4626 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4627 	"\t\t\t  <event>:\n"
4628 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4629 	"      filter\t\t- If set, only events passing filter are traced\n"
4630 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4631 	"\t    Format: <trigger>[:count][if <filter>]\n"
4632 	"\t   trigger: traceon, traceoff\n"
4633 	"\t            enable_event:<system>:<event>\n"
4634 	"\t            disable_event:<system>:<event>\n"
4635 #ifdef CONFIG_HIST_TRIGGERS
4636 	"\t            enable_hist:<system>:<event>\n"
4637 	"\t            disable_hist:<system>:<event>\n"
4638 #endif
4639 #ifdef CONFIG_STACKTRACE
4640 	"\t\t    stacktrace\n"
4641 #endif
4642 #ifdef CONFIG_TRACER_SNAPSHOT
4643 	"\t\t    snapshot\n"
4644 #endif
4645 #ifdef CONFIG_HIST_TRIGGERS
4646 	"\t\t    hist (see below)\n"
4647 #endif
4648 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4649 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4650 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4651 	"\t                  events/block/block_unplug/trigger\n"
4652 	"\t   The first disables tracing every time block_unplug is hit.\n"
4653 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4654 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4655 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4656 	"\t   Like function triggers, the counter is only decremented if it\n"
4657 	"\t    enabled or disabled tracing.\n"
4658 	"\t   To remove a trigger without a count:\n"
4659 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4660 	"\t   To remove a trigger with a count:\n"
4661 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4662 	"\t   Filters can be ignored when removing a trigger.\n"
4663 #ifdef CONFIG_HIST_TRIGGERS
4664 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4665 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4666 	"\t            [:values=<field1[,field2,...]>]\n"
4667 	"\t            [:sort=<field1[,field2,...]>]\n"
4668 	"\t            [:size=#entries]\n"
4669 	"\t            [:pause][:continue][:clear]\n"
4670 	"\t            [:name=histname1]\n"
4671 	"\t            [if <filter>]\n\n"
4672 	"\t    When a matching event is hit, an entry is added to a hash\n"
4673 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4674 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4675 	"\t    correspond to fields in the event's format description.  Keys\n"
4676 	"\t    can be any field, or the special string 'stacktrace'.\n"
4677 	"\t    Compound keys consisting of up to two fields can be specified\n"
4678 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4679 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4680 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4681 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4682 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4683 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4684 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4685 	"\t    its histogram data will be shared with other triggers of the\n"
4686 	"\t    same name, and trigger hits will update this common data.\n\n"
4687 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4688 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4689 	"\t    triggers attached to an event, there will be a table for each\n"
4690 	"\t    trigger in the output.  The table displayed for a named\n"
4691 	"\t    trigger will be the same as any other instance having the\n"
4692 	"\t    same name.  The default format used to display a given field\n"
4693 	"\t    can be modified by appending any of the following modifiers\n"
4694 	"\t    to the field name, as applicable:\n\n"
4695 	"\t            .hex        display a number as a hex value\n"
4696 	"\t            .sym        display an address as a symbol\n"
4697 	"\t            .sym-offset display an address as a symbol and offset\n"
4698 	"\t            .execname   display a common_pid as a program name\n"
4699 	"\t            .syscall    display a syscall id as a syscall name\n"
4700 	"\t            .log2       display log2 value rather than raw number\n"
4701 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4702 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4703 	"\t    trigger or to start a hist trigger but not log any events\n"
4704 	"\t    until told to do so.  'continue' can be used to start or\n"
4705 	"\t    restart a paused hist trigger.\n\n"
4706 	"\t    The 'clear' parameter will clear the contents of a running\n"
4707 	"\t    hist trigger and leave its current paused/active state\n"
4708 	"\t    unchanged.\n\n"
4709 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4710 	"\t    have one event conditionally start and stop another event's\n"
4711 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4712 	"\t    the enable_event and disable_event triggers.\n"
4713 #endif
4714 ;
4715 
4716 static ssize_t
4717 tracing_readme_read(struct file *filp, char __user *ubuf,
4718 		       size_t cnt, loff_t *ppos)
4719 {
4720 	return simple_read_from_buffer(ubuf, cnt, ppos,
4721 					readme_msg, strlen(readme_msg));
4722 }
4723 
4724 static const struct file_operations tracing_readme_fops = {
4725 	.open		= tracing_open_generic,
4726 	.read		= tracing_readme_read,
4727 	.llseek		= generic_file_llseek,
4728 };
4729 
4730 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4731 {
4732 	int *ptr = v;
4733 
4734 	if (*pos || m->count)
4735 		ptr++;
4736 
4737 	(*pos)++;
4738 
4739 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4740 		if (trace_find_tgid(*ptr))
4741 			return ptr;
4742 	}
4743 
4744 	return NULL;
4745 }
4746 
4747 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4748 {
4749 	void *v;
4750 	loff_t l = 0;
4751 
4752 	if (!tgid_map)
4753 		return NULL;
4754 
4755 	v = &tgid_map[0];
4756 	while (l <= *pos) {
4757 		v = saved_tgids_next(m, v, &l);
4758 		if (!v)
4759 			return NULL;
4760 	}
4761 
4762 	return v;
4763 }
4764 
4765 static void saved_tgids_stop(struct seq_file *m, void *v)
4766 {
4767 }
4768 
4769 static int saved_tgids_show(struct seq_file *m, void *v)
4770 {
4771 	int pid = (int *)v - tgid_map;
4772 
4773 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4774 	return 0;
4775 }
4776 
4777 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4778 	.start		= saved_tgids_start,
4779 	.stop		= saved_tgids_stop,
4780 	.next		= saved_tgids_next,
4781 	.show		= saved_tgids_show,
4782 };
4783 
4784 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4785 {
4786 	if (tracing_disabled)
4787 		return -ENODEV;
4788 
4789 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4790 }
4791 
4792 
4793 static const struct file_operations tracing_saved_tgids_fops = {
4794 	.open		= tracing_saved_tgids_open,
4795 	.read		= seq_read,
4796 	.llseek		= seq_lseek,
4797 	.release	= seq_release,
4798 };
4799 
4800 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4801 {
4802 	unsigned int *ptr = v;
4803 
4804 	if (*pos || m->count)
4805 		ptr++;
4806 
4807 	(*pos)++;
4808 
4809 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4810 	     ptr++) {
4811 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4812 			continue;
4813 
4814 		return ptr;
4815 	}
4816 
4817 	return NULL;
4818 }
4819 
4820 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4821 {
4822 	void *v;
4823 	loff_t l = 0;
4824 
4825 	preempt_disable();
4826 	arch_spin_lock(&trace_cmdline_lock);
4827 
4828 	v = &savedcmd->map_cmdline_to_pid[0];
4829 	while (l <= *pos) {
4830 		v = saved_cmdlines_next(m, v, &l);
4831 		if (!v)
4832 			return NULL;
4833 	}
4834 
4835 	return v;
4836 }
4837 
4838 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4839 {
4840 	arch_spin_unlock(&trace_cmdline_lock);
4841 	preempt_enable();
4842 }
4843 
4844 static int saved_cmdlines_show(struct seq_file *m, void *v)
4845 {
4846 	char buf[TASK_COMM_LEN];
4847 	unsigned int *pid = v;
4848 
4849 	__trace_find_cmdline(*pid, buf);
4850 	seq_printf(m, "%d %s\n", *pid, buf);
4851 	return 0;
4852 }
4853 
4854 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4855 	.start		= saved_cmdlines_start,
4856 	.next		= saved_cmdlines_next,
4857 	.stop		= saved_cmdlines_stop,
4858 	.show		= saved_cmdlines_show,
4859 };
4860 
4861 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4862 {
4863 	if (tracing_disabled)
4864 		return -ENODEV;
4865 
4866 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4867 }
4868 
4869 static const struct file_operations tracing_saved_cmdlines_fops = {
4870 	.open		= tracing_saved_cmdlines_open,
4871 	.read		= seq_read,
4872 	.llseek		= seq_lseek,
4873 	.release	= seq_release,
4874 };
4875 
4876 static ssize_t
4877 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4878 				 size_t cnt, loff_t *ppos)
4879 {
4880 	char buf[64];
4881 	int r;
4882 
4883 	arch_spin_lock(&trace_cmdline_lock);
4884 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4885 	arch_spin_unlock(&trace_cmdline_lock);
4886 
4887 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4888 }
4889 
4890 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4891 {
4892 	kfree(s->saved_cmdlines);
4893 	kfree(s->map_cmdline_to_pid);
4894 	kfree(s);
4895 }
4896 
4897 static int tracing_resize_saved_cmdlines(unsigned int val)
4898 {
4899 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4900 
4901 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4902 	if (!s)
4903 		return -ENOMEM;
4904 
4905 	if (allocate_cmdlines_buffer(val, s) < 0) {
4906 		kfree(s);
4907 		return -ENOMEM;
4908 	}
4909 
4910 	arch_spin_lock(&trace_cmdline_lock);
4911 	savedcmd_temp = savedcmd;
4912 	savedcmd = s;
4913 	arch_spin_unlock(&trace_cmdline_lock);
4914 	free_saved_cmdlines_buffer(savedcmd_temp);
4915 
4916 	return 0;
4917 }
4918 
4919 static ssize_t
4920 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4921 				  size_t cnt, loff_t *ppos)
4922 {
4923 	unsigned long val;
4924 	int ret;
4925 
4926 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4927 	if (ret)
4928 		return ret;
4929 
4930 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4931 	if (!val || val > PID_MAX_DEFAULT)
4932 		return -EINVAL;
4933 
4934 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4935 	if (ret < 0)
4936 		return ret;
4937 
4938 	*ppos += cnt;
4939 
4940 	return cnt;
4941 }
4942 
4943 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4944 	.open		= tracing_open_generic,
4945 	.read		= tracing_saved_cmdlines_size_read,
4946 	.write		= tracing_saved_cmdlines_size_write,
4947 };
4948 
4949 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4950 static union trace_eval_map_item *
4951 update_eval_map(union trace_eval_map_item *ptr)
4952 {
4953 	if (!ptr->map.eval_string) {
4954 		if (ptr->tail.next) {
4955 			ptr = ptr->tail.next;
4956 			/* Set ptr to the next real item (skip head) */
4957 			ptr++;
4958 		} else
4959 			return NULL;
4960 	}
4961 	return ptr;
4962 }
4963 
4964 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4965 {
4966 	union trace_eval_map_item *ptr = v;
4967 
4968 	/*
4969 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4970 	 * This really should never happen.
4971 	 */
4972 	ptr = update_eval_map(ptr);
4973 	if (WARN_ON_ONCE(!ptr))
4974 		return NULL;
4975 
4976 	ptr++;
4977 
4978 	(*pos)++;
4979 
4980 	ptr = update_eval_map(ptr);
4981 
4982 	return ptr;
4983 }
4984 
4985 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4986 {
4987 	union trace_eval_map_item *v;
4988 	loff_t l = 0;
4989 
4990 	mutex_lock(&trace_eval_mutex);
4991 
4992 	v = trace_eval_maps;
4993 	if (v)
4994 		v++;
4995 
4996 	while (v && l < *pos) {
4997 		v = eval_map_next(m, v, &l);
4998 	}
4999 
5000 	return v;
5001 }
5002 
5003 static void eval_map_stop(struct seq_file *m, void *v)
5004 {
5005 	mutex_unlock(&trace_eval_mutex);
5006 }
5007 
5008 static int eval_map_show(struct seq_file *m, void *v)
5009 {
5010 	union trace_eval_map_item *ptr = v;
5011 
5012 	seq_printf(m, "%s %ld (%s)\n",
5013 		   ptr->map.eval_string, ptr->map.eval_value,
5014 		   ptr->map.system);
5015 
5016 	return 0;
5017 }
5018 
5019 static const struct seq_operations tracing_eval_map_seq_ops = {
5020 	.start		= eval_map_start,
5021 	.next		= eval_map_next,
5022 	.stop		= eval_map_stop,
5023 	.show		= eval_map_show,
5024 };
5025 
5026 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5027 {
5028 	if (tracing_disabled)
5029 		return -ENODEV;
5030 
5031 	return seq_open(filp, &tracing_eval_map_seq_ops);
5032 }
5033 
5034 static const struct file_operations tracing_eval_map_fops = {
5035 	.open		= tracing_eval_map_open,
5036 	.read		= seq_read,
5037 	.llseek		= seq_lseek,
5038 	.release	= seq_release,
5039 };
5040 
5041 static inline union trace_eval_map_item *
5042 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5043 {
5044 	/* Return tail of array given the head */
5045 	return ptr + ptr->head.length + 1;
5046 }
5047 
5048 static void
5049 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5050 			   int len)
5051 {
5052 	struct trace_eval_map **stop;
5053 	struct trace_eval_map **map;
5054 	union trace_eval_map_item *map_array;
5055 	union trace_eval_map_item *ptr;
5056 
5057 	stop = start + len;
5058 
5059 	/*
5060 	 * The trace_eval_maps contains the map plus a head and tail item,
5061 	 * where the head holds the module and length of array, and the
5062 	 * tail holds a pointer to the next list.
5063 	 */
5064 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5065 	if (!map_array) {
5066 		pr_warn("Unable to allocate trace eval mapping\n");
5067 		return;
5068 	}
5069 
5070 	mutex_lock(&trace_eval_mutex);
5071 
5072 	if (!trace_eval_maps)
5073 		trace_eval_maps = map_array;
5074 	else {
5075 		ptr = trace_eval_maps;
5076 		for (;;) {
5077 			ptr = trace_eval_jmp_to_tail(ptr);
5078 			if (!ptr->tail.next)
5079 				break;
5080 			ptr = ptr->tail.next;
5081 
5082 		}
5083 		ptr->tail.next = map_array;
5084 	}
5085 	map_array->head.mod = mod;
5086 	map_array->head.length = len;
5087 	map_array++;
5088 
5089 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5090 		map_array->map = **map;
5091 		map_array++;
5092 	}
5093 	memset(map_array, 0, sizeof(*map_array));
5094 
5095 	mutex_unlock(&trace_eval_mutex);
5096 }
5097 
5098 static void trace_create_eval_file(struct dentry *d_tracer)
5099 {
5100 	trace_create_file("eval_map", 0444, d_tracer,
5101 			  NULL, &tracing_eval_map_fops);
5102 }
5103 
5104 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5105 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5106 static inline void trace_insert_eval_map_file(struct module *mod,
5107 			      struct trace_eval_map **start, int len) { }
5108 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5109 
5110 static void trace_insert_eval_map(struct module *mod,
5111 				  struct trace_eval_map **start, int len)
5112 {
5113 	struct trace_eval_map **map;
5114 
5115 	if (len <= 0)
5116 		return;
5117 
5118 	map = start;
5119 
5120 	trace_event_eval_update(map, len);
5121 
5122 	trace_insert_eval_map_file(mod, start, len);
5123 }
5124 
5125 static ssize_t
5126 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5127 		       size_t cnt, loff_t *ppos)
5128 {
5129 	struct trace_array *tr = filp->private_data;
5130 	char buf[MAX_TRACER_SIZE+2];
5131 	int r;
5132 
5133 	mutex_lock(&trace_types_lock);
5134 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5135 	mutex_unlock(&trace_types_lock);
5136 
5137 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5138 }
5139 
5140 int tracer_init(struct tracer *t, struct trace_array *tr)
5141 {
5142 	tracing_reset_online_cpus(&tr->trace_buffer);
5143 	return t->init(tr);
5144 }
5145 
5146 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5147 {
5148 	int cpu;
5149 
5150 	for_each_tracing_cpu(cpu)
5151 		per_cpu_ptr(buf->data, cpu)->entries = val;
5152 }
5153 
5154 #ifdef CONFIG_TRACER_MAX_TRACE
5155 /* resize @tr's buffer to the size of @size_tr's entries */
5156 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5157 					struct trace_buffer *size_buf, int cpu_id)
5158 {
5159 	int cpu, ret = 0;
5160 
5161 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5162 		for_each_tracing_cpu(cpu) {
5163 			ret = ring_buffer_resize(trace_buf->buffer,
5164 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5165 			if (ret < 0)
5166 				break;
5167 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5168 				per_cpu_ptr(size_buf->data, cpu)->entries;
5169 		}
5170 	} else {
5171 		ret = ring_buffer_resize(trace_buf->buffer,
5172 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5173 		if (ret == 0)
5174 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5175 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5176 	}
5177 
5178 	return ret;
5179 }
5180 #endif /* CONFIG_TRACER_MAX_TRACE */
5181 
5182 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5183 					unsigned long size, int cpu)
5184 {
5185 	int ret;
5186 
5187 	/*
5188 	 * If kernel or user changes the size of the ring buffer
5189 	 * we use the size that was given, and we can forget about
5190 	 * expanding it later.
5191 	 */
5192 	ring_buffer_expanded = true;
5193 
5194 	/* May be called before buffers are initialized */
5195 	if (!tr->trace_buffer.buffer)
5196 		return 0;
5197 
5198 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5199 	if (ret < 0)
5200 		return ret;
5201 
5202 #ifdef CONFIG_TRACER_MAX_TRACE
5203 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5204 	    !tr->current_trace->use_max_tr)
5205 		goto out;
5206 
5207 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5208 	if (ret < 0) {
5209 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5210 						     &tr->trace_buffer, cpu);
5211 		if (r < 0) {
5212 			/*
5213 			 * AARGH! We are left with different
5214 			 * size max buffer!!!!
5215 			 * The max buffer is our "snapshot" buffer.
5216 			 * When a tracer needs a snapshot (one of the
5217 			 * latency tracers), it swaps the max buffer
5218 			 * with the saved snap shot. We succeeded to
5219 			 * update the size of the main buffer, but failed to
5220 			 * update the size of the max buffer. But when we tried
5221 			 * to reset the main buffer to the original size, we
5222 			 * failed there too. This is very unlikely to
5223 			 * happen, but if it does, warn and kill all
5224 			 * tracing.
5225 			 */
5226 			WARN_ON(1);
5227 			tracing_disabled = 1;
5228 		}
5229 		return ret;
5230 	}
5231 
5232 	if (cpu == RING_BUFFER_ALL_CPUS)
5233 		set_buffer_entries(&tr->max_buffer, size);
5234 	else
5235 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5236 
5237  out:
5238 #endif /* CONFIG_TRACER_MAX_TRACE */
5239 
5240 	if (cpu == RING_BUFFER_ALL_CPUS)
5241 		set_buffer_entries(&tr->trace_buffer, size);
5242 	else
5243 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5244 
5245 	return ret;
5246 }
5247 
5248 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5249 					  unsigned long size, int cpu_id)
5250 {
5251 	int ret = size;
5252 
5253 	mutex_lock(&trace_types_lock);
5254 
5255 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5256 		/* make sure, this cpu is enabled in the mask */
5257 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5258 			ret = -EINVAL;
5259 			goto out;
5260 		}
5261 	}
5262 
5263 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5264 	if (ret < 0)
5265 		ret = -ENOMEM;
5266 
5267 out:
5268 	mutex_unlock(&trace_types_lock);
5269 
5270 	return ret;
5271 }
5272 
5273 
5274 /**
5275  * tracing_update_buffers - used by tracing facility to expand ring buffers
5276  *
5277  * To save on memory when the tracing is never used on a system with it
5278  * configured in. The ring buffers are set to a minimum size. But once
5279  * a user starts to use the tracing facility, then they need to grow
5280  * to their default size.
5281  *
5282  * This function is to be called when a tracer is about to be used.
5283  */
5284 int tracing_update_buffers(void)
5285 {
5286 	int ret = 0;
5287 
5288 	mutex_lock(&trace_types_lock);
5289 	if (!ring_buffer_expanded)
5290 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5291 						RING_BUFFER_ALL_CPUS);
5292 	mutex_unlock(&trace_types_lock);
5293 
5294 	return ret;
5295 }
5296 
5297 struct trace_option_dentry;
5298 
5299 static void
5300 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5301 
5302 /*
5303  * Used to clear out the tracer before deletion of an instance.
5304  * Must have trace_types_lock held.
5305  */
5306 static void tracing_set_nop(struct trace_array *tr)
5307 {
5308 	if (tr->current_trace == &nop_trace)
5309 		return;
5310 
5311 	tr->current_trace->enabled--;
5312 
5313 	if (tr->current_trace->reset)
5314 		tr->current_trace->reset(tr);
5315 
5316 	tr->current_trace = &nop_trace;
5317 }
5318 
5319 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5320 {
5321 	/* Only enable if the directory has been created already. */
5322 	if (!tr->dir)
5323 		return;
5324 
5325 	create_trace_option_files(tr, t);
5326 }
5327 
5328 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5329 {
5330 	struct tracer *t;
5331 #ifdef CONFIG_TRACER_MAX_TRACE
5332 	bool had_max_tr;
5333 #endif
5334 	int ret = 0;
5335 
5336 	mutex_lock(&trace_types_lock);
5337 
5338 	if (!ring_buffer_expanded) {
5339 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5340 						RING_BUFFER_ALL_CPUS);
5341 		if (ret < 0)
5342 			goto out;
5343 		ret = 0;
5344 	}
5345 
5346 	for (t = trace_types; t; t = t->next) {
5347 		if (strcmp(t->name, buf) == 0)
5348 			break;
5349 	}
5350 	if (!t) {
5351 		ret = -EINVAL;
5352 		goto out;
5353 	}
5354 	if (t == tr->current_trace)
5355 		goto out;
5356 
5357 	/* Some tracers won't work on kernel command line */
5358 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5359 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5360 			t->name);
5361 		goto out;
5362 	}
5363 
5364 	/* Some tracers are only allowed for the top level buffer */
5365 	if (!trace_ok_for_array(t, tr)) {
5366 		ret = -EINVAL;
5367 		goto out;
5368 	}
5369 
5370 	/* If trace pipe files are being read, we can't change the tracer */
5371 	if (tr->current_trace->ref) {
5372 		ret = -EBUSY;
5373 		goto out;
5374 	}
5375 
5376 	trace_branch_disable();
5377 
5378 	tr->current_trace->enabled--;
5379 
5380 	if (tr->current_trace->reset)
5381 		tr->current_trace->reset(tr);
5382 
5383 	/* Current trace needs to be nop_trace before synchronize_sched */
5384 	tr->current_trace = &nop_trace;
5385 
5386 #ifdef CONFIG_TRACER_MAX_TRACE
5387 	had_max_tr = tr->allocated_snapshot;
5388 
5389 	if (had_max_tr && !t->use_max_tr) {
5390 		/*
5391 		 * We need to make sure that the update_max_tr sees that
5392 		 * current_trace changed to nop_trace to keep it from
5393 		 * swapping the buffers after we resize it.
5394 		 * The update_max_tr is called from interrupts disabled
5395 		 * so a synchronized_sched() is sufficient.
5396 		 */
5397 		synchronize_sched();
5398 		free_snapshot(tr);
5399 	}
5400 #endif
5401 
5402 #ifdef CONFIG_TRACER_MAX_TRACE
5403 	if (t->use_max_tr && !had_max_tr) {
5404 		ret = tracing_alloc_snapshot_instance(tr);
5405 		if (ret < 0)
5406 			goto out;
5407 	}
5408 #endif
5409 
5410 	if (t->init) {
5411 		ret = tracer_init(t, tr);
5412 		if (ret)
5413 			goto out;
5414 	}
5415 
5416 	tr->current_trace = t;
5417 	tr->current_trace->enabled++;
5418 	trace_branch_enable(tr);
5419  out:
5420 	mutex_unlock(&trace_types_lock);
5421 
5422 	return ret;
5423 }
5424 
5425 static ssize_t
5426 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5427 			size_t cnt, loff_t *ppos)
5428 {
5429 	struct trace_array *tr = filp->private_data;
5430 	char buf[MAX_TRACER_SIZE+1];
5431 	int i;
5432 	size_t ret;
5433 	int err;
5434 
5435 	ret = cnt;
5436 
5437 	if (cnt > MAX_TRACER_SIZE)
5438 		cnt = MAX_TRACER_SIZE;
5439 
5440 	if (copy_from_user(buf, ubuf, cnt))
5441 		return -EFAULT;
5442 
5443 	buf[cnt] = 0;
5444 
5445 	/* strip ending whitespace. */
5446 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5447 		buf[i] = 0;
5448 
5449 	err = tracing_set_tracer(tr, buf);
5450 	if (err)
5451 		return err;
5452 
5453 	*ppos += ret;
5454 
5455 	return ret;
5456 }
5457 
5458 static ssize_t
5459 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5460 		   size_t cnt, loff_t *ppos)
5461 {
5462 	char buf[64];
5463 	int r;
5464 
5465 	r = snprintf(buf, sizeof(buf), "%ld\n",
5466 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5467 	if (r > sizeof(buf))
5468 		r = sizeof(buf);
5469 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5470 }
5471 
5472 static ssize_t
5473 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5474 		    size_t cnt, loff_t *ppos)
5475 {
5476 	unsigned long val;
5477 	int ret;
5478 
5479 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5480 	if (ret)
5481 		return ret;
5482 
5483 	*ptr = val * 1000;
5484 
5485 	return cnt;
5486 }
5487 
5488 static ssize_t
5489 tracing_thresh_read(struct file *filp, char __user *ubuf,
5490 		    size_t cnt, loff_t *ppos)
5491 {
5492 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5493 }
5494 
5495 static ssize_t
5496 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5497 		     size_t cnt, loff_t *ppos)
5498 {
5499 	struct trace_array *tr = filp->private_data;
5500 	int ret;
5501 
5502 	mutex_lock(&trace_types_lock);
5503 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5504 	if (ret < 0)
5505 		goto out;
5506 
5507 	if (tr->current_trace->update_thresh) {
5508 		ret = tr->current_trace->update_thresh(tr);
5509 		if (ret < 0)
5510 			goto out;
5511 	}
5512 
5513 	ret = cnt;
5514 out:
5515 	mutex_unlock(&trace_types_lock);
5516 
5517 	return ret;
5518 }
5519 
5520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5521 
5522 static ssize_t
5523 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5524 		     size_t cnt, loff_t *ppos)
5525 {
5526 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5527 }
5528 
5529 static ssize_t
5530 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5531 		      size_t cnt, loff_t *ppos)
5532 {
5533 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5534 }
5535 
5536 #endif
5537 
5538 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5539 {
5540 	struct trace_array *tr = inode->i_private;
5541 	struct trace_iterator *iter;
5542 	int ret = 0;
5543 
5544 	if (tracing_disabled)
5545 		return -ENODEV;
5546 
5547 	if (trace_array_get(tr) < 0)
5548 		return -ENODEV;
5549 
5550 	mutex_lock(&trace_types_lock);
5551 
5552 	/* create a buffer to store the information to pass to userspace */
5553 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5554 	if (!iter) {
5555 		ret = -ENOMEM;
5556 		__trace_array_put(tr);
5557 		goto out;
5558 	}
5559 
5560 	trace_seq_init(&iter->seq);
5561 	iter->trace = tr->current_trace;
5562 
5563 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5564 		ret = -ENOMEM;
5565 		goto fail;
5566 	}
5567 
5568 	/* trace pipe does not show start of buffer */
5569 	cpumask_setall(iter->started);
5570 
5571 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5572 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5573 
5574 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5575 	if (trace_clocks[tr->clock_id].in_ns)
5576 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5577 
5578 	iter->tr = tr;
5579 	iter->trace_buffer = &tr->trace_buffer;
5580 	iter->cpu_file = tracing_get_cpu(inode);
5581 	mutex_init(&iter->mutex);
5582 	filp->private_data = iter;
5583 
5584 	if (iter->trace->pipe_open)
5585 		iter->trace->pipe_open(iter);
5586 
5587 	nonseekable_open(inode, filp);
5588 
5589 	tr->current_trace->ref++;
5590 out:
5591 	mutex_unlock(&trace_types_lock);
5592 	return ret;
5593 
5594 fail:
5595 	kfree(iter->trace);
5596 	kfree(iter);
5597 	__trace_array_put(tr);
5598 	mutex_unlock(&trace_types_lock);
5599 	return ret;
5600 }
5601 
5602 static int tracing_release_pipe(struct inode *inode, struct file *file)
5603 {
5604 	struct trace_iterator *iter = file->private_data;
5605 	struct trace_array *tr = inode->i_private;
5606 
5607 	mutex_lock(&trace_types_lock);
5608 
5609 	tr->current_trace->ref--;
5610 
5611 	if (iter->trace->pipe_close)
5612 		iter->trace->pipe_close(iter);
5613 
5614 	mutex_unlock(&trace_types_lock);
5615 
5616 	free_cpumask_var(iter->started);
5617 	mutex_destroy(&iter->mutex);
5618 	kfree(iter);
5619 
5620 	trace_array_put(tr);
5621 
5622 	return 0;
5623 }
5624 
5625 static __poll_t
5626 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5627 {
5628 	struct trace_array *tr = iter->tr;
5629 
5630 	/* Iterators are static, they should be filled or empty */
5631 	if (trace_buffer_iter(iter, iter->cpu_file))
5632 		return EPOLLIN | EPOLLRDNORM;
5633 
5634 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5635 		/*
5636 		 * Always select as readable when in blocking mode
5637 		 */
5638 		return EPOLLIN | EPOLLRDNORM;
5639 	else
5640 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5641 					     filp, poll_table);
5642 }
5643 
5644 static __poll_t
5645 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5646 {
5647 	struct trace_iterator *iter = filp->private_data;
5648 
5649 	return trace_poll(iter, filp, poll_table);
5650 }
5651 
5652 /* Must be called with iter->mutex held. */
5653 static int tracing_wait_pipe(struct file *filp)
5654 {
5655 	struct trace_iterator *iter = filp->private_data;
5656 	int ret;
5657 
5658 	while (trace_empty(iter)) {
5659 
5660 		if ((filp->f_flags & O_NONBLOCK)) {
5661 			return -EAGAIN;
5662 		}
5663 
5664 		/*
5665 		 * We block until we read something and tracing is disabled.
5666 		 * We still block if tracing is disabled, but we have never
5667 		 * read anything. This allows a user to cat this file, and
5668 		 * then enable tracing. But after we have read something,
5669 		 * we give an EOF when tracing is again disabled.
5670 		 *
5671 		 * iter->pos will be 0 if we haven't read anything.
5672 		 */
5673 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5674 			break;
5675 
5676 		mutex_unlock(&iter->mutex);
5677 
5678 		ret = wait_on_pipe(iter, false);
5679 
5680 		mutex_lock(&iter->mutex);
5681 
5682 		if (ret)
5683 			return ret;
5684 	}
5685 
5686 	return 1;
5687 }
5688 
5689 /*
5690  * Consumer reader.
5691  */
5692 static ssize_t
5693 tracing_read_pipe(struct file *filp, char __user *ubuf,
5694 		  size_t cnt, loff_t *ppos)
5695 {
5696 	struct trace_iterator *iter = filp->private_data;
5697 	ssize_t sret;
5698 
5699 	/*
5700 	 * Avoid more than one consumer on a single file descriptor
5701 	 * This is just a matter of traces coherency, the ring buffer itself
5702 	 * is protected.
5703 	 */
5704 	mutex_lock(&iter->mutex);
5705 
5706 	/* return any leftover data */
5707 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5708 	if (sret != -EBUSY)
5709 		goto out;
5710 
5711 	trace_seq_init(&iter->seq);
5712 
5713 	if (iter->trace->read) {
5714 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5715 		if (sret)
5716 			goto out;
5717 	}
5718 
5719 waitagain:
5720 	sret = tracing_wait_pipe(filp);
5721 	if (sret <= 0)
5722 		goto out;
5723 
5724 	/* stop when tracing is finished */
5725 	if (trace_empty(iter)) {
5726 		sret = 0;
5727 		goto out;
5728 	}
5729 
5730 	if (cnt >= PAGE_SIZE)
5731 		cnt = PAGE_SIZE - 1;
5732 
5733 	/* reset all but tr, trace, and overruns */
5734 	memset(&iter->seq, 0,
5735 	       sizeof(struct trace_iterator) -
5736 	       offsetof(struct trace_iterator, seq));
5737 	cpumask_clear(iter->started);
5738 	iter->pos = -1;
5739 
5740 	trace_event_read_lock();
5741 	trace_access_lock(iter->cpu_file);
5742 	while (trace_find_next_entry_inc(iter) != NULL) {
5743 		enum print_line_t ret;
5744 		int save_len = iter->seq.seq.len;
5745 
5746 		ret = print_trace_line(iter);
5747 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5748 			/* don't print partial lines */
5749 			iter->seq.seq.len = save_len;
5750 			break;
5751 		}
5752 		if (ret != TRACE_TYPE_NO_CONSUME)
5753 			trace_consume(iter);
5754 
5755 		if (trace_seq_used(&iter->seq) >= cnt)
5756 			break;
5757 
5758 		/*
5759 		 * Setting the full flag means we reached the trace_seq buffer
5760 		 * size and we should leave by partial output condition above.
5761 		 * One of the trace_seq_* functions is not used properly.
5762 		 */
5763 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5764 			  iter->ent->type);
5765 	}
5766 	trace_access_unlock(iter->cpu_file);
5767 	trace_event_read_unlock();
5768 
5769 	/* Now copy what we have to the user */
5770 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5771 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5772 		trace_seq_init(&iter->seq);
5773 
5774 	/*
5775 	 * If there was nothing to send to user, in spite of consuming trace
5776 	 * entries, go back to wait for more entries.
5777 	 */
5778 	if (sret == -EBUSY)
5779 		goto waitagain;
5780 
5781 out:
5782 	mutex_unlock(&iter->mutex);
5783 
5784 	return sret;
5785 }
5786 
5787 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5788 				     unsigned int idx)
5789 {
5790 	__free_page(spd->pages[idx]);
5791 }
5792 
5793 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5794 	.can_merge		= 0,
5795 	.confirm		= generic_pipe_buf_confirm,
5796 	.release		= generic_pipe_buf_release,
5797 	.steal			= generic_pipe_buf_steal,
5798 	.get			= generic_pipe_buf_get,
5799 };
5800 
5801 static size_t
5802 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5803 {
5804 	size_t count;
5805 	int save_len;
5806 	int ret;
5807 
5808 	/* Seq buffer is page-sized, exactly what we need. */
5809 	for (;;) {
5810 		save_len = iter->seq.seq.len;
5811 		ret = print_trace_line(iter);
5812 
5813 		if (trace_seq_has_overflowed(&iter->seq)) {
5814 			iter->seq.seq.len = save_len;
5815 			break;
5816 		}
5817 
5818 		/*
5819 		 * This should not be hit, because it should only
5820 		 * be set if the iter->seq overflowed. But check it
5821 		 * anyway to be safe.
5822 		 */
5823 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5824 			iter->seq.seq.len = save_len;
5825 			break;
5826 		}
5827 
5828 		count = trace_seq_used(&iter->seq) - save_len;
5829 		if (rem < count) {
5830 			rem = 0;
5831 			iter->seq.seq.len = save_len;
5832 			break;
5833 		}
5834 
5835 		if (ret != TRACE_TYPE_NO_CONSUME)
5836 			trace_consume(iter);
5837 		rem -= count;
5838 		if (!trace_find_next_entry_inc(iter))	{
5839 			rem = 0;
5840 			iter->ent = NULL;
5841 			break;
5842 		}
5843 	}
5844 
5845 	return rem;
5846 }
5847 
5848 static ssize_t tracing_splice_read_pipe(struct file *filp,
5849 					loff_t *ppos,
5850 					struct pipe_inode_info *pipe,
5851 					size_t len,
5852 					unsigned int flags)
5853 {
5854 	struct page *pages_def[PIPE_DEF_BUFFERS];
5855 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5856 	struct trace_iterator *iter = filp->private_data;
5857 	struct splice_pipe_desc spd = {
5858 		.pages		= pages_def,
5859 		.partial	= partial_def,
5860 		.nr_pages	= 0, /* This gets updated below. */
5861 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5862 		.ops		= &tracing_pipe_buf_ops,
5863 		.spd_release	= tracing_spd_release_pipe,
5864 	};
5865 	ssize_t ret;
5866 	size_t rem;
5867 	unsigned int i;
5868 
5869 	if (splice_grow_spd(pipe, &spd))
5870 		return -ENOMEM;
5871 
5872 	mutex_lock(&iter->mutex);
5873 
5874 	if (iter->trace->splice_read) {
5875 		ret = iter->trace->splice_read(iter, filp,
5876 					       ppos, pipe, len, flags);
5877 		if (ret)
5878 			goto out_err;
5879 	}
5880 
5881 	ret = tracing_wait_pipe(filp);
5882 	if (ret <= 0)
5883 		goto out_err;
5884 
5885 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5886 		ret = -EFAULT;
5887 		goto out_err;
5888 	}
5889 
5890 	trace_event_read_lock();
5891 	trace_access_lock(iter->cpu_file);
5892 
5893 	/* Fill as many pages as possible. */
5894 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5895 		spd.pages[i] = alloc_page(GFP_KERNEL);
5896 		if (!spd.pages[i])
5897 			break;
5898 
5899 		rem = tracing_fill_pipe_page(rem, iter);
5900 
5901 		/* Copy the data into the page, so we can start over. */
5902 		ret = trace_seq_to_buffer(&iter->seq,
5903 					  page_address(spd.pages[i]),
5904 					  trace_seq_used(&iter->seq));
5905 		if (ret < 0) {
5906 			__free_page(spd.pages[i]);
5907 			break;
5908 		}
5909 		spd.partial[i].offset = 0;
5910 		spd.partial[i].len = trace_seq_used(&iter->seq);
5911 
5912 		trace_seq_init(&iter->seq);
5913 	}
5914 
5915 	trace_access_unlock(iter->cpu_file);
5916 	trace_event_read_unlock();
5917 	mutex_unlock(&iter->mutex);
5918 
5919 	spd.nr_pages = i;
5920 
5921 	if (i)
5922 		ret = splice_to_pipe(pipe, &spd);
5923 	else
5924 		ret = 0;
5925 out:
5926 	splice_shrink_spd(&spd);
5927 	return ret;
5928 
5929 out_err:
5930 	mutex_unlock(&iter->mutex);
5931 	goto out;
5932 }
5933 
5934 static ssize_t
5935 tracing_entries_read(struct file *filp, char __user *ubuf,
5936 		     size_t cnt, loff_t *ppos)
5937 {
5938 	struct inode *inode = file_inode(filp);
5939 	struct trace_array *tr = inode->i_private;
5940 	int cpu = tracing_get_cpu(inode);
5941 	char buf[64];
5942 	int r = 0;
5943 	ssize_t ret;
5944 
5945 	mutex_lock(&trace_types_lock);
5946 
5947 	if (cpu == RING_BUFFER_ALL_CPUS) {
5948 		int cpu, buf_size_same;
5949 		unsigned long size;
5950 
5951 		size = 0;
5952 		buf_size_same = 1;
5953 		/* check if all cpu sizes are same */
5954 		for_each_tracing_cpu(cpu) {
5955 			/* fill in the size from first enabled cpu */
5956 			if (size == 0)
5957 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5958 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5959 				buf_size_same = 0;
5960 				break;
5961 			}
5962 		}
5963 
5964 		if (buf_size_same) {
5965 			if (!ring_buffer_expanded)
5966 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5967 					    size >> 10,
5968 					    trace_buf_size >> 10);
5969 			else
5970 				r = sprintf(buf, "%lu\n", size >> 10);
5971 		} else
5972 			r = sprintf(buf, "X\n");
5973 	} else
5974 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5975 
5976 	mutex_unlock(&trace_types_lock);
5977 
5978 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5979 	return ret;
5980 }
5981 
5982 static ssize_t
5983 tracing_entries_write(struct file *filp, const char __user *ubuf,
5984 		      size_t cnt, loff_t *ppos)
5985 {
5986 	struct inode *inode = file_inode(filp);
5987 	struct trace_array *tr = inode->i_private;
5988 	unsigned long val;
5989 	int ret;
5990 
5991 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5992 	if (ret)
5993 		return ret;
5994 
5995 	/* must have at least 1 entry */
5996 	if (!val)
5997 		return -EINVAL;
5998 
5999 	/* value is in KB */
6000 	val <<= 10;
6001 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6002 	if (ret < 0)
6003 		return ret;
6004 
6005 	*ppos += cnt;
6006 
6007 	return cnt;
6008 }
6009 
6010 static ssize_t
6011 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6012 				size_t cnt, loff_t *ppos)
6013 {
6014 	struct trace_array *tr = filp->private_data;
6015 	char buf[64];
6016 	int r, cpu;
6017 	unsigned long size = 0, expanded_size = 0;
6018 
6019 	mutex_lock(&trace_types_lock);
6020 	for_each_tracing_cpu(cpu) {
6021 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6022 		if (!ring_buffer_expanded)
6023 			expanded_size += trace_buf_size >> 10;
6024 	}
6025 	if (ring_buffer_expanded)
6026 		r = sprintf(buf, "%lu\n", size);
6027 	else
6028 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6029 	mutex_unlock(&trace_types_lock);
6030 
6031 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6032 }
6033 
6034 static ssize_t
6035 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6036 			  size_t cnt, loff_t *ppos)
6037 {
6038 	/*
6039 	 * There is no need to read what the user has written, this function
6040 	 * is just to make sure that there is no error when "echo" is used
6041 	 */
6042 
6043 	*ppos += cnt;
6044 
6045 	return cnt;
6046 }
6047 
6048 static int
6049 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6050 {
6051 	struct trace_array *tr = inode->i_private;
6052 
6053 	/* disable tracing ? */
6054 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6055 		tracer_tracing_off(tr);
6056 	/* resize the ring buffer to 0 */
6057 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6058 
6059 	trace_array_put(tr);
6060 
6061 	return 0;
6062 }
6063 
6064 static ssize_t
6065 tracing_mark_write(struct file *filp, const char __user *ubuf,
6066 					size_t cnt, loff_t *fpos)
6067 {
6068 	struct trace_array *tr = filp->private_data;
6069 	struct ring_buffer_event *event;
6070 	enum event_trigger_type tt = ETT_NONE;
6071 	struct ring_buffer *buffer;
6072 	struct print_entry *entry;
6073 	unsigned long irq_flags;
6074 	const char faulted[] = "<faulted>";
6075 	ssize_t written;
6076 	int size;
6077 	int len;
6078 
6079 /* Used in tracing_mark_raw_write() as well */
6080 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6081 
6082 	if (tracing_disabled)
6083 		return -EINVAL;
6084 
6085 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6086 		return -EINVAL;
6087 
6088 	if (cnt > TRACE_BUF_SIZE)
6089 		cnt = TRACE_BUF_SIZE;
6090 
6091 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6092 
6093 	local_save_flags(irq_flags);
6094 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6095 
6096 	/* If less than "<faulted>", then make sure we can still add that */
6097 	if (cnt < FAULTED_SIZE)
6098 		size += FAULTED_SIZE - cnt;
6099 
6100 	buffer = tr->trace_buffer.buffer;
6101 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6102 					    irq_flags, preempt_count());
6103 	if (unlikely(!event))
6104 		/* Ring buffer disabled, return as if not open for write */
6105 		return -EBADF;
6106 
6107 	entry = ring_buffer_event_data(event);
6108 	entry->ip = _THIS_IP_;
6109 
6110 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6111 	if (len) {
6112 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6113 		cnt = FAULTED_SIZE;
6114 		written = -EFAULT;
6115 	} else
6116 		written = cnt;
6117 	len = cnt;
6118 
6119 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6120 		/* do not add \n before testing triggers, but add \0 */
6121 		entry->buf[cnt] = '\0';
6122 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6123 	}
6124 
6125 	if (entry->buf[cnt - 1] != '\n') {
6126 		entry->buf[cnt] = '\n';
6127 		entry->buf[cnt + 1] = '\0';
6128 	} else
6129 		entry->buf[cnt] = '\0';
6130 
6131 	__buffer_unlock_commit(buffer, event);
6132 
6133 	if (tt)
6134 		event_triggers_post_call(tr->trace_marker_file, tt);
6135 
6136 	if (written > 0)
6137 		*fpos += written;
6138 
6139 	return written;
6140 }
6141 
6142 /* Limit it for now to 3K (including tag) */
6143 #define RAW_DATA_MAX_SIZE (1024*3)
6144 
6145 static ssize_t
6146 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6147 					size_t cnt, loff_t *fpos)
6148 {
6149 	struct trace_array *tr = filp->private_data;
6150 	struct ring_buffer_event *event;
6151 	struct ring_buffer *buffer;
6152 	struct raw_data_entry *entry;
6153 	const char faulted[] = "<faulted>";
6154 	unsigned long irq_flags;
6155 	ssize_t written;
6156 	int size;
6157 	int len;
6158 
6159 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6160 
6161 	if (tracing_disabled)
6162 		return -EINVAL;
6163 
6164 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6165 		return -EINVAL;
6166 
6167 	/* The marker must at least have a tag id */
6168 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6169 		return -EINVAL;
6170 
6171 	if (cnt > TRACE_BUF_SIZE)
6172 		cnt = TRACE_BUF_SIZE;
6173 
6174 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6175 
6176 	local_save_flags(irq_flags);
6177 	size = sizeof(*entry) + cnt;
6178 	if (cnt < FAULT_SIZE_ID)
6179 		size += FAULT_SIZE_ID - cnt;
6180 
6181 	buffer = tr->trace_buffer.buffer;
6182 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6183 					    irq_flags, preempt_count());
6184 	if (!event)
6185 		/* Ring buffer disabled, return as if not open for write */
6186 		return -EBADF;
6187 
6188 	entry = ring_buffer_event_data(event);
6189 
6190 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6191 	if (len) {
6192 		entry->id = -1;
6193 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6194 		written = -EFAULT;
6195 	} else
6196 		written = cnt;
6197 
6198 	__buffer_unlock_commit(buffer, event);
6199 
6200 	if (written > 0)
6201 		*fpos += written;
6202 
6203 	return written;
6204 }
6205 
6206 static int tracing_clock_show(struct seq_file *m, void *v)
6207 {
6208 	struct trace_array *tr = m->private;
6209 	int i;
6210 
6211 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6212 		seq_printf(m,
6213 			"%s%s%s%s", i ? " " : "",
6214 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6215 			i == tr->clock_id ? "]" : "");
6216 	seq_putc(m, '\n');
6217 
6218 	return 0;
6219 }
6220 
6221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6222 {
6223 	int i;
6224 
6225 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6226 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6227 			break;
6228 	}
6229 	if (i == ARRAY_SIZE(trace_clocks))
6230 		return -EINVAL;
6231 
6232 	mutex_lock(&trace_types_lock);
6233 
6234 	tr->clock_id = i;
6235 
6236 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6237 
6238 	/*
6239 	 * New clock may not be consistent with the previous clock.
6240 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6241 	 */
6242 	tracing_reset_online_cpus(&tr->trace_buffer);
6243 
6244 #ifdef CONFIG_TRACER_MAX_TRACE
6245 	if (tr->max_buffer.buffer)
6246 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6247 	tracing_reset_online_cpus(&tr->max_buffer);
6248 #endif
6249 
6250 	mutex_unlock(&trace_types_lock);
6251 
6252 	return 0;
6253 }
6254 
6255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6256 				   size_t cnt, loff_t *fpos)
6257 {
6258 	struct seq_file *m = filp->private_data;
6259 	struct trace_array *tr = m->private;
6260 	char buf[64];
6261 	const char *clockstr;
6262 	int ret;
6263 
6264 	if (cnt >= sizeof(buf))
6265 		return -EINVAL;
6266 
6267 	if (copy_from_user(buf, ubuf, cnt))
6268 		return -EFAULT;
6269 
6270 	buf[cnt] = 0;
6271 
6272 	clockstr = strstrip(buf);
6273 
6274 	ret = tracing_set_clock(tr, clockstr);
6275 	if (ret)
6276 		return ret;
6277 
6278 	*fpos += cnt;
6279 
6280 	return cnt;
6281 }
6282 
6283 static int tracing_clock_open(struct inode *inode, struct file *file)
6284 {
6285 	struct trace_array *tr = inode->i_private;
6286 	int ret;
6287 
6288 	if (tracing_disabled)
6289 		return -ENODEV;
6290 
6291 	if (trace_array_get(tr))
6292 		return -ENODEV;
6293 
6294 	ret = single_open(file, tracing_clock_show, inode->i_private);
6295 	if (ret < 0)
6296 		trace_array_put(tr);
6297 
6298 	return ret;
6299 }
6300 
6301 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6302 {
6303 	struct trace_array *tr = m->private;
6304 
6305 	mutex_lock(&trace_types_lock);
6306 
6307 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6308 		seq_puts(m, "delta [absolute]\n");
6309 	else
6310 		seq_puts(m, "[delta] absolute\n");
6311 
6312 	mutex_unlock(&trace_types_lock);
6313 
6314 	return 0;
6315 }
6316 
6317 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6318 {
6319 	struct trace_array *tr = inode->i_private;
6320 	int ret;
6321 
6322 	if (tracing_disabled)
6323 		return -ENODEV;
6324 
6325 	if (trace_array_get(tr))
6326 		return -ENODEV;
6327 
6328 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6329 	if (ret < 0)
6330 		trace_array_put(tr);
6331 
6332 	return ret;
6333 }
6334 
6335 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6336 {
6337 	int ret = 0;
6338 
6339 	mutex_lock(&trace_types_lock);
6340 
6341 	if (abs && tr->time_stamp_abs_ref++)
6342 		goto out;
6343 
6344 	if (!abs) {
6345 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6346 			ret = -EINVAL;
6347 			goto out;
6348 		}
6349 
6350 		if (--tr->time_stamp_abs_ref)
6351 			goto out;
6352 	}
6353 
6354 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6355 
6356 #ifdef CONFIG_TRACER_MAX_TRACE
6357 	if (tr->max_buffer.buffer)
6358 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6359 #endif
6360  out:
6361 	mutex_unlock(&trace_types_lock);
6362 
6363 	return ret;
6364 }
6365 
6366 struct ftrace_buffer_info {
6367 	struct trace_iterator	iter;
6368 	void			*spare;
6369 	unsigned int		spare_cpu;
6370 	unsigned int		read;
6371 };
6372 
6373 #ifdef CONFIG_TRACER_SNAPSHOT
6374 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6375 {
6376 	struct trace_array *tr = inode->i_private;
6377 	struct trace_iterator *iter;
6378 	struct seq_file *m;
6379 	int ret = 0;
6380 
6381 	if (trace_array_get(tr) < 0)
6382 		return -ENODEV;
6383 
6384 	if (file->f_mode & FMODE_READ) {
6385 		iter = __tracing_open(inode, file, true);
6386 		if (IS_ERR(iter))
6387 			ret = PTR_ERR(iter);
6388 	} else {
6389 		/* Writes still need the seq_file to hold the private data */
6390 		ret = -ENOMEM;
6391 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6392 		if (!m)
6393 			goto out;
6394 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6395 		if (!iter) {
6396 			kfree(m);
6397 			goto out;
6398 		}
6399 		ret = 0;
6400 
6401 		iter->tr = tr;
6402 		iter->trace_buffer = &tr->max_buffer;
6403 		iter->cpu_file = tracing_get_cpu(inode);
6404 		m->private = iter;
6405 		file->private_data = m;
6406 	}
6407 out:
6408 	if (ret < 0)
6409 		trace_array_put(tr);
6410 
6411 	return ret;
6412 }
6413 
6414 static ssize_t
6415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6416 		       loff_t *ppos)
6417 {
6418 	struct seq_file *m = filp->private_data;
6419 	struct trace_iterator *iter = m->private;
6420 	struct trace_array *tr = iter->tr;
6421 	unsigned long val;
6422 	int ret;
6423 
6424 	ret = tracing_update_buffers();
6425 	if (ret < 0)
6426 		return ret;
6427 
6428 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6429 	if (ret)
6430 		return ret;
6431 
6432 	mutex_lock(&trace_types_lock);
6433 
6434 	if (tr->current_trace->use_max_tr) {
6435 		ret = -EBUSY;
6436 		goto out;
6437 	}
6438 
6439 	switch (val) {
6440 	case 0:
6441 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6442 			ret = -EINVAL;
6443 			break;
6444 		}
6445 		if (tr->allocated_snapshot)
6446 			free_snapshot(tr);
6447 		break;
6448 	case 1:
6449 /* Only allow per-cpu swap if the ring buffer supports it */
6450 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6451 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6452 			ret = -EINVAL;
6453 			break;
6454 		}
6455 #endif
6456 		if (!tr->allocated_snapshot) {
6457 			ret = tracing_alloc_snapshot_instance(tr);
6458 			if (ret < 0)
6459 				break;
6460 		}
6461 		local_irq_disable();
6462 		/* Now, we're going to swap */
6463 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6464 			update_max_tr(tr, current, smp_processor_id());
6465 		else
6466 			update_max_tr_single(tr, current, iter->cpu_file);
6467 		local_irq_enable();
6468 		break;
6469 	default:
6470 		if (tr->allocated_snapshot) {
6471 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6472 				tracing_reset_online_cpus(&tr->max_buffer);
6473 			else
6474 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6475 		}
6476 		break;
6477 	}
6478 
6479 	if (ret >= 0) {
6480 		*ppos += cnt;
6481 		ret = cnt;
6482 	}
6483 out:
6484 	mutex_unlock(&trace_types_lock);
6485 	return ret;
6486 }
6487 
6488 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6489 {
6490 	struct seq_file *m = file->private_data;
6491 	int ret;
6492 
6493 	ret = tracing_release(inode, file);
6494 
6495 	if (file->f_mode & FMODE_READ)
6496 		return ret;
6497 
6498 	/* If write only, the seq_file is just a stub */
6499 	if (m)
6500 		kfree(m->private);
6501 	kfree(m);
6502 
6503 	return 0;
6504 }
6505 
6506 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6507 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6508 				    size_t count, loff_t *ppos);
6509 static int tracing_buffers_release(struct inode *inode, struct file *file);
6510 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6511 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6512 
6513 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6514 {
6515 	struct ftrace_buffer_info *info;
6516 	int ret;
6517 
6518 	ret = tracing_buffers_open(inode, filp);
6519 	if (ret < 0)
6520 		return ret;
6521 
6522 	info = filp->private_data;
6523 
6524 	if (info->iter.trace->use_max_tr) {
6525 		tracing_buffers_release(inode, filp);
6526 		return -EBUSY;
6527 	}
6528 
6529 	info->iter.snapshot = true;
6530 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6531 
6532 	return ret;
6533 }
6534 
6535 #endif /* CONFIG_TRACER_SNAPSHOT */
6536 
6537 
6538 static const struct file_operations tracing_thresh_fops = {
6539 	.open		= tracing_open_generic,
6540 	.read		= tracing_thresh_read,
6541 	.write		= tracing_thresh_write,
6542 	.llseek		= generic_file_llseek,
6543 };
6544 
6545 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6546 static const struct file_operations tracing_max_lat_fops = {
6547 	.open		= tracing_open_generic,
6548 	.read		= tracing_max_lat_read,
6549 	.write		= tracing_max_lat_write,
6550 	.llseek		= generic_file_llseek,
6551 };
6552 #endif
6553 
6554 static const struct file_operations set_tracer_fops = {
6555 	.open		= tracing_open_generic,
6556 	.read		= tracing_set_trace_read,
6557 	.write		= tracing_set_trace_write,
6558 	.llseek		= generic_file_llseek,
6559 };
6560 
6561 static const struct file_operations tracing_pipe_fops = {
6562 	.open		= tracing_open_pipe,
6563 	.poll		= tracing_poll_pipe,
6564 	.read		= tracing_read_pipe,
6565 	.splice_read	= tracing_splice_read_pipe,
6566 	.release	= tracing_release_pipe,
6567 	.llseek		= no_llseek,
6568 };
6569 
6570 static const struct file_operations tracing_entries_fops = {
6571 	.open		= tracing_open_generic_tr,
6572 	.read		= tracing_entries_read,
6573 	.write		= tracing_entries_write,
6574 	.llseek		= generic_file_llseek,
6575 	.release	= tracing_release_generic_tr,
6576 };
6577 
6578 static const struct file_operations tracing_total_entries_fops = {
6579 	.open		= tracing_open_generic_tr,
6580 	.read		= tracing_total_entries_read,
6581 	.llseek		= generic_file_llseek,
6582 	.release	= tracing_release_generic_tr,
6583 };
6584 
6585 static const struct file_operations tracing_free_buffer_fops = {
6586 	.open		= tracing_open_generic_tr,
6587 	.write		= tracing_free_buffer_write,
6588 	.release	= tracing_free_buffer_release,
6589 };
6590 
6591 static const struct file_operations tracing_mark_fops = {
6592 	.open		= tracing_open_generic_tr,
6593 	.write		= tracing_mark_write,
6594 	.llseek		= generic_file_llseek,
6595 	.release	= tracing_release_generic_tr,
6596 };
6597 
6598 static const struct file_operations tracing_mark_raw_fops = {
6599 	.open		= tracing_open_generic_tr,
6600 	.write		= tracing_mark_raw_write,
6601 	.llseek		= generic_file_llseek,
6602 	.release	= tracing_release_generic_tr,
6603 };
6604 
6605 static const struct file_operations trace_clock_fops = {
6606 	.open		= tracing_clock_open,
6607 	.read		= seq_read,
6608 	.llseek		= seq_lseek,
6609 	.release	= tracing_single_release_tr,
6610 	.write		= tracing_clock_write,
6611 };
6612 
6613 static const struct file_operations trace_time_stamp_mode_fops = {
6614 	.open		= tracing_time_stamp_mode_open,
6615 	.read		= seq_read,
6616 	.llseek		= seq_lseek,
6617 	.release	= tracing_single_release_tr,
6618 };
6619 
6620 #ifdef CONFIG_TRACER_SNAPSHOT
6621 static const struct file_operations snapshot_fops = {
6622 	.open		= tracing_snapshot_open,
6623 	.read		= seq_read,
6624 	.write		= tracing_snapshot_write,
6625 	.llseek		= tracing_lseek,
6626 	.release	= tracing_snapshot_release,
6627 };
6628 
6629 static const struct file_operations snapshot_raw_fops = {
6630 	.open		= snapshot_raw_open,
6631 	.read		= tracing_buffers_read,
6632 	.release	= tracing_buffers_release,
6633 	.splice_read	= tracing_buffers_splice_read,
6634 	.llseek		= no_llseek,
6635 };
6636 
6637 #endif /* CONFIG_TRACER_SNAPSHOT */
6638 
6639 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6640 {
6641 	struct trace_array *tr = inode->i_private;
6642 	struct ftrace_buffer_info *info;
6643 	int ret;
6644 
6645 	if (tracing_disabled)
6646 		return -ENODEV;
6647 
6648 	if (trace_array_get(tr) < 0)
6649 		return -ENODEV;
6650 
6651 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6652 	if (!info) {
6653 		trace_array_put(tr);
6654 		return -ENOMEM;
6655 	}
6656 
6657 	mutex_lock(&trace_types_lock);
6658 
6659 	info->iter.tr		= tr;
6660 	info->iter.cpu_file	= tracing_get_cpu(inode);
6661 	info->iter.trace	= tr->current_trace;
6662 	info->iter.trace_buffer = &tr->trace_buffer;
6663 	info->spare		= NULL;
6664 	/* Force reading ring buffer for first read */
6665 	info->read		= (unsigned int)-1;
6666 
6667 	filp->private_data = info;
6668 
6669 	tr->current_trace->ref++;
6670 
6671 	mutex_unlock(&trace_types_lock);
6672 
6673 	ret = nonseekable_open(inode, filp);
6674 	if (ret < 0)
6675 		trace_array_put(tr);
6676 
6677 	return ret;
6678 }
6679 
6680 static __poll_t
6681 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6682 {
6683 	struct ftrace_buffer_info *info = filp->private_data;
6684 	struct trace_iterator *iter = &info->iter;
6685 
6686 	return trace_poll(iter, filp, poll_table);
6687 }
6688 
6689 static ssize_t
6690 tracing_buffers_read(struct file *filp, char __user *ubuf,
6691 		     size_t count, loff_t *ppos)
6692 {
6693 	struct ftrace_buffer_info *info = filp->private_data;
6694 	struct trace_iterator *iter = &info->iter;
6695 	ssize_t ret = 0;
6696 	ssize_t size;
6697 
6698 	if (!count)
6699 		return 0;
6700 
6701 #ifdef CONFIG_TRACER_MAX_TRACE
6702 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6703 		return -EBUSY;
6704 #endif
6705 
6706 	if (!info->spare) {
6707 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6708 							  iter->cpu_file);
6709 		if (IS_ERR(info->spare)) {
6710 			ret = PTR_ERR(info->spare);
6711 			info->spare = NULL;
6712 		} else {
6713 			info->spare_cpu = iter->cpu_file;
6714 		}
6715 	}
6716 	if (!info->spare)
6717 		return ret;
6718 
6719 	/* Do we have previous read data to read? */
6720 	if (info->read < PAGE_SIZE)
6721 		goto read;
6722 
6723  again:
6724 	trace_access_lock(iter->cpu_file);
6725 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6726 				    &info->spare,
6727 				    count,
6728 				    iter->cpu_file, 0);
6729 	trace_access_unlock(iter->cpu_file);
6730 
6731 	if (ret < 0) {
6732 		if (trace_empty(iter)) {
6733 			if ((filp->f_flags & O_NONBLOCK))
6734 				return -EAGAIN;
6735 
6736 			ret = wait_on_pipe(iter, false);
6737 			if (ret)
6738 				return ret;
6739 
6740 			goto again;
6741 		}
6742 		return 0;
6743 	}
6744 
6745 	info->read = 0;
6746  read:
6747 	size = PAGE_SIZE - info->read;
6748 	if (size > count)
6749 		size = count;
6750 
6751 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6752 	if (ret == size)
6753 		return -EFAULT;
6754 
6755 	size -= ret;
6756 
6757 	*ppos += size;
6758 	info->read += size;
6759 
6760 	return size;
6761 }
6762 
6763 static int tracing_buffers_release(struct inode *inode, struct file *file)
6764 {
6765 	struct ftrace_buffer_info *info = file->private_data;
6766 	struct trace_iterator *iter = &info->iter;
6767 
6768 	mutex_lock(&trace_types_lock);
6769 
6770 	iter->tr->current_trace->ref--;
6771 
6772 	__trace_array_put(iter->tr);
6773 
6774 	if (info->spare)
6775 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6776 					   info->spare_cpu, info->spare);
6777 	kfree(info);
6778 
6779 	mutex_unlock(&trace_types_lock);
6780 
6781 	return 0;
6782 }
6783 
6784 struct buffer_ref {
6785 	struct ring_buffer	*buffer;
6786 	void			*page;
6787 	int			cpu;
6788 	int			ref;
6789 };
6790 
6791 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6792 				    struct pipe_buffer *buf)
6793 {
6794 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6795 
6796 	if (--ref->ref)
6797 		return;
6798 
6799 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6800 	kfree(ref);
6801 	buf->private = 0;
6802 }
6803 
6804 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6805 				struct pipe_buffer *buf)
6806 {
6807 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6808 
6809 	ref->ref++;
6810 }
6811 
6812 /* Pipe buffer operations for a buffer. */
6813 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6814 	.can_merge		= 0,
6815 	.confirm		= generic_pipe_buf_confirm,
6816 	.release		= buffer_pipe_buf_release,
6817 	.steal			= generic_pipe_buf_steal,
6818 	.get			= buffer_pipe_buf_get,
6819 };
6820 
6821 /*
6822  * Callback from splice_to_pipe(), if we need to release some pages
6823  * at the end of the spd in case we error'ed out in filling the pipe.
6824  */
6825 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6826 {
6827 	struct buffer_ref *ref =
6828 		(struct buffer_ref *)spd->partial[i].private;
6829 
6830 	if (--ref->ref)
6831 		return;
6832 
6833 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6834 	kfree(ref);
6835 	spd->partial[i].private = 0;
6836 }
6837 
6838 static ssize_t
6839 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6840 			    struct pipe_inode_info *pipe, size_t len,
6841 			    unsigned int flags)
6842 {
6843 	struct ftrace_buffer_info *info = file->private_data;
6844 	struct trace_iterator *iter = &info->iter;
6845 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6846 	struct page *pages_def[PIPE_DEF_BUFFERS];
6847 	struct splice_pipe_desc spd = {
6848 		.pages		= pages_def,
6849 		.partial	= partial_def,
6850 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6851 		.ops		= &buffer_pipe_buf_ops,
6852 		.spd_release	= buffer_spd_release,
6853 	};
6854 	struct buffer_ref *ref;
6855 	int entries, i;
6856 	ssize_t ret = 0;
6857 
6858 #ifdef CONFIG_TRACER_MAX_TRACE
6859 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6860 		return -EBUSY;
6861 #endif
6862 
6863 	if (*ppos & (PAGE_SIZE - 1))
6864 		return -EINVAL;
6865 
6866 	if (len & (PAGE_SIZE - 1)) {
6867 		if (len < PAGE_SIZE)
6868 			return -EINVAL;
6869 		len &= PAGE_MASK;
6870 	}
6871 
6872 	if (splice_grow_spd(pipe, &spd))
6873 		return -ENOMEM;
6874 
6875  again:
6876 	trace_access_lock(iter->cpu_file);
6877 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6878 
6879 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6880 		struct page *page;
6881 		int r;
6882 
6883 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6884 		if (!ref) {
6885 			ret = -ENOMEM;
6886 			break;
6887 		}
6888 
6889 		ref->ref = 1;
6890 		ref->buffer = iter->trace_buffer->buffer;
6891 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6892 		if (IS_ERR(ref->page)) {
6893 			ret = PTR_ERR(ref->page);
6894 			ref->page = NULL;
6895 			kfree(ref);
6896 			break;
6897 		}
6898 		ref->cpu = iter->cpu_file;
6899 
6900 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6901 					  len, iter->cpu_file, 1);
6902 		if (r < 0) {
6903 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6904 						   ref->page);
6905 			kfree(ref);
6906 			break;
6907 		}
6908 
6909 		page = virt_to_page(ref->page);
6910 
6911 		spd.pages[i] = page;
6912 		spd.partial[i].len = PAGE_SIZE;
6913 		spd.partial[i].offset = 0;
6914 		spd.partial[i].private = (unsigned long)ref;
6915 		spd.nr_pages++;
6916 		*ppos += PAGE_SIZE;
6917 
6918 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6919 	}
6920 
6921 	trace_access_unlock(iter->cpu_file);
6922 	spd.nr_pages = i;
6923 
6924 	/* did we read anything? */
6925 	if (!spd.nr_pages) {
6926 		if (ret)
6927 			goto out;
6928 
6929 		ret = -EAGAIN;
6930 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6931 			goto out;
6932 
6933 		ret = wait_on_pipe(iter, true);
6934 		if (ret)
6935 			goto out;
6936 
6937 		goto again;
6938 	}
6939 
6940 	ret = splice_to_pipe(pipe, &spd);
6941 out:
6942 	splice_shrink_spd(&spd);
6943 
6944 	return ret;
6945 }
6946 
6947 static const struct file_operations tracing_buffers_fops = {
6948 	.open		= tracing_buffers_open,
6949 	.read		= tracing_buffers_read,
6950 	.poll		= tracing_buffers_poll,
6951 	.release	= tracing_buffers_release,
6952 	.splice_read	= tracing_buffers_splice_read,
6953 	.llseek		= no_llseek,
6954 };
6955 
6956 static ssize_t
6957 tracing_stats_read(struct file *filp, char __user *ubuf,
6958 		   size_t count, loff_t *ppos)
6959 {
6960 	struct inode *inode = file_inode(filp);
6961 	struct trace_array *tr = inode->i_private;
6962 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6963 	int cpu = tracing_get_cpu(inode);
6964 	struct trace_seq *s;
6965 	unsigned long cnt;
6966 	unsigned long long t;
6967 	unsigned long usec_rem;
6968 
6969 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6970 	if (!s)
6971 		return -ENOMEM;
6972 
6973 	trace_seq_init(s);
6974 
6975 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6976 	trace_seq_printf(s, "entries: %ld\n", cnt);
6977 
6978 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6979 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6980 
6981 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6982 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6983 
6984 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6985 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6986 
6987 	if (trace_clocks[tr->clock_id].in_ns) {
6988 		/* local or global for trace_clock */
6989 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6990 		usec_rem = do_div(t, USEC_PER_SEC);
6991 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6992 								t, usec_rem);
6993 
6994 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6995 		usec_rem = do_div(t, USEC_PER_SEC);
6996 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6997 	} else {
6998 		/* counter or tsc mode for trace_clock */
6999 		trace_seq_printf(s, "oldest event ts: %llu\n",
7000 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7001 
7002 		trace_seq_printf(s, "now ts: %llu\n",
7003 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7004 	}
7005 
7006 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7007 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7008 
7009 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7010 	trace_seq_printf(s, "read events: %ld\n", cnt);
7011 
7012 	count = simple_read_from_buffer(ubuf, count, ppos,
7013 					s->buffer, trace_seq_used(s));
7014 
7015 	kfree(s);
7016 
7017 	return count;
7018 }
7019 
7020 static const struct file_operations tracing_stats_fops = {
7021 	.open		= tracing_open_generic_tr,
7022 	.read		= tracing_stats_read,
7023 	.llseek		= generic_file_llseek,
7024 	.release	= tracing_release_generic_tr,
7025 };
7026 
7027 #ifdef CONFIG_DYNAMIC_FTRACE
7028 
7029 static ssize_t
7030 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7031 		  size_t cnt, loff_t *ppos)
7032 {
7033 	unsigned long *p = filp->private_data;
7034 	char buf[64]; /* Not too big for a shallow stack */
7035 	int r;
7036 
7037 	r = scnprintf(buf, 63, "%ld", *p);
7038 	buf[r++] = '\n';
7039 
7040 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7041 }
7042 
7043 static const struct file_operations tracing_dyn_info_fops = {
7044 	.open		= tracing_open_generic,
7045 	.read		= tracing_read_dyn_info,
7046 	.llseek		= generic_file_llseek,
7047 };
7048 #endif /* CONFIG_DYNAMIC_FTRACE */
7049 
7050 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7051 static void
7052 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7053 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7054 		void *data)
7055 {
7056 	tracing_snapshot_instance(tr);
7057 }
7058 
7059 static void
7060 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7061 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7062 		      void *data)
7063 {
7064 	struct ftrace_func_mapper *mapper = data;
7065 	long *count = NULL;
7066 
7067 	if (mapper)
7068 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7069 
7070 	if (count) {
7071 
7072 		if (*count <= 0)
7073 			return;
7074 
7075 		(*count)--;
7076 	}
7077 
7078 	tracing_snapshot_instance(tr);
7079 }
7080 
7081 static int
7082 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7083 		      struct ftrace_probe_ops *ops, void *data)
7084 {
7085 	struct ftrace_func_mapper *mapper = data;
7086 	long *count = NULL;
7087 
7088 	seq_printf(m, "%ps:", (void *)ip);
7089 
7090 	seq_puts(m, "snapshot");
7091 
7092 	if (mapper)
7093 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7094 
7095 	if (count)
7096 		seq_printf(m, ":count=%ld\n", *count);
7097 	else
7098 		seq_puts(m, ":unlimited\n");
7099 
7100 	return 0;
7101 }
7102 
7103 static int
7104 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7105 		     unsigned long ip, void *init_data, void **data)
7106 {
7107 	struct ftrace_func_mapper *mapper = *data;
7108 
7109 	if (!mapper) {
7110 		mapper = allocate_ftrace_func_mapper();
7111 		if (!mapper)
7112 			return -ENOMEM;
7113 		*data = mapper;
7114 	}
7115 
7116 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7117 }
7118 
7119 static void
7120 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7121 		     unsigned long ip, void *data)
7122 {
7123 	struct ftrace_func_mapper *mapper = data;
7124 
7125 	if (!ip) {
7126 		if (!mapper)
7127 			return;
7128 		free_ftrace_func_mapper(mapper, NULL);
7129 		return;
7130 	}
7131 
7132 	ftrace_func_mapper_remove_ip(mapper, ip);
7133 }
7134 
7135 static struct ftrace_probe_ops snapshot_probe_ops = {
7136 	.func			= ftrace_snapshot,
7137 	.print			= ftrace_snapshot_print,
7138 };
7139 
7140 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7141 	.func			= ftrace_count_snapshot,
7142 	.print			= ftrace_snapshot_print,
7143 	.init			= ftrace_snapshot_init,
7144 	.free			= ftrace_snapshot_free,
7145 };
7146 
7147 static int
7148 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7149 			       char *glob, char *cmd, char *param, int enable)
7150 {
7151 	struct ftrace_probe_ops *ops;
7152 	void *count = (void *)-1;
7153 	char *number;
7154 	int ret;
7155 
7156 	if (!tr)
7157 		return -ENODEV;
7158 
7159 	/* hash funcs only work with set_ftrace_filter */
7160 	if (!enable)
7161 		return -EINVAL;
7162 
7163 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7164 
7165 	if (glob[0] == '!')
7166 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7167 
7168 	if (!param)
7169 		goto out_reg;
7170 
7171 	number = strsep(&param, ":");
7172 
7173 	if (!strlen(number))
7174 		goto out_reg;
7175 
7176 	/*
7177 	 * We use the callback data field (which is a pointer)
7178 	 * as our counter.
7179 	 */
7180 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7181 	if (ret)
7182 		return ret;
7183 
7184  out_reg:
7185 	ret = tracing_alloc_snapshot_instance(tr);
7186 	if (ret < 0)
7187 		goto out;
7188 
7189 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7190 
7191  out:
7192 	return ret < 0 ? ret : 0;
7193 }
7194 
7195 static struct ftrace_func_command ftrace_snapshot_cmd = {
7196 	.name			= "snapshot",
7197 	.func			= ftrace_trace_snapshot_callback,
7198 };
7199 
7200 static __init int register_snapshot_cmd(void)
7201 {
7202 	return register_ftrace_command(&ftrace_snapshot_cmd);
7203 }
7204 #else
7205 static inline __init int register_snapshot_cmd(void) { return 0; }
7206 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7207 
7208 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7209 {
7210 	if (WARN_ON(!tr->dir))
7211 		return ERR_PTR(-ENODEV);
7212 
7213 	/* Top directory uses NULL as the parent */
7214 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7215 		return NULL;
7216 
7217 	/* All sub buffers have a descriptor */
7218 	return tr->dir;
7219 }
7220 
7221 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7222 {
7223 	struct dentry *d_tracer;
7224 
7225 	if (tr->percpu_dir)
7226 		return tr->percpu_dir;
7227 
7228 	d_tracer = tracing_get_dentry(tr);
7229 	if (IS_ERR(d_tracer))
7230 		return NULL;
7231 
7232 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7233 
7234 	WARN_ONCE(!tr->percpu_dir,
7235 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7236 
7237 	return tr->percpu_dir;
7238 }
7239 
7240 static struct dentry *
7241 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7242 		      void *data, long cpu, const struct file_operations *fops)
7243 {
7244 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7245 
7246 	if (ret) /* See tracing_get_cpu() */
7247 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7248 	return ret;
7249 }
7250 
7251 static void
7252 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7253 {
7254 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7255 	struct dentry *d_cpu;
7256 	char cpu_dir[30]; /* 30 characters should be more than enough */
7257 
7258 	if (!d_percpu)
7259 		return;
7260 
7261 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7262 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7263 	if (!d_cpu) {
7264 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7265 		return;
7266 	}
7267 
7268 	/* per cpu trace_pipe */
7269 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7270 				tr, cpu, &tracing_pipe_fops);
7271 
7272 	/* per cpu trace */
7273 	trace_create_cpu_file("trace", 0644, d_cpu,
7274 				tr, cpu, &tracing_fops);
7275 
7276 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7277 				tr, cpu, &tracing_buffers_fops);
7278 
7279 	trace_create_cpu_file("stats", 0444, d_cpu,
7280 				tr, cpu, &tracing_stats_fops);
7281 
7282 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7283 				tr, cpu, &tracing_entries_fops);
7284 
7285 #ifdef CONFIG_TRACER_SNAPSHOT
7286 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7287 				tr, cpu, &snapshot_fops);
7288 
7289 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7290 				tr, cpu, &snapshot_raw_fops);
7291 #endif
7292 }
7293 
7294 #ifdef CONFIG_FTRACE_SELFTEST
7295 /* Let selftest have access to static functions in this file */
7296 #include "trace_selftest.c"
7297 #endif
7298 
7299 static ssize_t
7300 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7301 			loff_t *ppos)
7302 {
7303 	struct trace_option_dentry *topt = filp->private_data;
7304 	char *buf;
7305 
7306 	if (topt->flags->val & topt->opt->bit)
7307 		buf = "1\n";
7308 	else
7309 		buf = "0\n";
7310 
7311 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7312 }
7313 
7314 static ssize_t
7315 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7316 			 loff_t *ppos)
7317 {
7318 	struct trace_option_dentry *topt = filp->private_data;
7319 	unsigned long val;
7320 	int ret;
7321 
7322 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7323 	if (ret)
7324 		return ret;
7325 
7326 	if (val != 0 && val != 1)
7327 		return -EINVAL;
7328 
7329 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7330 		mutex_lock(&trace_types_lock);
7331 		ret = __set_tracer_option(topt->tr, topt->flags,
7332 					  topt->opt, !val);
7333 		mutex_unlock(&trace_types_lock);
7334 		if (ret)
7335 			return ret;
7336 	}
7337 
7338 	*ppos += cnt;
7339 
7340 	return cnt;
7341 }
7342 
7343 
7344 static const struct file_operations trace_options_fops = {
7345 	.open = tracing_open_generic,
7346 	.read = trace_options_read,
7347 	.write = trace_options_write,
7348 	.llseek	= generic_file_llseek,
7349 };
7350 
7351 /*
7352  * In order to pass in both the trace_array descriptor as well as the index
7353  * to the flag that the trace option file represents, the trace_array
7354  * has a character array of trace_flags_index[], which holds the index
7355  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7356  * The address of this character array is passed to the flag option file
7357  * read/write callbacks.
7358  *
7359  * In order to extract both the index and the trace_array descriptor,
7360  * get_tr_index() uses the following algorithm.
7361  *
7362  *   idx = *ptr;
7363  *
7364  * As the pointer itself contains the address of the index (remember
7365  * index[1] == 1).
7366  *
7367  * Then to get the trace_array descriptor, by subtracting that index
7368  * from the ptr, we get to the start of the index itself.
7369  *
7370  *   ptr - idx == &index[0]
7371  *
7372  * Then a simple container_of() from that pointer gets us to the
7373  * trace_array descriptor.
7374  */
7375 static void get_tr_index(void *data, struct trace_array **ptr,
7376 			 unsigned int *pindex)
7377 {
7378 	*pindex = *(unsigned char *)data;
7379 
7380 	*ptr = container_of(data - *pindex, struct trace_array,
7381 			    trace_flags_index);
7382 }
7383 
7384 static ssize_t
7385 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7386 			loff_t *ppos)
7387 {
7388 	void *tr_index = filp->private_data;
7389 	struct trace_array *tr;
7390 	unsigned int index;
7391 	char *buf;
7392 
7393 	get_tr_index(tr_index, &tr, &index);
7394 
7395 	if (tr->trace_flags & (1 << index))
7396 		buf = "1\n";
7397 	else
7398 		buf = "0\n";
7399 
7400 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7401 }
7402 
7403 static ssize_t
7404 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7405 			 loff_t *ppos)
7406 {
7407 	void *tr_index = filp->private_data;
7408 	struct trace_array *tr;
7409 	unsigned int index;
7410 	unsigned long val;
7411 	int ret;
7412 
7413 	get_tr_index(tr_index, &tr, &index);
7414 
7415 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7416 	if (ret)
7417 		return ret;
7418 
7419 	if (val != 0 && val != 1)
7420 		return -EINVAL;
7421 
7422 	mutex_lock(&trace_types_lock);
7423 	ret = set_tracer_flag(tr, 1 << index, val);
7424 	mutex_unlock(&trace_types_lock);
7425 
7426 	if (ret < 0)
7427 		return ret;
7428 
7429 	*ppos += cnt;
7430 
7431 	return cnt;
7432 }
7433 
7434 static const struct file_operations trace_options_core_fops = {
7435 	.open = tracing_open_generic,
7436 	.read = trace_options_core_read,
7437 	.write = trace_options_core_write,
7438 	.llseek = generic_file_llseek,
7439 };
7440 
7441 struct dentry *trace_create_file(const char *name,
7442 				 umode_t mode,
7443 				 struct dentry *parent,
7444 				 void *data,
7445 				 const struct file_operations *fops)
7446 {
7447 	struct dentry *ret;
7448 
7449 	ret = tracefs_create_file(name, mode, parent, data, fops);
7450 	if (!ret)
7451 		pr_warn("Could not create tracefs '%s' entry\n", name);
7452 
7453 	return ret;
7454 }
7455 
7456 
7457 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7458 {
7459 	struct dentry *d_tracer;
7460 
7461 	if (tr->options)
7462 		return tr->options;
7463 
7464 	d_tracer = tracing_get_dentry(tr);
7465 	if (IS_ERR(d_tracer))
7466 		return NULL;
7467 
7468 	tr->options = tracefs_create_dir("options", d_tracer);
7469 	if (!tr->options) {
7470 		pr_warn("Could not create tracefs directory 'options'\n");
7471 		return NULL;
7472 	}
7473 
7474 	return tr->options;
7475 }
7476 
7477 static void
7478 create_trace_option_file(struct trace_array *tr,
7479 			 struct trace_option_dentry *topt,
7480 			 struct tracer_flags *flags,
7481 			 struct tracer_opt *opt)
7482 {
7483 	struct dentry *t_options;
7484 
7485 	t_options = trace_options_init_dentry(tr);
7486 	if (!t_options)
7487 		return;
7488 
7489 	topt->flags = flags;
7490 	topt->opt = opt;
7491 	topt->tr = tr;
7492 
7493 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7494 				    &trace_options_fops);
7495 
7496 }
7497 
7498 static void
7499 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7500 {
7501 	struct trace_option_dentry *topts;
7502 	struct trace_options *tr_topts;
7503 	struct tracer_flags *flags;
7504 	struct tracer_opt *opts;
7505 	int cnt;
7506 	int i;
7507 
7508 	if (!tracer)
7509 		return;
7510 
7511 	flags = tracer->flags;
7512 
7513 	if (!flags || !flags->opts)
7514 		return;
7515 
7516 	/*
7517 	 * If this is an instance, only create flags for tracers
7518 	 * the instance may have.
7519 	 */
7520 	if (!trace_ok_for_array(tracer, tr))
7521 		return;
7522 
7523 	for (i = 0; i < tr->nr_topts; i++) {
7524 		/* Make sure there's no duplicate flags. */
7525 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7526 			return;
7527 	}
7528 
7529 	opts = flags->opts;
7530 
7531 	for (cnt = 0; opts[cnt].name; cnt++)
7532 		;
7533 
7534 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7535 	if (!topts)
7536 		return;
7537 
7538 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7539 			    GFP_KERNEL);
7540 	if (!tr_topts) {
7541 		kfree(topts);
7542 		return;
7543 	}
7544 
7545 	tr->topts = tr_topts;
7546 	tr->topts[tr->nr_topts].tracer = tracer;
7547 	tr->topts[tr->nr_topts].topts = topts;
7548 	tr->nr_topts++;
7549 
7550 	for (cnt = 0; opts[cnt].name; cnt++) {
7551 		create_trace_option_file(tr, &topts[cnt], flags,
7552 					 &opts[cnt]);
7553 		WARN_ONCE(topts[cnt].entry == NULL,
7554 			  "Failed to create trace option: %s",
7555 			  opts[cnt].name);
7556 	}
7557 }
7558 
7559 static struct dentry *
7560 create_trace_option_core_file(struct trace_array *tr,
7561 			      const char *option, long index)
7562 {
7563 	struct dentry *t_options;
7564 
7565 	t_options = trace_options_init_dentry(tr);
7566 	if (!t_options)
7567 		return NULL;
7568 
7569 	return trace_create_file(option, 0644, t_options,
7570 				 (void *)&tr->trace_flags_index[index],
7571 				 &trace_options_core_fops);
7572 }
7573 
7574 static void create_trace_options_dir(struct trace_array *tr)
7575 {
7576 	struct dentry *t_options;
7577 	bool top_level = tr == &global_trace;
7578 	int i;
7579 
7580 	t_options = trace_options_init_dentry(tr);
7581 	if (!t_options)
7582 		return;
7583 
7584 	for (i = 0; trace_options[i]; i++) {
7585 		if (top_level ||
7586 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7587 			create_trace_option_core_file(tr, trace_options[i], i);
7588 	}
7589 }
7590 
7591 static ssize_t
7592 rb_simple_read(struct file *filp, char __user *ubuf,
7593 	       size_t cnt, loff_t *ppos)
7594 {
7595 	struct trace_array *tr = filp->private_data;
7596 	char buf[64];
7597 	int r;
7598 
7599 	r = tracer_tracing_is_on(tr);
7600 	r = sprintf(buf, "%d\n", r);
7601 
7602 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7603 }
7604 
7605 static ssize_t
7606 rb_simple_write(struct file *filp, const char __user *ubuf,
7607 		size_t cnt, loff_t *ppos)
7608 {
7609 	struct trace_array *tr = filp->private_data;
7610 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7611 	unsigned long val;
7612 	int ret;
7613 
7614 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7615 	if (ret)
7616 		return ret;
7617 
7618 	if (buffer) {
7619 		mutex_lock(&trace_types_lock);
7620 		if (val) {
7621 			tracer_tracing_on(tr);
7622 			if (tr->current_trace->start)
7623 				tr->current_trace->start(tr);
7624 		} else {
7625 			tracer_tracing_off(tr);
7626 			if (tr->current_trace->stop)
7627 				tr->current_trace->stop(tr);
7628 		}
7629 		mutex_unlock(&trace_types_lock);
7630 	}
7631 
7632 	(*ppos)++;
7633 
7634 	return cnt;
7635 }
7636 
7637 static const struct file_operations rb_simple_fops = {
7638 	.open		= tracing_open_generic_tr,
7639 	.read		= rb_simple_read,
7640 	.write		= rb_simple_write,
7641 	.release	= tracing_release_generic_tr,
7642 	.llseek		= default_llseek,
7643 };
7644 
7645 struct dentry *trace_instance_dir;
7646 
7647 static void
7648 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7649 
7650 static int
7651 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7652 {
7653 	enum ring_buffer_flags rb_flags;
7654 
7655 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7656 
7657 	buf->tr = tr;
7658 
7659 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7660 	if (!buf->buffer)
7661 		return -ENOMEM;
7662 
7663 	buf->data = alloc_percpu(struct trace_array_cpu);
7664 	if (!buf->data) {
7665 		ring_buffer_free(buf->buffer);
7666 		buf->buffer = NULL;
7667 		return -ENOMEM;
7668 	}
7669 
7670 	/* Allocate the first page for all buffers */
7671 	set_buffer_entries(&tr->trace_buffer,
7672 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7673 
7674 	return 0;
7675 }
7676 
7677 static int allocate_trace_buffers(struct trace_array *tr, int size)
7678 {
7679 	int ret;
7680 
7681 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7682 	if (ret)
7683 		return ret;
7684 
7685 #ifdef CONFIG_TRACER_MAX_TRACE
7686 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7687 				    allocate_snapshot ? size : 1);
7688 	if (WARN_ON(ret)) {
7689 		ring_buffer_free(tr->trace_buffer.buffer);
7690 		tr->trace_buffer.buffer = NULL;
7691 		free_percpu(tr->trace_buffer.data);
7692 		tr->trace_buffer.data = NULL;
7693 		return -ENOMEM;
7694 	}
7695 	tr->allocated_snapshot = allocate_snapshot;
7696 
7697 	/*
7698 	 * Only the top level trace array gets its snapshot allocated
7699 	 * from the kernel command line.
7700 	 */
7701 	allocate_snapshot = false;
7702 #endif
7703 	return 0;
7704 }
7705 
7706 static void free_trace_buffer(struct trace_buffer *buf)
7707 {
7708 	if (buf->buffer) {
7709 		ring_buffer_free(buf->buffer);
7710 		buf->buffer = NULL;
7711 		free_percpu(buf->data);
7712 		buf->data = NULL;
7713 	}
7714 }
7715 
7716 static void free_trace_buffers(struct trace_array *tr)
7717 {
7718 	if (!tr)
7719 		return;
7720 
7721 	free_trace_buffer(&tr->trace_buffer);
7722 
7723 #ifdef CONFIG_TRACER_MAX_TRACE
7724 	free_trace_buffer(&tr->max_buffer);
7725 #endif
7726 }
7727 
7728 static void init_trace_flags_index(struct trace_array *tr)
7729 {
7730 	int i;
7731 
7732 	/* Used by the trace options files */
7733 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7734 		tr->trace_flags_index[i] = i;
7735 }
7736 
7737 static void __update_tracer_options(struct trace_array *tr)
7738 {
7739 	struct tracer *t;
7740 
7741 	for (t = trace_types; t; t = t->next)
7742 		add_tracer_options(tr, t);
7743 }
7744 
7745 static void update_tracer_options(struct trace_array *tr)
7746 {
7747 	mutex_lock(&trace_types_lock);
7748 	__update_tracer_options(tr);
7749 	mutex_unlock(&trace_types_lock);
7750 }
7751 
7752 static int instance_mkdir(const char *name)
7753 {
7754 	struct trace_array *tr;
7755 	int ret;
7756 
7757 	mutex_lock(&event_mutex);
7758 	mutex_lock(&trace_types_lock);
7759 
7760 	ret = -EEXIST;
7761 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7762 		if (tr->name && strcmp(tr->name, name) == 0)
7763 			goto out_unlock;
7764 	}
7765 
7766 	ret = -ENOMEM;
7767 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7768 	if (!tr)
7769 		goto out_unlock;
7770 
7771 	tr->name = kstrdup(name, GFP_KERNEL);
7772 	if (!tr->name)
7773 		goto out_free_tr;
7774 
7775 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7776 		goto out_free_tr;
7777 
7778 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7779 
7780 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7781 
7782 	raw_spin_lock_init(&tr->start_lock);
7783 
7784 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7785 
7786 	tr->current_trace = &nop_trace;
7787 
7788 	INIT_LIST_HEAD(&tr->systems);
7789 	INIT_LIST_HEAD(&tr->events);
7790 	INIT_LIST_HEAD(&tr->hist_vars);
7791 
7792 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7793 		goto out_free_tr;
7794 
7795 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7796 	if (!tr->dir)
7797 		goto out_free_tr;
7798 
7799 	ret = event_trace_add_tracer(tr->dir, tr);
7800 	if (ret) {
7801 		tracefs_remove_recursive(tr->dir);
7802 		goto out_free_tr;
7803 	}
7804 
7805 	ftrace_init_trace_array(tr);
7806 
7807 	init_tracer_tracefs(tr, tr->dir);
7808 	init_trace_flags_index(tr);
7809 	__update_tracer_options(tr);
7810 
7811 	list_add(&tr->list, &ftrace_trace_arrays);
7812 
7813 	mutex_unlock(&trace_types_lock);
7814 	mutex_unlock(&event_mutex);
7815 
7816 	return 0;
7817 
7818  out_free_tr:
7819 	free_trace_buffers(tr);
7820 	free_cpumask_var(tr->tracing_cpumask);
7821 	kfree(tr->name);
7822 	kfree(tr);
7823 
7824  out_unlock:
7825 	mutex_unlock(&trace_types_lock);
7826 	mutex_unlock(&event_mutex);
7827 
7828 	return ret;
7829 
7830 }
7831 
7832 static int instance_rmdir(const char *name)
7833 {
7834 	struct trace_array *tr;
7835 	int found = 0;
7836 	int ret;
7837 	int i;
7838 
7839 	mutex_lock(&event_mutex);
7840 	mutex_lock(&trace_types_lock);
7841 
7842 	ret = -ENODEV;
7843 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7844 		if (tr->name && strcmp(tr->name, name) == 0) {
7845 			found = 1;
7846 			break;
7847 		}
7848 	}
7849 	if (!found)
7850 		goto out_unlock;
7851 
7852 	ret = -EBUSY;
7853 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7854 		goto out_unlock;
7855 
7856 	list_del(&tr->list);
7857 
7858 	/* Disable all the flags that were enabled coming in */
7859 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7860 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7861 			set_tracer_flag(tr, 1 << i, 0);
7862 	}
7863 
7864 	tracing_set_nop(tr);
7865 	clear_ftrace_function_probes(tr);
7866 	event_trace_del_tracer(tr);
7867 	ftrace_clear_pids(tr);
7868 	ftrace_destroy_function_files(tr);
7869 	tracefs_remove_recursive(tr->dir);
7870 	free_trace_buffers(tr);
7871 
7872 	for (i = 0; i < tr->nr_topts; i++) {
7873 		kfree(tr->topts[i].topts);
7874 	}
7875 	kfree(tr->topts);
7876 
7877 	free_cpumask_var(tr->tracing_cpumask);
7878 	kfree(tr->name);
7879 	kfree(tr);
7880 
7881 	ret = 0;
7882 
7883  out_unlock:
7884 	mutex_unlock(&trace_types_lock);
7885 	mutex_unlock(&event_mutex);
7886 
7887 	return ret;
7888 }
7889 
7890 static __init void create_trace_instances(struct dentry *d_tracer)
7891 {
7892 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7893 							 instance_mkdir,
7894 							 instance_rmdir);
7895 	if (WARN_ON(!trace_instance_dir))
7896 		return;
7897 }
7898 
7899 static void
7900 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7901 {
7902 	struct trace_event_file *file;
7903 	int cpu;
7904 
7905 	trace_create_file("available_tracers", 0444, d_tracer,
7906 			tr, &show_traces_fops);
7907 
7908 	trace_create_file("current_tracer", 0644, d_tracer,
7909 			tr, &set_tracer_fops);
7910 
7911 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7912 			  tr, &tracing_cpumask_fops);
7913 
7914 	trace_create_file("trace_options", 0644, d_tracer,
7915 			  tr, &tracing_iter_fops);
7916 
7917 	trace_create_file("trace", 0644, d_tracer,
7918 			  tr, &tracing_fops);
7919 
7920 	trace_create_file("trace_pipe", 0444, d_tracer,
7921 			  tr, &tracing_pipe_fops);
7922 
7923 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7924 			  tr, &tracing_entries_fops);
7925 
7926 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7927 			  tr, &tracing_total_entries_fops);
7928 
7929 	trace_create_file("free_buffer", 0200, d_tracer,
7930 			  tr, &tracing_free_buffer_fops);
7931 
7932 	trace_create_file("trace_marker", 0220, d_tracer,
7933 			  tr, &tracing_mark_fops);
7934 
7935 	file = __find_event_file(tr, "ftrace", "print");
7936 	if (file && file->dir)
7937 		trace_create_file("trigger", 0644, file->dir, file,
7938 				  &event_trigger_fops);
7939 	tr->trace_marker_file = file;
7940 
7941 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7942 			  tr, &tracing_mark_raw_fops);
7943 
7944 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7945 			  &trace_clock_fops);
7946 
7947 	trace_create_file("tracing_on", 0644, d_tracer,
7948 			  tr, &rb_simple_fops);
7949 
7950 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7951 			  &trace_time_stamp_mode_fops);
7952 
7953 	create_trace_options_dir(tr);
7954 
7955 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7956 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7957 			&tr->max_latency, &tracing_max_lat_fops);
7958 #endif
7959 
7960 	if (ftrace_create_function_files(tr, d_tracer))
7961 		WARN(1, "Could not allocate function filter files");
7962 
7963 #ifdef CONFIG_TRACER_SNAPSHOT
7964 	trace_create_file("snapshot", 0644, d_tracer,
7965 			  tr, &snapshot_fops);
7966 #endif
7967 
7968 	for_each_tracing_cpu(cpu)
7969 		tracing_init_tracefs_percpu(tr, cpu);
7970 
7971 	ftrace_init_tracefs(tr, d_tracer);
7972 }
7973 
7974 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7975 {
7976 	struct vfsmount *mnt;
7977 	struct file_system_type *type;
7978 
7979 	/*
7980 	 * To maintain backward compatibility for tools that mount
7981 	 * debugfs to get to the tracing facility, tracefs is automatically
7982 	 * mounted to the debugfs/tracing directory.
7983 	 */
7984 	type = get_fs_type("tracefs");
7985 	if (!type)
7986 		return NULL;
7987 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7988 	put_filesystem(type);
7989 	if (IS_ERR(mnt))
7990 		return NULL;
7991 	mntget(mnt);
7992 
7993 	return mnt;
7994 }
7995 
7996 /**
7997  * tracing_init_dentry - initialize top level trace array
7998  *
7999  * This is called when creating files or directories in the tracing
8000  * directory. It is called via fs_initcall() by any of the boot up code
8001  * and expects to return the dentry of the top level tracing directory.
8002  */
8003 struct dentry *tracing_init_dentry(void)
8004 {
8005 	struct trace_array *tr = &global_trace;
8006 
8007 	/* The top level trace array uses  NULL as parent */
8008 	if (tr->dir)
8009 		return NULL;
8010 
8011 	if (WARN_ON(!tracefs_initialized()) ||
8012 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8013 		 WARN_ON(!debugfs_initialized())))
8014 		return ERR_PTR(-ENODEV);
8015 
8016 	/*
8017 	 * As there may still be users that expect the tracing
8018 	 * files to exist in debugfs/tracing, we must automount
8019 	 * the tracefs file system there, so older tools still
8020 	 * work with the newer kerenl.
8021 	 */
8022 	tr->dir = debugfs_create_automount("tracing", NULL,
8023 					   trace_automount, NULL);
8024 	if (!tr->dir) {
8025 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8026 		return ERR_PTR(-ENOMEM);
8027 	}
8028 
8029 	return NULL;
8030 }
8031 
8032 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8033 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8034 
8035 static void __init trace_eval_init(void)
8036 {
8037 	int len;
8038 
8039 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8040 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8041 }
8042 
8043 #ifdef CONFIG_MODULES
8044 static void trace_module_add_evals(struct module *mod)
8045 {
8046 	if (!mod->num_trace_evals)
8047 		return;
8048 
8049 	/*
8050 	 * Modules with bad taint do not have events created, do
8051 	 * not bother with enums either.
8052 	 */
8053 	if (trace_module_has_bad_taint(mod))
8054 		return;
8055 
8056 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8057 }
8058 
8059 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8060 static void trace_module_remove_evals(struct module *mod)
8061 {
8062 	union trace_eval_map_item *map;
8063 	union trace_eval_map_item **last = &trace_eval_maps;
8064 
8065 	if (!mod->num_trace_evals)
8066 		return;
8067 
8068 	mutex_lock(&trace_eval_mutex);
8069 
8070 	map = trace_eval_maps;
8071 
8072 	while (map) {
8073 		if (map->head.mod == mod)
8074 			break;
8075 		map = trace_eval_jmp_to_tail(map);
8076 		last = &map->tail.next;
8077 		map = map->tail.next;
8078 	}
8079 	if (!map)
8080 		goto out;
8081 
8082 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8083 	kfree(map);
8084  out:
8085 	mutex_unlock(&trace_eval_mutex);
8086 }
8087 #else
8088 static inline void trace_module_remove_evals(struct module *mod) { }
8089 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8090 
8091 static int trace_module_notify(struct notifier_block *self,
8092 			       unsigned long val, void *data)
8093 {
8094 	struct module *mod = data;
8095 
8096 	switch (val) {
8097 	case MODULE_STATE_COMING:
8098 		trace_module_add_evals(mod);
8099 		break;
8100 	case MODULE_STATE_GOING:
8101 		trace_module_remove_evals(mod);
8102 		break;
8103 	}
8104 
8105 	return 0;
8106 }
8107 
8108 static struct notifier_block trace_module_nb = {
8109 	.notifier_call = trace_module_notify,
8110 	.priority = 0,
8111 };
8112 #endif /* CONFIG_MODULES */
8113 
8114 static __init int tracer_init_tracefs(void)
8115 {
8116 	struct dentry *d_tracer;
8117 
8118 	trace_access_lock_init();
8119 
8120 	d_tracer = tracing_init_dentry();
8121 	if (IS_ERR(d_tracer))
8122 		return 0;
8123 
8124 	event_trace_init();
8125 
8126 	init_tracer_tracefs(&global_trace, d_tracer);
8127 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8128 
8129 	trace_create_file("tracing_thresh", 0644, d_tracer,
8130 			&global_trace, &tracing_thresh_fops);
8131 
8132 	trace_create_file("README", 0444, d_tracer,
8133 			NULL, &tracing_readme_fops);
8134 
8135 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8136 			NULL, &tracing_saved_cmdlines_fops);
8137 
8138 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8139 			  NULL, &tracing_saved_cmdlines_size_fops);
8140 
8141 	trace_create_file("saved_tgids", 0444, d_tracer,
8142 			NULL, &tracing_saved_tgids_fops);
8143 
8144 	trace_eval_init();
8145 
8146 	trace_create_eval_file(d_tracer);
8147 
8148 #ifdef CONFIG_MODULES
8149 	register_module_notifier(&trace_module_nb);
8150 #endif
8151 
8152 #ifdef CONFIG_DYNAMIC_FTRACE
8153 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8154 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8155 #endif
8156 
8157 	create_trace_instances(d_tracer);
8158 
8159 	update_tracer_options(&global_trace);
8160 
8161 	return 0;
8162 }
8163 
8164 static int trace_panic_handler(struct notifier_block *this,
8165 			       unsigned long event, void *unused)
8166 {
8167 	if (ftrace_dump_on_oops)
8168 		ftrace_dump(ftrace_dump_on_oops);
8169 	return NOTIFY_OK;
8170 }
8171 
8172 static struct notifier_block trace_panic_notifier = {
8173 	.notifier_call  = trace_panic_handler,
8174 	.next           = NULL,
8175 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8176 };
8177 
8178 static int trace_die_handler(struct notifier_block *self,
8179 			     unsigned long val,
8180 			     void *data)
8181 {
8182 	switch (val) {
8183 	case DIE_OOPS:
8184 		if (ftrace_dump_on_oops)
8185 			ftrace_dump(ftrace_dump_on_oops);
8186 		break;
8187 	default:
8188 		break;
8189 	}
8190 	return NOTIFY_OK;
8191 }
8192 
8193 static struct notifier_block trace_die_notifier = {
8194 	.notifier_call = trace_die_handler,
8195 	.priority = 200
8196 };
8197 
8198 /*
8199  * printk is set to max of 1024, we really don't need it that big.
8200  * Nothing should be printing 1000 characters anyway.
8201  */
8202 #define TRACE_MAX_PRINT		1000
8203 
8204 /*
8205  * Define here KERN_TRACE so that we have one place to modify
8206  * it if we decide to change what log level the ftrace dump
8207  * should be at.
8208  */
8209 #define KERN_TRACE		KERN_EMERG
8210 
8211 void
8212 trace_printk_seq(struct trace_seq *s)
8213 {
8214 	/* Probably should print a warning here. */
8215 	if (s->seq.len >= TRACE_MAX_PRINT)
8216 		s->seq.len = TRACE_MAX_PRINT;
8217 
8218 	/*
8219 	 * More paranoid code. Although the buffer size is set to
8220 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8221 	 * an extra layer of protection.
8222 	 */
8223 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8224 		s->seq.len = s->seq.size - 1;
8225 
8226 	/* should be zero ended, but we are paranoid. */
8227 	s->buffer[s->seq.len] = 0;
8228 
8229 	printk(KERN_TRACE "%s", s->buffer);
8230 
8231 	trace_seq_init(s);
8232 }
8233 
8234 void trace_init_global_iter(struct trace_iterator *iter)
8235 {
8236 	iter->tr = &global_trace;
8237 	iter->trace = iter->tr->current_trace;
8238 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8239 	iter->trace_buffer = &global_trace.trace_buffer;
8240 
8241 	if (iter->trace && iter->trace->open)
8242 		iter->trace->open(iter);
8243 
8244 	/* Annotate start of buffers if we had overruns */
8245 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8246 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8247 
8248 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8249 	if (trace_clocks[iter->tr->clock_id].in_ns)
8250 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8251 }
8252 
8253 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8254 {
8255 	/* use static because iter can be a bit big for the stack */
8256 	static struct trace_iterator iter;
8257 	static atomic_t dump_running;
8258 	struct trace_array *tr = &global_trace;
8259 	unsigned int old_userobj;
8260 	unsigned long flags;
8261 	int cnt = 0, cpu;
8262 
8263 	/* Only allow one dump user at a time. */
8264 	if (atomic_inc_return(&dump_running) != 1) {
8265 		atomic_dec(&dump_running);
8266 		return;
8267 	}
8268 
8269 	/*
8270 	 * Always turn off tracing when we dump.
8271 	 * We don't need to show trace output of what happens
8272 	 * between multiple crashes.
8273 	 *
8274 	 * If the user does a sysrq-z, then they can re-enable
8275 	 * tracing with echo 1 > tracing_on.
8276 	 */
8277 	tracing_off();
8278 
8279 	local_irq_save(flags);
8280 
8281 	/* Simulate the iterator */
8282 	trace_init_global_iter(&iter);
8283 
8284 	for_each_tracing_cpu(cpu) {
8285 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8286 	}
8287 
8288 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8289 
8290 	/* don't look at user memory in panic mode */
8291 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8292 
8293 	switch (oops_dump_mode) {
8294 	case DUMP_ALL:
8295 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8296 		break;
8297 	case DUMP_ORIG:
8298 		iter.cpu_file = raw_smp_processor_id();
8299 		break;
8300 	case DUMP_NONE:
8301 		goto out_enable;
8302 	default:
8303 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8304 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8305 	}
8306 
8307 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8308 
8309 	/* Did function tracer already get disabled? */
8310 	if (ftrace_is_dead()) {
8311 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8312 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8313 	}
8314 
8315 	/*
8316 	 * We need to stop all tracing on all CPUS to read the
8317 	 * the next buffer. This is a bit expensive, but is
8318 	 * not done often. We fill all what we can read,
8319 	 * and then release the locks again.
8320 	 */
8321 
8322 	while (!trace_empty(&iter)) {
8323 
8324 		if (!cnt)
8325 			printk(KERN_TRACE "---------------------------------\n");
8326 
8327 		cnt++;
8328 
8329 		/* reset all but tr, trace, and overruns */
8330 		memset(&iter.seq, 0,
8331 		       sizeof(struct trace_iterator) -
8332 		       offsetof(struct trace_iterator, seq));
8333 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8334 		iter.pos = -1;
8335 
8336 		if (trace_find_next_entry_inc(&iter) != NULL) {
8337 			int ret;
8338 
8339 			ret = print_trace_line(&iter);
8340 			if (ret != TRACE_TYPE_NO_CONSUME)
8341 				trace_consume(&iter);
8342 		}
8343 		touch_nmi_watchdog();
8344 
8345 		trace_printk_seq(&iter.seq);
8346 	}
8347 
8348 	if (!cnt)
8349 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8350 	else
8351 		printk(KERN_TRACE "---------------------------------\n");
8352 
8353  out_enable:
8354 	tr->trace_flags |= old_userobj;
8355 
8356 	for_each_tracing_cpu(cpu) {
8357 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8358 	}
8359  	atomic_dec(&dump_running);
8360 	local_irq_restore(flags);
8361 }
8362 EXPORT_SYMBOL_GPL(ftrace_dump);
8363 
8364 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8365 {
8366 	char **argv;
8367 	int argc, ret;
8368 
8369 	argc = 0;
8370 	ret = 0;
8371 	argv = argv_split(GFP_KERNEL, buf, &argc);
8372 	if (!argv)
8373 		return -ENOMEM;
8374 
8375 	if (argc)
8376 		ret = createfn(argc, argv);
8377 
8378 	argv_free(argv);
8379 
8380 	return ret;
8381 }
8382 
8383 #define WRITE_BUFSIZE  4096
8384 
8385 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8386 				size_t count, loff_t *ppos,
8387 				int (*createfn)(int, char **))
8388 {
8389 	char *kbuf, *buf, *tmp;
8390 	int ret = 0;
8391 	size_t done = 0;
8392 	size_t size;
8393 
8394 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8395 	if (!kbuf)
8396 		return -ENOMEM;
8397 
8398 	while (done < count) {
8399 		size = count - done;
8400 
8401 		if (size >= WRITE_BUFSIZE)
8402 			size = WRITE_BUFSIZE - 1;
8403 
8404 		if (copy_from_user(kbuf, buffer + done, size)) {
8405 			ret = -EFAULT;
8406 			goto out;
8407 		}
8408 		kbuf[size] = '\0';
8409 		buf = kbuf;
8410 		do {
8411 			tmp = strchr(buf, '\n');
8412 			if (tmp) {
8413 				*tmp = '\0';
8414 				size = tmp - buf + 1;
8415 			} else {
8416 				size = strlen(buf);
8417 				if (done + size < count) {
8418 					if (buf != kbuf)
8419 						break;
8420 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8421 					pr_warn("Line length is too long: Should be less than %d\n",
8422 						WRITE_BUFSIZE - 2);
8423 					ret = -EINVAL;
8424 					goto out;
8425 				}
8426 			}
8427 			done += size;
8428 
8429 			/* Remove comments */
8430 			tmp = strchr(buf, '#');
8431 
8432 			if (tmp)
8433 				*tmp = '\0';
8434 
8435 			ret = trace_run_command(buf, createfn);
8436 			if (ret)
8437 				goto out;
8438 			buf += size;
8439 
8440 		} while (done < count);
8441 	}
8442 	ret = done;
8443 
8444 out:
8445 	kfree(kbuf);
8446 
8447 	return ret;
8448 }
8449 
8450 __init static int tracer_alloc_buffers(void)
8451 {
8452 	int ring_buf_size;
8453 	int ret = -ENOMEM;
8454 
8455 	/*
8456 	 * Make sure we don't accidently add more trace options
8457 	 * than we have bits for.
8458 	 */
8459 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8460 
8461 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8462 		goto out;
8463 
8464 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8465 		goto out_free_buffer_mask;
8466 
8467 	/* Only allocate trace_printk buffers if a trace_printk exists */
8468 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8469 		/* Must be called before global_trace.buffer is allocated */
8470 		trace_printk_init_buffers();
8471 
8472 	/* To save memory, keep the ring buffer size to its minimum */
8473 	if (ring_buffer_expanded)
8474 		ring_buf_size = trace_buf_size;
8475 	else
8476 		ring_buf_size = 1;
8477 
8478 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8479 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8480 
8481 	raw_spin_lock_init(&global_trace.start_lock);
8482 
8483 	/*
8484 	 * The prepare callbacks allocates some memory for the ring buffer. We
8485 	 * don't free the buffer if the if the CPU goes down. If we were to free
8486 	 * the buffer, then the user would lose any trace that was in the
8487 	 * buffer. The memory will be removed once the "instance" is removed.
8488 	 */
8489 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8490 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8491 				      NULL);
8492 	if (ret < 0)
8493 		goto out_free_cpumask;
8494 	/* Used for event triggers */
8495 	ret = -ENOMEM;
8496 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8497 	if (!temp_buffer)
8498 		goto out_rm_hp_state;
8499 
8500 	if (trace_create_savedcmd() < 0)
8501 		goto out_free_temp_buffer;
8502 
8503 	/* TODO: make the number of buffers hot pluggable with CPUS */
8504 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8505 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8506 		WARN_ON(1);
8507 		goto out_free_savedcmd;
8508 	}
8509 
8510 	if (global_trace.buffer_disabled)
8511 		tracing_off();
8512 
8513 	if (trace_boot_clock) {
8514 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8515 		if (ret < 0)
8516 			pr_warn("Trace clock %s not defined, going back to default\n",
8517 				trace_boot_clock);
8518 	}
8519 
8520 	/*
8521 	 * register_tracer() might reference current_trace, so it
8522 	 * needs to be set before we register anything. This is
8523 	 * just a bootstrap of current_trace anyway.
8524 	 */
8525 	global_trace.current_trace = &nop_trace;
8526 
8527 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8528 
8529 	ftrace_init_global_array_ops(&global_trace);
8530 
8531 	init_trace_flags_index(&global_trace);
8532 
8533 	register_tracer(&nop_trace);
8534 
8535 	/* Function tracing may start here (via kernel command line) */
8536 	init_function_trace();
8537 
8538 	/* All seems OK, enable tracing */
8539 	tracing_disabled = 0;
8540 
8541 	atomic_notifier_chain_register(&panic_notifier_list,
8542 				       &trace_panic_notifier);
8543 
8544 	register_die_notifier(&trace_die_notifier);
8545 
8546 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8547 
8548 	INIT_LIST_HEAD(&global_trace.systems);
8549 	INIT_LIST_HEAD(&global_trace.events);
8550 	INIT_LIST_HEAD(&global_trace.hist_vars);
8551 	list_add(&global_trace.list, &ftrace_trace_arrays);
8552 
8553 	apply_trace_boot_options();
8554 
8555 	register_snapshot_cmd();
8556 
8557 	return 0;
8558 
8559 out_free_savedcmd:
8560 	free_saved_cmdlines_buffer(savedcmd);
8561 out_free_temp_buffer:
8562 	ring_buffer_free(temp_buffer);
8563 out_rm_hp_state:
8564 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8565 out_free_cpumask:
8566 	free_cpumask_var(global_trace.tracing_cpumask);
8567 out_free_buffer_mask:
8568 	free_cpumask_var(tracing_buffer_mask);
8569 out:
8570 	return ret;
8571 }
8572 
8573 void __init early_trace_init(void)
8574 {
8575 	if (tracepoint_printk) {
8576 		tracepoint_print_iter =
8577 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8578 		if (WARN_ON(!tracepoint_print_iter))
8579 			tracepoint_printk = 0;
8580 		else
8581 			static_key_enable(&tracepoint_printk_key.key);
8582 	}
8583 	tracer_alloc_buffers();
8584 }
8585 
8586 void __init trace_init(void)
8587 {
8588 	trace_event_init();
8589 }
8590 
8591 __init static int clear_boot_tracer(void)
8592 {
8593 	/*
8594 	 * The default tracer at boot buffer is an init section.
8595 	 * This function is called in lateinit. If we did not
8596 	 * find the boot tracer, then clear it out, to prevent
8597 	 * later registration from accessing the buffer that is
8598 	 * about to be freed.
8599 	 */
8600 	if (!default_bootup_tracer)
8601 		return 0;
8602 
8603 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8604 	       default_bootup_tracer);
8605 	default_bootup_tracer = NULL;
8606 
8607 	return 0;
8608 }
8609 
8610 fs_initcall(tracer_init_tracefs);
8611 late_initcall_sync(clear_boot_tracer);
8612 
8613 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8614 __init static int tracing_set_default_clock(void)
8615 {
8616 	/* sched_clock_stable() is determined in late_initcall */
8617 	if (!trace_boot_clock && !sched_clock_stable()) {
8618 		printk(KERN_WARNING
8619 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8620 		       "If you want to keep using the local clock, then add:\n"
8621 		       "  \"trace_clock=local\"\n"
8622 		       "on the kernel command line\n");
8623 		tracing_set_clock(&global_trace, "global");
8624 	}
8625 
8626 	return 0;
8627 }
8628 late_initcall_sync(tracing_set_default_clock);
8629 #endif
8630