xref: /openbmc/linux/kernel/trace/trace.c (revision d623f60d)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = tracing_alloc_snapshot_instance(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	if (tr->stop_count)
1364 		return;
1365 
1366 	WARN_ON_ONCE(!irqs_disabled());
1367 
1368 	if (!tr->allocated_snapshot) {
1369 		/* Only the nop tracer should hit this when disabling */
1370 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1371 		return;
1372 	}
1373 
1374 	arch_spin_lock(&tr->max_lock);
1375 
1376 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1377 
1378 	__update_max_tr(tr, tsk, cpu);
1379 	arch_spin_unlock(&tr->max_lock);
1380 }
1381 
1382 /**
1383  * update_max_tr_single - only copy one trace over, and reset the rest
1384  * @tr - tracer
1385  * @tsk - task with the latency
1386  * @cpu - the cpu of the buffer to copy.
1387  *
1388  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1389  */
1390 void
1391 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1392 {
1393 	int ret;
1394 
1395 	if (tr->stop_count)
1396 		return;
1397 
1398 	WARN_ON_ONCE(!irqs_disabled());
1399 	if (!tr->allocated_snapshot) {
1400 		/* Only the nop tracer should hit this when disabling */
1401 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1402 		return;
1403 	}
1404 
1405 	arch_spin_lock(&tr->max_lock);
1406 
1407 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1408 
1409 	if (ret == -EBUSY) {
1410 		/*
1411 		 * We failed to swap the buffer due to a commit taking
1412 		 * place on this CPU. We fail to record, but we reset
1413 		 * the max trace buffer (no one writes directly to it)
1414 		 * and flag that it failed.
1415 		 */
1416 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1417 			"Failed to swap buffers due to commit in progress\n");
1418 	}
1419 
1420 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1421 
1422 	__update_max_tr(tr, tsk, cpu);
1423 	arch_spin_unlock(&tr->max_lock);
1424 }
1425 #endif /* CONFIG_TRACER_MAX_TRACE */
1426 
1427 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1428 {
1429 	/* Iterators are static, they should be filled or empty */
1430 	if (trace_buffer_iter(iter, iter->cpu_file))
1431 		return 0;
1432 
1433 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1434 				full);
1435 }
1436 
1437 #ifdef CONFIG_FTRACE_STARTUP_TEST
1438 static bool selftests_can_run;
1439 
1440 struct trace_selftests {
1441 	struct list_head		list;
1442 	struct tracer			*type;
1443 };
1444 
1445 static LIST_HEAD(postponed_selftests);
1446 
1447 static int save_selftest(struct tracer *type)
1448 {
1449 	struct trace_selftests *selftest;
1450 
1451 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1452 	if (!selftest)
1453 		return -ENOMEM;
1454 
1455 	selftest->type = type;
1456 	list_add(&selftest->list, &postponed_selftests);
1457 	return 0;
1458 }
1459 
1460 static int run_tracer_selftest(struct tracer *type)
1461 {
1462 	struct trace_array *tr = &global_trace;
1463 	struct tracer *saved_tracer = tr->current_trace;
1464 	int ret;
1465 
1466 	if (!type->selftest || tracing_selftest_disabled)
1467 		return 0;
1468 
1469 	/*
1470 	 * If a tracer registers early in boot up (before scheduling is
1471 	 * initialized and such), then do not run its selftests yet.
1472 	 * Instead, run it a little later in the boot process.
1473 	 */
1474 	if (!selftests_can_run)
1475 		return save_selftest(type);
1476 
1477 	/*
1478 	 * Run a selftest on this tracer.
1479 	 * Here we reset the trace buffer, and set the current
1480 	 * tracer to be this tracer. The tracer can then run some
1481 	 * internal tracing to verify that everything is in order.
1482 	 * If we fail, we do not register this tracer.
1483 	 */
1484 	tracing_reset_online_cpus(&tr->trace_buffer);
1485 
1486 	tr->current_trace = type;
1487 
1488 #ifdef CONFIG_TRACER_MAX_TRACE
1489 	if (type->use_max_tr) {
1490 		/* If we expanded the buffers, make sure the max is expanded too */
1491 		if (ring_buffer_expanded)
1492 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1493 					   RING_BUFFER_ALL_CPUS);
1494 		tr->allocated_snapshot = true;
1495 	}
1496 #endif
1497 
1498 	/* the test is responsible for initializing and enabling */
1499 	pr_info("Testing tracer %s: ", type->name);
1500 	ret = type->selftest(type, tr);
1501 	/* the test is responsible for resetting too */
1502 	tr->current_trace = saved_tracer;
1503 	if (ret) {
1504 		printk(KERN_CONT "FAILED!\n");
1505 		/* Add the warning after printing 'FAILED' */
1506 		WARN_ON(1);
1507 		return -1;
1508 	}
1509 	/* Only reset on passing, to avoid touching corrupted buffers */
1510 	tracing_reset_online_cpus(&tr->trace_buffer);
1511 
1512 #ifdef CONFIG_TRACER_MAX_TRACE
1513 	if (type->use_max_tr) {
1514 		tr->allocated_snapshot = false;
1515 
1516 		/* Shrink the max buffer again */
1517 		if (ring_buffer_expanded)
1518 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1519 					   RING_BUFFER_ALL_CPUS);
1520 	}
1521 #endif
1522 
1523 	printk(KERN_CONT "PASSED\n");
1524 	return 0;
1525 }
1526 
1527 static __init int init_trace_selftests(void)
1528 {
1529 	struct trace_selftests *p, *n;
1530 	struct tracer *t, **last;
1531 	int ret;
1532 
1533 	selftests_can_run = true;
1534 
1535 	mutex_lock(&trace_types_lock);
1536 
1537 	if (list_empty(&postponed_selftests))
1538 		goto out;
1539 
1540 	pr_info("Running postponed tracer tests:\n");
1541 
1542 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1543 		ret = run_tracer_selftest(p->type);
1544 		/* If the test fails, then warn and remove from available_tracers */
1545 		if (ret < 0) {
1546 			WARN(1, "tracer: %s failed selftest, disabling\n",
1547 			     p->type->name);
1548 			last = &trace_types;
1549 			for (t = trace_types; t; t = t->next) {
1550 				if (t == p->type) {
1551 					*last = t->next;
1552 					break;
1553 				}
1554 				last = &t->next;
1555 			}
1556 		}
1557 		list_del(&p->list);
1558 		kfree(p);
1559 	}
1560 
1561  out:
1562 	mutex_unlock(&trace_types_lock);
1563 
1564 	return 0;
1565 }
1566 core_initcall(init_trace_selftests);
1567 #else
1568 static inline int run_tracer_selftest(struct tracer *type)
1569 {
1570 	return 0;
1571 }
1572 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1573 
1574 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1575 
1576 static void __init apply_trace_boot_options(void);
1577 
1578 /**
1579  * register_tracer - register a tracer with the ftrace system.
1580  * @type - the plugin for the tracer
1581  *
1582  * Register a new plugin tracer.
1583  */
1584 int __init register_tracer(struct tracer *type)
1585 {
1586 	struct tracer *t;
1587 	int ret = 0;
1588 
1589 	if (!type->name) {
1590 		pr_info("Tracer must have a name\n");
1591 		return -1;
1592 	}
1593 
1594 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1595 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1596 		return -1;
1597 	}
1598 
1599 	mutex_lock(&trace_types_lock);
1600 
1601 	tracing_selftest_running = true;
1602 
1603 	for (t = trace_types; t; t = t->next) {
1604 		if (strcmp(type->name, t->name) == 0) {
1605 			/* already found */
1606 			pr_info("Tracer %s already registered\n",
1607 				type->name);
1608 			ret = -1;
1609 			goto out;
1610 		}
1611 	}
1612 
1613 	if (!type->set_flag)
1614 		type->set_flag = &dummy_set_flag;
1615 	if (!type->flags) {
1616 		/*allocate a dummy tracer_flags*/
1617 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1618 		if (!type->flags) {
1619 			ret = -ENOMEM;
1620 			goto out;
1621 		}
1622 		type->flags->val = 0;
1623 		type->flags->opts = dummy_tracer_opt;
1624 	} else
1625 		if (!type->flags->opts)
1626 			type->flags->opts = dummy_tracer_opt;
1627 
1628 	/* store the tracer for __set_tracer_option */
1629 	type->flags->trace = type;
1630 
1631 	ret = run_tracer_selftest(type);
1632 	if (ret < 0)
1633 		goto out;
1634 
1635 	type->next = trace_types;
1636 	trace_types = type;
1637 	add_tracer_options(&global_trace, type);
1638 
1639  out:
1640 	tracing_selftest_running = false;
1641 	mutex_unlock(&trace_types_lock);
1642 
1643 	if (ret || !default_bootup_tracer)
1644 		goto out_unlock;
1645 
1646 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1647 		goto out_unlock;
1648 
1649 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1650 	/* Do we want this tracer to start on bootup? */
1651 	tracing_set_tracer(&global_trace, type->name);
1652 	default_bootup_tracer = NULL;
1653 
1654 	apply_trace_boot_options();
1655 
1656 	/* disable other selftests, since this will break it. */
1657 	tracing_selftest_disabled = true;
1658 #ifdef CONFIG_FTRACE_STARTUP_TEST
1659 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1660 	       type->name);
1661 #endif
1662 
1663  out_unlock:
1664 	return ret;
1665 }
1666 
1667 void tracing_reset(struct trace_buffer *buf, int cpu)
1668 {
1669 	struct ring_buffer *buffer = buf->buffer;
1670 
1671 	if (!buffer)
1672 		return;
1673 
1674 	ring_buffer_record_disable(buffer);
1675 
1676 	/* Make sure all commits have finished */
1677 	synchronize_sched();
1678 	ring_buffer_reset_cpu(buffer, cpu);
1679 
1680 	ring_buffer_record_enable(buffer);
1681 }
1682 
1683 void tracing_reset_online_cpus(struct trace_buffer *buf)
1684 {
1685 	struct ring_buffer *buffer = buf->buffer;
1686 	int cpu;
1687 
1688 	if (!buffer)
1689 		return;
1690 
1691 	ring_buffer_record_disable(buffer);
1692 
1693 	/* Make sure all commits have finished */
1694 	synchronize_sched();
1695 
1696 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1697 
1698 	for_each_online_cpu(cpu)
1699 		ring_buffer_reset_cpu(buffer, cpu);
1700 
1701 	ring_buffer_record_enable(buffer);
1702 }
1703 
1704 /* Must have trace_types_lock held */
1705 void tracing_reset_all_online_cpus(void)
1706 {
1707 	struct trace_array *tr;
1708 
1709 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1710 		if (!tr->clear_trace)
1711 			continue;
1712 		tr->clear_trace = false;
1713 		tracing_reset_online_cpus(&tr->trace_buffer);
1714 #ifdef CONFIG_TRACER_MAX_TRACE
1715 		tracing_reset_online_cpus(&tr->max_buffer);
1716 #endif
1717 	}
1718 }
1719 
1720 static int *tgid_map;
1721 
1722 #define SAVED_CMDLINES_DEFAULT 128
1723 #define NO_CMDLINE_MAP UINT_MAX
1724 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1725 struct saved_cmdlines_buffer {
1726 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1727 	unsigned *map_cmdline_to_pid;
1728 	unsigned cmdline_num;
1729 	int cmdline_idx;
1730 	char *saved_cmdlines;
1731 };
1732 static struct saved_cmdlines_buffer *savedcmd;
1733 
1734 /* temporary disable recording */
1735 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1736 
1737 static inline char *get_saved_cmdlines(int idx)
1738 {
1739 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1740 }
1741 
1742 static inline void set_cmdline(int idx, const char *cmdline)
1743 {
1744 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1745 }
1746 
1747 static int allocate_cmdlines_buffer(unsigned int val,
1748 				    struct saved_cmdlines_buffer *s)
1749 {
1750 	s->map_cmdline_to_pid = kmalloc_array(val,
1751 					      sizeof(*s->map_cmdline_to_pid),
1752 					      GFP_KERNEL);
1753 	if (!s->map_cmdline_to_pid)
1754 		return -ENOMEM;
1755 
1756 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1757 	if (!s->saved_cmdlines) {
1758 		kfree(s->map_cmdline_to_pid);
1759 		return -ENOMEM;
1760 	}
1761 
1762 	s->cmdline_idx = 0;
1763 	s->cmdline_num = val;
1764 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1765 	       sizeof(s->map_pid_to_cmdline));
1766 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1767 	       val * sizeof(*s->map_cmdline_to_pid));
1768 
1769 	return 0;
1770 }
1771 
1772 static int trace_create_savedcmd(void)
1773 {
1774 	int ret;
1775 
1776 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1777 	if (!savedcmd)
1778 		return -ENOMEM;
1779 
1780 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1781 	if (ret < 0) {
1782 		kfree(savedcmd);
1783 		savedcmd = NULL;
1784 		return -ENOMEM;
1785 	}
1786 
1787 	return 0;
1788 }
1789 
1790 int is_tracing_stopped(void)
1791 {
1792 	return global_trace.stop_count;
1793 }
1794 
1795 /**
1796  * tracing_start - quick start of the tracer
1797  *
1798  * If tracing is enabled but was stopped by tracing_stop,
1799  * this will start the tracer back up.
1800  */
1801 void tracing_start(void)
1802 {
1803 	struct ring_buffer *buffer;
1804 	unsigned long flags;
1805 
1806 	if (tracing_disabled)
1807 		return;
1808 
1809 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1810 	if (--global_trace.stop_count) {
1811 		if (global_trace.stop_count < 0) {
1812 			/* Someone screwed up their debugging */
1813 			WARN_ON_ONCE(1);
1814 			global_trace.stop_count = 0;
1815 		}
1816 		goto out;
1817 	}
1818 
1819 	/* Prevent the buffers from switching */
1820 	arch_spin_lock(&global_trace.max_lock);
1821 
1822 	buffer = global_trace.trace_buffer.buffer;
1823 	if (buffer)
1824 		ring_buffer_record_enable(buffer);
1825 
1826 #ifdef CONFIG_TRACER_MAX_TRACE
1827 	buffer = global_trace.max_buffer.buffer;
1828 	if (buffer)
1829 		ring_buffer_record_enable(buffer);
1830 #endif
1831 
1832 	arch_spin_unlock(&global_trace.max_lock);
1833 
1834  out:
1835 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1836 }
1837 
1838 static void tracing_start_tr(struct trace_array *tr)
1839 {
1840 	struct ring_buffer *buffer;
1841 	unsigned long flags;
1842 
1843 	if (tracing_disabled)
1844 		return;
1845 
1846 	/* If global, we need to also start the max tracer */
1847 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1848 		return tracing_start();
1849 
1850 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1851 
1852 	if (--tr->stop_count) {
1853 		if (tr->stop_count < 0) {
1854 			/* Someone screwed up their debugging */
1855 			WARN_ON_ONCE(1);
1856 			tr->stop_count = 0;
1857 		}
1858 		goto out;
1859 	}
1860 
1861 	buffer = tr->trace_buffer.buffer;
1862 	if (buffer)
1863 		ring_buffer_record_enable(buffer);
1864 
1865  out:
1866 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1867 }
1868 
1869 /**
1870  * tracing_stop - quick stop of the tracer
1871  *
1872  * Light weight way to stop tracing. Use in conjunction with
1873  * tracing_start.
1874  */
1875 void tracing_stop(void)
1876 {
1877 	struct ring_buffer *buffer;
1878 	unsigned long flags;
1879 
1880 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1881 	if (global_trace.stop_count++)
1882 		goto out;
1883 
1884 	/* Prevent the buffers from switching */
1885 	arch_spin_lock(&global_trace.max_lock);
1886 
1887 	buffer = global_trace.trace_buffer.buffer;
1888 	if (buffer)
1889 		ring_buffer_record_disable(buffer);
1890 
1891 #ifdef CONFIG_TRACER_MAX_TRACE
1892 	buffer = global_trace.max_buffer.buffer;
1893 	if (buffer)
1894 		ring_buffer_record_disable(buffer);
1895 #endif
1896 
1897 	arch_spin_unlock(&global_trace.max_lock);
1898 
1899  out:
1900 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1901 }
1902 
1903 static void tracing_stop_tr(struct trace_array *tr)
1904 {
1905 	struct ring_buffer *buffer;
1906 	unsigned long flags;
1907 
1908 	/* If global, we need to also stop the max tracer */
1909 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1910 		return tracing_stop();
1911 
1912 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1913 	if (tr->stop_count++)
1914 		goto out;
1915 
1916 	buffer = tr->trace_buffer.buffer;
1917 	if (buffer)
1918 		ring_buffer_record_disable(buffer);
1919 
1920  out:
1921 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1922 }
1923 
1924 static int trace_save_cmdline(struct task_struct *tsk)
1925 {
1926 	unsigned pid, idx;
1927 
1928 	/* treat recording of idle task as a success */
1929 	if (!tsk->pid)
1930 		return 1;
1931 
1932 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1933 		return 0;
1934 
1935 	/*
1936 	 * It's not the end of the world if we don't get
1937 	 * the lock, but we also don't want to spin
1938 	 * nor do we want to disable interrupts,
1939 	 * so if we miss here, then better luck next time.
1940 	 */
1941 	if (!arch_spin_trylock(&trace_cmdline_lock))
1942 		return 0;
1943 
1944 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1945 	if (idx == NO_CMDLINE_MAP) {
1946 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1947 
1948 		/*
1949 		 * Check whether the cmdline buffer at idx has a pid
1950 		 * mapped. We are going to overwrite that entry so we
1951 		 * need to clear the map_pid_to_cmdline. Otherwise we
1952 		 * would read the new comm for the old pid.
1953 		 */
1954 		pid = savedcmd->map_cmdline_to_pid[idx];
1955 		if (pid != NO_CMDLINE_MAP)
1956 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1957 
1958 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1959 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1960 
1961 		savedcmd->cmdline_idx = idx;
1962 	}
1963 
1964 	set_cmdline(idx, tsk->comm);
1965 
1966 	arch_spin_unlock(&trace_cmdline_lock);
1967 
1968 	return 1;
1969 }
1970 
1971 static void __trace_find_cmdline(int pid, char comm[])
1972 {
1973 	unsigned map;
1974 
1975 	if (!pid) {
1976 		strcpy(comm, "<idle>");
1977 		return;
1978 	}
1979 
1980 	if (WARN_ON_ONCE(pid < 0)) {
1981 		strcpy(comm, "<XXX>");
1982 		return;
1983 	}
1984 
1985 	if (pid > PID_MAX_DEFAULT) {
1986 		strcpy(comm, "<...>");
1987 		return;
1988 	}
1989 
1990 	map = savedcmd->map_pid_to_cmdline[pid];
1991 	if (map != NO_CMDLINE_MAP)
1992 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1993 	else
1994 		strcpy(comm, "<...>");
1995 }
1996 
1997 void trace_find_cmdline(int pid, char comm[])
1998 {
1999 	preempt_disable();
2000 	arch_spin_lock(&trace_cmdline_lock);
2001 
2002 	__trace_find_cmdline(pid, comm);
2003 
2004 	arch_spin_unlock(&trace_cmdline_lock);
2005 	preempt_enable();
2006 }
2007 
2008 int trace_find_tgid(int pid)
2009 {
2010 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2011 		return 0;
2012 
2013 	return tgid_map[pid];
2014 }
2015 
2016 static int trace_save_tgid(struct task_struct *tsk)
2017 {
2018 	/* treat recording of idle task as a success */
2019 	if (!tsk->pid)
2020 		return 1;
2021 
2022 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2023 		return 0;
2024 
2025 	tgid_map[tsk->pid] = tsk->tgid;
2026 	return 1;
2027 }
2028 
2029 static bool tracing_record_taskinfo_skip(int flags)
2030 {
2031 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2032 		return true;
2033 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2034 		return true;
2035 	if (!__this_cpu_read(trace_taskinfo_save))
2036 		return true;
2037 	return false;
2038 }
2039 
2040 /**
2041  * tracing_record_taskinfo - record the task info of a task
2042  *
2043  * @task  - task to record
2044  * @flags - TRACE_RECORD_CMDLINE for recording comm
2045  *        - TRACE_RECORD_TGID for recording tgid
2046  */
2047 void tracing_record_taskinfo(struct task_struct *task, int flags)
2048 {
2049 	bool done;
2050 
2051 	if (tracing_record_taskinfo_skip(flags))
2052 		return;
2053 
2054 	/*
2055 	 * Record as much task information as possible. If some fail, continue
2056 	 * to try to record the others.
2057 	 */
2058 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2059 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2060 
2061 	/* If recording any information failed, retry again soon. */
2062 	if (!done)
2063 		return;
2064 
2065 	__this_cpu_write(trace_taskinfo_save, false);
2066 }
2067 
2068 /**
2069  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2070  *
2071  * @prev - previous task during sched_switch
2072  * @next - next task during sched_switch
2073  * @flags - TRACE_RECORD_CMDLINE for recording comm
2074  *          TRACE_RECORD_TGID for recording tgid
2075  */
2076 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2077 					  struct task_struct *next, int flags)
2078 {
2079 	bool done;
2080 
2081 	if (tracing_record_taskinfo_skip(flags))
2082 		return;
2083 
2084 	/*
2085 	 * Record as much task information as possible. If some fail, continue
2086 	 * to try to record the others.
2087 	 */
2088 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2089 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2090 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2091 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2092 
2093 	/* If recording any information failed, retry again soon. */
2094 	if (!done)
2095 		return;
2096 
2097 	__this_cpu_write(trace_taskinfo_save, false);
2098 }
2099 
2100 /* Helpers to record a specific task information */
2101 void tracing_record_cmdline(struct task_struct *task)
2102 {
2103 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2104 }
2105 
2106 void tracing_record_tgid(struct task_struct *task)
2107 {
2108 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2109 }
2110 
2111 /*
2112  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2113  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2114  * simplifies those functions and keeps them in sync.
2115  */
2116 enum print_line_t trace_handle_return(struct trace_seq *s)
2117 {
2118 	return trace_seq_has_overflowed(s) ?
2119 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2120 }
2121 EXPORT_SYMBOL_GPL(trace_handle_return);
2122 
2123 void
2124 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2125 			     int pc)
2126 {
2127 	struct task_struct *tsk = current;
2128 
2129 	entry->preempt_count		= pc & 0xff;
2130 	entry->pid			= (tsk) ? tsk->pid : 0;
2131 	entry->flags =
2132 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2133 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2134 #else
2135 		TRACE_FLAG_IRQS_NOSUPPORT |
2136 #endif
2137 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2138 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2139 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2140 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2141 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2142 }
2143 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2144 
2145 struct ring_buffer_event *
2146 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2147 			  int type,
2148 			  unsigned long len,
2149 			  unsigned long flags, int pc)
2150 {
2151 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2152 }
2153 
2154 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2155 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2156 static int trace_buffered_event_ref;
2157 
2158 /**
2159  * trace_buffered_event_enable - enable buffering events
2160  *
2161  * When events are being filtered, it is quicker to use a temporary
2162  * buffer to write the event data into if there's a likely chance
2163  * that it will not be committed. The discard of the ring buffer
2164  * is not as fast as committing, and is much slower than copying
2165  * a commit.
2166  *
2167  * When an event is to be filtered, allocate per cpu buffers to
2168  * write the event data into, and if the event is filtered and discarded
2169  * it is simply dropped, otherwise, the entire data is to be committed
2170  * in one shot.
2171  */
2172 void trace_buffered_event_enable(void)
2173 {
2174 	struct ring_buffer_event *event;
2175 	struct page *page;
2176 	int cpu;
2177 
2178 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2179 
2180 	if (trace_buffered_event_ref++)
2181 		return;
2182 
2183 	for_each_tracing_cpu(cpu) {
2184 		page = alloc_pages_node(cpu_to_node(cpu),
2185 					GFP_KERNEL | __GFP_NORETRY, 0);
2186 		if (!page)
2187 			goto failed;
2188 
2189 		event = page_address(page);
2190 		memset(event, 0, sizeof(*event));
2191 
2192 		per_cpu(trace_buffered_event, cpu) = event;
2193 
2194 		preempt_disable();
2195 		if (cpu == smp_processor_id() &&
2196 		    this_cpu_read(trace_buffered_event) !=
2197 		    per_cpu(trace_buffered_event, cpu))
2198 			WARN_ON_ONCE(1);
2199 		preempt_enable();
2200 	}
2201 
2202 	return;
2203  failed:
2204 	trace_buffered_event_disable();
2205 }
2206 
2207 static void enable_trace_buffered_event(void *data)
2208 {
2209 	/* Probably not needed, but do it anyway */
2210 	smp_rmb();
2211 	this_cpu_dec(trace_buffered_event_cnt);
2212 }
2213 
2214 static void disable_trace_buffered_event(void *data)
2215 {
2216 	this_cpu_inc(trace_buffered_event_cnt);
2217 }
2218 
2219 /**
2220  * trace_buffered_event_disable - disable buffering events
2221  *
2222  * When a filter is removed, it is faster to not use the buffered
2223  * events, and to commit directly into the ring buffer. Free up
2224  * the temp buffers when there are no more users. This requires
2225  * special synchronization with current events.
2226  */
2227 void trace_buffered_event_disable(void)
2228 {
2229 	int cpu;
2230 
2231 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2232 
2233 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2234 		return;
2235 
2236 	if (--trace_buffered_event_ref)
2237 		return;
2238 
2239 	preempt_disable();
2240 	/* For each CPU, set the buffer as used. */
2241 	smp_call_function_many(tracing_buffer_mask,
2242 			       disable_trace_buffered_event, NULL, 1);
2243 	preempt_enable();
2244 
2245 	/* Wait for all current users to finish */
2246 	synchronize_sched();
2247 
2248 	for_each_tracing_cpu(cpu) {
2249 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2250 		per_cpu(trace_buffered_event, cpu) = NULL;
2251 	}
2252 	/*
2253 	 * Make sure trace_buffered_event is NULL before clearing
2254 	 * trace_buffered_event_cnt.
2255 	 */
2256 	smp_wmb();
2257 
2258 	preempt_disable();
2259 	/* Do the work on each cpu */
2260 	smp_call_function_many(tracing_buffer_mask,
2261 			       enable_trace_buffered_event, NULL, 1);
2262 	preempt_enable();
2263 }
2264 
2265 static struct ring_buffer *temp_buffer;
2266 
2267 struct ring_buffer_event *
2268 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2269 			  struct trace_event_file *trace_file,
2270 			  int type, unsigned long len,
2271 			  unsigned long flags, int pc)
2272 {
2273 	struct ring_buffer_event *entry;
2274 	int val;
2275 
2276 	*current_rb = trace_file->tr->trace_buffer.buffer;
2277 
2278 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2279 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2280 	    (entry = this_cpu_read(trace_buffered_event))) {
2281 		/* Try to use the per cpu buffer first */
2282 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2283 		if (val == 1) {
2284 			trace_event_setup(entry, type, flags, pc);
2285 			entry->array[0] = len;
2286 			return entry;
2287 		}
2288 		this_cpu_dec(trace_buffered_event_cnt);
2289 	}
2290 
2291 	entry = __trace_buffer_lock_reserve(*current_rb,
2292 					    type, len, flags, pc);
2293 	/*
2294 	 * If tracing is off, but we have triggers enabled
2295 	 * we still need to look at the event data. Use the temp_buffer
2296 	 * to store the trace event for the tigger to use. It's recusive
2297 	 * safe and will not be recorded anywhere.
2298 	 */
2299 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2300 		*current_rb = temp_buffer;
2301 		entry = __trace_buffer_lock_reserve(*current_rb,
2302 						    type, len, flags, pc);
2303 	}
2304 	return entry;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2307 
2308 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2309 static DEFINE_MUTEX(tracepoint_printk_mutex);
2310 
2311 static void output_printk(struct trace_event_buffer *fbuffer)
2312 {
2313 	struct trace_event_call *event_call;
2314 	struct trace_event *event;
2315 	unsigned long flags;
2316 	struct trace_iterator *iter = tracepoint_print_iter;
2317 
2318 	/* We should never get here if iter is NULL */
2319 	if (WARN_ON_ONCE(!iter))
2320 		return;
2321 
2322 	event_call = fbuffer->trace_file->event_call;
2323 	if (!event_call || !event_call->event.funcs ||
2324 	    !event_call->event.funcs->trace)
2325 		return;
2326 
2327 	event = &fbuffer->trace_file->event_call->event;
2328 
2329 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2330 	trace_seq_init(&iter->seq);
2331 	iter->ent = fbuffer->entry;
2332 	event_call->event.funcs->trace(iter, 0, event);
2333 	trace_seq_putc(&iter->seq, 0);
2334 	printk("%s", iter->seq.buffer);
2335 
2336 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2337 }
2338 
2339 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2340 			     void __user *buffer, size_t *lenp,
2341 			     loff_t *ppos)
2342 {
2343 	int save_tracepoint_printk;
2344 	int ret;
2345 
2346 	mutex_lock(&tracepoint_printk_mutex);
2347 	save_tracepoint_printk = tracepoint_printk;
2348 
2349 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2350 
2351 	/*
2352 	 * This will force exiting early, as tracepoint_printk
2353 	 * is always zero when tracepoint_printk_iter is not allocated
2354 	 */
2355 	if (!tracepoint_print_iter)
2356 		tracepoint_printk = 0;
2357 
2358 	if (save_tracepoint_printk == tracepoint_printk)
2359 		goto out;
2360 
2361 	if (tracepoint_printk)
2362 		static_key_enable(&tracepoint_printk_key.key);
2363 	else
2364 		static_key_disable(&tracepoint_printk_key.key);
2365 
2366  out:
2367 	mutex_unlock(&tracepoint_printk_mutex);
2368 
2369 	return ret;
2370 }
2371 
2372 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2373 {
2374 	if (static_key_false(&tracepoint_printk_key.key))
2375 		output_printk(fbuffer);
2376 
2377 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2378 				    fbuffer->event, fbuffer->entry,
2379 				    fbuffer->flags, fbuffer->pc);
2380 }
2381 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2382 
2383 /*
2384  * Skip 3:
2385  *
2386  *   trace_buffer_unlock_commit_regs()
2387  *   trace_event_buffer_commit()
2388  *   trace_event_raw_event_xxx()
2389  */
2390 # define STACK_SKIP 3
2391 
2392 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2393 				     struct ring_buffer *buffer,
2394 				     struct ring_buffer_event *event,
2395 				     unsigned long flags, int pc,
2396 				     struct pt_regs *regs)
2397 {
2398 	__buffer_unlock_commit(buffer, event);
2399 
2400 	/*
2401 	 * If regs is not set, then skip the necessary functions.
2402 	 * Note, we can still get here via blktrace, wakeup tracer
2403 	 * and mmiotrace, but that's ok if they lose a function or
2404 	 * two. They are not that meaningful.
2405 	 */
2406 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2407 	ftrace_trace_userstack(buffer, flags, pc);
2408 }
2409 
2410 /*
2411  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2412  */
2413 void
2414 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2415 				   struct ring_buffer_event *event)
2416 {
2417 	__buffer_unlock_commit(buffer, event);
2418 }
2419 
2420 static void
2421 trace_process_export(struct trace_export *export,
2422 	       struct ring_buffer_event *event)
2423 {
2424 	struct trace_entry *entry;
2425 	unsigned int size = 0;
2426 
2427 	entry = ring_buffer_event_data(event);
2428 	size = ring_buffer_event_length(event);
2429 	export->write(export, entry, size);
2430 }
2431 
2432 static DEFINE_MUTEX(ftrace_export_lock);
2433 
2434 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2435 
2436 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2437 
2438 static inline void ftrace_exports_enable(void)
2439 {
2440 	static_branch_enable(&ftrace_exports_enabled);
2441 }
2442 
2443 static inline void ftrace_exports_disable(void)
2444 {
2445 	static_branch_disable(&ftrace_exports_enabled);
2446 }
2447 
2448 void ftrace_exports(struct ring_buffer_event *event)
2449 {
2450 	struct trace_export *export;
2451 
2452 	preempt_disable_notrace();
2453 
2454 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2455 	while (export) {
2456 		trace_process_export(export, event);
2457 		export = rcu_dereference_raw_notrace(export->next);
2458 	}
2459 
2460 	preempt_enable_notrace();
2461 }
2462 
2463 static inline void
2464 add_trace_export(struct trace_export **list, struct trace_export *export)
2465 {
2466 	rcu_assign_pointer(export->next, *list);
2467 	/*
2468 	 * We are entering export into the list but another
2469 	 * CPU might be walking that list. We need to make sure
2470 	 * the export->next pointer is valid before another CPU sees
2471 	 * the export pointer included into the list.
2472 	 */
2473 	rcu_assign_pointer(*list, export);
2474 }
2475 
2476 static inline int
2477 rm_trace_export(struct trace_export **list, struct trace_export *export)
2478 {
2479 	struct trace_export **p;
2480 
2481 	for (p = list; *p != NULL; p = &(*p)->next)
2482 		if (*p == export)
2483 			break;
2484 
2485 	if (*p != export)
2486 		return -1;
2487 
2488 	rcu_assign_pointer(*p, (*p)->next);
2489 
2490 	return 0;
2491 }
2492 
2493 static inline void
2494 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2495 {
2496 	if (*list == NULL)
2497 		ftrace_exports_enable();
2498 
2499 	add_trace_export(list, export);
2500 }
2501 
2502 static inline int
2503 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505 	int ret;
2506 
2507 	ret = rm_trace_export(list, export);
2508 	if (*list == NULL)
2509 		ftrace_exports_disable();
2510 
2511 	return ret;
2512 }
2513 
2514 int register_ftrace_export(struct trace_export *export)
2515 {
2516 	if (WARN_ON_ONCE(!export->write))
2517 		return -1;
2518 
2519 	mutex_lock(&ftrace_export_lock);
2520 
2521 	add_ftrace_export(&ftrace_exports_list, export);
2522 
2523 	mutex_unlock(&ftrace_export_lock);
2524 
2525 	return 0;
2526 }
2527 EXPORT_SYMBOL_GPL(register_ftrace_export);
2528 
2529 int unregister_ftrace_export(struct trace_export *export)
2530 {
2531 	int ret;
2532 
2533 	mutex_lock(&ftrace_export_lock);
2534 
2535 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2536 
2537 	mutex_unlock(&ftrace_export_lock);
2538 
2539 	return ret;
2540 }
2541 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2542 
2543 void
2544 trace_function(struct trace_array *tr,
2545 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2546 	       int pc)
2547 {
2548 	struct trace_event_call *call = &event_function;
2549 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2550 	struct ring_buffer_event *event;
2551 	struct ftrace_entry *entry;
2552 
2553 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2554 					    flags, pc);
2555 	if (!event)
2556 		return;
2557 	entry	= ring_buffer_event_data(event);
2558 	entry->ip			= ip;
2559 	entry->parent_ip		= parent_ip;
2560 
2561 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2562 		if (static_branch_unlikely(&ftrace_exports_enabled))
2563 			ftrace_exports(event);
2564 		__buffer_unlock_commit(buffer, event);
2565 	}
2566 }
2567 
2568 #ifdef CONFIG_STACKTRACE
2569 
2570 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2571 struct ftrace_stack {
2572 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2573 };
2574 
2575 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2576 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2577 
2578 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2579 				 unsigned long flags,
2580 				 int skip, int pc, struct pt_regs *regs)
2581 {
2582 	struct trace_event_call *call = &event_kernel_stack;
2583 	struct ring_buffer_event *event;
2584 	struct stack_entry *entry;
2585 	struct stack_trace trace;
2586 	int use_stack;
2587 	int size = FTRACE_STACK_ENTRIES;
2588 
2589 	trace.nr_entries	= 0;
2590 	trace.skip		= skip;
2591 
2592 	/*
2593 	 * Add one, for this function and the call to save_stack_trace()
2594 	 * If regs is set, then these functions will not be in the way.
2595 	 */
2596 #ifndef CONFIG_UNWINDER_ORC
2597 	if (!regs)
2598 		trace.skip++;
2599 #endif
2600 
2601 	/*
2602 	 * Since events can happen in NMIs there's no safe way to
2603 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2604 	 * or NMI comes in, it will just have to use the default
2605 	 * FTRACE_STACK_SIZE.
2606 	 */
2607 	preempt_disable_notrace();
2608 
2609 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2610 	/*
2611 	 * We don't need any atomic variables, just a barrier.
2612 	 * If an interrupt comes in, we don't care, because it would
2613 	 * have exited and put the counter back to what we want.
2614 	 * We just need a barrier to keep gcc from moving things
2615 	 * around.
2616 	 */
2617 	barrier();
2618 	if (use_stack == 1) {
2619 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2620 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2621 
2622 		if (regs)
2623 			save_stack_trace_regs(regs, &trace);
2624 		else
2625 			save_stack_trace(&trace);
2626 
2627 		if (trace.nr_entries > size)
2628 			size = trace.nr_entries;
2629 	} else
2630 		/* From now on, use_stack is a boolean */
2631 		use_stack = 0;
2632 
2633 	size *= sizeof(unsigned long);
2634 
2635 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2636 					    sizeof(*entry) + size, flags, pc);
2637 	if (!event)
2638 		goto out;
2639 	entry = ring_buffer_event_data(event);
2640 
2641 	memset(&entry->caller, 0, size);
2642 
2643 	if (use_stack)
2644 		memcpy(&entry->caller, trace.entries,
2645 		       trace.nr_entries * sizeof(unsigned long));
2646 	else {
2647 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2648 		trace.entries		= entry->caller;
2649 		if (regs)
2650 			save_stack_trace_regs(regs, &trace);
2651 		else
2652 			save_stack_trace(&trace);
2653 	}
2654 
2655 	entry->size = trace.nr_entries;
2656 
2657 	if (!call_filter_check_discard(call, entry, buffer, event))
2658 		__buffer_unlock_commit(buffer, event);
2659 
2660  out:
2661 	/* Again, don't let gcc optimize things here */
2662 	barrier();
2663 	__this_cpu_dec(ftrace_stack_reserve);
2664 	preempt_enable_notrace();
2665 
2666 }
2667 
2668 static inline void ftrace_trace_stack(struct trace_array *tr,
2669 				      struct ring_buffer *buffer,
2670 				      unsigned long flags,
2671 				      int skip, int pc, struct pt_regs *regs)
2672 {
2673 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2674 		return;
2675 
2676 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2677 }
2678 
2679 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2680 		   int pc)
2681 {
2682 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2683 
2684 	if (rcu_is_watching()) {
2685 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2686 		return;
2687 	}
2688 
2689 	/*
2690 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2691 	 * but if the above rcu_is_watching() failed, then the NMI
2692 	 * triggered someplace critical, and rcu_irq_enter() should
2693 	 * not be called from NMI.
2694 	 */
2695 	if (unlikely(in_nmi()))
2696 		return;
2697 
2698 	rcu_irq_enter_irqson();
2699 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2700 	rcu_irq_exit_irqson();
2701 }
2702 
2703 /**
2704  * trace_dump_stack - record a stack back trace in the trace buffer
2705  * @skip: Number of functions to skip (helper handlers)
2706  */
2707 void trace_dump_stack(int skip)
2708 {
2709 	unsigned long flags;
2710 
2711 	if (tracing_disabled || tracing_selftest_running)
2712 		return;
2713 
2714 	local_save_flags(flags);
2715 
2716 #ifndef CONFIG_UNWINDER_ORC
2717 	/* Skip 1 to skip this function. */
2718 	skip++;
2719 #endif
2720 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2721 			     flags, skip, preempt_count(), NULL);
2722 }
2723 
2724 static DEFINE_PER_CPU(int, user_stack_count);
2725 
2726 void
2727 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2728 {
2729 	struct trace_event_call *call = &event_user_stack;
2730 	struct ring_buffer_event *event;
2731 	struct userstack_entry *entry;
2732 	struct stack_trace trace;
2733 
2734 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2735 		return;
2736 
2737 	/*
2738 	 * NMIs can not handle page faults, even with fix ups.
2739 	 * The save user stack can (and often does) fault.
2740 	 */
2741 	if (unlikely(in_nmi()))
2742 		return;
2743 
2744 	/*
2745 	 * prevent recursion, since the user stack tracing may
2746 	 * trigger other kernel events.
2747 	 */
2748 	preempt_disable();
2749 	if (__this_cpu_read(user_stack_count))
2750 		goto out;
2751 
2752 	__this_cpu_inc(user_stack_count);
2753 
2754 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2755 					    sizeof(*entry), flags, pc);
2756 	if (!event)
2757 		goto out_drop_count;
2758 	entry	= ring_buffer_event_data(event);
2759 
2760 	entry->tgid		= current->tgid;
2761 	memset(&entry->caller, 0, sizeof(entry->caller));
2762 
2763 	trace.nr_entries	= 0;
2764 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2765 	trace.skip		= 0;
2766 	trace.entries		= entry->caller;
2767 
2768 	save_stack_trace_user(&trace);
2769 	if (!call_filter_check_discard(call, entry, buffer, event))
2770 		__buffer_unlock_commit(buffer, event);
2771 
2772  out_drop_count:
2773 	__this_cpu_dec(user_stack_count);
2774  out:
2775 	preempt_enable();
2776 }
2777 
2778 #ifdef UNUSED
2779 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2780 {
2781 	ftrace_trace_userstack(tr, flags, preempt_count());
2782 }
2783 #endif /* UNUSED */
2784 
2785 #endif /* CONFIG_STACKTRACE */
2786 
2787 /* created for use with alloc_percpu */
2788 struct trace_buffer_struct {
2789 	int nesting;
2790 	char buffer[4][TRACE_BUF_SIZE];
2791 };
2792 
2793 static struct trace_buffer_struct *trace_percpu_buffer;
2794 
2795 /*
2796  * Thise allows for lockless recording.  If we're nested too deeply, then
2797  * this returns NULL.
2798  */
2799 static char *get_trace_buf(void)
2800 {
2801 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2802 
2803 	if (!buffer || buffer->nesting >= 4)
2804 		return NULL;
2805 
2806 	buffer->nesting++;
2807 
2808 	/* Interrupts must see nesting incremented before we use the buffer */
2809 	barrier();
2810 	return &buffer->buffer[buffer->nesting][0];
2811 }
2812 
2813 static void put_trace_buf(void)
2814 {
2815 	/* Don't let the decrement of nesting leak before this */
2816 	barrier();
2817 	this_cpu_dec(trace_percpu_buffer->nesting);
2818 }
2819 
2820 static int alloc_percpu_trace_buffer(void)
2821 {
2822 	struct trace_buffer_struct *buffers;
2823 
2824 	buffers = alloc_percpu(struct trace_buffer_struct);
2825 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2826 		return -ENOMEM;
2827 
2828 	trace_percpu_buffer = buffers;
2829 	return 0;
2830 }
2831 
2832 static int buffers_allocated;
2833 
2834 void trace_printk_init_buffers(void)
2835 {
2836 	if (buffers_allocated)
2837 		return;
2838 
2839 	if (alloc_percpu_trace_buffer())
2840 		return;
2841 
2842 	/* trace_printk() is for debug use only. Don't use it in production. */
2843 
2844 	pr_warn("\n");
2845 	pr_warn("**********************************************************\n");
2846 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2847 	pr_warn("**                                                      **\n");
2848 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2849 	pr_warn("**                                                      **\n");
2850 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2851 	pr_warn("** unsafe for production use.                           **\n");
2852 	pr_warn("**                                                      **\n");
2853 	pr_warn("** If you see this message and you are not debugging    **\n");
2854 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2857 	pr_warn("**********************************************************\n");
2858 
2859 	/* Expand the buffers to set size */
2860 	tracing_update_buffers();
2861 
2862 	buffers_allocated = 1;
2863 
2864 	/*
2865 	 * trace_printk_init_buffers() can be called by modules.
2866 	 * If that happens, then we need to start cmdline recording
2867 	 * directly here. If the global_trace.buffer is already
2868 	 * allocated here, then this was called by module code.
2869 	 */
2870 	if (global_trace.trace_buffer.buffer)
2871 		tracing_start_cmdline_record();
2872 }
2873 
2874 void trace_printk_start_comm(void)
2875 {
2876 	/* Start tracing comms if trace printk is set */
2877 	if (!buffers_allocated)
2878 		return;
2879 	tracing_start_cmdline_record();
2880 }
2881 
2882 static void trace_printk_start_stop_comm(int enabled)
2883 {
2884 	if (!buffers_allocated)
2885 		return;
2886 
2887 	if (enabled)
2888 		tracing_start_cmdline_record();
2889 	else
2890 		tracing_stop_cmdline_record();
2891 }
2892 
2893 /**
2894  * trace_vbprintk - write binary msg to tracing buffer
2895  *
2896  */
2897 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2898 {
2899 	struct trace_event_call *call = &event_bprint;
2900 	struct ring_buffer_event *event;
2901 	struct ring_buffer *buffer;
2902 	struct trace_array *tr = &global_trace;
2903 	struct bprint_entry *entry;
2904 	unsigned long flags;
2905 	char *tbuffer;
2906 	int len = 0, size, pc;
2907 
2908 	if (unlikely(tracing_selftest_running || tracing_disabled))
2909 		return 0;
2910 
2911 	/* Don't pollute graph traces with trace_vprintk internals */
2912 	pause_graph_tracing();
2913 
2914 	pc = preempt_count();
2915 	preempt_disable_notrace();
2916 
2917 	tbuffer = get_trace_buf();
2918 	if (!tbuffer) {
2919 		len = 0;
2920 		goto out_nobuffer;
2921 	}
2922 
2923 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2924 
2925 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2926 		goto out;
2927 
2928 	local_save_flags(flags);
2929 	size = sizeof(*entry) + sizeof(u32) * len;
2930 	buffer = tr->trace_buffer.buffer;
2931 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2932 					    flags, pc);
2933 	if (!event)
2934 		goto out;
2935 	entry = ring_buffer_event_data(event);
2936 	entry->ip			= ip;
2937 	entry->fmt			= fmt;
2938 
2939 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2940 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2941 		__buffer_unlock_commit(buffer, event);
2942 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2943 	}
2944 
2945 out:
2946 	put_trace_buf();
2947 
2948 out_nobuffer:
2949 	preempt_enable_notrace();
2950 	unpause_graph_tracing();
2951 
2952 	return len;
2953 }
2954 EXPORT_SYMBOL_GPL(trace_vbprintk);
2955 
2956 __printf(3, 0)
2957 static int
2958 __trace_array_vprintk(struct ring_buffer *buffer,
2959 		      unsigned long ip, const char *fmt, va_list args)
2960 {
2961 	struct trace_event_call *call = &event_print;
2962 	struct ring_buffer_event *event;
2963 	int len = 0, size, pc;
2964 	struct print_entry *entry;
2965 	unsigned long flags;
2966 	char *tbuffer;
2967 
2968 	if (tracing_disabled || tracing_selftest_running)
2969 		return 0;
2970 
2971 	/* Don't pollute graph traces with trace_vprintk internals */
2972 	pause_graph_tracing();
2973 
2974 	pc = preempt_count();
2975 	preempt_disable_notrace();
2976 
2977 
2978 	tbuffer = get_trace_buf();
2979 	if (!tbuffer) {
2980 		len = 0;
2981 		goto out_nobuffer;
2982 	}
2983 
2984 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2985 
2986 	local_save_flags(flags);
2987 	size = sizeof(*entry) + len + 1;
2988 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2989 					    flags, pc);
2990 	if (!event)
2991 		goto out;
2992 	entry = ring_buffer_event_data(event);
2993 	entry->ip = ip;
2994 
2995 	memcpy(&entry->buf, tbuffer, len + 1);
2996 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2997 		__buffer_unlock_commit(buffer, event);
2998 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2999 	}
3000 
3001 out:
3002 	put_trace_buf();
3003 
3004 out_nobuffer:
3005 	preempt_enable_notrace();
3006 	unpause_graph_tracing();
3007 
3008 	return len;
3009 }
3010 
3011 __printf(3, 0)
3012 int trace_array_vprintk(struct trace_array *tr,
3013 			unsigned long ip, const char *fmt, va_list args)
3014 {
3015 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3016 }
3017 
3018 __printf(3, 0)
3019 int trace_array_printk(struct trace_array *tr,
3020 		       unsigned long ip, const char *fmt, ...)
3021 {
3022 	int ret;
3023 	va_list ap;
3024 
3025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3026 		return 0;
3027 
3028 	va_start(ap, fmt);
3029 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3030 	va_end(ap);
3031 	return ret;
3032 }
3033 
3034 __printf(3, 4)
3035 int trace_array_printk_buf(struct ring_buffer *buffer,
3036 			   unsigned long ip, const char *fmt, ...)
3037 {
3038 	int ret;
3039 	va_list ap;
3040 
3041 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3042 		return 0;
3043 
3044 	va_start(ap, fmt);
3045 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3046 	va_end(ap);
3047 	return ret;
3048 }
3049 
3050 __printf(2, 0)
3051 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3052 {
3053 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3054 }
3055 EXPORT_SYMBOL_GPL(trace_vprintk);
3056 
3057 static void trace_iterator_increment(struct trace_iterator *iter)
3058 {
3059 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3060 
3061 	iter->idx++;
3062 	if (buf_iter)
3063 		ring_buffer_read(buf_iter, NULL);
3064 }
3065 
3066 static struct trace_entry *
3067 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3068 		unsigned long *lost_events)
3069 {
3070 	struct ring_buffer_event *event;
3071 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3072 
3073 	if (buf_iter)
3074 		event = ring_buffer_iter_peek(buf_iter, ts);
3075 	else
3076 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3077 					 lost_events);
3078 
3079 	if (event) {
3080 		iter->ent_size = ring_buffer_event_length(event);
3081 		return ring_buffer_event_data(event);
3082 	}
3083 	iter->ent_size = 0;
3084 	return NULL;
3085 }
3086 
3087 static struct trace_entry *
3088 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3089 		  unsigned long *missing_events, u64 *ent_ts)
3090 {
3091 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3092 	struct trace_entry *ent, *next = NULL;
3093 	unsigned long lost_events = 0, next_lost = 0;
3094 	int cpu_file = iter->cpu_file;
3095 	u64 next_ts = 0, ts;
3096 	int next_cpu = -1;
3097 	int next_size = 0;
3098 	int cpu;
3099 
3100 	/*
3101 	 * If we are in a per_cpu trace file, don't bother by iterating over
3102 	 * all cpu and peek directly.
3103 	 */
3104 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3105 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3106 			return NULL;
3107 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3108 		if (ent_cpu)
3109 			*ent_cpu = cpu_file;
3110 
3111 		return ent;
3112 	}
3113 
3114 	for_each_tracing_cpu(cpu) {
3115 
3116 		if (ring_buffer_empty_cpu(buffer, cpu))
3117 			continue;
3118 
3119 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3120 
3121 		/*
3122 		 * Pick the entry with the smallest timestamp:
3123 		 */
3124 		if (ent && (!next || ts < next_ts)) {
3125 			next = ent;
3126 			next_cpu = cpu;
3127 			next_ts = ts;
3128 			next_lost = lost_events;
3129 			next_size = iter->ent_size;
3130 		}
3131 	}
3132 
3133 	iter->ent_size = next_size;
3134 
3135 	if (ent_cpu)
3136 		*ent_cpu = next_cpu;
3137 
3138 	if (ent_ts)
3139 		*ent_ts = next_ts;
3140 
3141 	if (missing_events)
3142 		*missing_events = next_lost;
3143 
3144 	return next;
3145 }
3146 
3147 /* Find the next real entry, without updating the iterator itself */
3148 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3149 					  int *ent_cpu, u64 *ent_ts)
3150 {
3151 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3152 }
3153 
3154 /* Find the next real entry, and increment the iterator to the next entry */
3155 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3156 {
3157 	iter->ent = __find_next_entry(iter, &iter->cpu,
3158 				      &iter->lost_events, &iter->ts);
3159 
3160 	if (iter->ent)
3161 		trace_iterator_increment(iter);
3162 
3163 	return iter->ent ? iter : NULL;
3164 }
3165 
3166 static void trace_consume(struct trace_iterator *iter)
3167 {
3168 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3169 			    &iter->lost_events);
3170 }
3171 
3172 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3173 {
3174 	struct trace_iterator *iter = m->private;
3175 	int i = (int)*pos;
3176 	void *ent;
3177 
3178 	WARN_ON_ONCE(iter->leftover);
3179 
3180 	(*pos)++;
3181 
3182 	/* can't go backwards */
3183 	if (iter->idx > i)
3184 		return NULL;
3185 
3186 	if (iter->idx < 0)
3187 		ent = trace_find_next_entry_inc(iter);
3188 	else
3189 		ent = iter;
3190 
3191 	while (ent && iter->idx < i)
3192 		ent = trace_find_next_entry_inc(iter);
3193 
3194 	iter->pos = *pos;
3195 
3196 	return ent;
3197 }
3198 
3199 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3200 {
3201 	struct ring_buffer_event *event;
3202 	struct ring_buffer_iter *buf_iter;
3203 	unsigned long entries = 0;
3204 	u64 ts;
3205 
3206 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3207 
3208 	buf_iter = trace_buffer_iter(iter, cpu);
3209 	if (!buf_iter)
3210 		return;
3211 
3212 	ring_buffer_iter_reset(buf_iter);
3213 
3214 	/*
3215 	 * We could have the case with the max latency tracers
3216 	 * that a reset never took place on a cpu. This is evident
3217 	 * by the timestamp being before the start of the buffer.
3218 	 */
3219 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3220 		if (ts >= iter->trace_buffer->time_start)
3221 			break;
3222 		entries++;
3223 		ring_buffer_read(buf_iter, NULL);
3224 	}
3225 
3226 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3227 }
3228 
3229 /*
3230  * The current tracer is copied to avoid a global locking
3231  * all around.
3232  */
3233 static void *s_start(struct seq_file *m, loff_t *pos)
3234 {
3235 	struct trace_iterator *iter = m->private;
3236 	struct trace_array *tr = iter->tr;
3237 	int cpu_file = iter->cpu_file;
3238 	void *p = NULL;
3239 	loff_t l = 0;
3240 	int cpu;
3241 
3242 	/*
3243 	 * copy the tracer to avoid using a global lock all around.
3244 	 * iter->trace is a copy of current_trace, the pointer to the
3245 	 * name may be used instead of a strcmp(), as iter->trace->name
3246 	 * will point to the same string as current_trace->name.
3247 	 */
3248 	mutex_lock(&trace_types_lock);
3249 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3250 		*iter->trace = *tr->current_trace;
3251 	mutex_unlock(&trace_types_lock);
3252 
3253 #ifdef CONFIG_TRACER_MAX_TRACE
3254 	if (iter->snapshot && iter->trace->use_max_tr)
3255 		return ERR_PTR(-EBUSY);
3256 #endif
3257 
3258 	if (!iter->snapshot)
3259 		atomic_inc(&trace_record_taskinfo_disabled);
3260 
3261 	if (*pos != iter->pos) {
3262 		iter->ent = NULL;
3263 		iter->cpu = 0;
3264 		iter->idx = -1;
3265 
3266 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3267 			for_each_tracing_cpu(cpu)
3268 				tracing_iter_reset(iter, cpu);
3269 		} else
3270 			tracing_iter_reset(iter, cpu_file);
3271 
3272 		iter->leftover = 0;
3273 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3274 			;
3275 
3276 	} else {
3277 		/*
3278 		 * If we overflowed the seq_file before, then we want
3279 		 * to just reuse the trace_seq buffer again.
3280 		 */
3281 		if (iter->leftover)
3282 			p = iter;
3283 		else {
3284 			l = *pos - 1;
3285 			p = s_next(m, p, &l);
3286 		}
3287 	}
3288 
3289 	trace_event_read_lock();
3290 	trace_access_lock(cpu_file);
3291 	return p;
3292 }
3293 
3294 static void s_stop(struct seq_file *m, void *p)
3295 {
3296 	struct trace_iterator *iter = m->private;
3297 
3298 #ifdef CONFIG_TRACER_MAX_TRACE
3299 	if (iter->snapshot && iter->trace->use_max_tr)
3300 		return;
3301 #endif
3302 
3303 	if (!iter->snapshot)
3304 		atomic_dec(&trace_record_taskinfo_disabled);
3305 
3306 	trace_access_unlock(iter->cpu_file);
3307 	trace_event_read_unlock();
3308 }
3309 
3310 static void
3311 get_total_entries(struct trace_buffer *buf,
3312 		  unsigned long *total, unsigned long *entries)
3313 {
3314 	unsigned long count;
3315 	int cpu;
3316 
3317 	*total = 0;
3318 	*entries = 0;
3319 
3320 	for_each_tracing_cpu(cpu) {
3321 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3322 		/*
3323 		 * If this buffer has skipped entries, then we hold all
3324 		 * entries for the trace and we need to ignore the
3325 		 * ones before the time stamp.
3326 		 */
3327 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3328 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3329 			/* total is the same as the entries */
3330 			*total += count;
3331 		} else
3332 			*total += count +
3333 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3334 		*entries += count;
3335 	}
3336 }
3337 
3338 static void print_lat_help_header(struct seq_file *m)
3339 {
3340 	seq_puts(m, "#                  _------=> CPU#            \n"
3341 		    "#                 / _-----=> irqs-off        \n"
3342 		    "#                | / _----=> need-resched    \n"
3343 		    "#                || / _---=> hardirq/softirq \n"
3344 		    "#                ||| / _--=> preempt-depth   \n"
3345 		    "#                |||| /     delay            \n"
3346 		    "#  cmd     pid   ||||| time  |   caller      \n"
3347 		    "#     \\   /      |||||  \\    |   /         \n");
3348 }
3349 
3350 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3351 {
3352 	unsigned long total;
3353 	unsigned long entries;
3354 
3355 	get_total_entries(buf, &total, &entries);
3356 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3357 		   entries, total, num_online_cpus());
3358 	seq_puts(m, "#\n");
3359 }
3360 
3361 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3362 				   unsigned int flags)
3363 {
3364 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3365 
3366 	print_event_info(buf, m);
3367 
3368 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3369 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3370 }
3371 
3372 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3373 				       unsigned int flags)
3374 {
3375 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3376 	const char tgid_space[] = "          ";
3377 	const char space[] = "  ";
3378 
3379 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3380 		   tgid ? tgid_space : space);
3381 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3382 		   tgid ? tgid_space : space);
3383 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3384 		   tgid ? tgid_space : space);
3385 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3386 		   tgid ? tgid_space : space);
3387 	seq_printf(m, "#                          %s||| /     delay\n",
3388 		   tgid ? tgid_space : space);
3389 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3390 		   tgid ? "   TGID   " : space);
3391 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3392 		   tgid ? "     |    " : space);
3393 }
3394 
3395 void
3396 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3397 {
3398 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3399 	struct trace_buffer *buf = iter->trace_buffer;
3400 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3401 	struct tracer *type = iter->trace;
3402 	unsigned long entries;
3403 	unsigned long total;
3404 	const char *name = "preemption";
3405 
3406 	name = type->name;
3407 
3408 	get_total_entries(buf, &total, &entries);
3409 
3410 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3411 		   name, UTS_RELEASE);
3412 	seq_puts(m, "# -----------------------------------"
3413 		 "---------------------------------\n");
3414 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3415 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3416 		   nsecs_to_usecs(data->saved_latency),
3417 		   entries,
3418 		   total,
3419 		   buf->cpu,
3420 #if defined(CONFIG_PREEMPT_NONE)
3421 		   "server",
3422 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3423 		   "desktop",
3424 #elif defined(CONFIG_PREEMPT)
3425 		   "preempt",
3426 #else
3427 		   "unknown",
3428 #endif
3429 		   /* These are reserved for later use */
3430 		   0, 0, 0, 0);
3431 #ifdef CONFIG_SMP
3432 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3433 #else
3434 	seq_puts(m, ")\n");
3435 #endif
3436 	seq_puts(m, "#    -----------------\n");
3437 	seq_printf(m, "#    | task: %.16s-%d "
3438 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3439 		   data->comm, data->pid,
3440 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3441 		   data->policy, data->rt_priority);
3442 	seq_puts(m, "#    -----------------\n");
3443 
3444 	if (data->critical_start) {
3445 		seq_puts(m, "#  => started at: ");
3446 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3447 		trace_print_seq(m, &iter->seq);
3448 		seq_puts(m, "\n#  => ended at:   ");
3449 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3450 		trace_print_seq(m, &iter->seq);
3451 		seq_puts(m, "\n#\n");
3452 	}
3453 
3454 	seq_puts(m, "#\n");
3455 }
3456 
3457 static void test_cpu_buff_start(struct trace_iterator *iter)
3458 {
3459 	struct trace_seq *s = &iter->seq;
3460 	struct trace_array *tr = iter->tr;
3461 
3462 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3463 		return;
3464 
3465 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3466 		return;
3467 
3468 	if (cpumask_available(iter->started) &&
3469 	    cpumask_test_cpu(iter->cpu, iter->started))
3470 		return;
3471 
3472 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3473 		return;
3474 
3475 	if (cpumask_available(iter->started))
3476 		cpumask_set_cpu(iter->cpu, iter->started);
3477 
3478 	/* Don't print started cpu buffer for the first entry of the trace */
3479 	if (iter->idx > 1)
3480 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3481 				iter->cpu);
3482 }
3483 
3484 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3485 {
3486 	struct trace_array *tr = iter->tr;
3487 	struct trace_seq *s = &iter->seq;
3488 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3489 	struct trace_entry *entry;
3490 	struct trace_event *event;
3491 
3492 	entry = iter->ent;
3493 
3494 	test_cpu_buff_start(iter);
3495 
3496 	event = ftrace_find_event(entry->type);
3497 
3498 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3499 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3500 			trace_print_lat_context(iter);
3501 		else
3502 			trace_print_context(iter);
3503 	}
3504 
3505 	if (trace_seq_has_overflowed(s))
3506 		return TRACE_TYPE_PARTIAL_LINE;
3507 
3508 	if (event)
3509 		return event->funcs->trace(iter, sym_flags, event);
3510 
3511 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3512 
3513 	return trace_handle_return(s);
3514 }
3515 
3516 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3517 {
3518 	struct trace_array *tr = iter->tr;
3519 	struct trace_seq *s = &iter->seq;
3520 	struct trace_entry *entry;
3521 	struct trace_event *event;
3522 
3523 	entry = iter->ent;
3524 
3525 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3526 		trace_seq_printf(s, "%d %d %llu ",
3527 				 entry->pid, iter->cpu, iter->ts);
3528 
3529 	if (trace_seq_has_overflowed(s))
3530 		return TRACE_TYPE_PARTIAL_LINE;
3531 
3532 	event = ftrace_find_event(entry->type);
3533 	if (event)
3534 		return event->funcs->raw(iter, 0, event);
3535 
3536 	trace_seq_printf(s, "%d ?\n", entry->type);
3537 
3538 	return trace_handle_return(s);
3539 }
3540 
3541 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3542 {
3543 	struct trace_array *tr = iter->tr;
3544 	struct trace_seq *s = &iter->seq;
3545 	unsigned char newline = '\n';
3546 	struct trace_entry *entry;
3547 	struct trace_event *event;
3548 
3549 	entry = iter->ent;
3550 
3551 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3552 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3553 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3554 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3555 		if (trace_seq_has_overflowed(s))
3556 			return TRACE_TYPE_PARTIAL_LINE;
3557 	}
3558 
3559 	event = ftrace_find_event(entry->type);
3560 	if (event) {
3561 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3562 		if (ret != TRACE_TYPE_HANDLED)
3563 			return ret;
3564 	}
3565 
3566 	SEQ_PUT_FIELD(s, newline);
3567 
3568 	return trace_handle_return(s);
3569 }
3570 
3571 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3572 {
3573 	struct trace_array *tr = iter->tr;
3574 	struct trace_seq *s = &iter->seq;
3575 	struct trace_entry *entry;
3576 	struct trace_event *event;
3577 
3578 	entry = iter->ent;
3579 
3580 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3581 		SEQ_PUT_FIELD(s, entry->pid);
3582 		SEQ_PUT_FIELD(s, iter->cpu);
3583 		SEQ_PUT_FIELD(s, iter->ts);
3584 		if (trace_seq_has_overflowed(s))
3585 			return TRACE_TYPE_PARTIAL_LINE;
3586 	}
3587 
3588 	event = ftrace_find_event(entry->type);
3589 	return event ? event->funcs->binary(iter, 0, event) :
3590 		TRACE_TYPE_HANDLED;
3591 }
3592 
3593 int trace_empty(struct trace_iterator *iter)
3594 {
3595 	struct ring_buffer_iter *buf_iter;
3596 	int cpu;
3597 
3598 	/* If we are looking at one CPU buffer, only check that one */
3599 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3600 		cpu = iter->cpu_file;
3601 		buf_iter = trace_buffer_iter(iter, cpu);
3602 		if (buf_iter) {
3603 			if (!ring_buffer_iter_empty(buf_iter))
3604 				return 0;
3605 		} else {
3606 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3607 				return 0;
3608 		}
3609 		return 1;
3610 	}
3611 
3612 	for_each_tracing_cpu(cpu) {
3613 		buf_iter = trace_buffer_iter(iter, cpu);
3614 		if (buf_iter) {
3615 			if (!ring_buffer_iter_empty(buf_iter))
3616 				return 0;
3617 		} else {
3618 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619 				return 0;
3620 		}
3621 	}
3622 
3623 	return 1;
3624 }
3625 
3626 /*  Called with trace_event_read_lock() held. */
3627 enum print_line_t print_trace_line(struct trace_iterator *iter)
3628 {
3629 	struct trace_array *tr = iter->tr;
3630 	unsigned long trace_flags = tr->trace_flags;
3631 	enum print_line_t ret;
3632 
3633 	if (iter->lost_events) {
3634 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3635 				 iter->cpu, iter->lost_events);
3636 		if (trace_seq_has_overflowed(&iter->seq))
3637 			return TRACE_TYPE_PARTIAL_LINE;
3638 	}
3639 
3640 	if (iter->trace && iter->trace->print_line) {
3641 		ret = iter->trace->print_line(iter);
3642 		if (ret != TRACE_TYPE_UNHANDLED)
3643 			return ret;
3644 	}
3645 
3646 	if (iter->ent->type == TRACE_BPUTS &&
3647 			trace_flags & TRACE_ITER_PRINTK &&
3648 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649 		return trace_print_bputs_msg_only(iter);
3650 
3651 	if (iter->ent->type == TRACE_BPRINT &&
3652 			trace_flags & TRACE_ITER_PRINTK &&
3653 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654 		return trace_print_bprintk_msg_only(iter);
3655 
3656 	if (iter->ent->type == TRACE_PRINT &&
3657 			trace_flags & TRACE_ITER_PRINTK &&
3658 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659 		return trace_print_printk_msg_only(iter);
3660 
3661 	if (trace_flags & TRACE_ITER_BIN)
3662 		return print_bin_fmt(iter);
3663 
3664 	if (trace_flags & TRACE_ITER_HEX)
3665 		return print_hex_fmt(iter);
3666 
3667 	if (trace_flags & TRACE_ITER_RAW)
3668 		return print_raw_fmt(iter);
3669 
3670 	return print_trace_fmt(iter);
3671 }
3672 
3673 void trace_latency_header(struct seq_file *m)
3674 {
3675 	struct trace_iterator *iter = m->private;
3676 	struct trace_array *tr = iter->tr;
3677 
3678 	/* print nothing if the buffers are empty */
3679 	if (trace_empty(iter))
3680 		return;
3681 
3682 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3683 		print_trace_header(m, iter);
3684 
3685 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3686 		print_lat_help_header(m);
3687 }
3688 
3689 void trace_default_header(struct seq_file *m)
3690 {
3691 	struct trace_iterator *iter = m->private;
3692 	struct trace_array *tr = iter->tr;
3693 	unsigned long trace_flags = tr->trace_flags;
3694 
3695 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3696 		return;
3697 
3698 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3699 		/* print nothing if the buffers are empty */
3700 		if (trace_empty(iter))
3701 			return;
3702 		print_trace_header(m, iter);
3703 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3704 			print_lat_help_header(m);
3705 	} else {
3706 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3707 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3708 				print_func_help_header_irq(iter->trace_buffer,
3709 							   m, trace_flags);
3710 			else
3711 				print_func_help_header(iter->trace_buffer, m,
3712 						       trace_flags);
3713 		}
3714 	}
3715 }
3716 
3717 static void test_ftrace_alive(struct seq_file *m)
3718 {
3719 	if (!ftrace_is_dead())
3720 		return;
3721 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3722 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3723 }
3724 
3725 #ifdef CONFIG_TRACER_MAX_TRACE
3726 static void show_snapshot_main_help(struct seq_file *m)
3727 {
3728 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3729 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3730 		    "#                      Takes a snapshot of the main buffer.\n"
3731 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3732 		    "#                      (Doesn't have to be '2' works with any number that\n"
3733 		    "#                       is not a '0' or '1')\n");
3734 }
3735 
3736 static void show_snapshot_percpu_help(struct seq_file *m)
3737 {
3738 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3739 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3740 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3741 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3742 #else
3743 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3744 		    "#                     Must use main snapshot file to allocate.\n");
3745 #endif
3746 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3747 		    "#                      (Doesn't have to be '2' works with any number that\n"
3748 		    "#                       is not a '0' or '1')\n");
3749 }
3750 
3751 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3752 {
3753 	if (iter->tr->allocated_snapshot)
3754 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3755 	else
3756 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3757 
3758 	seq_puts(m, "# Snapshot commands:\n");
3759 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3760 		show_snapshot_main_help(m);
3761 	else
3762 		show_snapshot_percpu_help(m);
3763 }
3764 #else
3765 /* Should never be called */
3766 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3767 #endif
3768 
3769 static int s_show(struct seq_file *m, void *v)
3770 {
3771 	struct trace_iterator *iter = v;
3772 	int ret;
3773 
3774 	if (iter->ent == NULL) {
3775 		if (iter->tr) {
3776 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3777 			seq_puts(m, "#\n");
3778 			test_ftrace_alive(m);
3779 		}
3780 		if (iter->snapshot && trace_empty(iter))
3781 			print_snapshot_help(m, iter);
3782 		else if (iter->trace && iter->trace->print_header)
3783 			iter->trace->print_header(m);
3784 		else
3785 			trace_default_header(m);
3786 
3787 	} else if (iter->leftover) {
3788 		/*
3789 		 * If we filled the seq_file buffer earlier, we
3790 		 * want to just show it now.
3791 		 */
3792 		ret = trace_print_seq(m, &iter->seq);
3793 
3794 		/* ret should this time be zero, but you never know */
3795 		iter->leftover = ret;
3796 
3797 	} else {
3798 		print_trace_line(iter);
3799 		ret = trace_print_seq(m, &iter->seq);
3800 		/*
3801 		 * If we overflow the seq_file buffer, then it will
3802 		 * ask us for this data again at start up.
3803 		 * Use that instead.
3804 		 *  ret is 0 if seq_file write succeeded.
3805 		 *        -1 otherwise.
3806 		 */
3807 		iter->leftover = ret;
3808 	}
3809 
3810 	return 0;
3811 }
3812 
3813 /*
3814  * Should be used after trace_array_get(), trace_types_lock
3815  * ensures that i_cdev was already initialized.
3816  */
3817 static inline int tracing_get_cpu(struct inode *inode)
3818 {
3819 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3820 		return (long)inode->i_cdev - 1;
3821 	return RING_BUFFER_ALL_CPUS;
3822 }
3823 
3824 static const struct seq_operations tracer_seq_ops = {
3825 	.start		= s_start,
3826 	.next		= s_next,
3827 	.stop		= s_stop,
3828 	.show		= s_show,
3829 };
3830 
3831 static struct trace_iterator *
3832 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3833 {
3834 	struct trace_array *tr = inode->i_private;
3835 	struct trace_iterator *iter;
3836 	int cpu;
3837 
3838 	if (tracing_disabled)
3839 		return ERR_PTR(-ENODEV);
3840 
3841 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3842 	if (!iter)
3843 		return ERR_PTR(-ENOMEM);
3844 
3845 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3846 				    GFP_KERNEL);
3847 	if (!iter->buffer_iter)
3848 		goto release;
3849 
3850 	/*
3851 	 * We make a copy of the current tracer to avoid concurrent
3852 	 * changes on it while we are reading.
3853 	 */
3854 	mutex_lock(&trace_types_lock);
3855 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3856 	if (!iter->trace)
3857 		goto fail;
3858 
3859 	*iter->trace = *tr->current_trace;
3860 
3861 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3862 		goto fail;
3863 
3864 	iter->tr = tr;
3865 
3866 #ifdef CONFIG_TRACER_MAX_TRACE
3867 	/* Currently only the top directory has a snapshot */
3868 	if (tr->current_trace->print_max || snapshot)
3869 		iter->trace_buffer = &tr->max_buffer;
3870 	else
3871 #endif
3872 		iter->trace_buffer = &tr->trace_buffer;
3873 	iter->snapshot = snapshot;
3874 	iter->pos = -1;
3875 	iter->cpu_file = tracing_get_cpu(inode);
3876 	mutex_init(&iter->mutex);
3877 
3878 	/* Notify the tracer early; before we stop tracing. */
3879 	if (iter->trace && iter->trace->open)
3880 		iter->trace->open(iter);
3881 
3882 	/* Annotate start of buffers if we had overruns */
3883 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3884 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3885 
3886 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3887 	if (trace_clocks[tr->clock_id].in_ns)
3888 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3889 
3890 	/* stop the trace while dumping if we are not opening "snapshot" */
3891 	if (!iter->snapshot)
3892 		tracing_stop_tr(tr);
3893 
3894 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3895 		for_each_tracing_cpu(cpu) {
3896 			iter->buffer_iter[cpu] =
3897 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3898 		}
3899 		ring_buffer_read_prepare_sync();
3900 		for_each_tracing_cpu(cpu) {
3901 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3902 			tracing_iter_reset(iter, cpu);
3903 		}
3904 	} else {
3905 		cpu = iter->cpu_file;
3906 		iter->buffer_iter[cpu] =
3907 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908 		ring_buffer_read_prepare_sync();
3909 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3910 		tracing_iter_reset(iter, cpu);
3911 	}
3912 
3913 	mutex_unlock(&trace_types_lock);
3914 
3915 	return iter;
3916 
3917  fail:
3918 	mutex_unlock(&trace_types_lock);
3919 	kfree(iter->trace);
3920 	kfree(iter->buffer_iter);
3921 release:
3922 	seq_release_private(inode, file);
3923 	return ERR_PTR(-ENOMEM);
3924 }
3925 
3926 int tracing_open_generic(struct inode *inode, struct file *filp)
3927 {
3928 	if (tracing_disabled)
3929 		return -ENODEV;
3930 
3931 	filp->private_data = inode->i_private;
3932 	return 0;
3933 }
3934 
3935 bool tracing_is_disabled(void)
3936 {
3937 	return (tracing_disabled) ? true: false;
3938 }
3939 
3940 /*
3941  * Open and update trace_array ref count.
3942  * Must have the current trace_array passed to it.
3943  */
3944 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3945 {
3946 	struct trace_array *tr = inode->i_private;
3947 
3948 	if (tracing_disabled)
3949 		return -ENODEV;
3950 
3951 	if (trace_array_get(tr) < 0)
3952 		return -ENODEV;
3953 
3954 	filp->private_data = inode->i_private;
3955 
3956 	return 0;
3957 }
3958 
3959 static int tracing_release(struct inode *inode, struct file *file)
3960 {
3961 	struct trace_array *tr = inode->i_private;
3962 	struct seq_file *m = file->private_data;
3963 	struct trace_iterator *iter;
3964 	int cpu;
3965 
3966 	if (!(file->f_mode & FMODE_READ)) {
3967 		trace_array_put(tr);
3968 		return 0;
3969 	}
3970 
3971 	/* Writes do not use seq_file */
3972 	iter = m->private;
3973 	mutex_lock(&trace_types_lock);
3974 
3975 	for_each_tracing_cpu(cpu) {
3976 		if (iter->buffer_iter[cpu])
3977 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3978 	}
3979 
3980 	if (iter->trace && iter->trace->close)
3981 		iter->trace->close(iter);
3982 
3983 	if (!iter->snapshot)
3984 		/* reenable tracing if it was previously enabled */
3985 		tracing_start_tr(tr);
3986 
3987 	__trace_array_put(tr);
3988 
3989 	mutex_unlock(&trace_types_lock);
3990 
3991 	mutex_destroy(&iter->mutex);
3992 	free_cpumask_var(iter->started);
3993 	kfree(iter->trace);
3994 	kfree(iter->buffer_iter);
3995 	seq_release_private(inode, file);
3996 
3997 	return 0;
3998 }
3999 
4000 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4001 {
4002 	struct trace_array *tr = inode->i_private;
4003 
4004 	trace_array_put(tr);
4005 	return 0;
4006 }
4007 
4008 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4009 {
4010 	struct trace_array *tr = inode->i_private;
4011 
4012 	trace_array_put(tr);
4013 
4014 	return single_release(inode, file);
4015 }
4016 
4017 static int tracing_open(struct inode *inode, struct file *file)
4018 {
4019 	struct trace_array *tr = inode->i_private;
4020 	struct trace_iterator *iter;
4021 	int ret = 0;
4022 
4023 	if (trace_array_get(tr) < 0)
4024 		return -ENODEV;
4025 
4026 	/* If this file was open for write, then erase contents */
4027 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4028 		int cpu = tracing_get_cpu(inode);
4029 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4030 
4031 #ifdef CONFIG_TRACER_MAX_TRACE
4032 		if (tr->current_trace->print_max)
4033 			trace_buf = &tr->max_buffer;
4034 #endif
4035 
4036 		if (cpu == RING_BUFFER_ALL_CPUS)
4037 			tracing_reset_online_cpus(trace_buf);
4038 		else
4039 			tracing_reset(trace_buf, cpu);
4040 	}
4041 
4042 	if (file->f_mode & FMODE_READ) {
4043 		iter = __tracing_open(inode, file, false);
4044 		if (IS_ERR(iter))
4045 			ret = PTR_ERR(iter);
4046 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4047 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4048 	}
4049 
4050 	if (ret < 0)
4051 		trace_array_put(tr);
4052 
4053 	return ret;
4054 }
4055 
4056 /*
4057  * Some tracers are not suitable for instance buffers.
4058  * A tracer is always available for the global array (toplevel)
4059  * or if it explicitly states that it is.
4060  */
4061 static bool
4062 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4063 {
4064 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4065 }
4066 
4067 /* Find the next tracer that this trace array may use */
4068 static struct tracer *
4069 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4070 {
4071 	while (t && !trace_ok_for_array(t, tr))
4072 		t = t->next;
4073 
4074 	return t;
4075 }
4076 
4077 static void *
4078 t_next(struct seq_file *m, void *v, loff_t *pos)
4079 {
4080 	struct trace_array *tr = m->private;
4081 	struct tracer *t = v;
4082 
4083 	(*pos)++;
4084 
4085 	if (t)
4086 		t = get_tracer_for_array(tr, t->next);
4087 
4088 	return t;
4089 }
4090 
4091 static void *t_start(struct seq_file *m, loff_t *pos)
4092 {
4093 	struct trace_array *tr = m->private;
4094 	struct tracer *t;
4095 	loff_t l = 0;
4096 
4097 	mutex_lock(&trace_types_lock);
4098 
4099 	t = get_tracer_for_array(tr, trace_types);
4100 	for (; t && l < *pos; t = t_next(m, t, &l))
4101 			;
4102 
4103 	return t;
4104 }
4105 
4106 static void t_stop(struct seq_file *m, void *p)
4107 {
4108 	mutex_unlock(&trace_types_lock);
4109 }
4110 
4111 static int t_show(struct seq_file *m, void *v)
4112 {
4113 	struct tracer *t = v;
4114 
4115 	if (!t)
4116 		return 0;
4117 
4118 	seq_puts(m, t->name);
4119 	if (t->next)
4120 		seq_putc(m, ' ');
4121 	else
4122 		seq_putc(m, '\n');
4123 
4124 	return 0;
4125 }
4126 
4127 static const struct seq_operations show_traces_seq_ops = {
4128 	.start		= t_start,
4129 	.next		= t_next,
4130 	.stop		= t_stop,
4131 	.show		= t_show,
4132 };
4133 
4134 static int show_traces_open(struct inode *inode, struct file *file)
4135 {
4136 	struct trace_array *tr = inode->i_private;
4137 	struct seq_file *m;
4138 	int ret;
4139 
4140 	if (tracing_disabled)
4141 		return -ENODEV;
4142 
4143 	ret = seq_open(file, &show_traces_seq_ops);
4144 	if (ret)
4145 		return ret;
4146 
4147 	m = file->private_data;
4148 	m->private = tr;
4149 
4150 	return 0;
4151 }
4152 
4153 static ssize_t
4154 tracing_write_stub(struct file *filp, const char __user *ubuf,
4155 		   size_t count, loff_t *ppos)
4156 {
4157 	return count;
4158 }
4159 
4160 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4161 {
4162 	int ret;
4163 
4164 	if (file->f_mode & FMODE_READ)
4165 		ret = seq_lseek(file, offset, whence);
4166 	else
4167 		file->f_pos = ret = 0;
4168 
4169 	return ret;
4170 }
4171 
4172 static const struct file_operations tracing_fops = {
4173 	.open		= tracing_open,
4174 	.read		= seq_read,
4175 	.write		= tracing_write_stub,
4176 	.llseek		= tracing_lseek,
4177 	.release	= tracing_release,
4178 };
4179 
4180 static const struct file_operations show_traces_fops = {
4181 	.open		= show_traces_open,
4182 	.read		= seq_read,
4183 	.release	= seq_release,
4184 	.llseek		= seq_lseek,
4185 };
4186 
4187 static ssize_t
4188 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4189 		     size_t count, loff_t *ppos)
4190 {
4191 	struct trace_array *tr = file_inode(filp)->i_private;
4192 	char *mask_str;
4193 	int len;
4194 
4195 	len = snprintf(NULL, 0, "%*pb\n",
4196 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4197 	mask_str = kmalloc(len, GFP_KERNEL);
4198 	if (!mask_str)
4199 		return -ENOMEM;
4200 
4201 	len = snprintf(mask_str, len, "%*pb\n",
4202 		       cpumask_pr_args(tr->tracing_cpumask));
4203 	if (len >= count) {
4204 		count = -EINVAL;
4205 		goto out_err;
4206 	}
4207 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4208 
4209 out_err:
4210 	kfree(mask_str);
4211 
4212 	return count;
4213 }
4214 
4215 static ssize_t
4216 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4217 		      size_t count, loff_t *ppos)
4218 {
4219 	struct trace_array *tr = file_inode(filp)->i_private;
4220 	cpumask_var_t tracing_cpumask_new;
4221 	int err, cpu;
4222 
4223 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4224 		return -ENOMEM;
4225 
4226 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4227 	if (err)
4228 		goto err_unlock;
4229 
4230 	local_irq_disable();
4231 	arch_spin_lock(&tr->max_lock);
4232 	for_each_tracing_cpu(cpu) {
4233 		/*
4234 		 * Increase/decrease the disabled counter if we are
4235 		 * about to flip a bit in the cpumask:
4236 		 */
4237 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4241 		}
4242 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4243 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4244 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4245 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4246 		}
4247 	}
4248 	arch_spin_unlock(&tr->max_lock);
4249 	local_irq_enable();
4250 
4251 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4252 	free_cpumask_var(tracing_cpumask_new);
4253 
4254 	return count;
4255 
4256 err_unlock:
4257 	free_cpumask_var(tracing_cpumask_new);
4258 
4259 	return err;
4260 }
4261 
4262 static const struct file_operations tracing_cpumask_fops = {
4263 	.open		= tracing_open_generic_tr,
4264 	.read		= tracing_cpumask_read,
4265 	.write		= tracing_cpumask_write,
4266 	.release	= tracing_release_generic_tr,
4267 	.llseek		= generic_file_llseek,
4268 };
4269 
4270 static int tracing_trace_options_show(struct seq_file *m, void *v)
4271 {
4272 	struct tracer_opt *trace_opts;
4273 	struct trace_array *tr = m->private;
4274 	u32 tracer_flags;
4275 	int i;
4276 
4277 	mutex_lock(&trace_types_lock);
4278 	tracer_flags = tr->current_trace->flags->val;
4279 	trace_opts = tr->current_trace->flags->opts;
4280 
4281 	for (i = 0; trace_options[i]; i++) {
4282 		if (tr->trace_flags & (1 << i))
4283 			seq_printf(m, "%s\n", trace_options[i]);
4284 		else
4285 			seq_printf(m, "no%s\n", trace_options[i]);
4286 	}
4287 
4288 	for (i = 0; trace_opts[i].name; i++) {
4289 		if (tracer_flags & trace_opts[i].bit)
4290 			seq_printf(m, "%s\n", trace_opts[i].name);
4291 		else
4292 			seq_printf(m, "no%s\n", trace_opts[i].name);
4293 	}
4294 	mutex_unlock(&trace_types_lock);
4295 
4296 	return 0;
4297 }
4298 
4299 static int __set_tracer_option(struct trace_array *tr,
4300 			       struct tracer_flags *tracer_flags,
4301 			       struct tracer_opt *opts, int neg)
4302 {
4303 	struct tracer *trace = tracer_flags->trace;
4304 	int ret;
4305 
4306 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4307 	if (ret)
4308 		return ret;
4309 
4310 	if (neg)
4311 		tracer_flags->val &= ~opts->bit;
4312 	else
4313 		tracer_flags->val |= opts->bit;
4314 	return 0;
4315 }
4316 
4317 /* Try to assign a tracer specific option */
4318 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4319 {
4320 	struct tracer *trace = tr->current_trace;
4321 	struct tracer_flags *tracer_flags = trace->flags;
4322 	struct tracer_opt *opts = NULL;
4323 	int i;
4324 
4325 	for (i = 0; tracer_flags->opts[i].name; i++) {
4326 		opts = &tracer_flags->opts[i];
4327 
4328 		if (strcmp(cmp, opts->name) == 0)
4329 			return __set_tracer_option(tr, trace->flags, opts, neg);
4330 	}
4331 
4332 	return -EINVAL;
4333 }
4334 
4335 /* Some tracers require overwrite to stay enabled */
4336 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4337 {
4338 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4339 		return -1;
4340 
4341 	return 0;
4342 }
4343 
4344 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4345 {
4346 	/* do nothing if flag is already set */
4347 	if (!!(tr->trace_flags & mask) == !!enabled)
4348 		return 0;
4349 
4350 	/* Give the tracer a chance to approve the change */
4351 	if (tr->current_trace->flag_changed)
4352 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4353 			return -EINVAL;
4354 
4355 	if (enabled)
4356 		tr->trace_flags |= mask;
4357 	else
4358 		tr->trace_flags &= ~mask;
4359 
4360 	if (mask == TRACE_ITER_RECORD_CMD)
4361 		trace_event_enable_cmd_record(enabled);
4362 
4363 	if (mask == TRACE_ITER_RECORD_TGID) {
4364 		if (!tgid_map)
4365 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4366 					   sizeof(*tgid_map),
4367 					   GFP_KERNEL);
4368 		if (!tgid_map) {
4369 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4370 			return -ENOMEM;
4371 		}
4372 
4373 		trace_event_enable_tgid_record(enabled);
4374 	}
4375 
4376 	if (mask == TRACE_ITER_EVENT_FORK)
4377 		trace_event_follow_fork(tr, enabled);
4378 
4379 	if (mask == TRACE_ITER_FUNC_FORK)
4380 		ftrace_pid_follow_fork(tr, enabled);
4381 
4382 	if (mask == TRACE_ITER_OVERWRITE) {
4383 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4384 #ifdef CONFIG_TRACER_MAX_TRACE
4385 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4386 #endif
4387 	}
4388 
4389 	if (mask == TRACE_ITER_PRINTK) {
4390 		trace_printk_start_stop_comm(enabled);
4391 		trace_printk_control(enabled);
4392 	}
4393 
4394 	return 0;
4395 }
4396 
4397 static int trace_set_options(struct trace_array *tr, char *option)
4398 {
4399 	char *cmp;
4400 	int neg = 0;
4401 	int ret;
4402 	size_t orig_len = strlen(option);
4403 
4404 	cmp = strstrip(option);
4405 
4406 	if (strncmp(cmp, "no", 2) == 0) {
4407 		neg = 1;
4408 		cmp += 2;
4409 	}
4410 
4411 	mutex_lock(&trace_types_lock);
4412 
4413 	ret = match_string(trace_options, -1, cmp);
4414 	/* If no option could be set, test the specific tracer options */
4415 	if (ret < 0)
4416 		ret = set_tracer_option(tr, cmp, neg);
4417 	else
4418 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4419 
4420 	mutex_unlock(&trace_types_lock);
4421 
4422 	/*
4423 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4424 	 * turn it back into a space.
4425 	 */
4426 	if (orig_len > strlen(option))
4427 		option[strlen(option)] = ' ';
4428 
4429 	return ret;
4430 }
4431 
4432 static void __init apply_trace_boot_options(void)
4433 {
4434 	char *buf = trace_boot_options_buf;
4435 	char *option;
4436 
4437 	while (true) {
4438 		option = strsep(&buf, ",");
4439 
4440 		if (!option)
4441 			break;
4442 
4443 		if (*option)
4444 			trace_set_options(&global_trace, option);
4445 
4446 		/* Put back the comma to allow this to be called again */
4447 		if (buf)
4448 			*(buf - 1) = ',';
4449 	}
4450 }
4451 
4452 static ssize_t
4453 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4454 			size_t cnt, loff_t *ppos)
4455 {
4456 	struct seq_file *m = filp->private_data;
4457 	struct trace_array *tr = m->private;
4458 	char buf[64];
4459 	int ret;
4460 
4461 	if (cnt >= sizeof(buf))
4462 		return -EINVAL;
4463 
4464 	if (copy_from_user(buf, ubuf, cnt))
4465 		return -EFAULT;
4466 
4467 	buf[cnt] = 0;
4468 
4469 	ret = trace_set_options(tr, buf);
4470 	if (ret < 0)
4471 		return ret;
4472 
4473 	*ppos += cnt;
4474 
4475 	return cnt;
4476 }
4477 
4478 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4479 {
4480 	struct trace_array *tr = inode->i_private;
4481 	int ret;
4482 
4483 	if (tracing_disabled)
4484 		return -ENODEV;
4485 
4486 	if (trace_array_get(tr) < 0)
4487 		return -ENODEV;
4488 
4489 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4490 	if (ret < 0)
4491 		trace_array_put(tr);
4492 
4493 	return ret;
4494 }
4495 
4496 static const struct file_operations tracing_iter_fops = {
4497 	.open		= tracing_trace_options_open,
4498 	.read		= seq_read,
4499 	.llseek		= seq_lseek,
4500 	.release	= tracing_single_release_tr,
4501 	.write		= tracing_trace_options_write,
4502 };
4503 
4504 static const char readme_msg[] =
4505 	"tracing mini-HOWTO:\n\n"
4506 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4507 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4508 	" Important files:\n"
4509 	"  trace\t\t\t- The static contents of the buffer\n"
4510 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4511 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4512 	"  current_tracer\t- function and latency tracers\n"
4513 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4514 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4515 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4516 	"  trace_clock\t\t-change the clock used to order events\n"
4517 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4518 	"      global:   Synced across CPUs but slows tracing down.\n"
4519 	"     counter:   Not a clock, but just an increment\n"
4520 	"      uptime:   Jiffy counter from time of boot\n"
4521 	"        perf:   Same clock that perf events use\n"
4522 #ifdef CONFIG_X86_64
4523 	"     x86-tsc:   TSC cycle counter\n"
4524 #endif
4525 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4526 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4527 	"    absolute:   Absolute (standalone) timestamp\n"
4528 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4529 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4530 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4531 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4532 	"\t\t\t  Remove sub-buffer with rmdir\n"
4533 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4534 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4535 	"\t\t\t  option name\n"
4536 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4537 #ifdef CONFIG_DYNAMIC_FTRACE
4538 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4539 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4540 	"\t\t\t  functions\n"
4541 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4542 	"\t     modules: Can select a group via module\n"
4543 	"\t      Format: :mod:<module-name>\n"
4544 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4545 	"\t    triggers: a command to perform when function is hit\n"
4546 	"\t      Format: <function>:<trigger>[:count]\n"
4547 	"\t     trigger: traceon, traceoff\n"
4548 	"\t\t      enable_event:<system>:<event>\n"
4549 	"\t\t      disable_event:<system>:<event>\n"
4550 #ifdef CONFIG_STACKTRACE
4551 	"\t\t      stacktrace\n"
4552 #endif
4553 #ifdef CONFIG_TRACER_SNAPSHOT
4554 	"\t\t      snapshot\n"
4555 #endif
4556 	"\t\t      dump\n"
4557 	"\t\t      cpudump\n"
4558 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4559 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4560 	"\t     The first one will disable tracing every time do_fault is hit\n"
4561 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4562 	"\t       The first time do trap is hit and it disables tracing, the\n"
4563 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4564 	"\t       the counter will not decrement. It only decrements when the\n"
4565 	"\t       trigger did work\n"
4566 	"\t     To remove trigger without count:\n"
4567 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4568 	"\t     To remove trigger with a count:\n"
4569 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4570 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4571 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4572 	"\t    modules: Can select a group via module command :mod:\n"
4573 	"\t    Does not accept triggers\n"
4574 #endif /* CONFIG_DYNAMIC_FTRACE */
4575 #ifdef CONFIG_FUNCTION_TRACER
4576 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4577 	"\t\t    (function)\n"
4578 #endif
4579 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4580 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4581 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4582 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4583 #endif
4584 #ifdef CONFIG_TRACER_SNAPSHOT
4585 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4586 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4587 	"\t\t\t  information\n"
4588 #endif
4589 #ifdef CONFIG_STACK_TRACER
4590 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4591 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4592 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4593 	"\t\t\t  new trace)\n"
4594 #ifdef CONFIG_DYNAMIC_FTRACE
4595 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4596 	"\t\t\t  traces\n"
4597 #endif
4598 #endif /* CONFIG_STACK_TRACER */
4599 #ifdef CONFIG_KPROBE_EVENTS
4600 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4601 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4602 #endif
4603 #ifdef CONFIG_UPROBE_EVENTS
4604 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4605 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4606 #endif
4607 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4608 	"\t  accepts: event-definitions (one definition per line)\n"
4609 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4610 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4611 	"\t           -:[<group>/]<event>\n"
4612 #ifdef CONFIG_KPROBE_EVENTS
4613 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4614   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4615 #endif
4616 #ifdef CONFIG_UPROBE_EVENTS
4617 	"\t    place: <path>:<offset>\n"
4618 #endif
4619 	"\t     args: <name>=fetcharg[:type]\n"
4620 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4621 	"\t           $stack<index>, $stack, $retval, $comm\n"
4622 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4623 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4624 #endif
4625 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4626 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4627 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4628 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4629 	"\t\t\t  events\n"
4630 	"      filter\t\t- If set, only events passing filter are traced\n"
4631 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4632 	"\t\t\t  <event>:\n"
4633 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4634 	"      filter\t\t- If set, only events passing filter are traced\n"
4635 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4636 	"\t    Format: <trigger>[:count][if <filter>]\n"
4637 	"\t   trigger: traceon, traceoff\n"
4638 	"\t            enable_event:<system>:<event>\n"
4639 	"\t            disable_event:<system>:<event>\n"
4640 #ifdef CONFIG_HIST_TRIGGERS
4641 	"\t            enable_hist:<system>:<event>\n"
4642 	"\t            disable_hist:<system>:<event>\n"
4643 #endif
4644 #ifdef CONFIG_STACKTRACE
4645 	"\t\t    stacktrace\n"
4646 #endif
4647 #ifdef CONFIG_TRACER_SNAPSHOT
4648 	"\t\t    snapshot\n"
4649 #endif
4650 #ifdef CONFIG_HIST_TRIGGERS
4651 	"\t\t    hist (see below)\n"
4652 #endif
4653 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4654 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4655 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4656 	"\t                  events/block/block_unplug/trigger\n"
4657 	"\t   The first disables tracing every time block_unplug is hit.\n"
4658 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4659 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4660 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4661 	"\t   Like function triggers, the counter is only decremented if it\n"
4662 	"\t    enabled or disabled tracing.\n"
4663 	"\t   To remove a trigger without a count:\n"
4664 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4665 	"\t   To remove a trigger with a count:\n"
4666 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4667 	"\t   Filters can be ignored when removing a trigger.\n"
4668 #ifdef CONFIG_HIST_TRIGGERS
4669 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4670 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4671 	"\t            [:values=<field1[,field2,...]>]\n"
4672 	"\t            [:sort=<field1[,field2,...]>]\n"
4673 	"\t            [:size=#entries]\n"
4674 	"\t            [:pause][:continue][:clear]\n"
4675 	"\t            [:name=histname1]\n"
4676 	"\t            [if <filter>]\n\n"
4677 	"\t    When a matching event is hit, an entry is added to a hash\n"
4678 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4679 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4680 	"\t    correspond to fields in the event's format description.  Keys\n"
4681 	"\t    can be any field, or the special string 'stacktrace'.\n"
4682 	"\t    Compound keys consisting of up to two fields can be specified\n"
4683 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4684 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4685 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4686 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4687 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4688 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4689 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4690 	"\t    its histogram data will be shared with other triggers of the\n"
4691 	"\t    same name, and trigger hits will update this common data.\n\n"
4692 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4693 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4694 	"\t    triggers attached to an event, there will be a table for each\n"
4695 	"\t    trigger in the output.  The table displayed for a named\n"
4696 	"\t    trigger will be the same as any other instance having the\n"
4697 	"\t    same name.  The default format used to display a given field\n"
4698 	"\t    can be modified by appending any of the following modifiers\n"
4699 	"\t    to the field name, as applicable:\n\n"
4700 	"\t            .hex        display a number as a hex value\n"
4701 	"\t            .sym        display an address as a symbol\n"
4702 	"\t            .sym-offset display an address as a symbol and offset\n"
4703 	"\t            .execname   display a common_pid as a program name\n"
4704 	"\t            .syscall    display a syscall id as a syscall name\n"
4705 	"\t            .log2       display log2 value rather than raw number\n"
4706 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4707 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4708 	"\t    trigger or to start a hist trigger but not log any events\n"
4709 	"\t    until told to do so.  'continue' can be used to start or\n"
4710 	"\t    restart a paused hist trigger.\n\n"
4711 	"\t    The 'clear' parameter will clear the contents of a running\n"
4712 	"\t    hist trigger and leave its current paused/active state\n"
4713 	"\t    unchanged.\n\n"
4714 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4715 	"\t    have one event conditionally start and stop another event's\n"
4716 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4717 	"\t    the enable_event and disable_event triggers.\n"
4718 #endif
4719 ;
4720 
4721 static ssize_t
4722 tracing_readme_read(struct file *filp, char __user *ubuf,
4723 		       size_t cnt, loff_t *ppos)
4724 {
4725 	return simple_read_from_buffer(ubuf, cnt, ppos,
4726 					readme_msg, strlen(readme_msg));
4727 }
4728 
4729 static const struct file_operations tracing_readme_fops = {
4730 	.open		= tracing_open_generic,
4731 	.read		= tracing_readme_read,
4732 	.llseek		= generic_file_llseek,
4733 };
4734 
4735 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4736 {
4737 	int *ptr = v;
4738 
4739 	if (*pos || m->count)
4740 		ptr++;
4741 
4742 	(*pos)++;
4743 
4744 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4745 		if (trace_find_tgid(*ptr))
4746 			return ptr;
4747 	}
4748 
4749 	return NULL;
4750 }
4751 
4752 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4753 {
4754 	void *v;
4755 	loff_t l = 0;
4756 
4757 	if (!tgid_map)
4758 		return NULL;
4759 
4760 	v = &tgid_map[0];
4761 	while (l <= *pos) {
4762 		v = saved_tgids_next(m, v, &l);
4763 		if (!v)
4764 			return NULL;
4765 	}
4766 
4767 	return v;
4768 }
4769 
4770 static void saved_tgids_stop(struct seq_file *m, void *v)
4771 {
4772 }
4773 
4774 static int saved_tgids_show(struct seq_file *m, void *v)
4775 {
4776 	int pid = (int *)v - tgid_map;
4777 
4778 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4779 	return 0;
4780 }
4781 
4782 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4783 	.start		= saved_tgids_start,
4784 	.stop		= saved_tgids_stop,
4785 	.next		= saved_tgids_next,
4786 	.show		= saved_tgids_show,
4787 };
4788 
4789 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4790 {
4791 	if (tracing_disabled)
4792 		return -ENODEV;
4793 
4794 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4795 }
4796 
4797 
4798 static const struct file_operations tracing_saved_tgids_fops = {
4799 	.open		= tracing_saved_tgids_open,
4800 	.read		= seq_read,
4801 	.llseek		= seq_lseek,
4802 	.release	= seq_release,
4803 };
4804 
4805 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4806 {
4807 	unsigned int *ptr = v;
4808 
4809 	if (*pos || m->count)
4810 		ptr++;
4811 
4812 	(*pos)++;
4813 
4814 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4815 	     ptr++) {
4816 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4817 			continue;
4818 
4819 		return ptr;
4820 	}
4821 
4822 	return NULL;
4823 }
4824 
4825 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4826 {
4827 	void *v;
4828 	loff_t l = 0;
4829 
4830 	preempt_disable();
4831 	arch_spin_lock(&trace_cmdline_lock);
4832 
4833 	v = &savedcmd->map_cmdline_to_pid[0];
4834 	while (l <= *pos) {
4835 		v = saved_cmdlines_next(m, v, &l);
4836 		if (!v)
4837 			return NULL;
4838 	}
4839 
4840 	return v;
4841 }
4842 
4843 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4844 {
4845 	arch_spin_unlock(&trace_cmdline_lock);
4846 	preempt_enable();
4847 }
4848 
4849 static int saved_cmdlines_show(struct seq_file *m, void *v)
4850 {
4851 	char buf[TASK_COMM_LEN];
4852 	unsigned int *pid = v;
4853 
4854 	__trace_find_cmdline(*pid, buf);
4855 	seq_printf(m, "%d %s\n", *pid, buf);
4856 	return 0;
4857 }
4858 
4859 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4860 	.start		= saved_cmdlines_start,
4861 	.next		= saved_cmdlines_next,
4862 	.stop		= saved_cmdlines_stop,
4863 	.show		= saved_cmdlines_show,
4864 };
4865 
4866 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4867 {
4868 	if (tracing_disabled)
4869 		return -ENODEV;
4870 
4871 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4872 }
4873 
4874 static const struct file_operations tracing_saved_cmdlines_fops = {
4875 	.open		= tracing_saved_cmdlines_open,
4876 	.read		= seq_read,
4877 	.llseek		= seq_lseek,
4878 	.release	= seq_release,
4879 };
4880 
4881 static ssize_t
4882 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4883 				 size_t cnt, loff_t *ppos)
4884 {
4885 	char buf[64];
4886 	int r;
4887 
4888 	arch_spin_lock(&trace_cmdline_lock);
4889 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4890 	arch_spin_unlock(&trace_cmdline_lock);
4891 
4892 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4893 }
4894 
4895 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4896 {
4897 	kfree(s->saved_cmdlines);
4898 	kfree(s->map_cmdline_to_pid);
4899 	kfree(s);
4900 }
4901 
4902 static int tracing_resize_saved_cmdlines(unsigned int val)
4903 {
4904 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4905 
4906 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4907 	if (!s)
4908 		return -ENOMEM;
4909 
4910 	if (allocate_cmdlines_buffer(val, s) < 0) {
4911 		kfree(s);
4912 		return -ENOMEM;
4913 	}
4914 
4915 	arch_spin_lock(&trace_cmdline_lock);
4916 	savedcmd_temp = savedcmd;
4917 	savedcmd = s;
4918 	arch_spin_unlock(&trace_cmdline_lock);
4919 	free_saved_cmdlines_buffer(savedcmd_temp);
4920 
4921 	return 0;
4922 }
4923 
4924 static ssize_t
4925 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4926 				  size_t cnt, loff_t *ppos)
4927 {
4928 	unsigned long val;
4929 	int ret;
4930 
4931 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4932 	if (ret)
4933 		return ret;
4934 
4935 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4936 	if (!val || val > PID_MAX_DEFAULT)
4937 		return -EINVAL;
4938 
4939 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4940 	if (ret < 0)
4941 		return ret;
4942 
4943 	*ppos += cnt;
4944 
4945 	return cnt;
4946 }
4947 
4948 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4949 	.open		= tracing_open_generic,
4950 	.read		= tracing_saved_cmdlines_size_read,
4951 	.write		= tracing_saved_cmdlines_size_write,
4952 };
4953 
4954 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4955 static union trace_eval_map_item *
4956 update_eval_map(union trace_eval_map_item *ptr)
4957 {
4958 	if (!ptr->map.eval_string) {
4959 		if (ptr->tail.next) {
4960 			ptr = ptr->tail.next;
4961 			/* Set ptr to the next real item (skip head) */
4962 			ptr++;
4963 		} else
4964 			return NULL;
4965 	}
4966 	return ptr;
4967 }
4968 
4969 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4970 {
4971 	union trace_eval_map_item *ptr = v;
4972 
4973 	/*
4974 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4975 	 * This really should never happen.
4976 	 */
4977 	ptr = update_eval_map(ptr);
4978 	if (WARN_ON_ONCE(!ptr))
4979 		return NULL;
4980 
4981 	ptr++;
4982 
4983 	(*pos)++;
4984 
4985 	ptr = update_eval_map(ptr);
4986 
4987 	return ptr;
4988 }
4989 
4990 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4991 {
4992 	union trace_eval_map_item *v;
4993 	loff_t l = 0;
4994 
4995 	mutex_lock(&trace_eval_mutex);
4996 
4997 	v = trace_eval_maps;
4998 	if (v)
4999 		v++;
5000 
5001 	while (v && l < *pos) {
5002 		v = eval_map_next(m, v, &l);
5003 	}
5004 
5005 	return v;
5006 }
5007 
5008 static void eval_map_stop(struct seq_file *m, void *v)
5009 {
5010 	mutex_unlock(&trace_eval_mutex);
5011 }
5012 
5013 static int eval_map_show(struct seq_file *m, void *v)
5014 {
5015 	union trace_eval_map_item *ptr = v;
5016 
5017 	seq_printf(m, "%s %ld (%s)\n",
5018 		   ptr->map.eval_string, ptr->map.eval_value,
5019 		   ptr->map.system);
5020 
5021 	return 0;
5022 }
5023 
5024 static const struct seq_operations tracing_eval_map_seq_ops = {
5025 	.start		= eval_map_start,
5026 	.next		= eval_map_next,
5027 	.stop		= eval_map_stop,
5028 	.show		= eval_map_show,
5029 };
5030 
5031 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5032 {
5033 	if (tracing_disabled)
5034 		return -ENODEV;
5035 
5036 	return seq_open(filp, &tracing_eval_map_seq_ops);
5037 }
5038 
5039 static const struct file_operations tracing_eval_map_fops = {
5040 	.open		= tracing_eval_map_open,
5041 	.read		= seq_read,
5042 	.llseek		= seq_lseek,
5043 	.release	= seq_release,
5044 };
5045 
5046 static inline union trace_eval_map_item *
5047 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5048 {
5049 	/* Return tail of array given the head */
5050 	return ptr + ptr->head.length + 1;
5051 }
5052 
5053 static void
5054 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5055 			   int len)
5056 {
5057 	struct trace_eval_map **stop;
5058 	struct trace_eval_map **map;
5059 	union trace_eval_map_item *map_array;
5060 	union trace_eval_map_item *ptr;
5061 
5062 	stop = start + len;
5063 
5064 	/*
5065 	 * The trace_eval_maps contains the map plus a head and tail item,
5066 	 * where the head holds the module and length of array, and the
5067 	 * tail holds a pointer to the next list.
5068 	 */
5069 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5070 	if (!map_array) {
5071 		pr_warn("Unable to allocate trace eval mapping\n");
5072 		return;
5073 	}
5074 
5075 	mutex_lock(&trace_eval_mutex);
5076 
5077 	if (!trace_eval_maps)
5078 		trace_eval_maps = map_array;
5079 	else {
5080 		ptr = trace_eval_maps;
5081 		for (;;) {
5082 			ptr = trace_eval_jmp_to_tail(ptr);
5083 			if (!ptr->tail.next)
5084 				break;
5085 			ptr = ptr->tail.next;
5086 
5087 		}
5088 		ptr->tail.next = map_array;
5089 	}
5090 	map_array->head.mod = mod;
5091 	map_array->head.length = len;
5092 	map_array++;
5093 
5094 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5095 		map_array->map = **map;
5096 		map_array++;
5097 	}
5098 	memset(map_array, 0, sizeof(*map_array));
5099 
5100 	mutex_unlock(&trace_eval_mutex);
5101 }
5102 
5103 static void trace_create_eval_file(struct dentry *d_tracer)
5104 {
5105 	trace_create_file("eval_map", 0444, d_tracer,
5106 			  NULL, &tracing_eval_map_fops);
5107 }
5108 
5109 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5110 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5111 static inline void trace_insert_eval_map_file(struct module *mod,
5112 			      struct trace_eval_map **start, int len) { }
5113 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5114 
5115 static void trace_insert_eval_map(struct module *mod,
5116 				  struct trace_eval_map **start, int len)
5117 {
5118 	struct trace_eval_map **map;
5119 
5120 	if (len <= 0)
5121 		return;
5122 
5123 	map = start;
5124 
5125 	trace_event_eval_update(map, len);
5126 
5127 	trace_insert_eval_map_file(mod, start, len);
5128 }
5129 
5130 static ssize_t
5131 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5132 		       size_t cnt, loff_t *ppos)
5133 {
5134 	struct trace_array *tr = filp->private_data;
5135 	char buf[MAX_TRACER_SIZE+2];
5136 	int r;
5137 
5138 	mutex_lock(&trace_types_lock);
5139 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5140 	mutex_unlock(&trace_types_lock);
5141 
5142 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5143 }
5144 
5145 int tracer_init(struct tracer *t, struct trace_array *tr)
5146 {
5147 	tracing_reset_online_cpus(&tr->trace_buffer);
5148 	return t->init(tr);
5149 }
5150 
5151 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5152 {
5153 	int cpu;
5154 
5155 	for_each_tracing_cpu(cpu)
5156 		per_cpu_ptr(buf->data, cpu)->entries = val;
5157 }
5158 
5159 #ifdef CONFIG_TRACER_MAX_TRACE
5160 /* resize @tr's buffer to the size of @size_tr's entries */
5161 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5162 					struct trace_buffer *size_buf, int cpu_id)
5163 {
5164 	int cpu, ret = 0;
5165 
5166 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5167 		for_each_tracing_cpu(cpu) {
5168 			ret = ring_buffer_resize(trace_buf->buffer,
5169 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5170 			if (ret < 0)
5171 				break;
5172 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5173 				per_cpu_ptr(size_buf->data, cpu)->entries;
5174 		}
5175 	} else {
5176 		ret = ring_buffer_resize(trace_buf->buffer,
5177 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5178 		if (ret == 0)
5179 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5180 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5181 	}
5182 
5183 	return ret;
5184 }
5185 #endif /* CONFIG_TRACER_MAX_TRACE */
5186 
5187 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5188 					unsigned long size, int cpu)
5189 {
5190 	int ret;
5191 
5192 	/*
5193 	 * If kernel or user changes the size of the ring buffer
5194 	 * we use the size that was given, and we can forget about
5195 	 * expanding it later.
5196 	 */
5197 	ring_buffer_expanded = true;
5198 
5199 	/* May be called before buffers are initialized */
5200 	if (!tr->trace_buffer.buffer)
5201 		return 0;
5202 
5203 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5204 	if (ret < 0)
5205 		return ret;
5206 
5207 #ifdef CONFIG_TRACER_MAX_TRACE
5208 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5209 	    !tr->current_trace->use_max_tr)
5210 		goto out;
5211 
5212 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5213 	if (ret < 0) {
5214 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5215 						     &tr->trace_buffer, cpu);
5216 		if (r < 0) {
5217 			/*
5218 			 * AARGH! We are left with different
5219 			 * size max buffer!!!!
5220 			 * The max buffer is our "snapshot" buffer.
5221 			 * When a tracer needs a snapshot (one of the
5222 			 * latency tracers), it swaps the max buffer
5223 			 * with the saved snap shot. We succeeded to
5224 			 * update the size of the main buffer, but failed to
5225 			 * update the size of the max buffer. But when we tried
5226 			 * to reset the main buffer to the original size, we
5227 			 * failed there too. This is very unlikely to
5228 			 * happen, but if it does, warn and kill all
5229 			 * tracing.
5230 			 */
5231 			WARN_ON(1);
5232 			tracing_disabled = 1;
5233 		}
5234 		return ret;
5235 	}
5236 
5237 	if (cpu == RING_BUFFER_ALL_CPUS)
5238 		set_buffer_entries(&tr->max_buffer, size);
5239 	else
5240 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5241 
5242  out:
5243 #endif /* CONFIG_TRACER_MAX_TRACE */
5244 
5245 	if (cpu == RING_BUFFER_ALL_CPUS)
5246 		set_buffer_entries(&tr->trace_buffer, size);
5247 	else
5248 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5249 
5250 	return ret;
5251 }
5252 
5253 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5254 					  unsigned long size, int cpu_id)
5255 {
5256 	int ret = size;
5257 
5258 	mutex_lock(&trace_types_lock);
5259 
5260 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5261 		/* make sure, this cpu is enabled in the mask */
5262 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5263 			ret = -EINVAL;
5264 			goto out;
5265 		}
5266 	}
5267 
5268 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5269 	if (ret < 0)
5270 		ret = -ENOMEM;
5271 
5272 out:
5273 	mutex_unlock(&trace_types_lock);
5274 
5275 	return ret;
5276 }
5277 
5278 
5279 /**
5280  * tracing_update_buffers - used by tracing facility to expand ring buffers
5281  *
5282  * To save on memory when the tracing is never used on a system with it
5283  * configured in. The ring buffers are set to a minimum size. But once
5284  * a user starts to use the tracing facility, then they need to grow
5285  * to their default size.
5286  *
5287  * This function is to be called when a tracer is about to be used.
5288  */
5289 int tracing_update_buffers(void)
5290 {
5291 	int ret = 0;
5292 
5293 	mutex_lock(&trace_types_lock);
5294 	if (!ring_buffer_expanded)
5295 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5296 						RING_BUFFER_ALL_CPUS);
5297 	mutex_unlock(&trace_types_lock);
5298 
5299 	return ret;
5300 }
5301 
5302 struct trace_option_dentry;
5303 
5304 static void
5305 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5306 
5307 /*
5308  * Used to clear out the tracer before deletion of an instance.
5309  * Must have trace_types_lock held.
5310  */
5311 static void tracing_set_nop(struct trace_array *tr)
5312 {
5313 	if (tr->current_trace == &nop_trace)
5314 		return;
5315 
5316 	tr->current_trace->enabled--;
5317 
5318 	if (tr->current_trace->reset)
5319 		tr->current_trace->reset(tr);
5320 
5321 	tr->current_trace = &nop_trace;
5322 }
5323 
5324 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5325 {
5326 	/* Only enable if the directory has been created already. */
5327 	if (!tr->dir)
5328 		return;
5329 
5330 	create_trace_option_files(tr, t);
5331 }
5332 
5333 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5334 {
5335 	struct tracer *t;
5336 #ifdef CONFIG_TRACER_MAX_TRACE
5337 	bool had_max_tr;
5338 #endif
5339 	int ret = 0;
5340 
5341 	mutex_lock(&trace_types_lock);
5342 
5343 	if (!ring_buffer_expanded) {
5344 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5345 						RING_BUFFER_ALL_CPUS);
5346 		if (ret < 0)
5347 			goto out;
5348 		ret = 0;
5349 	}
5350 
5351 	for (t = trace_types; t; t = t->next) {
5352 		if (strcmp(t->name, buf) == 0)
5353 			break;
5354 	}
5355 	if (!t) {
5356 		ret = -EINVAL;
5357 		goto out;
5358 	}
5359 	if (t == tr->current_trace)
5360 		goto out;
5361 
5362 	/* Some tracers won't work on kernel command line */
5363 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5364 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5365 			t->name);
5366 		goto out;
5367 	}
5368 
5369 	/* Some tracers are only allowed for the top level buffer */
5370 	if (!trace_ok_for_array(t, tr)) {
5371 		ret = -EINVAL;
5372 		goto out;
5373 	}
5374 
5375 	/* If trace pipe files are being read, we can't change the tracer */
5376 	if (tr->current_trace->ref) {
5377 		ret = -EBUSY;
5378 		goto out;
5379 	}
5380 
5381 	trace_branch_disable();
5382 
5383 	tr->current_trace->enabled--;
5384 
5385 	if (tr->current_trace->reset)
5386 		tr->current_trace->reset(tr);
5387 
5388 	/* Current trace needs to be nop_trace before synchronize_sched */
5389 	tr->current_trace = &nop_trace;
5390 
5391 #ifdef CONFIG_TRACER_MAX_TRACE
5392 	had_max_tr = tr->allocated_snapshot;
5393 
5394 	if (had_max_tr && !t->use_max_tr) {
5395 		/*
5396 		 * We need to make sure that the update_max_tr sees that
5397 		 * current_trace changed to nop_trace to keep it from
5398 		 * swapping the buffers after we resize it.
5399 		 * The update_max_tr is called from interrupts disabled
5400 		 * so a synchronized_sched() is sufficient.
5401 		 */
5402 		synchronize_sched();
5403 		free_snapshot(tr);
5404 	}
5405 #endif
5406 
5407 #ifdef CONFIG_TRACER_MAX_TRACE
5408 	if (t->use_max_tr && !had_max_tr) {
5409 		ret = tracing_alloc_snapshot_instance(tr);
5410 		if (ret < 0)
5411 			goto out;
5412 	}
5413 #endif
5414 
5415 	if (t->init) {
5416 		ret = tracer_init(t, tr);
5417 		if (ret)
5418 			goto out;
5419 	}
5420 
5421 	tr->current_trace = t;
5422 	tr->current_trace->enabled++;
5423 	trace_branch_enable(tr);
5424  out:
5425 	mutex_unlock(&trace_types_lock);
5426 
5427 	return ret;
5428 }
5429 
5430 static ssize_t
5431 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5432 			size_t cnt, loff_t *ppos)
5433 {
5434 	struct trace_array *tr = filp->private_data;
5435 	char buf[MAX_TRACER_SIZE+1];
5436 	int i;
5437 	size_t ret;
5438 	int err;
5439 
5440 	ret = cnt;
5441 
5442 	if (cnt > MAX_TRACER_SIZE)
5443 		cnt = MAX_TRACER_SIZE;
5444 
5445 	if (copy_from_user(buf, ubuf, cnt))
5446 		return -EFAULT;
5447 
5448 	buf[cnt] = 0;
5449 
5450 	/* strip ending whitespace. */
5451 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5452 		buf[i] = 0;
5453 
5454 	err = tracing_set_tracer(tr, buf);
5455 	if (err)
5456 		return err;
5457 
5458 	*ppos += ret;
5459 
5460 	return ret;
5461 }
5462 
5463 static ssize_t
5464 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5465 		   size_t cnt, loff_t *ppos)
5466 {
5467 	char buf[64];
5468 	int r;
5469 
5470 	r = snprintf(buf, sizeof(buf), "%ld\n",
5471 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5472 	if (r > sizeof(buf))
5473 		r = sizeof(buf);
5474 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5475 }
5476 
5477 static ssize_t
5478 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5479 		    size_t cnt, loff_t *ppos)
5480 {
5481 	unsigned long val;
5482 	int ret;
5483 
5484 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5485 	if (ret)
5486 		return ret;
5487 
5488 	*ptr = val * 1000;
5489 
5490 	return cnt;
5491 }
5492 
5493 static ssize_t
5494 tracing_thresh_read(struct file *filp, char __user *ubuf,
5495 		    size_t cnt, loff_t *ppos)
5496 {
5497 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5498 }
5499 
5500 static ssize_t
5501 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5502 		     size_t cnt, loff_t *ppos)
5503 {
5504 	struct trace_array *tr = filp->private_data;
5505 	int ret;
5506 
5507 	mutex_lock(&trace_types_lock);
5508 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5509 	if (ret < 0)
5510 		goto out;
5511 
5512 	if (tr->current_trace->update_thresh) {
5513 		ret = tr->current_trace->update_thresh(tr);
5514 		if (ret < 0)
5515 			goto out;
5516 	}
5517 
5518 	ret = cnt;
5519 out:
5520 	mutex_unlock(&trace_types_lock);
5521 
5522 	return ret;
5523 }
5524 
5525 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5526 
5527 static ssize_t
5528 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5529 		     size_t cnt, loff_t *ppos)
5530 {
5531 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5532 }
5533 
5534 static ssize_t
5535 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5536 		      size_t cnt, loff_t *ppos)
5537 {
5538 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5539 }
5540 
5541 #endif
5542 
5543 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5544 {
5545 	struct trace_array *tr = inode->i_private;
5546 	struct trace_iterator *iter;
5547 	int ret = 0;
5548 
5549 	if (tracing_disabled)
5550 		return -ENODEV;
5551 
5552 	if (trace_array_get(tr) < 0)
5553 		return -ENODEV;
5554 
5555 	mutex_lock(&trace_types_lock);
5556 
5557 	/* create a buffer to store the information to pass to userspace */
5558 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5559 	if (!iter) {
5560 		ret = -ENOMEM;
5561 		__trace_array_put(tr);
5562 		goto out;
5563 	}
5564 
5565 	trace_seq_init(&iter->seq);
5566 	iter->trace = tr->current_trace;
5567 
5568 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5569 		ret = -ENOMEM;
5570 		goto fail;
5571 	}
5572 
5573 	/* trace pipe does not show start of buffer */
5574 	cpumask_setall(iter->started);
5575 
5576 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5577 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5578 
5579 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5580 	if (trace_clocks[tr->clock_id].in_ns)
5581 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5582 
5583 	iter->tr = tr;
5584 	iter->trace_buffer = &tr->trace_buffer;
5585 	iter->cpu_file = tracing_get_cpu(inode);
5586 	mutex_init(&iter->mutex);
5587 	filp->private_data = iter;
5588 
5589 	if (iter->trace->pipe_open)
5590 		iter->trace->pipe_open(iter);
5591 
5592 	nonseekable_open(inode, filp);
5593 
5594 	tr->current_trace->ref++;
5595 out:
5596 	mutex_unlock(&trace_types_lock);
5597 	return ret;
5598 
5599 fail:
5600 	kfree(iter->trace);
5601 	kfree(iter);
5602 	__trace_array_put(tr);
5603 	mutex_unlock(&trace_types_lock);
5604 	return ret;
5605 }
5606 
5607 static int tracing_release_pipe(struct inode *inode, struct file *file)
5608 {
5609 	struct trace_iterator *iter = file->private_data;
5610 	struct trace_array *tr = inode->i_private;
5611 
5612 	mutex_lock(&trace_types_lock);
5613 
5614 	tr->current_trace->ref--;
5615 
5616 	if (iter->trace->pipe_close)
5617 		iter->trace->pipe_close(iter);
5618 
5619 	mutex_unlock(&trace_types_lock);
5620 
5621 	free_cpumask_var(iter->started);
5622 	mutex_destroy(&iter->mutex);
5623 	kfree(iter);
5624 
5625 	trace_array_put(tr);
5626 
5627 	return 0;
5628 }
5629 
5630 static __poll_t
5631 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5632 {
5633 	struct trace_array *tr = iter->tr;
5634 
5635 	/* Iterators are static, they should be filled or empty */
5636 	if (trace_buffer_iter(iter, iter->cpu_file))
5637 		return EPOLLIN | EPOLLRDNORM;
5638 
5639 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5640 		/*
5641 		 * Always select as readable when in blocking mode
5642 		 */
5643 		return EPOLLIN | EPOLLRDNORM;
5644 	else
5645 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5646 					     filp, poll_table);
5647 }
5648 
5649 static __poll_t
5650 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5651 {
5652 	struct trace_iterator *iter = filp->private_data;
5653 
5654 	return trace_poll(iter, filp, poll_table);
5655 }
5656 
5657 /* Must be called with iter->mutex held. */
5658 static int tracing_wait_pipe(struct file *filp)
5659 {
5660 	struct trace_iterator *iter = filp->private_data;
5661 	int ret;
5662 
5663 	while (trace_empty(iter)) {
5664 
5665 		if ((filp->f_flags & O_NONBLOCK)) {
5666 			return -EAGAIN;
5667 		}
5668 
5669 		/*
5670 		 * We block until we read something and tracing is disabled.
5671 		 * We still block if tracing is disabled, but we have never
5672 		 * read anything. This allows a user to cat this file, and
5673 		 * then enable tracing. But after we have read something,
5674 		 * we give an EOF when tracing is again disabled.
5675 		 *
5676 		 * iter->pos will be 0 if we haven't read anything.
5677 		 */
5678 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5679 			break;
5680 
5681 		mutex_unlock(&iter->mutex);
5682 
5683 		ret = wait_on_pipe(iter, false);
5684 
5685 		mutex_lock(&iter->mutex);
5686 
5687 		if (ret)
5688 			return ret;
5689 	}
5690 
5691 	return 1;
5692 }
5693 
5694 /*
5695  * Consumer reader.
5696  */
5697 static ssize_t
5698 tracing_read_pipe(struct file *filp, char __user *ubuf,
5699 		  size_t cnt, loff_t *ppos)
5700 {
5701 	struct trace_iterator *iter = filp->private_data;
5702 	ssize_t sret;
5703 
5704 	/*
5705 	 * Avoid more than one consumer on a single file descriptor
5706 	 * This is just a matter of traces coherency, the ring buffer itself
5707 	 * is protected.
5708 	 */
5709 	mutex_lock(&iter->mutex);
5710 
5711 	/* return any leftover data */
5712 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5713 	if (sret != -EBUSY)
5714 		goto out;
5715 
5716 	trace_seq_init(&iter->seq);
5717 
5718 	if (iter->trace->read) {
5719 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5720 		if (sret)
5721 			goto out;
5722 	}
5723 
5724 waitagain:
5725 	sret = tracing_wait_pipe(filp);
5726 	if (sret <= 0)
5727 		goto out;
5728 
5729 	/* stop when tracing is finished */
5730 	if (trace_empty(iter)) {
5731 		sret = 0;
5732 		goto out;
5733 	}
5734 
5735 	if (cnt >= PAGE_SIZE)
5736 		cnt = PAGE_SIZE - 1;
5737 
5738 	/* reset all but tr, trace, and overruns */
5739 	memset(&iter->seq, 0,
5740 	       sizeof(struct trace_iterator) -
5741 	       offsetof(struct trace_iterator, seq));
5742 	cpumask_clear(iter->started);
5743 	iter->pos = -1;
5744 
5745 	trace_event_read_lock();
5746 	trace_access_lock(iter->cpu_file);
5747 	while (trace_find_next_entry_inc(iter) != NULL) {
5748 		enum print_line_t ret;
5749 		int save_len = iter->seq.seq.len;
5750 
5751 		ret = print_trace_line(iter);
5752 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5753 			/* don't print partial lines */
5754 			iter->seq.seq.len = save_len;
5755 			break;
5756 		}
5757 		if (ret != TRACE_TYPE_NO_CONSUME)
5758 			trace_consume(iter);
5759 
5760 		if (trace_seq_used(&iter->seq) >= cnt)
5761 			break;
5762 
5763 		/*
5764 		 * Setting the full flag means we reached the trace_seq buffer
5765 		 * size and we should leave by partial output condition above.
5766 		 * One of the trace_seq_* functions is not used properly.
5767 		 */
5768 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5769 			  iter->ent->type);
5770 	}
5771 	trace_access_unlock(iter->cpu_file);
5772 	trace_event_read_unlock();
5773 
5774 	/* Now copy what we have to the user */
5775 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5776 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5777 		trace_seq_init(&iter->seq);
5778 
5779 	/*
5780 	 * If there was nothing to send to user, in spite of consuming trace
5781 	 * entries, go back to wait for more entries.
5782 	 */
5783 	if (sret == -EBUSY)
5784 		goto waitagain;
5785 
5786 out:
5787 	mutex_unlock(&iter->mutex);
5788 
5789 	return sret;
5790 }
5791 
5792 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5793 				     unsigned int idx)
5794 {
5795 	__free_page(spd->pages[idx]);
5796 }
5797 
5798 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5799 	.can_merge		= 0,
5800 	.confirm		= generic_pipe_buf_confirm,
5801 	.release		= generic_pipe_buf_release,
5802 	.steal			= generic_pipe_buf_steal,
5803 	.get			= generic_pipe_buf_get,
5804 };
5805 
5806 static size_t
5807 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5808 {
5809 	size_t count;
5810 	int save_len;
5811 	int ret;
5812 
5813 	/* Seq buffer is page-sized, exactly what we need. */
5814 	for (;;) {
5815 		save_len = iter->seq.seq.len;
5816 		ret = print_trace_line(iter);
5817 
5818 		if (trace_seq_has_overflowed(&iter->seq)) {
5819 			iter->seq.seq.len = save_len;
5820 			break;
5821 		}
5822 
5823 		/*
5824 		 * This should not be hit, because it should only
5825 		 * be set if the iter->seq overflowed. But check it
5826 		 * anyway to be safe.
5827 		 */
5828 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5829 			iter->seq.seq.len = save_len;
5830 			break;
5831 		}
5832 
5833 		count = trace_seq_used(&iter->seq) - save_len;
5834 		if (rem < count) {
5835 			rem = 0;
5836 			iter->seq.seq.len = save_len;
5837 			break;
5838 		}
5839 
5840 		if (ret != TRACE_TYPE_NO_CONSUME)
5841 			trace_consume(iter);
5842 		rem -= count;
5843 		if (!trace_find_next_entry_inc(iter))	{
5844 			rem = 0;
5845 			iter->ent = NULL;
5846 			break;
5847 		}
5848 	}
5849 
5850 	return rem;
5851 }
5852 
5853 static ssize_t tracing_splice_read_pipe(struct file *filp,
5854 					loff_t *ppos,
5855 					struct pipe_inode_info *pipe,
5856 					size_t len,
5857 					unsigned int flags)
5858 {
5859 	struct page *pages_def[PIPE_DEF_BUFFERS];
5860 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5861 	struct trace_iterator *iter = filp->private_data;
5862 	struct splice_pipe_desc spd = {
5863 		.pages		= pages_def,
5864 		.partial	= partial_def,
5865 		.nr_pages	= 0, /* This gets updated below. */
5866 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5867 		.ops		= &tracing_pipe_buf_ops,
5868 		.spd_release	= tracing_spd_release_pipe,
5869 	};
5870 	ssize_t ret;
5871 	size_t rem;
5872 	unsigned int i;
5873 
5874 	if (splice_grow_spd(pipe, &spd))
5875 		return -ENOMEM;
5876 
5877 	mutex_lock(&iter->mutex);
5878 
5879 	if (iter->trace->splice_read) {
5880 		ret = iter->trace->splice_read(iter, filp,
5881 					       ppos, pipe, len, flags);
5882 		if (ret)
5883 			goto out_err;
5884 	}
5885 
5886 	ret = tracing_wait_pipe(filp);
5887 	if (ret <= 0)
5888 		goto out_err;
5889 
5890 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5891 		ret = -EFAULT;
5892 		goto out_err;
5893 	}
5894 
5895 	trace_event_read_lock();
5896 	trace_access_lock(iter->cpu_file);
5897 
5898 	/* Fill as many pages as possible. */
5899 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5900 		spd.pages[i] = alloc_page(GFP_KERNEL);
5901 		if (!spd.pages[i])
5902 			break;
5903 
5904 		rem = tracing_fill_pipe_page(rem, iter);
5905 
5906 		/* Copy the data into the page, so we can start over. */
5907 		ret = trace_seq_to_buffer(&iter->seq,
5908 					  page_address(spd.pages[i]),
5909 					  trace_seq_used(&iter->seq));
5910 		if (ret < 0) {
5911 			__free_page(spd.pages[i]);
5912 			break;
5913 		}
5914 		spd.partial[i].offset = 0;
5915 		spd.partial[i].len = trace_seq_used(&iter->seq);
5916 
5917 		trace_seq_init(&iter->seq);
5918 	}
5919 
5920 	trace_access_unlock(iter->cpu_file);
5921 	trace_event_read_unlock();
5922 	mutex_unlock(&iter->mutex);
5923 
5924 	spd.nr_pages = i;
5925 
5926 	if (i)
5927 		ret = splice_to_pipe(pipe, &spd);
5928 	else
5929 		ret = 0;
5930 out:
5931 	splice_shrink_spd(&spd);
5932 	return ret;
5933 
5934 out_err:
5935 	mutex_unlock(&iter->mutex);
5936 	goto out;
5937 }
5938 
5939 static ssize_t
5940 tracing_entries_read(struct file *filp, char __user *ubuf,
5941 		     size_t cnt, loff_t *ppos)
5942 {
5943 	struct inode *inode = file_inode(filp);
5944 	struct trace_array *tr = inode->i_private;
5945 	int cpu = tracing_get_cpu(inode);
5946 	char buf[64];
5947 	int r = 0;
5948 	ssize_t ret;
5949 
5950 	mutex_lock(&trace_types_lock);
5951 
5952 	if (cpu == RING_BUFFER_ALL_CPUS) {
5953 		int cpu, buf_size_same;
5954 		unsigned long size;
5955 
5956 		size = 0;
5957 		buf_size_same = 1;
5958 		/* check if all cpu sizes are same */
5959 		for_each_tracing_cpu(cpu) {
5960 			/* fill in the size from first enabled cpu */
5961 			if (size == 0)
5962 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5963 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5964 				buf_size_same = 0;
5965 				break;
5966 			}
5967 		}
5968 
5969 		if (buf_size_same) {
5970 			if (!ring_buffer_expanded)
5971 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5972 					    size >> 10,
5973 					    trace_buf_size >> 10);
5974 			else
5975 				r = sprintf(buf, "%lu\n", size >> 10);
5976 		} else
5977 			r = sprintf(buf, "X\n");
5978 	} else
5979 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5980 
5981 	mutex_unlock(&trace_types_lock);
5982 
5983 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5984 	return ret;
5985 }
5986 
5987 static ssize_t
5988 tracing_entries_write(struct file *filp, const char __user *ubuf,
5989 		      size_t cnt, loff_t *ppos)
5990 {
5991 	struct inode *inode = file_inode(filp);
5992 	struct trace_array *tr = inode->i_private;
5993 	unsigned long val;
5994 	int ret;
5995 
5996 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5997 	if (ret)
5998 		return ret;
5999 
6000 	/* must have at least 1 entry */
6001 	if (!val)
6002 		return -EINVAL;
6003 
6004 	/* value is in KB */
6005 	val <<= 10;
6006 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6007 	if (ret < 0)
6008 		return ret;
6009 
6010 	*ppos += cnt;
6011 
6012 	return cnt;
6013 }
6014 
6015 static ssize_t
6016 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6017 				size_t cnt, loff_t *ppos)
6018 {
6019 	struct trace_array *tr = filp->private_data;
6020 	char buf[64];
6021 	int r, cpu;
6022 	unsigned long size = 0, expanded_size = 0;
6023 
6024 	mutex_lock(&trace_types_lock);
6025 	for_each_tracing_cpu(cpu) {
6026 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6027 		if (!ring_buffer_expanded)
6028 			expanded_size += trace_buf_size >> 10;
6029 	}
6030 	if (ring_buffer_expanded)
6031 		r = sprintf(buf, "%lu\n", size);
6032 	else
6033 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6034 	mutex_unlock(&trace_types_lock);
6035 
6036 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6037 }
6038 
6039 static ssize_t
6040 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6041 			  size_t cnt, loff_t *ppos)
6042 {
6043 	/*
6044 	 * There is no need to read what the user has written, this function
6045 	 * is just to make sure that there is no error when "echo" is used
6046 	 */
6047 
6048 	*ppos += cnt;
6049 
6050 	return cnt;
6051 }
6052 
6053 static int
6054 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6055 {
6056 	struct trace_array *tr = inode->i_private;
6057 
6058 	/* disable tracing ? */
6059 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6060 		tracer_tracing_off(tr);
6061 	/* resize the ring buffer to 0 */
6062 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6063 
6064 	trace_array_put(tr);
6065 
6066 	return 0;
6067 }
6068 
6069 static ssize_t
6070 tracing_mark_write(struct file *filp, const char __user *ubuf,
6071 					size_t cnt, loff_t *fpos)
6072 {
6073 	struct trace_array *tr = filp->private_data;
6074 	struct ring_buffer_event *event;
6075 	enum event_trigger_type tt = ETT_NONE;
6076 	struct ring_buffer *buffer;
6077 	struct print_entry *entry;
6078 	unsigned long irq_flags;
6079 	const char faulted[] = "<faulted>";
6080 	ssize_t written;
6081 	int size;
6082 	int len;
6083 
6084 /* Used in tracing_mark_raw_write() as well */
6085 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6086 
6087 	if (tracing_disabled)
6088 		return -EINVAL;
6089 
6090 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6091 		return -EINVAL;
6092 
6093 	if (cnt > TRACE_BUF_SIZE)
6094 		cnt = TRACE_BUF_SIZE;
6095 
6096 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6097 
6098 	local_save_flags(irq_flags);
6099 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6100 
6101 	/* If less than "<faulted>", then make sure we can still add that */
6102 	if (cnt < FAULTED_SIZE)
6103 		size += FAULTED_SIZE - cnt;
6104 
6105 	buffer = tr->trace_buffer.buffer;
6106 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6107 					    irq_flags, preempt_count());
6108 	if (unlikely(!event))
6109 		/* Ring buffer disabled, return as if not open for write */
6110 		return -EBADF;
6111 
6112 	entry = ring_buffer_event_data(event);
6113 	entry->ip = _THIS_IP_;
6114 
6115 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6116 	if (len) {
6117 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6118 		cnt = FAULTED_SIZE;
6119 		written = -EFAULT;
6120 	} else
6121 		written = cnt;
6122 	len = cnt;
6123 
6124 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6125 		/* do not add \n before testing triggers, but add \0 */
6126 		entry->buf[cnt] = '\0';
6127 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6128 	}
6129 
6130 	if (entry->buf[cnt - 1] != '\n') {
6131 		entry->buf[cnt] = '\n';
6132 		entry->buf[cnt + 1] = '\0';
6133 	} else
6134 		entry->buf[cnt] = '\0';
6135 
6136 	__buffer_unlock_commit(buffer, event);
6137 
6138 	if (tt)
6139 		event_triggers_post_call(tr->trace_marker_file, tt);
6140 
6141 	if (written > 0)
6142 		*fpos += written;
6143 
6144 	return written;
6145 }
6146 
6147 /* Limit it for now to 3K (including tag) */
6148 #define RAW_DATA_MAX_SIZE (1024*3)
6149 
6150 static ssize_t
6151 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6152 					size_t cnt, loff_t *fpos)
6153 {
6154 	struct trace_array *tr = filp->private_data;
6155 	struct ring_buffer_event *event;
6156 	struct ring_buffer *buffer;
6157 	struct raw_data_entry *entry;
6158 	const char faulted[] = "<faulted>";
6159 	unsigned long irq_flags;
6160 	ssize_t written;
6161 	int size;
6162 	int len;
6163 
6164 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6165 
6166 	if (tracing_disabled)
6167 		return -EINVAL;
6168 
6169 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6170 		return -EINVAL;
6171 
6172 	/* The marker must at least have a tag id */
6173 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6174 		return -EINVAL;
6175 
6176 	if (cnt > TRACE_BUF_SIZE)
6177 		cnt = TRACE_BUF_SIZE;
6178 
6179 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6180 
6181 	local_save_flags(irq_flags);
6182 	size = sizeof(*entry) + cnt;
6183 	if (cnt < FAULT_SIZE_ID)
6184 		size += FAULT_SIZE_ID - cnt;
6185 
6186 	buffer = tr->trace_buffer.buffer;
6187 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6188 					    irq_flags, preempt_count());
6189 	if (!event)
6190 		/* Ring buffer disabled, return as if not open for write */
6191 		return -EBADF;
6192 
6193 	entry = ring_buffer_event_data(event);
6194 
6195 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6196 	if (len) {
6197 		entry->id = -1;
6198 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6199 		written = -EFAULT;
6200 	} else
6201 		written = cnt;
6202 
6203 	__buffer_unlock_commit(buffer, event);
6204 
6205 	if (written > 0)
6206 		*fpos += written;
6207 
6208 	return written;
6209 }
6210 
6211 static int tracing_clock_show(struct seq_file *m, void *v)
6212 {
6213 	struct trace_array *tr = m->private;
6214 	int i;
6215 
6216 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6217 		seq_printf(m,
6218 			"%s%s%s%s", i ? " " : "",
6219 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6220 			i == tr->clock_id ? "]" : "");
6221 	seq_putc(m, '\n');
6222 
6223 	return 0;
6224 }
6225 
6226 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6227 {
6228 	int i;
6229 
6230 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6231 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6232 			break;
6233 	}
6234 	if (i == ARRAY_SIZE(trace_clocks))
6235 		return -EINVAL;
6236 
6237 	mutex_lock(&trace_types_lock);
6238 
6239 	tr->clock_id = i;
6240 
6241 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6242 
6243 	/*
6244 	 * New clock may not be consistent with the previous clock.
6245 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6246 	 */
6247 	tracing_reset_online_cpus(&tr->trace_buffer);
6248 
6249 #ifdef CONFIG_TRACER_MAX_TRACE
6250 	if (tr->max_buffer.buffer)
6251 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6252 	tracing_reset_online_cpus(&tr->max_buffer);
6253 #endif
6254 
6255 	mutex_unlock(&trace_types_lock);
6256 
6257 	return 0;
6258 }
6259 
6260 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6261 				   size_t cnt, loff_t *fpos)
6262 {
6263 	struct seq_file *m = filp->private_data;
6264 	struct trace_array *tr = m->private;
6265 	char buf[64];
6266 	const char *clockstr;
6267 	int ret;
6268 
6269 	if (cnt >= sizeof(buf))
6270 		return -EINVAL;
6271 
6272 	if (copy_from_user(buf, ubuf, cnt))
6273 		return -EFAULT;
6274 
6275 	buf[cnt] = 0;
6276 
6277 	clockstr = strstrip(buf);
6278 
6279 	ret = tracing_set_clock(tr, clockstr);
6280 	if (ret)
6281 		return ret;
6282 
6283 	*fpos += cnt;
6284 
6285 	return cnt;
6286 }
6287 
6288 static int tracing_clock_open(struct inode *inode, struct file *file)
6289 {
6290 	struct trace_array *tr = inode->i_private;
6291 	int ret;
6292 
6293 	if (tracing_disabled)
6294 		return -ENODEV;
6295 
6296 	if (trace_array_get(tr))
6297 		return -ENODEV;
6298 
6299 	ret = single_open(file, tracing_clock_show, inode->i_private);
6300 	if (ret < 0)
6301 		trace_array_put(tr);
6302 
6303 	return ret;
6304 }
6305 
6306 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6307 {
6308 	struct trace_array *tr = m->private;
6309 
6310 	mutex_lock(&trace_types_lock);
6311 
6312 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6313 		seq_puts(m, "delta [absolute]\n");
6314 	else
6315 		seq_puts(m, "[delta] absolute\n");
6316 
6317 	mutex_unlock(&trace_types_lock);
6318 
6319 	return 0;
6320 }
6321 
6322 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6323 {
6324 	struct trace_array *tr = inode->i_private;
6325 	int ret;
6326 
6327 	if (tracing_disabled)
6328 		return -ENODEV;
6329 
6330 	if (trace_array_get(tr))
6331 		return -ENODEV;
6332 
6333 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6334 	if (ret < 0)
6335 		trace_array_put(tr);
6336 
6337 	return ret;
6338 }
6339 
6340 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6341 {
6342 	int ret = 0;
6343 
6344 	mutex_lock(&trace_types_lock);
6345 
6346 	if (abs && tr->time_stamp_abs_ref++)
6347 		goto out;
6348 
6349 	if (!abs) {
6350 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6351 			ret = -EINVAL;
6352 			goto out;
6353 		}
6354 
6355 		if (--tr->time_stamp_abs_ref)
6356 			goto out;
6357 	}
6358 
6359 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6360 
6361 #ifdef CONFIG_TRACER_MAX_TRACE
6362 	if (tr->max_buffer.buffer)
6363 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6364 #endif
6365  out:
6366 	mutex_unlock(&trace_types_lock);
6367 
6368 	return ret;
6369 }
6370 
6371 struct ftrace_buffer_info {
6372 	struct trace_iterator	iter;
6373 	void			*spare;
6374 	unsigned int		spare_cpu;
6375 	unsigned int		read;
6376 };
6377 
6378 #ifdef CONFIG_TRACER_SNAPSHOT
6379 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6380 {
6381 	struct trace_array *tr = inode->i_private;
6382 	struct trace_iterator *iter;
6383 	struct seq_file *m;
6384 	int ret = 0;
6385 
6386 	if (trace_array_get(tr) < 0)
6387 		return -ENODEV;
6388 
6389 	if (file->f_mode & FMODE_READ) {
6390 		iter = __tracing_open(inode, file, true);
6391 		if (IS_ERR(iter))
6392 			ret = PTR_ERR(iter);
6393 	} else {
6394 		/* Writes still need the seq_file to hold the private data */
6395 		ret = -ENOMEM;
6396 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6397 		if (!m)
6398 			goto out;
6399 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6400 		if (!iter) {
6401 			kfree(m);
6402 			goto out;
6403 		}
6404 		ret = 0;
6405 
6406 		iter->tr = tr;
6407 		iter->trace_buffer = &tr->max_buffer;
6408 		iter->cpu_file = tracing_get_cpu(inode);
6409 		m->private = iter;
6410 		file->private_data = m;
6411 	}
6412 out:
6413 	if (ret < 0)
6414 		trace_array_put(tr);
6415 
6416 	return ret;
6417 }
6418 
6419 static ssize_t
6420 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6421 		       loff_t *ppos)
6422 {
6423 	struct seq_file *m = filp->private_data;
6424 	struct trace_iterator *iter = m->private;
6425 	struct trace_array *tr = iter->tr;
6426 	unsigned long val;
6427 	int ret;
6428 
6429 	ret = tracing_update_buffers();
6430 	if (ret < 0)
6431 		return ret;
6432 
6433 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6434 	if (ret)
6435 		return ret;
6436 
6437 	mutex_lock(&trace_types_lock);
6438 
6439 	if (tr->current_trace->use_max_tr) {
6440 		ret = -EBUSY;
6441 		goto out;
6442 	}
6443 
6444 	switch (val) {
6445 	case 0:
6446 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6447 			ret = -EINVAL;
6448 			break;
6449 		}
6450 		if (tr->allocated_snapshot)
6451 			free_snapshot(tr);
6452 		break;
6453 	case 1:
6454 /* Only allow per-cpu swap if the ring buffer supports it */
6455 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6456 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6457 			ret = -EINVAL;
6458 			break;
6459 		}
6460 #endif
6461 		if (!tr->allocated_snapshot) {
6462 			ret = tracing_alloc_snapshot_instance(tr);
6463 			if (ret < 0)
6464 				break;
6465 		}
6466 		local_irq_disable();
6467 		/* Now, we're going to swap */
6468 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6469 			update_max_tr(tr, current, smp_processor_id());
6470 		else
6471 			update_max_tr_single(tr, current, iter->cpu_file);
6472 		local_irq_enable();
6473 		break;
6474 	default:
6475 		if (tr->allocated_snapshot) {
6476 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6477 				tracing_reset_online_cpus(&tr->max_buffer);
6478 			else
6479 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6480 		}
6481 		break;
6482 	}
6483 
6484 	if (ret >= 0) {
6485 		*ppos += cnt;
6486 		ret = cnt;
6487 	}
6488 out:
6489 	mutex_unlock(&trace_types_lock);
6490 	return ret;
6491 }
6492 
6493 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6494 {
6495 	struct seq_file *m = file->private_data;
6496 	int ret;
6497 
6498 	ret = tracing_release(inode, file);
6499 
6500 	if (file->f_mode & FMODE_READ)
6501 		return ret;
6502 
6503 	/* If write only, the seq_file is just a stub */
6504 	if (m)
6505 		kfree(m->private);
6506 	kfree(m);
6507 
6508 	return 0;
6509 }
6510 
6511 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6512 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6513 				    size_t count, loff_t *ppos);
6514 static int tracing_buffers_release(struct inode *inode, struct file *file);
6515 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6516 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6517 
6518 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6519 {
6520 	struct ftrace_buffer_info *info;
6521 	int ret;
6522 
6523 	ret = tracing_buffers_open(inode, filp);
6524 	if (ret < 0)
6525 		return ret;
6526 
6527 	info = filp->private_data;
6528 
6529 	if (info->iter.trace->use_max_tr) {
6530 		tracing_buffers_release(inode, filp);
6531 		return -EBUSY;
6532 	}
6533 
6534 	info->iter.snapshot = true;
6535 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6536 
6537 	return ret;
6538 }
6539 
6540 #endif /* CONFIG_TRACER_SNAPSHOT */
6541 
6542 
6543 static const struct file_operations tracing_thresh_fops = {
6544 	.open		= tracing_open_generic,
6545 	.read		= tracing_thresh_read,
6546 	.write		= tracing_thresh_write,
6547 	.llseek		= generic_file_llseek,
6548 };
6549 
6550 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6551 static const struct file_operations tracing_max_lat_fops = {
6552 	.open		= tracing_open_generic,
6553 	.read		= tracing_max_lat_read,
6554 	.write		= tracing_max_lat_write,
6555 	.llseek		= generic_file_llseek,
6556 };
6557 #endif
6558 
6559 static const struct file_operations set_tracer_fops = {
6560 	.open		= tracing_open_generic,
6561 	.read		= tracing_set_trace_read,
6562 	.write		= tracing_set_trace_write,
6563 	.llseek		= generic_file_llseek,
6564 };
6565 
6566 static const struct file_operations tracing_pipe_fops = {
6567 	.open		= tracing_open_pipe,
6568 	.poll		= tracing_poll_pipe,
6569 	.read		= tracing_read_pipe,
6570 	.splice_read	= tracing_splice_read_pipe,
6571 	.release	= tracing_release_pipe,
6572 	.llseek		= no_llseek,
6573 };
6574 
6575 static const struct file_operations tracing_entries_fops = {
6576 	.open		= tracing_open_generic_tr,
6577 	.read		= tracing_entries_read,
6578 	.write		= tracing_entries_write,
6579 	.llseek		= generic_file_llseek,
6580 	.release	= tracing_release_generic_tr,
6581 };
6582 
6583 static const struct file_operations tracing_total_entries_fops = {
6584 	.open		= tracing_open_generic_tr,
6585 	.read		= tracing_total_entries_read,
6586 	.llseek		= generic_file_llseek,
6587 	.release	= tracing_release_generic_tr,
6588 };
6589 
6590 static const struct file_operations tracing_free_buffer_fops = {
6591 	.open		= tracing_open_generic_tr,
6592 	.write		= tracing_free_buffer_write,
6593 	.release	= tracing_free_buffer_release,
6594 };
6595 
6596 static const struct file_operations tracing_mark_fops = {
6597 	.open		= tracing_open_generic_tr,
6598 	.write		= tracing_mark_write,
6599 	.llseek		= generic_file_llseek,
6600 	.release	= tracing_release_generic_tr,
6601 };
6602 
6603 static const struct file_operations tracing_mark_raw_fops = {
6604 	.open		= tracing_open_generic_tr,
6605 	.write		= tracing_mark_raw_write,
6606 	.llseek		= generic_file_llseek,
6607 	.release	= tracing_release_generic_tr,
6608 };
6609 
6610 static const struct file_operations trace_clock_fops = {
6611 	.open		= tracing_clock_open,
6612 	.read		= seq_read,
6613 	.llseek		= seq_lseek,
6614 	.release	= tracing_single_release_tr,
6615 	.write		= tracing_clock_write,
6616 };
6617 
6618 static const struct file_operations trace_time_stamp_mode_fops = {
6619 	.open		= tracing_time_stamp_mode_open,
6620 	.read		= seq_read,
6621 	.llseek		= seq_lseek,
6622 	.release	= tracing_single_release_tr,
6623 };
6624 
6625 #ifdef CONFIG_TRACER_SNAPSHOT
6626 static const struct file_operations snapshot_fops = {
6627 	.open		= tracing_snapshot_open,
6628 	.read		= seq_read,
6629 	.write		= tracing_snapshot_write,
6630 	.llseek		= tracing_lseek,
6631 	.release	= tracing_snapshot_release,
6632 };
6633 
6634 static const struct file_operations snapshot_raw_fops = {
6635 	.open		= snapshot_raw_open,
6636 	.read		= tracing_buffers_read,
6637 	.release	= tracing_buffers_release,
6638 	.splice_read	= tracing_buffers_splice_read,
6639 	.llseek		= no_llseek,
6640 };
6641 
6642 #endif /* CONFIG_TRACER_SNAPSHOT */
6643 
6644 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6645 {
6646 	struct trace_array *tr = inode->i_private;
6647 	struct ftrace_buffer_info *info;
6648 	int ret;
6649 
6650 	if (tracing_disabled)
6651 		return -ENODEV;
6652 
6653 	if (trace_array_get(tr) < 0)
6654 		return -ENODEV;
6655 
6656 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6657 	if (!info) {
6658 		trace_array_put(tr);
6659 		return -ENOMEM;
6660 	}
6661 
6662 	mutex_lock(&trace_types_lock);
6663 
6664 	info->iter.tr		= tr;
6665 	info->iter.cpu_file	= tracing_get_cpu(inode);
6666 	info->iter.trace	= tr->current_trace;
6667 	info->iter.trace_buffer = &tr->trace_buffer;
6668 	info->spare		= NULL;
6669 	/* Force reading ring buffer for first read */
6670 	info->read		= (unsigned int)-1;
6671 
6672 	filp->private_data = info;
6673 
6674 	tr->current_trace->ref++;
6675 
6676 	mutex_unlock(&trace_types_lock);
6677 
6678 	ret = nonseekable_open(inode, filp);
6679 	if (ret < 0)
6680 		trace_array_put(tr);
6681 
6682 	return ret;
6683 }
6684 
6685 static __poll_t
6686 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6687 {
6688 	struct ftrace_buffer_info *info = filp->private_data;
6689 	struct trace_iterator *iter = &info->iter;
6690 
6691 	return trace_poll(iter, filp, poll_table);
6692 }
6693 
6694 static ssize_t
6695 tracing_buffers_read(struct file *filp, char __user *ubuf,
6696 		     size_t count, loff_t *ppos)
6697 {
6698 	struct ftrace_buffer_info *info = filp->private_data;
6699 	struct trace_iterator *iter = &info->iter;
6700 	ssize_t ret = 0;
6701 	ssize_t size;
6702 
6703 	if (!count)
6704 		return 0;
6705 
6706 #ifdef CONFIG_TRACER_MAX_TRACE
6707 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6708 		return -EBUSY;
6709 #endif
6710 
6711 	if (!info->spare) {
6712 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6713 							  iter->cpu_file);
6714 		if (IS_ERR(info->spare)) {
6715 			ret = PTR_ERR(info->spare);
6716 			info->spare = NULL;
6717 		} else {
6718 			info->spare_cpu = iter->cpu_file;
6719 		}
6720 	}
6721 	if (!info->spare)
6722 		return ret;
6723 
6724 	/* Do we have previous read data to read? */
6725 	if (info->read < PAGE_SIZE)
6726 		goto read;
6727 
6728  again:
6729 	trace_access_lock(iter->cpu_file);
6730 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6731 				    &info->spare,
6732 				    count,
6733 				    iter->cpu_file, 0);
6734 	trace_access_unlock(iter->cpu_file);
6735 
6736 	if (ret < 0) {
6737 		if (trace_empty(iter)) {
6738 			if ((filp->f_flags & O_NONBLOCK))
6739 				return -EAGAIN;
6740 
6741 			ret = wait_on_pipe(iter, false);
6742 			if (ret)
6743 				return ret;
6744 
6745 			goto again;
6746 		}
6747 		return 0;
6748 	}
6749 
6750 	info->read = 0;
6751  read:
6752 	size = PAGE_SIZE - info->read;
6753 	if (size > count)
6754 		size = count;
6755 
6756 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6757 	if (ret == size)
6758 		return -EFAULT;
6759 
6760 	size -= ret;
6761 
6762 	*ppos += size;
6763 	info->read += size;
6764 
6765 	return size;
6766 }
6767 
6768 static int tracing_buffers_release(struct inode *inode, struct file *file)
6769 {
6770 	struct ftrace_buffer_info *info = file->private_data;
6771 	struct trace_iterator *iter = &info->iter;
6772 
6773 	mutex_lock(&trace_types_lock);
6774 
6775 	iter->tr->current_trace->ref--;
6776 
6777 	__trace_array_put(iter->tr);
6778 
6779 	if (info->spare)
6780 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6781 					   info->spare_cpu, info->spare);
6782 	kfree(info);
6783 
6784 	mutex_unlock(&trace_types_lock);
6785 
6786 	return 0;
6787 }
6788 
6789 struct buffer_ref {
6790 	struct ring_buffer	*buffer;
6791 	void			*page;
6792 	int			cpu;
6793 	int			ref;
6794 };
6795 
6796 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6797 				    struct pipe_buffer *buf)
6798 {
6799 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6800 
6801 	if (--ref->ref)
6802 		return;
6803 
6804 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6805 	kfree(ref);
6806 	buf->private = 0;
6807 }
6808 
6809 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6810 				struct pipe_buffer *buf)
6811 {
6812 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6813 
6814 	ref->ref++;
6815 }
6816 
6817 /* Pipe buffer operations for a buffer. */
6818 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6819 	.can_merge		= 0,
6820 	.confirm		= generic_pipe_buf_confirm,
6821 	.release		= buffer_pipe_buf_release,
6822 	.steal			= generic_pipe_buf_steal,
6823 	.get			= buffer_pipe_buf_get,
6824 };
6825 
6826 /*
6827  * Callback from splice_to_pipe(), if we need to release some pages
6828  * at the end of the spd in case we error'ed out in filling the pipe.
6829  */
6830 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6831 {
6832 	struct buffer_ref *ref =
6833 		(struct buffer_ref *)spd->partial[i].private;
6834 
6835 	if (--ref->ref)
6836 		return;
6837 
6838 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6839 	kfree(ref);
6840 	spd->partial[i].private = 0;
6841 }
6842 
6843 static ssize_t
6844 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6845 			    struct pipe_inode_info *pipe, size_t len,
6846 			    unsigned int flags)
6847 {
6848 	struct ftrace_buffer_info *info = file->private_data;
6849 	struct trace_iterator *iter = &info->iter;
6850 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6851 	struct page *pages_def[PIPE_DEF_BUFFERS];
6852 	struct splice_pipe_desc spd = {
6853 		.pages		= pages_def,
6854 		.partial	= partial_def,
6855 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6856 		.ops		= &buffer_pipe_buf_ops,
6857 		.spd_release	= buffer_spd_release,
6858 	};
6859 	struct buffer_ref *ref;
6860 	int entries, i;
6861 	ssize_t ret = 0;
6862 
6863 #ifdef CONFIG_TRACER_MAX_TRACE
6864 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6865 		return -EBUSY;
6866 #endif
6867 
6868 	if (*ppos & (PAGE_SIZE - 1))
6869 		return -EINVAL;
6870 
6871 	if (len & (PAGE_SIZE - 1)) {
6872 		if (len < PAGE_SIZE)
6873 			return -EINVAL;
6874 		len &= PAGE_MASK;
6875 	}
6876 
6877 	if (splice_grow_spd(pipe, &spd))
6878 		return -ENOMEM;
6879 
6880  again:
6881 	trace_access_lock(iter->cpu_file);
6882 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6883 
6884 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6885 		struct page *page;
6886 		int r;
6887 
6888 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6889 		if (!ref) {
6890 			ret = -ENOMEM;
6891 			break;
6892 		}
6893 
6894 		ref->ref = 1;
6895 		ref->buffer = iter->trace_buffer->buffer;
6896 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6897 		if (IS_ERR(ref->page)) {
6898 			ret = PTR_ERR(ref->page);
6899 			ref->page = NULL;
6900 			kfree(ref);
6901 			break;
6902 		}
6903 		ref->cpu = iter->cpu_file;
6904 
6905 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6906 					  len, iter->cpu_file, 1);
6907 		if (r < 0) {
6908 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6909 						   ref->page);
6910 			kfree(ref);
6911 			break;
6912 		}
6913 
6914 		page = virt_to_page(ref->page);
6915 
6916 		spd.pages[i] = page;
6917 		spd.partial[i].len = PAGE_SIZE;
6918 		spd.partial[i].offset = 0;
6919 		spd.partial[i].private = (unsigned long)ref;
6920 		spd.nr_pages++;
6921 		*ppos += PAGE_SIZE;
6922 
6923 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6924 	}
6925 
6926 	trace_access_unlock(iter->cpu_file);
6927 	spd.nr_pages = i;
6928 
6929 	/* did we read anything? */
6930 	if (!spd.nr_pages) {
6931 		if (ret)
6932 			goto out;
6933 
6934 		ret = -EAGAIN;
6935 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6936 			goto out;
6937 
6938 		ret = wait_on_pipe(iter, true);
6939 		if (ret)
6940 			goto out;
6941 
6942 		goto again;
6943 	}
6944 
6945 	ret = splice_to_pipe(pipe, &spd);
6946 out:
6947 	splice_shrink_spd(&spd);
6948 
6949 	return ret;
6950 }
6951 
6952 static const struct file_operations tracing_buffers_fops = {
6953 	.open		= tracing_buffers_open,
6954 	.read		= tracing_buffers_read,
6955 	.poll		= tracing_buffers_poll,
6956 	.release	= tracing_buffers_release,
6957 	.splice_read	= tracing_buffers_splice_read,
6958 	.llseek		= no_llseek,
6959 };
6960 
6961 static ssize_t
6962 tracing_stats_read(struct file *filp, char __user *ubuf,
6963 		   size_t count, loff_t *ppos)
6964 {
6965 	struct inode *inode = file_inode(filp);
6966 	struct trace_array *tr = inode->i_private;
6967 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6968 	int cpu = tracing_get_cpu(inode);
6969 	struct trace_seq *s;
6970 	unsigned long cnt;
6971 	unsigned long long t;
6972 	unsigned long usec_rem;
6973 
6974 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6975 	if (!s)
6976 		return -ENOMEM;
6977 
6978 	trace_seq_init(s);
6979 
6980 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6981 	trace_seq_printf(s, "entries: %ld\n", cnt);
6982 
6983 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6984 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6985 
6986 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6987 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6988 
6989 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6990 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6991 
6992 	if (trace_clocks[tr->clock_id].in_ns) {
6993 		/* local or global for trace_clock */
6994 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6995 		usec_rem = do_div(t, USEC_PER_SEC);
6996 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6997 								t, usec_rem);
6998 
6999 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7000 		usec_rem = do_div(t, USEC_PER_SEC);
7001 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7002 	} else {
7003 		/* counter or tsc mode for trace_clock */
7004 		trace_seq_printf(s, "oldest event ts: %llu\n",
7005 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7006 
7007 		trace_seq_printf(s, "now ts: %llu\n",
7008 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7009 	}
7010 
7011 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7012 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7013 
7014 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7015 	trace_seq_printf(s, "read events: %ld\n", cnt);
7016 
7017 	count = simple_read_from_buffer(ubuf, count, ppos,
7018 					s->buffer, trace_seq_used(s));
7019 
7020 	kfree(s);
7021 
7022 	return count;
7023 }
7024 
7025 static const struct file_operations tracing_stats_fops = {
7026 	.open		= tracing_open_generic_tr,
7027 	.read		= tracing_stats_read,
7028 	.llseek		= generic_file_llseek,
7029 	.release	= tracing_release_generic_tr,
7030 };
7031 
7032 #ifdef CONFIG_DYNAMIC_FTRACE
7033 
7034 static ssize_t
7035 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7036 		  size_t cnt, loff_t *ppos)
7037 {
7038 	unsigned long *p = filp->private_data;
7039 	char buf[64]; /* Not too big for a shallow stack */
7040 	int r;
7041 
7042 	r = scnprintf(buf, 63, "%ld", *p);
7043 	buf[r++] = '\n';
7044 
7045 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7046 }
7047 
7048 static const struct file_operations tracing_dyn_info_fops = {
7049 	.open		= tracing_open_generic,
7050 	.read		= tracing_read_dyn_info,
7051 	.llseek		= generic_file_llseek,
7052 };
7053 #endif /* CONFIG_DYNAMIC_FTRACE */
7054 
7055 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7056 static void
7057 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7058 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7059 		void *data)
7060 {
7061 	tracing_snapshot_instance(tr);
7062 }
7063 
7064 static void
7065 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7066 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7067 		      void *data)
7068 {
7069 	struct ftrace_func_mapper *mapper = data;
7070 	long *count = NULL;
7071 
7072 	if (mapper)
7073 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7074 
7075 	if (count) {
7076 
7077 		if (*count <= 0)
7078 			return;
7079 
7080 		(*count)--;
7081 	}
7082 
7083 	tracing_snapshot_instance(tr);
7084 }
7085 
7086 static int
7087 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7088 		      struct ftrace_probe_ops *ops, void *data)
7089 {
7090 	struct ftrace_func_mapper *mapper = data;
7091 	long *count = NULL;
7092 
7093 	seq_printf(m, "%ps:", (void *)ip);
7094 
7095 	seq_puts(m, "snapshot");
7096 
7097 	if (mapper)
7098 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7099 
7100 	if (count)
7101 		seq_printf(m, ":count=%ld\n", *count);
7102 	else
7103 		seq_puts(m, ":unlimited\n");
7104 
7105 	return 0;
7106 }
7107 
7108 static int
7109 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7110 		     unsigned long ip, void *init_data, void **data)
7111 {
7112 	struct ftrace_func_mapper *mapper = *data;
7113 
7114 	if (!mapper) {
7115 		mapper = allocate_ftrace_func_mapper();
7116 		if (!mapper)
7117 			return -ENOMEM;
7118 		*data = mapper;
7119 	}
7120 
7121 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7122 }
7123 
7124 static void
7125 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7126 		     unsigned long ip, void *data)
7127 {
7128 	struct ftrace_func_mapper *mapper = data;
7129 
7130 	if (!ip) {
7131 		if (!mapper)
7132 			return;
7133 		free_ftrace_func_mapper(mapper, NULL);
7134 		return;
7135 	}
7136 
7137 	ftrace_func_mapper_remove_ip(mapper, ip);
7138 }
7139 
7140 static struct ftrace_probe_ops snapshot_probe_ops = {
7141 	.func			= ftrace_snapshot,
7142 	.print			= ftrace_snapshot_print,
7143 };
7144 
7145 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7146 	.func			= ftrace_count_snapshot,
7147 	.print			= ftrace_snapshot_print,
7148 	.init			= ftrace_snapshot_init,
7149 	.free			= ftrace_snapshot_free,
7150 };
7151 
7152 static int
7153 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7154 			       char *glob, char *cmd, char *param, int enable)
7155 {
7156 	struct ftrace_probe_ops *ops;
7157 	void *count = (void *)-1;
7158 	char *number;
7159 	int ret;
7160 
7161 	if (!tr)
7162 		return -ENODEV;
7163 
7164 	/* hash funcs only work with set_ftrace_filter */
7165 	if (!enable)
7166 		return -EINVAL;
7167 
7168 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7169 
7170 	if (glob[0] == '!')
7171 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7172 
7173 	if (!param)
7174 		goto out_reg;
7175 
7176 	number = strsep(&param, ":");
7177 
7178 	if (!strlen(number))
7179 		goto out_reg;
7180 
7181 	/*
7182 	 * We use the callback data field (which is a pointer)
7183 	 * as our counter.
7184 	 */
7185 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7186 	if (ret)
7187 		return ret;
7188 
7189  out_reg:
7190 	ret = tracing_alloc_snapshot_instance(tr);
7191 	if (ret < 0)
7192 		goto out;
7193 
7194 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7195 
7196  out:
7197 	return ret < 0 ? ret : 0;
7198 }
7199 
7200 static struct ftrace_func_command ftrace_snapshot_cmd = {
7201 	.name			= "snapshot",
7202 	.func			= ftrace_trace_snapshot_callback,
7203 };
7204 
7205 static __init int register_snapshot_cmd(void)
7206 {
7207 	return register_ftrace_command(&ftrace_snapshot_cmd);
7208 }
7209 #else
7210 static inline __init int register_snapshot_cmd(void) { return 0; }
7211 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7212 
7213 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7214 {
7215 	if (WARN_ON(!tr->dir))
7216 		return ERR_PTR(-ENODEV);
7217 
7218 	/* Top directory uses NULL as the parent */
7219 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7220 		return NULL;
7221 
7222 	/* All sub buffers have a descriptor */
7223 	return tr->dir;
7224 }
7225 
7226 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7227 {
7228 	struct dentry *d_tracer;
7229 
7230 	if (tr->percpu_dir)
7231 		return tr->percpu_dir;
7232 
7233 	d_tracer = tracing_get_dentry(tr);
7234 	if (IS_ERR(d_tracer))
7235 		return NULL;
7236 
7237 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7238 
7239 	WARN_ONCE(!tr->percpu_dir,
7240 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7241 
7242 	return tr->percpu_dir;
7243 }
7244 
7245 static struct dentry *
7246 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7247 		      void *data, long cpu, const struct file_operations *fops)
7248 {
7249 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7250 
7251 	if (ret) /* See tracing_get_cpu() */
7252 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7253 	return ret;
7254 }
7255 
7256 static void
7257 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7258 {
7259 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7260 	struct dentry *d_cpu;
7261 	char cpu_dir[30]; /* 30 characters should be more than enough */
7262 
7263 	if (!d_percpu)
7264 		return;
7265 
7266 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7267 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7268 	if (!d_cpu) {
7269 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7270 		return;
7271 	}
7272 
7273 	/* per cpu trace_pipe */
7274 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7275 				tr, cpu, &tracing_pipe_fops);
7276 
7277 	/* per cpu trace */
7278 	trace_create_cpu_file("trace", 0644, d_cpu,
7279 				tr, cpu, &tracing_fops);
7280 
7281 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7282 				tr, cpu, &tracing_buffers_fops);
7283 
7284 	trace_create_cpu_file("stats", 0444, d_cpu,
7285 				tr, cpu, &tracing_stats_fops);
7286 
7287 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7288 				tr, cpu, &tracing_entries_fops);
7289 
7290 #ifdef CONFIG_TRACER_SNAPSHOT
7291 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7292 				tr, cpu, &snapshot_fops);
7293 
7294 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7295 				tr, cpu, &snapshot_raw_fops);
7296 #endif
7297 }
7298 
7299 #ifdef CONFIG_FTRACE_SELFTEST
7300 /* Let selftest have access to static functions in this file */
7301 #include "trace_selftest.c"
7302 #endif
7303 
7304 static ssize_t
7305 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7306 			loff_t *ppos)
7307 {
7308 	struct trace_option_dentry *topt = filp->private_data;
7309 	char *buf;
7310 
7311 	if (topt->flags->val & topt->opt->bit)
7312 		buf = "1\n";
7313 	else
7314 		buf = "0\n";
7315 
7316 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7317 }
7318 
7319 static ssize_t
7320 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7321 			 loff_t *ppos)
7322 {
7323 	struct trace_option_dentry *topt = filp->private_data;
7324 	unsigned long val;
7325 	int ret;
7326 
7327 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7328 	if (ret)
7329 		return ret;
7330 
7331 	if (val != 0 && val != 1)
7332 		return -EINVAL;
7333 
7334 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7335 		mutex_lock(&trace_types_lock);
7336 		ret = __set_tracer_option(topt->tr, topt->flags,
7337 					  topt->opt, !val);
7338 		mutex_unlock(&trace_types_lock);
7339 		if (ret)
7340 			return ret;
7341 	}
7342 
7343 	*ppos += cnt;
7344 
7345 	return cnt;
7346 }
7347 
7348 
7349 static const struct file_operations trace_options_fops = {
7350 	.open = tracing_open_generic,
7351 	.read = trace_options_read,
7352 	.write = trace_options_write,
7353 	.llseek	= generic_file_llseek,
7354 };
7355 
7356 /*
7357  * In order to pass in both the trace_array descriptor as well as the index
7358  * to the flag that the trace option file represents, the trace_array
7359  * has a character array of trace_flags_index[], which holds the index
7360  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7361  * The address of this character array is passed to the flag option file
7362  * read/write callbacks.
7363  *
7364  * In order to extract both the index and the trace_array descriptor,
7365  * get_tr_index() uses the following algorithm.
7366  *
7367  *   idx = *ptr;
7368  *
7369  * As the pointer itself contains the address of the index (remember
7370  * index[1] == 1).
7371  *
7372  * Then to get the trace_array descriptor, by subtracting that index
7373  * from the ptr, we get to the start of the index itself.
7374  *
7375  *   ptr - idx == &index[0]
7376  *
7377  * Then a simple container_of() from that pointer gets us to the
7378  * trace_array descriptor.
7379  */
7380 static void get_tr_index(void *data, struct trace_array **ptr,
7381 			 unsigned int *pindex)
7382 {
7383 	*pindex = *(unsigned char *)data;
7384 
7385 	*ptr = container_of(data - *pindex, struct trace_array,
7386 			    trace_flags_index);
7387 }
7388 
7389 static ssize_t
7390 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7391 			loff_t *ppos)
7392 {
7393 	void *tr_index = filp->private_data;
7394 	struct trace_array *tr;
7395 	unsigned int index;
7396 	char *buf;
7397 
7398 	get_tr_index(tr_index, &tr, &index);
7399 
7400 	if (tr->trace_flags & (1 << index))
7401 		buf = "1\n";
7402 	else
7403 		buf = "0\n";
7404 
7405 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7406 }
7407 
7408 static ssize_t
7409 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7410 			 loff_t *ppos)
7411 {
7412 	void *tr_index = filp->private_data;
7413 	struct trace_array *tr;
7414 	unsigned int index;
7415 	unsigned long val;
7416 	int ret;
7417 
7418 	get_tr_index(tr_index, &tr, &index);
7419 
7420 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7421 	if (ret)
7422 		return ret;
7423 
7424 	if (val != 0 && val != 1)
7425 		return -EINVAL;
7426 
7427 	mutex_lock(&trace_types_lock);
7428 	ret = set_tracer_flag(tr, 1 << index, val);
7429 	mutex_unlock(&trace_types_lock);
7430 
7431 	if (ret < 0)
7432 		return ret;
7433 
7434 	*ppos += cnt;
7435 
7436 	return cnt;
7437 }
7438 
7439 static const struct file_operations trace_options_core_fops = {
7440 	.open = tracing_open_generic,
7441 	.read = trace_options_core_read,
7442 	.write = trace_options_core_write,
7443 	.llseek = generic_file_llseek,
7444 };
7445 
7446 struct dentry *trace_create_file(const char *name,
7447 				 umode_t mode,
7448 				 struct dentry *parent,
7449 				 void *data,
7450 				 const struct file_operations *fops)
7451 {
7452 	struct dentry *ret;
7453 
7454 	ret = tracefs_create_file(name, mode, parent, data, fops);
7455 	if (!ret)
7456 		pr_warn("Could not create tracefs '%s' entry\n", name);
7457 
7458 	return ret;
7459 }
7460 
7461 
7462 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7463 {
7464 	struct dentry *d_tracer;
7465 
7466 	if (tr->options)
7467 		return tr->options;
7468 
7469 	d_tracer = tracing_get_dentry(tr);
7470 	if (IS_ERR(d_tracer))
7471 		return NULL;
7472 
7473 	tr->options = tracefs_create_dir("options", d_tracer);
7474 	if (!tr->options) {
7475 		pr_warn("Could not create tracefs directory 'options'\n");
7476 		return NULL;
7477 	}
7478 
7479 	return tr->options;
7480 }
7481 
7482 static void
7483 create_trace_option_file(struct trace_array *tr,
7484 			 struct trace_option_dentry *topt,
7485 			 struct tracer_flags *flags,
7486 			 struct tracer_opt *opt)
7487 {
7488 	struct dentry *t_options;
7489 
7490 	t_options = trace_options_init_dentry(tr);
7491 	if (!t_options)
7492 		return;
7493 
7494 	topt->flags = flags;
7495 	topt->opt = opt;
7496 	topt->tr = tr;
7497 
7498 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7499 				    &trace_options_fops);
7500 
7501 }
7502 
7503 static void
7504 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7505 {
7506 	struct trace_option_dentry *topts;
7507 	struct trace_options *tr_topts;
7508 	struct tracer_flags *flags;
7509 	struct tracer_opt *opts;
7510 	int cnt;
7511 	int i;
7512 
7513 	if (!tracer)
7514 		return;
7515 
7516 	flags = tracer->flags;
7517 
7518 	if (!flags || !flags->opts)
7519 		return;
7520 
7521 	/*
7522 	 * If this is an instance, only create flags for tracers
7523 	 * the instance may have.
7524 	 */
7525 	if (!trace_ok_for_array(tracer, tr))
7526 		return;
7527 
7528 	for (i = 0; i < tr->nr_topts; i++) {
7529 		/* Make sure there's no duplicate flags. */
7530 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7531 			return;
7532 	}
7533 
7534 	opts = flags->opts;
7535 
7536 	for (cnt = 0; opts[cnt].name; cnt++)
7537 		;
7538 
7539 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7540 	if (!topts)
7541 		return;
7542 
7543 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7544 			    GFP_KERNEL);
7545 	if (!tr_topts) {
7546 		kfree(topts);
7547 		return;
7548 	}
7549 
7550 	tr->topts = tr_topts;
7551 	tr->topts[tr->nr_topts].tracer = tracer;
7552 	tr->topts[tr->nr_topts].topts = topts;
7553 	tr->nr_topts++;
7554 
7555 	for (cnt = 0; opts[cnt].name; cnt++) {
7556 		create_trace_option_file(tr, &topts[cnt], flags,
7557 					 &opts[cnt]);
7558 		WARN_ONCE(topts[cnt].entry == NULL,
7559 			  "Failed to create trace option: %s",
7560 			  opts[cnt].name);
7561 	}
7562 }
7563 
7564 static struct dentry *
7565 create_trace_option_core_file(struct trace_array *tr,
7566 			      const char *option, long index)
7567 {
7568 	struct dentry *t_options;
7569 
7570 	t_options = trace_options_init_dentry(tr);
7571 	if (!t_options)
7572 		return NULL;
7573 
7574 	return trace_create_file(option, 0644, t_options,
7575 				 (void *)&tr->trace_flags_index[index],
7576 				 &trace_options_core_fops);
7577 }
7578 
7579 static void create_trace_options_dir(struct trace_array *tr)
7580 {
7581 	struct dentry *t_options;
7582 	bool top_level = tr == &global_trace;
7583 	int i;
7584 
7585 	t_options = trace_options_init_dentry(tr);
7586 	if (!t_options)
7587 		return;
7588 
7589 	for (i = 0; trace_options[i]; i++) {
7590 		if (top_level ||
7591 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7592 			create_trace_option_core_file(tr, trace_options[i], i);
7593 	}
7594 }
7595 
7596 static ssize_t
7597 rb_simple_read(struct file *filp, char __user *ubuf,
7598 	       size_t cnt, loff_t *ppos)
7599 {
7600 	struct trace_array *tr = filp->private_data;
7601 	char buf[64];
7602 	int r;
7603 
7604 	r = tracer_tracing_is_on(tr);
7605 	r = sprintf(buf, "%d\n", r);
7606 
7607 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7608 }
7609 
7610 static ssize_t
7611 rb_simple_write(struct file *filp, const char __user *ubuf,
7612 		size_t cnt, loff_t *ppos)
7613 {
7614 	struct trace_array *tr = filp->private_data;
7615 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7616 	unsigned long val;
7617 	int ret;
7618 
7619 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7620 	if (ret)
7621 		return ret;
7622 
7623 	if (buffer) {
7624 		mutex_lock(&trace_types_lock);
7625 		if (val) {
7626 			tracer_tracing_on(tr);
7627 			if (tr->current_trace->start)
7628 				tr->current_trace->start(tr);
7629 		} else {
7630 			tracer_tracing_off(tr);
7631 			if (tr->current_trace->stop)
7632 				tr->current_trace->stop(tr);
7633 		}
7634 		mutex_unlock(&trace_types_lock);
7635 	}
7636 
7637 	(*ppos)++;
7638 
7639 	return cnt;
7640 }
7641 
7642 static const struct file_operations rb_simple_fops = {
7643 	.open		= tracing_open_generic_tr,
7644 	.read		= rb_simple_read,
7645 	.write		= rb_simple_write,
7646 	.release	= tracing_release_generic_tr,
7647 	.llseek		= default_llseek,
7648 };
7649 
7650 struct dentry *trace_instance_dir;
7651 
7652 static void
7653 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7654 
7655 static int
7656 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7657 {
7658 	enum ring_buffer_flags rb_flags;
7659 
7660 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7661 
7662 	buf->tr = tr;
7663 
7664 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7665 	if (!buf->buffer)
7666 		return -ENOMEM;
7667 
7668 	buf->data = alloc_percpu(struct trace_array_cpu);
7669 	if (!buf->data) {
7670 		ring_buffer_free(buf->buffer);
7671 		buf->buffer = NULL;
7672 		return -ENOMEM;
7673 	}
7674 
7675 	/* Allocate the first page for all buffers */
7676 	set_buffer_entries(&tr->trace_buffer,
7677 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7678 
7679 	return 0;
7680 }
7681 
7682 static int allocate_trace_buffers(struct trace_array *tr, int size)
7683 {
7684 	int ret;
7685 
7686 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7687 	if (ret)
7688 		return ret;
7689 
7690 #ifdef CONFIG_TRACER_MAX_TRACE
7691 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7692 				    allocate_snapshot ? size : 1);
7693 	if (WARN_ON(ret)) {
7694 		ring_buffer_free(tr->trace_buffer.buffer);
7695 		tr->trace_buffer.buffer = NULL;
7696 		free_percpu(tr->trace_buffer.data);
7697 		tr->trace_buffer.data = NULL;
7698 		return -ENOMEM;
7699 	}
7700 	tr->allocated_snapshot = allocate_snapshot;
7701 
7702 	/*
7703 	 * Only the top level trace array gets its snapshot allocated
7704 	 * from the kernel command line.
7705 	 */
7706 	allocate_snapshot = false;
7707 #endif
7708 	return 0;
7709 }
7710 
7711 static void free_trace_buffer(struct trace_buffer *buf)
7712 {
7713 	if (buf->buffer) {
7714 		ring_buffer_free(buf->buffer);
7715 		buf->buffer = NULL;
7716 		free_percpu(buf->data);
7717 		buf->data = NULL;
7718 	}
7719 }
7720 
7721 static void free_trace_buffers(struct trace_array *tr)
7722 {
7723 	if (!tr)
7724 		return;
7725 
7726 	free_trace_buffer(&tr->trace_buffer);
7727 
7728 #ifdef CONFIG_TRACER_MAX_TRACE
7729 	free_trace_buffer(&tr->max_buffer);
7730 #endif
7731 }
7732 
7733 static void init_trace_flags_index(struct trace_array *tr)
7734 {
7735 	int i;
7736 
7737 	/* Used by the trace options files */
7738 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7739 		tr->trace_flags_index[i] = i;
7740 }
7741 
7742 static void __update_tracer_options(struct trace_array *tr)
7743 {
7744 	struct tracer *t;
7745 
7746 	for (t = trace_types; t; t = t->next)
7747 		add_tracer_options(tr, t);
7748 }
7749 
7750 static void update_tracer_options(struct trace_array *tr)
7751 {
7752 	mutex_lock(&trace_types_lock);
7753 	__update_tracer_options(tr);
7754 	mutex_unlock(&trace_types_lock);
7755 }
7756 
7757 static int instance_mkdir(const char *name)
7758 {
7759 	struct trace_array *tr;
7760 	int ret;
7761 
7762 	mutex_lock(&event_mutex);
7763 	mutex_lock(&trace_types_lock);
7764 
7765 	ret = -EEXIST;
7766 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7767 		if (tr->name && strcmp(tr->name, name) == 0)
7768 			goto out_unlock;
7769 	}
7770 
7771 	ret = -ENOMEM;
7772 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7773 	if (!tr)
7774 		goto out_unlock;
7775 
7776 	tr->name = kstrdup(name, GFP_KERNEL);
7777 	if (!tr->name)
7778 		goto out_free_tr;
7779 
7780 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7781 		goto out_free_tr;
7782 
7783 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7784 
7785 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7786 
7787 	raw_spin_lock_init(&tr->start_lock);
7788 
7789 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7790 
7791 	tr->current_trace = &nop_trace;
7792 
7793 	INIT_LIST_HEAD(&tr->systems);
7794 	INIT_LIST_HEAD(&tr->events);
7795 	INIT_LIST_HEAD(&tr->hist_vars);
7796 
7797 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7798 		goto out_free_tr;
7799 
7800 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7801 	if (!tr->dir)
7802 		goto out_free_tr;
7803 
7804 	ret = event_trace_add_tracer(tr->dir, tr);
7805 	if (ret) {
7806 		tracefs_remove_recursive(tr->dir);
7807 		goto out_free_tr;
7808 	}
7809 
7810 	ftrace_init_trace_array(tr);
7811 
7812 	init_tracer_tracefs(tr, tr->dir);
7813 	init_trace_flags_index(tr);
7814 	__update_tracer_options(tr);
7815 
7816 	list_add(&tr->list, &ftrace_trace_arrays);
7817 
7818 	mutex_unlock(&trace_types_lock);
7819 	mutex_unlock(&event_mutex);
7820 
7821 	return 0;
7822 
7823  out_free_tr:
7824 	free_trace_buffers(tr);
7825 	free_cpumask_var(tr->tracing_cpumask);
7826 	kfree(tr->name);
7827 	kfree(tr);
7828 
7829  out_unlock:
7830 	mutex_unlock(&trace_types_lock);
7831 	mutex_unlock(&event_mutex);
7832 
7833 	return ret;
7834 
7835 }
7836 
7837 static int instance_rmdir(const char *name)
7838 {
7839 	struct trace_array *tr;
7840 	int found = 0;
7841 	int ret;
7842 	int i;
7843 
7844 	mutex_lock(&event_mutex);
7845 	mutex_lock(&trace_types_lock);
7846 
7847 	ret = -ENODEV;
7848 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7849 		if (tr->name && strcmp(tr->name, name) == 0) {
7850 			found = 1;
7851 			break;
7852 		}
7853 	}
7854 	if (!found)
7855 		goto out_unlock;
7856 
7857 	ret = -EBUSY;
7858 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7859 		goto out_unlock;
7860 
7861 	list_del(&tr->list);
7862 
7863 	/* Disable all the flags that were enabled coming in */
7864 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7865 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7866 			set_tracer_flag(tr, 1 << i, 0);
7867 	}
7868 
7869 	tracing_set_nop(tr);
7870 	clear_ftrace_function_probes(tr);
7871 	event_trace_del_tracer(tr);
7872 	ftrace_clear_pids(tr);
7873 	ftrace_destroy_function_files(tr);
7874 	tracefs_remove_recursive(tr->dir);
7875 	free_trace_buffers(tr);
7876 
7877 	for (i = 0; i < tr->nr_topts; i++) {
7878 		kfree(tr->topts[i].topts);
7879 	}
7880 	kfree(tr->topts);
7881 
7882 	free_cpumask_var(tr->tracing_cpumask);
7883 	kfree(tr->name);
7884 	kfree(tr);
7885 
7886 	ret = 0;
7887 
7888  out_unlock:
7889 	mutex_unlock(&trace_types_lock);
7890 	mutex_unlock(&event_mutex);
7891 
7892 	return ret;
7893 }
7894 
7895 static __init void create_trace_instances(struct dentry *d_tracer)
7896 {
7897 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7898 							 instance_mkdir,
7899 							 instance_rmdir);
7900 	if (WARN_ON(!trace_instance_dir))
7901 		return;
7902 }
7903 
7904 static void
7905 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7906 {
7907 	struct trace_event_file *file;
7908 	int cpu;
7909 
7910 	trace_create_file("available_tracers", 0444, d_tracer,
7911 			tr, &show_traces_fops);
7912 
7913 	trace_create_file("current_tracer", 0644, d_tracer,
7914 			tr, &set_tracer_fops);
7915 
7916 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7917 			  tr, &tracing_cpumask_fops);
7918 
7919 	trace_create_file("trace_options", 0644, d_tracer,
7920 			  tr, &tracing_iter_fops);
7921 
7922 	trace_create_file("trace", 0644, d_tracer,
7923 			  tr, &tracing_fops);
7924 
7925 	trace_create_file("trace_pipe", 0444, d_tracer,
7926 			  tr, &tracing_pipe_fops);
7927 
7928 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7929 			  tr, &tracing_entries_fops);
7930 
7931 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7932 			  tr, &tracing_total_entries_fops);
7933 
7934 	trace_create_file("free_buffer", 0200, d_tracer,
7935 			  tr, &tracing_free_buffer_fops);
7936 
7937 	trace_create_file("trace_marker", 0220, d_tracer,
7938 			  tr, &tracing_mark_fops);
7939 
7940 	file = __find_event_file(tr, "ftrace", "print");
7941 	if (file && file->dir)
7942 		trace_create_file("trigger", 0644, file->dir, file,
7943 				  &event_trigger_fops);
7944 	tr->trace_marker_file = file;
7945 
7946 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7947 			  tr, &tracing_mark_raw_fops);
7948 
7949 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7950 			  &trace_clock_fops);
7951 
7952 	trace_create_file("tracing_on", 0644, d_tracer,
7953 			  tr, &rb_simple_fops);
7954 
7955 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7956 			  &trace_time_stamp_mode_fops);
7957 
7958 	create_trace_options_dir(tr);
7959 
7960 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7961 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7962 			&tr->max_latency, &tracing_max_lat_fops);
7963 #endif
7964 
7965 	if (ftrace_create_function_files(tr, d_tracer))
7966 		WARN(1, "Could not allocate function filter files");
7967 
7968 #ifdef CONFIG_TRACER_SNAPSHOT
7969 	trace_create_file("snapshot", 0644, d_tracer,
7970 			  tr, &snapshot_fops);
7971 #endif
7972 
7973 	for_each_tracing_cpu(cpu)
7974 		tracing_init_tracefs_percpu(tr, cpu);
7975 
7976 	ftrace_init_tracefs(tr, d_tracer);
7977 }
7978 
7979 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7980 {
7981 	struct vfsmount *mnt;
7982 	struct file_system_type *type;
7983 
7984 	/*
7985 	 * To maintain backward compatibility for tools that mount
7986 	 * debugfs to get to the tracing facility, tracefs is automatically
7987 	 * mounted to the debugfs/tracing directory.
7988 	 */
7989 	type = get_fs_type("tracefs");
7990 	if (!type)
7991 		return NULL;
7992 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7993 	put_filesystem(type);
7994 	if (IS_ERR(mnt))
7995 		return NULL;
7996 	mntget(mnt);
7997 
7998 	return mnt;
7999 }
8000 
8001 /**
8002  * tracing_init_dentry - initialize top level trace array
8003  *
8004  * This is called when creating files or directories in the tracing
8005  * directory. It is called via fs_initcall() by any of the boot up code
8006  * and expects to return the dentry of the top level tracing directory.
8007  */
8008 struct dentry *tracing_init_dentry(void)
8009 {
8010 	struct trace_array *tr = &global_trace;
8011 
8012 	/* The top level trace array uses  NULL as parent */
8013 	if (tr->dir)
8014 		return NULL;
8015 
8016 	if (WARN_ON(!tracefs_initialized()) ||
8017 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8018 		 WARN_ON(!debugfs_initialized())))
8019 		return ERR_PTR(-ENODEV);
8020 
8021 	/*
8022 	 * As there may still be users that expect the tracing
8023 	 * files to exist in debugfs/tracing, we must automount
8024 	 * the tracefs file system there, so older tools still
8025 	 * work with the newer kerenl.
8026 	 */
8027 	tr->dir = debugfs_create_automount("tracing", NULL,
8028 					   trace_automount, NULL);
8029 	if (!tr->dir) {
8030 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8031 		return ERR_PTR(-ENOMEM);
8032 	}
8033 
8034 	return NULL;
8035 }
8036 
8037 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8038 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8039 
8040 static void __init trace_eval_init(void)
8041 {
8042 	int len;
8043 
8044 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8045 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8046 }
8047 
8048 #ifdef CONFIG_MODULES
8049 static void trace_module_add_evals(struct module *mod)
8050 {
8051 	if (!mod->num_trace_evals)
8052 		return;
8053 
8054 	/*
8055 	 * Modules with bad taint do not have events created, do
8056 	 * not bother with enums either.
8057 	 */
8058 	if (trace_module_has_bad_taint(mod))
8059 		return;
8060 
8061 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8062 }
8063 
8064 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8065 static void trace_module_remove_evals(struct module *mod)
8066 {
8067 	union trace_eval_map_item *map;
8068 	union trace_eval_map_item **last = &trace_eval_maps;
8069 
8070 	if (!mod->num_trace_evals)
8071 		return;
8072 
8073 	mutex_lock(&trace_eval_mutex);
8074 
8075 	map = trace_eval_maps;
8076 
8077 	while (map) {
8078 		if (map->head.mod == mod)
8079 			break;
8080 		map = trace_eval_jmp_to_tail(map);
8081 		last = &map->tail.next;
8082 		map = map->tail.next;
8083 	}
8084 	if (!map)
8085 		goto out;
8086 
8087 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8088 	kfree(map);
8089  out:
8090 	mutex_unlock(&trace_eval_mutex);
8091 }
8092 #else
8093 static inline void trace_module_remove_evals(struct module *mod) { }
8094 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8095 
8096 static int trace_module_notify(struct notifier_block *self,
8097 			       unsigned long val, void *data)
8098 {
8099 	struct module *mod = data;
8100 
8101 	switch (val) {
8102 	case MODULE_STATE_COMING:
8103 		trace_module_add_evals(mod);
8104 		break;
8105 	case MODULE_STATE_GOING:
8106 		trace_module_remove_evals(mod);
8107 		break;
8108 	}
8109 
8110 	return 0;
8111 }
8112 
8113 static struct notifier_block trace_module_nb = {
8114 	.notifier_call = trace_module_notify,
8115 	.priority = 0,
8116 };
8117 #endif /* CONFIG_MODULES */
8118 
8119 static __init int tracer_init_tracefs(void)
8120 {
8121 	struct dentry *d_tracer;
8122 
8123 	trace_access_lock_init();
8124 
8125 	d_tracer = tracing_init_dentry();
8126 	if (IS_ERR(d_tracer))
8127 		return 0;
8128 
8129 	event_trace_init();
8130 
8131 	init_tracer_tracefs(&global_trace, d_tracer);
8132 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8133 
8134 	trace_create_file("tracing_thresh", 0644, d_tracer,
8135 			&global_trace, &tracing_thresh_fops);
8136 
8137 	trace_create_file("README", 0444, d_tracer,
8138 			NULL, &tracing_readme_fops);
8139 
8140 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8141 			NULL, &tracing_saved_cmdlines_fops);
8142 
8143 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8144 			  NULL, &tracing_saved_cmdlines_size_fops);
8145 
8146 	trace_create_file("saved_tgids", 0444, d_tracer,
8147 			NULL, &tracing_saved_tgids_fops);
8148 
8149 	trace_eval_init();
8150 
8151 	trace_create_eval_file(d_tracer);
8152 
8153 #ifdef CONFIG_MODULES
8154 	register_module_notifier(&trace_module_nb);
8155 #endif
8156 
8157 #ifdef CONFIG_DYNAMIC_FTRACE
8158 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8159 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8160 #endif
8161 
8162 	create_trace_instances(d_tracer);
8163 
8164 	update_tracer_options(&global_trace);
8165 
8166 	return 0;
8167 }
8168 
8169 static int trace_panic_handler(struct notifier_block *this,
8170 			       unsigned long event, void *unused)
8171 {
8172 	if (ftrace_dump_on_oops)
8173 		ftrace_dump(ftrace_dump_on_oops);
8174 	return NOTIFY_OK;
8175 }
8176 
8177 static struct notifier_block trace_panic_notifier = {
8178 	.notifier_call  = trace_panic_handler,
8179 	.next           = NULL,
8180 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8181 };
8182 
8183 static int trace_die_handler(struct notifier_block *self,
8184 			     unsigned long val,
8185 			     void *data)
8186 {
8187 	switch (val) {
8188 	case DIE_OOPS:
8189 		if (ftrace_dump_on_oops)
8190 			ftrace_dump(ftrace_dump_on_oops);
8191 		break;
8192 	default:
8193 		break;
8194 	}
8195 	return NOTIFY_OK;
8196 }
8197 
8198 static struct notifier_block trace_die_notifier = {
8199 	.notifier_call = trace_die_handler,
8200 	.priority = 200
8201 };
8202 
8203 /*
8204  * printk is set to max of 1024, we really don't need it that big.
8205  * Nothing should be printing 1000 characters anyway.
8206  */
8207 #define TRACE_MAX_PRINT		1000
8208 
8209 /*
8210  * Define here KERN_TRACE so that we have one place to modify
8211  * it if we decide to change what log level the ftrace dump
8212  * should be at.
8213  */
8214 #define KERN_TRACE		KERN_EMERG
8215 
8216 void
8217 trace_printk_seq(struct trace_seq *s)
8218 {
8219 	/* Probably should print a warning here. */
8220 	if (s->seq.len >= TRACE_MAX_PRINT)
8221 		s->seq.len = TRACE_MAX_PRINT;
8222 
8223 	/*
8224 	 * More paranoid code. Although the buffer size is set to
8225 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8226 	 * an extra layer of protection.
8227 	 */
8228 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8229 		s->seq.len = s->seq.size - 1;
8230 
8231 	/* should be zero ended, but we are paranoid. */
8232 	s->buffer[s->seq.len] = 0;
8233 
8234 	printk(KERN_TRACE "%s", s->buffer);
8235 
8236 	trace_seq_init(s);
8237 }
8238 
8239 void trace_init_global_iter(struct trace_iterator *iter)
8240 {
8241 	iter->tr = &global_trace;
8242 	iter->trace = iter->tr->current_trace;
8243 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8244 	iter->trace_buffer = &global_trace.trace_buffer;
8245 
8246 	if (iter->trace && iter->trace->open)
8247 		iter->trace->open(iter);
8248 
8249 	/* Annotate start of buffers if we had overruns */
8250 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8251 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8252 
8253 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8254 	if (trace_clocks[iter->tr->clock_id].in_ns)
8255 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8256 }
8257 
8258 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8259 {
8260 	/* use static because iter can be a bit big for the stack */
8261 	static struct trace_iterator iter;
8262 	static atomic_t dump_running;
8263 	struct trace_array *tr = &global_trace;
8264 	unsigned int old_userobj;
8265 	unsigned long flags;
8266 	int cnt = 0, cpu;
8267 
8268 	/* Only allow one dump user at a time. */
8269 	if (atomic_inc_return(&dump_running) != 1) {
8270 		atomic_dec(&dump_running);
8271 		return;
8272 	}
8273 
8274 	/*
8275 	 * Always turn off tracing when we dump.
8276 	 * We don't need to show trace output of what happens
8277 	 * between multiple crashes.
8278 	 *
8279 	 * If the user does a sysrq-z, then they can re-enable
8280 	 * tracing with echo 1 > tracing_on.
8281 	 */
8282 	tracing_off();
8283 
8284 	local_irq_save(flags);
8285 
8286 	/* Simulate the iterator */
8287 	trace_init_global_iter(&iter);
8288 
8289 	for_each_tracing_cpu(cpu) {
8290 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8291 	}
8292 
8293 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8294 
8295 	/* don't look at user memory in panic mode */
8296 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8297 
8298 	switch (oops_dump_mode) {
8299 	case DUMP_ALL:
8300 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8301 		break;
8302 	case DUMP_ORIG:
8303 		iter.cpu_file = raw_smp_processor_id();
8304 		break;
8305 	case DUMP_NONE:
8306 		goto out_enable;
8307 	default:
8308 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8309 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8310 	}
8311 
8312 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8313 
8314 	/* Did function tracer already get disabled? */
8315 	if (ftrace_is_dead()) {
8316 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8317 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8318 	}
8319 
8320 	/*
8321 	 * We need to stop all tracing on all CPUS to read the
8322 	 * the next buffer. This is a bit expensive, but is
8323 	 * not done often. We fill all what we can read,
8324 	 * and then release the locks again.
8325 	 */
8326 
8327 	while (!trace_empty(&iter)) {
8328 
8329 		if (!cnt)
8330 			printk(KERN_TRACE "---------------------------------\n");
8331 
8332 		cnt++;
8333 
8334 		/* reset all but tr, trace, and overruns */
8335 		memset(&iter.seq, 0,
8336 		       sizeof(struct trace_iterator) -
8337 		       offsetof(struct trace_iterator, seq));
8338 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8339 		iter.pos = -1;
8340 
8341 		if (trace_find_next_entry_inc(&iter) != NULL) {
8342 			int ret;
8343 
8344 			ret = print_trace_line(&iter);
8345 			if (ret != TRACE_TYPE_NO_CONSUME)
8346 				trace_consume(&iter);
8347 		}
8348 		touch_nmi_watchdog();
8349 
8350 		trace_printk_seq(&iter.seq);
8351 	}
8352 
8353 	if (!cnt)
8354 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8355 	else
8356 		printk(KERN_TRACE "---------------------------------\n");
8357 
8358  out_enable:
8359 	tr->trace_flags |= old_userobj;
8360 
8361 	for_each_tracing_cpu(cpu) {
8362 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8363 	}
8364  	atomic_dec(&dump_running);
8365 	local_irq_restore(flags);
8366 }
8367 EXPORT_SYMBOL_GPL(ftrace_dump);
8368 
8369 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8370 {
8371 	char **argv;
8372 	int argc, ret;
8373 
8374 	argc = 0;
8375 	ret = 0;
8376 	argv = argv_split(GFP_KERNEL, buf, &argc);
8377 	if (!argv)
8378 		return -ENOMEM;
8379 
8380 	if (argc)
8381 		ret = createfn(argc, argv);
8382 
8383 	argv_free(argv);
8384 
8385 	return ret;
8386 }
8387 
8388 #define WRITE_BUFSIZE  4096
8389 
8390 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8391 				size_t count, loff_t *ppos,
8392 				int (*createfn)(int, char **))
8393 {
8394 	char *kbuf, *buf, *tmp;
8395 	int ret = 0;
8396 	size_t done = 0;
8397 	size_t size;
8398 
8399 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8400 	if (!kbuf)
8401 		return -ENOMEM;
8402 
8403 	while (done < count) {
8404 		size = count - done;
8405 
8406 		if (size >= WRITE_BUFSIZE)
8407 			size = WRITE_BUFSIZE - 1;
8408 
8409 		if (copy_from_user(kbuf, buffer + done, size)) {
8410 			ret = -EFAULT;
8411 			goto out;
8412 		}
8413 		kbuf[size] = '\0';
8414 		buf = kbuf;
8415 		do {
8416 			tmp = strchr(buf, '\n');
8417 			if (tmp) {
8418 				*tmp = '\0';
8419 				size = tmp - buf + 1;
8420 			} else {
8421 				size = strlen(buf);
8422 				if (done + size < count) {
8423 					if (buf != kbuf)
8424 						break;
8425 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8426 					pr_warn("Line length is too long: Should be less than %d\n",
8427 						WRITE_BUFSIZE - 2);
8428 					ret = -EINVAL;
8429 					goto out;
8430 				}
8431 			}
8432 			done += size;
8433 
8434 			/* Remove comments */
8435 			tmp = strchr(buf, '#');
8436 
8437 			if (tmp)
8438 				*tmp = '\0';
8439 
8440 			ret = trace_run_command(buf, createfn);
8441 			if (ret)
8442 				goto out;
8443 			buf += size;
8444 
8445 		} while (done < count);
8446 	}
8447 	ret = done;
8448 
8449 out:
8450 	kfree(kbuf);
8451 
8452 	return ret;
8453 }
8454 
8455 __init static int tracer_alloc_buffers(void)
8456 {
8457 	int ring_buf_size;
8458 	int ret = -ENOMEM;
8459 
8460 	/*
8461 	 * Make sure we don't accidently add more trace options
8462 	 * than we have bits for.
8463 	 */
8464 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8465 
8466 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8467 		goto out;
8468 
8469 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8470 		goto out_free_buffer_mask;
8471 
8472 	/* Only allocate trace_printk buffers if a trace_printk exists */
8473 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8474 		/* Must be called before global_trace.buffer is allocated */
8475 		trace_printk_init_buffers();
8476 
8477 	/* To save memory, keep the ring buffer size to its minimum */
8478 	if (ring_buffer_expanded)
8479 		ring_buf_size = trace_buf_size;
8480 	else
8481 		ring_buf_size = 1;
8482 
8483 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8484 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8485 
8486 	raw_spin_lock_init(&global_trace.start_lock);
8487 
8488 	/*
8489 	 * The prepare callbacks allocates some memory for the ring buffer. We
8490 	 * don't free the buffer if the if the CPU goes down. If we were to free
8491 	 * the buffer, then the user would lose any trace that was in the
8492 	 * buffer. The memory will be removed once the "instance" is removed.
8493 	 */
8494 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8495 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8496 				      NULL);
8497 	if (ret < 0)
8498 		goto out_free_cpumask;
8499 	/* Used for event triggers */
8500 	ret = -ENOMEM;
8501 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8502 	if (!temp_buffer)
8503 		goto out_rm_hp_state;
8504 
8505 	if (trace_create_savedcmd() < 0)
8506 		goto out_free_temp_buffer;
8507 
8508 	/* TODO: make the number of buffers hot pluggable with CPUS */
8509 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8510 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8511 		WARN_ON(1);
8512 		goto out_free_savedcmd;
8513 	}
8514 
8515 	if (global_trace.buffer_disabled)
8516 		tracing_off();
8517 
8518 	if (trace_boot_clock) {
8519 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8520 		if (ret < 0)
8521 			pr_warn("Trace clock %s not defined, going back to default\n",
8522 				trace_boot_clock);
8523 	}
8524 
8525 	/*
8526 	 * register_tracer() might reference current_trace, so it
8527 	 * needs to be set before we register anything. This is
8528 	 * just a bootstrap of current_trace anyway.
8529 	 */
8530 	global_trace.current_trace = &nop_trace;
8531 
8532 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8533 
8534 	ftrace_init_global_array_ops(&global_trace);
8535 
8536 	init_trace_flags_index(&global_trace);
8537 
8538 	register_tracer(&nop_trace);
8539 
8540 	/* Function tracing may start here (via kernel command line) */
8541 	init_function_trace();
8542 
8543 	/* All seems OK, enable tracing */
8544 	tracing_disabled = 0;
8545 
8546 	atomic_notifier_chain_register(&panic_notifier_list,
8547 				       &trace_panic_notifier);
8548 
8549 	register_die_notifier(&trace_die_notifier);
8550 
8551 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8552 
8553 	INIT_LIST_HEAD(&global_trace.systems);
8554 	INIT_LIST_HEAD(&global_trace.events);
8555 	INIT_LIST_HEAD(&global_trace.hist_vars);
8556 	list_add(&global_trace.list, &ftrace_trace_arrays);
8557 
8558 	apply_trace_boot_options();
8559 
8560 	register_snapshot_cmd();
8561 
8562 	return 0;
8563 
8564 out_free_savedcmd:
8565 	free_saved_cmdlines_buffer(savedcmd);
8566 out_free_temp_buffer:
8567 	ring_buffer_free(temp_buffer);
8568 out_rm_hp_state:
8569 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8570 out_free_cpumask:
8571 	free_cpumask_var(global_trace.tracing_cpumask);
8572 out_free_buffer_mask:
8573 	free_cpumask_var(tracing_buffer_mask);
8574 out:
8575 	return ret;
8576 }
8577 
8578 void __init early_trace_init(void)
8579 {
8580 	if (tracepoint_printk) {
8581 		tracepoint_print_iter =
8582 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8583 		if (WARN_ON(!tracepoint_print_iter))
8584 			tracepoint_printk = 0;
8585 		else
8586 			static_key_enable(&tracepoint_printk_key.key);
8587 	}
8588 	tracer_alloc_buffers();
8589 }
8590 
8591 void __init trace_init(void)
8592 {
8593 	trace_event_init();
8594 }
8595 
8596 __init static int clear_boot_tracer(void)
8597 {
8598 	/*
8599 	 * The default tracer at boot buffer is an init section.
8600 	 * This function is called in lateinit. If we did not
8601 	 * find the boot tracer, then clear it out, to prevent
8602 	 * later registration from accessing the buffer that is
8603 	 * about to be freed.
8604 	 */
8605 	if (!default_bootup_tracer)
8606 		return 0;
8607 
8608 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8609 	       default_bootup_tracer);
8610 	default_bootup_tracer = NULL;
8611 
8612 	return 0;
8613 }
8614 
8615 fs_initcall(tracer_init_tracefs);
8616 late_initcall_sync(clear_boot_tracer);
8617 
8618 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8619 __init static int tracing_set_default_clock(void)
8620 {
8621 	/* sched_clock_stable() is determined in late_initcall */
8622 	if (!trace_boot_clock && !sched_clock_stable()) {
8623 		printk(KERN_WARNING
8624 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8625 		       "If you want to keep using the local clock, then add:\n"
8626 		       "  \"trace_clock=local\"\n"
8627 		       "on the kernel command line\n");
8628 		tracing_set_clock(&global_trace, "global");
8629 	}
8630 
8631 	return 0;
8632 }
8633 late_initcall_sync(tracing_set_default_clock);
8634 #endif
8635