xref: /openbmc/linux/kernel/trace/trace.c (revision 42bc47b3)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = tracing_alloc_snapshot_instance(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	struct ring_buffer *buf;
1364 
1365 	if (tr->stop_count)
1366 		return;
1367 
1368 	WARN_ON_ONCE(!irqs_disabled());
1369 
1370 	if (!tr->allocated_snapshot) {
1371 		/* Only the nop tracer should hit this when disabling */
1372 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373 		return;
1374 	}
1375 
1376 	arch_spin_lock(&tr->max_lock);
1377 
1378 	buf = tr->trace_buffer.buffer;
1379 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 	tr->max_buffer.buffer = buf;
1381 
1382 	__update_max_tr(tr, tsk, cpu);
1383 	arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 	int ret;
1398 
1399 	if (tr->stop_count)
1400 		return;
1401 
1402 	WARN_ON_ONCE(!irqs_disabled());
1403 	if (!tr->allocated_snapshot) {
1404 		/* Only the nop tracer should hit this when disabling */
1405 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 		return;
1407 	}
1408 
1409 	arch_spin_lock(&tr->max_lock);
1410 
1411 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413 	if (ret == -EBUSY) {
1414 		/*
1415 		 * We failed to swap the buffer due to a commit taking
1416 		 * place on this CPU. We fail to record, but we reset
1417 		 * the max trace buffer (no one writes directly to it)
1418 		 * and flag that it failed.
1419 		 */
1420 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 			"Failed to swap buffers due to commit in progress\n");
1422 	}
1423 
1424 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426 	__update_max_tr(tr, tsk, cpu);
1427 	arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 	/* Iterators are static, they should be filled or empty */
1434 	if (trace_buffer_iter(iter, iter->cpu_file))
1435 		return 0;
1436 
1437 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 				full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445 	struct list_head		list;
1446 	struct tracer			*type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
1451 static int save_selftest(struct tracer *type)
1452 {
1453 	struct trace_selftests *selftest;
1454 
1455 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 	if (!selftest)
1457 		return -ENOMEM;
1458 
1459 	selftest->type = type;
1460 	list_add(&selftest->list, &postponed_selftests);
1461 	return 0;
1462 }
1463 
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	struct tracer *saved_tracer = tr->current_trace;
1468 	int ret;
1469 
1470 	if (!type->selftest || tracing_selftest_disabled)
1471 		return 0;
1472 
1473 	/*
1474 	 * If a tracer registers early in boot up (before scheduling is
1475 	 * initialized and such), then do not run its selftests yet.
1476 	 * Instead, run it a little later in the boot process.
1477 	 */
1478 	if (!selftests_can_run)
1479 		return save_selftest(type);
1480 
1481 	/*
1482 	 * Run a selftest on this tracer.
1483 	 * Here we reset the trace buffer, and set the current
1484 	 * tracer to be this tracer. The tracer can then run some
1485 	 * internal tracing to verify that everything is in order.
1486 	 * If we fail, we do not register this tracer.
1487 	 */
1488 	tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490 	tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 	if (type->use_max_tr) {
1494 		/* If we expanded the buffers, make sure the max is expanded too */
1495 		if (ring_buffer_expanded)
1496 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 					   RING_BUFFER_ALL_CPUS);
1498 		tr->allocated_snapshot = true;
1499 	}
1500 #endif
1501 
1502 	/* the test is responsible for initializing and enabling */
1503 	pr_info("Testing tracer %s: ", type->name);
1504 	ret = type->selftest(type, tr);
1505 	/* the test is responsible for resetting too */
1506 	tr->current_trace = saved_tracer;
1507 	if (ret) {
1508 		printk(KERN_CONT "FAILED!\n");
1509 		/* Add the warning after printing 'FAILED' */
1510 		WARN_ON(1);
1511 		return -1;
1512 	}
1513 	/* Only reset on passing, to avoid touching corrupted buffers */
1514 	tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 	if (type->use_max_tr) {
1518 		tr->allocated_snapshot = false;
1519 
1520 		/* Shrink the max buffer again */
1521 		if (ring_buffer_expanded)
1522 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 					   RING_BUFFER_ALL_CPUS);
1524 	}
1525 #endif
1526 
1527 	printk(KERN_CONT "PASSED\n");
1528 	return 0;
1529 }
1530 
1531 static __init int init_trace_selftests(void)
1532 {
1533 	struct trace_selftests *p, *n;
1534 	struct tracer *t, **last;
1535 	int ret;
1536 
1537 	selftests_can_run = true;
1538 
1539 	mutex_lock(&trace_types_lock);
1540 
1541 	if (list_empty(&postponed_selftests))
1542 		goto out;
1543 
1544 	pr_info("Running postponed tracer tests:\n");
1545 
1546 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547 		ret = run_tracer_selftest(p->type);
1548 		/* If the test fails, then warn and remove from available_tracers */
1549 		if (ret < 0) {
1550 			WARN(1, "tracer: %s failed selftest, disabling\n",
1551 			     p->type->name);
1552 			last = &trace_types;
1553 			for (t = trace_types; t; t = t->next) {
1554 				if (t == p->type) {
1555 					*last = t->next;
1556 					break;
1557 				}
1558 				last = &t->next;
1559 			}
1560 		}
1561 		list_del(&p->list);
1562 		kfree(p);
1563 	}
1564 
1565  out:
1566 	mutex_unlock(&trace_types_lock);
1567 
1568 	return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574 	return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577 
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579 
1580 static void __init apply_trace_boot_options(void);
1581 
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590 	struct tracer *t;
1591 	int ret = 0;
1592 
1593 	if (!type->name) {
1594 		pr_info("Tracer must have a name\n");
1595 		return -1;
1596 	}
1597 
1598 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600 		return -1;
1601 	}
1602 
1603 	mutex_lock(&trace_types_lock);
1604 
1605 	tracing_selftest_running = true;
1606 
1607 	for (t = trace_types; t; t = t->next) {
1608 		if (strcmp(type->name, t->name) == 0) {
1609 			/* already found */
1610 			pr_info("Tracer %s already registered\n",
1611 				type->name);
1612 			ret = -1;
1613 			goto out;
1614 		}
1615 	}
1616 
1617 	if (!type->set_flag)
1618 		type->set_flag = &dummy_set_flag;
1619 	if (!type->flags) {
1620 		/*allocate a dummy tracer_flags*/
1621 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622 		if (!type->flags) {
1623 			ret = -ENOMEM;
1624 			goto out;
1625 		}
1626 		type->flags->val = 0;
1627 		type->flags->opts = dummy_tracer_opt;
1628 	} else
1629 		if (!type->flags->opts)
1630 			type->flags->opts = dummy_tracer_opt;
1631 
1632 	/* store the tracer for __set_tracer_option */
1633 	type->flags->trace = type;
1634 
1635 	ret = run_tracer_selftest(type);
1636 	if (ret < 0)
1637 		goto out;
1638 
1639 	type->next = trace_types;
1640 	trace_types = type;
1641 	add_tracer_options(&global_trace, type);
1642 
1643  out:
1644 	tracing_selftest_running = false;
1645 	mutex_unlock(&trace_types_lock);
1646 
1647 	if (ret || !default_bootup_tracer)
1648 		goto out_unlock;
1649 
1650 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651 		goto out_unlock;
1652 
1653 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654 	/* Do we want this tracer to start on bootup? */
1655 	tracing_set_tracer(&global_trace, type->name);
1656 	default_bootup_tracer = NULL;
1657 
1658 	apply_trace_boot_options();
1659 
1660 	/* disable other selftests, since this will break it. */
1661 	tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664 	       type->name);
1665 #endif
1666 
1667  out_unlock:
1668 	return ret;
1669 }
1670 
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673 	struct ring_buffer *buffer = buf->buffer;
1674 
1675 	if (!buffer)
1676 		return;
1677 
1678 	ring_buffer_record_disable(buffer);
1679 
1680 	/* Make sure all commits have finished */
1681 	synchronize_sched();
1682 	ring_buffer_reset_cpu(buffer, cpu);
1683 
1684 	ring_buffer_record_enable(buffer);
1685 }
1686 
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689 	struct ring_buffer *buffer = buf->buffer;
1690 	int cpu;
1691 
1692 	if (!buffer)
1693 		return;
1694 
1695 	ring_buffer_record_disable(buffer);
1696 
1697 	/* Make sure all commits have finished */
1698 	synchronize_sched();
1699 
1700 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701 
1702 	for_each_online_cpu(cpu)
1703 		ring_buffer_reset_cpu(buffer, cpu);
1704 
1705 	ring_buffer_record_enable(buffer);
1706 }
1707 
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711 	struct trace_array *tr;
1712 
1713 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714 		if (!tr->clear_trace)
1715 			continue;
1716 		tr->clear_trace = false;
1717 		tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 		tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721 	}
1722 }
1723 
1724 static int *tgid_map;
1725 
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731 	unsigned *map_cmdline_to_pid;
1732 	unsigned cmdline_num;
1733 	int cmdline_idx;
1734 	char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737 
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740 
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745 
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750 
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752 				    struct saved_cmdlines_buffer *s)
1753 {
1754 	s->map_cmdline_to_pid = kmalloc_array(val,
1755 					      sizeof(*s->map_cmdline_to_pid),
1756 					      GFP_KERNEL);
1757 	if (!s->map_cmdline_to_pid)
1758 		return -ENOMEM;
1759 
1760 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1761 	if (!s->saved_cmdlines) {
1762 		kfree(s->map_cmdline_to_pid);
1763 		return -ENOMEM;
1764 	}
1765 
1766 	s->cmdline_idx = 0;
1767 	s->cmdline_num = val;
1768 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1769 	       sizeof(s->map_pid_to_cmdline));
1770 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1771 	       val * sizeof(*s->map_cmdline_to_pid));
1772 
1773 	return 0;
1774 }
1775 
1776 static int trace_create_savedcmd(void)
1777 {
1778 	int ret;
1779 
1780 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1781 	if (!savedcmd)
1782 		return -ENOMEM;
1783 
1784 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1785 	if (ret < 0) {
1786 		kfree(savedcmd);
1787 		savedcmd = NULL;
1788 		return -ENOMEM;
1789 	}
1790 
1791 	return 0;
1792 }
1793 
1794 int is_tracing_stopped(void)
1795 {
1796 	return global_trace.stop_count;
1797 }
1798 
1799 /**
1800  * tracing_start - quick start of the tracer
1801  *
1802  * If tracing is enabled but was stopped by tracing_stop,
1803  * this will start the tracer back up.
1804  */
1805 void tracing_start(void)
1806 {
1807 	struct ring_buffer *buffer;
1808 	unsigned long flags;
1809 
1810 	if (tracing_disabled)
1811 		return;
1812 
1813 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1814 	if (--global_trace.stop_count) {
1815 		if (global_trace.stop_count < 0) {
1816 			/* Someone screwed up their debugging */
1817 			WARN_ON_ONCE(1);
1818 			global_trace.stop_count = 0;
1819 		}
1820 		goto out;
1821 	}
1822 
1823 	/* Prevent the buffers from switching */
1824 	arch_spin_lock(&global_trace.max_lock);
1825 
1826 	buffer = global_trace.trace_buffer.buffer;
1827 	if (buffer)
1828 		ring_buffer_record_enable(buffer);
1829 
1830 #ifdef CONFIG_TRACER_MAX_TRACE
1831 	buffer = global_trace.max_buffer.buffer;
1832 	if (buffer)
1833 		ring_buffer_record_enable(buffer);
1834 #endif
1835 
1836 	arch_spin_unlock(&global_trace.max_lock);
1837 
1838  out:
1839 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1840 }
1841 
1842 static void tracing_start_tr(struct trace_array *tr)
1843 {
1844 	struct ring_buffer *buffer;
1845 	unsigned long flags;
1846 
1847 	if (tracing_disabled)
1848 		return;
1849 
1850 	/* If global, we need to also start the max tracer */
1851 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1852 		return tracing_start();
1853 
1854 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1855 
1856 	if (--tr->stop_count) {
1857 		if (tr->stop_count < 0) {
1858 			/* Someone screwed up their debugging */
1859 			WARN_ON_ONCE(1);
1860 			tr->stop_count = 0;
1861 		}
1862 		goto out;
1863 	}
1864 
1865 	buffer = tr->trace_buffer.buffer;
1866 	if (buffer)
1867 		ring_buffer_record_enable(buffer);
1868 
1869  out:
1870 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1871 }
1872 
1873 /**
1874  * tracing_stop - quick stop of the tracer
1875  *
1876  * Light weight way to stop tracing. Use in conjunction with
1877  * tracing_start.
1878  */
1879 void tracing_stop(void)
1880 {
1881 	struct ring_buffer *buffer;
1882 	unsigned long flags;
1883 
1884 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1885 	if (global_trace.stop_count++)
1886 		goto out;
1887 
1888 	/* Prevent the buffers from switching */
1889 	arch_spin_lock(&global_trace.max_lock);
1890 
1891 	buffer = global_trace.trace_buffer.buffer;
1892 	if (buffer)
1893 		ring_buffer_record_disable(buffer);
1894 
1895 #ifdef CONFIG_TRACER_MAX_TRACE
1896 	buffer = global_trace.max_buffer.buffer;
1897 	if (buffer)
1898 		ring_buffer_record_disable(buffer);
1899 #endif
1900 
1901 	arch_spin_unlock(&global_trace.max_lock);
1902 
1903  out:
1904 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1905 }
1906 
1907 static void tracing_stop_tr(struct trace_array *tr)
1908 {
1909 	struct ring_buffer *buffer;
1910 	unsigned long flags;
1911 
1912 	/* If global, we need to also stop the max tracer */
1913 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1914 		return tracing_stop();
1915 
1916 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1917 	if (tr->stop_count++)
1918 		goto out;
1919 
1920 	buffer = tr->trace_buffer.buffer;
1921 	if (buffer)
1922 		ring_buffer_record_disable(buffer);
1923 
1924  out:
1925 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1926 }
1927 
1928 static int trace_save_cmdline(struct task_struct *tsk)
1929 {
1930 	unsigned pid, idx;
1931 
1932 	/* treat recording of idle task as a success */
1933 	if (!tsk->pid)
1934 		return 1;
1935 
1936 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1937 		return 0;
1938 
1939 	/*
1940 	 * It's not the end of the world if we don't get
1941 	 * the lock, but we also don't want to spin
1942 	 * nor do we want to disable interrupts,
1943 	 * so if we miss here, then better luck next time.
1944 	 */
1945 	if (!arch_spin_trylock(&trace_cmdline_lock))
1946 		return 0;
1947 
1948 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1949 	if (idx == NO_CMDLINE_MAP) {
1950 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1951 
1952 		/*
1953 		 * Check whether the cmdline buffer at idx has a pid
1954 		 * mapped. We are going to overwrite that entry so we
1955 		 * need to clear the map_pid_to_cmdline. Otherwise we
1956 		 * would read the new comm for the old pid.
1957 		 */
1958 		pid = savedcmd->map_cmdline_to_pid[idx];
1959 		if (pid != NO_CMDLINE_MAP)
1960 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1961 
1962 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1963 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1964 
1965 		savedcmd->cmdline_idx = idx;
1966 	}
1967 
1968 	set_cmdline(idx, tsk->comm);
1969 
1970 	arch_spin_unlock(&trace_cmdline_lock);
1971 
1972 	return 1;
1973 }
1974 
1975 static void __trace_find_cmdline(int pid, char comm[])
1976 {
1977 	unsigned map;
1978 
1979 	if (!pid) {
1980 		strcpy(comm, "<idle>");
1981 		return;
1982 	}
1983 
1984 	if (WARN_ON_ONCE(pid < 0)) {
1985 		strcpy(comm, "<XXX>");
1986 		return;
1987 	}
1988 
1989 	if (pid > PID_MAX_DEFAULT) {
1990 		strcpy(comm, "<...>");
1991 		return;
1992 	}
1993 
1994 	map = savedcmd->map_pid_to_cmdline[pid];
1995 	if (map != NO_CMDLINE_MAP)
1996 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1997 	else
1998 		strcpy(comm, "<...>");
1999 }
2000 
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003 	preempt_disable();
2004 	arch_spin_lock(&trace_cmdline_lock);
2005 
2006 	__trace_find_cmdline(pid, comm);
2007 
2008 	arch_spin_unlock(&trace_cmdline_lock);
2009 	preempt_enable();
2010 }
2011 
2012 int trace_find_tgid(int pid)
2013 {
2014 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2015 		return 0;
2016 
2017 	return tgid_map[pid];
2018 }
2019 
2020 static int trace_save_tgid(struct task_struct *tsk)
2021 {
2022 	/* treat recording of idle task as a success */
2023 	if (!tsk->pid)
2024 		return 1;
2025 
2026 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2027 		return 0;
2028 
2029 	tgid_map[tsk->pid] = tsk->tgid;
2030 	return 1;
2031 }
2032 
2033 static bool tracing_record_taskinfo_skip(int flags)
2034 {
2035 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2036 		return true;
2037 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2038 		return true;
2039 	if (!__this_cpu_read(trace_taskinfo_save))
2040 		return true;
2041 	return false;
2042 }
2043 
2044 /**
2045  * tracing_record_taskinfo - record the task info of a task
2046  *
2047  * @task  - task to record
2048  * @flags - TRACE_RECORD_CMDLINE for recording comm
2049  *        - TRACE_RECORD_TGID for recording tgid
2050  */
2051 void tracing_record_taskinfo(struct task_struct *task, int flags)
2052 {
2053 	bool done;
2054 
2055 	if (tracing_record_taskinfo_skip(flags))
2056 		return;
2057 
2058 	/*
2059 	 * Record as much task information as possible. If some fail, continue
2060 	 * to try to record the others.
2061 	 */
2062 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2063 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2064 
2065 	/* If recording any information failed, retry again soon. */
2066 	if (!done)
2067 		return;
2068 
2069 	__this_cpu_write(trace_taskinfo_save, false);
2070 }
2071 
2072 /**
2073  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2074  *
2075  * @prev - previous task during sched_switch
2076  * @next - next task during sched_switch
2077  * @flags - TRACE_RECORD_CMDLINE for recording comm
2078  *          TRACE_RECORD_TGID for recording tgid
2079  */
2080 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2081 					  struct task_struct *next, int flags)
2082 {
2083 	bool done;
2084 
2085 	if (tracing_record_taskinfo_skip(flags))
2086 		return;
2087 
2088 	/*
2089 	 * Record as much task information as possible. If some fail, continue
2090 	 * to try to record the others.
2091 	 */
2092 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2093 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2094 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2095 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2096 
2097 	/* If recording any information failed, retry again soon. */
2098 	if (!done)
2099 		return;
2100 
2101 	__this_cpu_write(trace_taskinfo_save, false);
2102 }
2103 
2104 /* Helpers to record a specific task information */
2105 void tracing_record_cmdline(struct task_struct *task)
2106 {
2107 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2108 }
2109 
2110 void tracing_record_tgid(struct task_struct *task)
2111 {
2112 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2113 }
2114 
2115 /*
2116  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2117  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2118  * simplifies those functions and keeps them in sync.
2119  */
2120 enum print_line_t trace_handle_return(struct trace_seq *s)
2121 {
2122 	return trace_seq_has_overflowed(s) ?
2123 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2124 }
2125 EXPORT_SYMBOL_GPL(trace_handle_return);
2126 
2127 void
2128 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2129 			     int pc)
2130 {
2131 	struct task_struct *tsk = current;
2132 
2133 	entry->preempt_count		= pc & 0xff;
2134 	entry->pid			= (tsk) ? tsk->pid : 0;
2135 	entry->flags =
2136 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2137 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2138 #else
2139 		TRACE_FLAG_IRQS_NOSUPPORT |
2140 #endif
2141 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2142 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2143 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2144 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2145 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2146 }
2147 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2148 
2149 struct ring_buffer_event *
2150 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2151 			  int type,
2152 			  unsigned long len,
2153 			  unsigned long flags, int pc)
2154 {
2155 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2156 }
2157 
2158 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2159 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2160 static int trace_buffered_event_ref;
2161 
2162 /**
2163  * trace_buffered_event_enable - enable buffering events
2164  *
2165  * When events are being filtered, it is quicker to use a temporary
2166  * buffer to write the event data into if there's a likely chance
2167  * that it will not be committed. The discard of the ring buffer
2168  * is not as fast as committing, and is much slower than copying
2169  * a commit.
2170  *
2171  * When an event is to be filtered, allocate per cpu buffers to
2172  * write the event data into, and if the event is filtered and discarded
2173  * it is simply dropped, otherwise, the entire data is to be committed
2174  * in one shot.
2175  */
2176 void trace_buffered_event_enable(void)
2177 {
2178 	struct ring_buffer_event *event;
2179 	struct page *page;
2180 	int cpu;
2181 
2182 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2183 
2184 	if (trace_buffered_event_ref++)
2185 		return;
2186 
2187 	for_each_tracing_cpu(cpu) {
2188 		page = alloc_pages_node(cpu_to_node(cpu),
2189 					GFP_KERNEL | __GFP_NORETRY, 0);
2190 		if (!page)
2191 			goto failed;
2192 
2193 		event = page_address(page);
2194 		memset(event, 0, sizeof(*event));
2195 
2196 		per_cpu(trace_buffered_event, cpu) = event;
2197 
2198 		preempt_disable();
2199 		if (cpu == smp_processor_id() &&
2200 		    this_cpu_read(trace_buffered_event) !=
2201 		    per_cpu(trace_buffered_event, cpu))
2202 			WARN_ON_ONCE(1);
2203 		preempt_enable();
2204 	}
2205 
2206 	return;
2207  failed:
2208 	trace_buffered_event_disable();
2209 }
2210 
2211 static void enable_trace_buffered_event(void *data)
2212 {
2213 	/* Probably not needed, but do it anyway */
2214 	smp_rmb();
2215 	this_cpu_dec(trace_buffered_event_cnt);
2216 }
2217 
2218 static void disable_trace_buffered_event(void *data)
2219 {
2220 	this_cpu_inc(trace_buffered_event_cnt);
2221 }
2222 
2223 /**
2224  * trace_buffered_event_disable - disable buffering events
2225  *
2226  * When a filter is removed, it is faster to not use the buffered
2227  * events, and to commit directly into the ring buffer. Free up
2228  * the temp buffers when there are no more users. This requires
2229  * special synchronization with current events.
2230  */
2231 void trace_buffered_event_disable(void)
2232 {
2233 	int cpu;
2234 
2235 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2236 
2237 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2238 		return;
2239 
2240 	if (--trace_buffered_event_ref)
2241 		return;
2242 
2243 	preempt_disable();
2244 	/* For each CPU, set the buffer as used. */
2245 	smp_call_function_many(tracing_buffer_mask,
2246 			       disable_trace_buffered_event, NULL, 1);
2247 	preempt_enable();
2248 
2249 	/* Wait for all current users to finish */
2250 	synchronize_sched();
2251 
2252 	for_each_tracing_cpu(cpu) {
2253 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2254 		per_cpu(trace_buffered_event, cpu) = NULL;
2255 	}
2256 	/*
2257 	 * Make sure trace_buffered_event is NULL before clearing
2258 	 * trace_buffered_event_cnt.
2259 	 */
2260 	smp_wmb();
2261 
2262 	preempt_disable();
2263 	/* Do the work on each cpu */
2264 	smp_call_function_many(tracing_buffer_mask,
2265 			       enable_trace_buffered_event, NULL, 1);
2266 	preempt_enable();
2267 }
2268 
2269 static struct ring_buffer *temp_buffer;
2270 
2271 struct ring_buffer_event *
2272 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2273 			  struct trace_event_file *trace_file,
2274 			  int type, unsigned long len,
2275 			  unsigned long flags, int pc)
2276 {
2277 	struct ring_buffer_event *entry;
2278 	int val;
2279 
2280 	*current_rb = trace_file->tr->trace_buffer.buffer;
2281 
2282 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2283 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2284 	    (entry = this_cpu_read(trace_buffered_event))) {
2285 		/* Try to use the per cpu buffer first */
2286 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2287 		if (val == 1) {
2288 			trace_event_setup(entry, type, flags, pc);
2289 			entry->array[0] = len;
2290 			return entry;
2291 		}
2292 		this_cpu_dec(trace_buffered_event_cnt);
2293 	}
2294 
2295 	entry = __trace_buffer_lock_reserve(*current_rb,
2296 					    type, len, flags, pc);
2297 	/*
2298 	 * If tracing is off, but we have triggers enabled
2299 	 * we still need to look at the event data. Use the temp_buffer
2300 	 * to store the trace event for the tigger to use. It's recusive
2301 	 * safe and will not be recorded anywhere.
2302 	 */
2303 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2304 		*current_rb = temp_buffer;
2305 		entry = __trace_buffer_lock_reserve(*current_rb,
2306 						    type, len, flags, pc);
2307 	}
2308 	return entry;
2309 }
2310 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2311 
2312 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2313 static DEFINE_MUTEX(tracepoint_printk_mutex);
2314 
2315 static void output_printk(struct trace_event_buffer *fbuffer)
2316 {
2317 	struct trace_event_call *event_call;
2318 	struct trace_event *event;
2319 	unsigned long flags;
2320 	struct trace_iterator *iter = tracepoint_print_iter;
2321 
2322 	/* We should never get here if iter is NULL */
2323 	if (WARN_ON_ONCE(!iter))
2324 		return;
2325 
2326 	event_call = fbuffer->trace_file->event_call;
2327 	if (!event_call || !event_call->event.funcs ||
2328 	    !event_call->event.funcs->trace)
2329 		return;
2330 
2331 	event = &fbuffer->trace_file->event_call->event;
2332 
2333 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2334 	trace_seq_init(&iter->seq);
2335 	iter->ent = fbuffer->entry;
2336 	event_call->event.funcs->trace(iter, 0, event);
2337 	trace_seq_putc(&iter->seq, 0);
2338 	printk("%s", iter->seq.buffer);
2339 
2340 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2341 }
2342 
2343 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2344 			     void __user *buffer, size_t *lenp,
2345 			     loff_t *ppos)
2346 {
2347 	int save_tracepoint_printk;
2348 	int ret;
2349 
2350 	mutex_lock(&tracepoint_printk_mutex);
2351 	save_tracepoint_printk = tracepoint_printk;
2352 
2353 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2354 
2355 	/*
2356 	 * This will force exiting early, as tracepoint_printk
2357 	 * is always zero when tracepoint_printk_iter is not allocated
2358 	 */
2359 	if (!tracepoint_print_iter)
2360 		tracepoint_printk = 0;
2361 
2362 	if (save_tracepoint_printk == tracepoint_printk)
2363 		goto out;
2364 
2365 	if (tracepoint_printk)
2366 		static_key_enable(&tracepoint_printk_key.key);
2367 	else
2368 		static_key_disable(&tracepoint_printk_key.key);
2369 
2370  out:
2371 	mutex_unlock(&tracepoint_printk_mutex);
2372 
2373 	return ret;
2374 }
2375 
2376 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2377 {
2378 	if (static_key_false(&tracepoint_printk_key.key))
2379 		output_printk(fbuffer);
2380 
2381 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2382 				    fbuffer->event, fbuffer->entry,
2383 				    fbuffer->flags, fbuffer->pc);
2384 }
2385 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2386 
2387 /*
2388  * Skip 3:
2389  *
2390  *   trace_buffer_unlock_commit_regs()
2391  *   trace_event_buffer_commit()
2392  *   trace_event_raw_event_xxx()
2393  */
2394 # define STACK_SKIP 3
2395 
2396 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2397 				     struct ring_buffer *buffer,
2398 				     struct ring_buffer_event *event,
2399 				     unsigned long flags, int pc,
2400 				     struct pt_regs *regs)
2401 {
2402 	__buffer_unlock_commit(buffer, event);
2403 
2404 	/*
2405 	 * If regs is not set, then skip the necessary functions.
2406 	 * Note, we can still get here via blktrace, wakeup tracer
2407 	 * and mmiotrace, but that's ok if they lose a function or
2408 	 * two. They are not that meaningful.
2409 	 */
2410 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2411 	ftrace_trace_userstack(buffer, flags, pc);
2412 }
2413 
2414 /*
2415  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2416  */
2417 void
2418 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2419 				   struct ring_buffer_event *event)
2420 {
2421 	__buffer_unlock_commit(buffer, event);
2422 }
2423 
2424 static void
2425 trace_process_export(struct trace_export *export,
2426 	       struct ring_buffer_event *event)
2427 {
2428 	struct trace_entry *entry;
2429 	unsigned int size = 0;
2430 
2431 	entry = ring_buffer_event_data(event);
2432 	size = ring_buffer_event_length(event);
2433 	export->write(export, entry, size);
2434 }
2435 
2436 static DEFINE_MUTEX(ftrace_export_lock);
2437 
2438 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2439 
2440 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2441 
2442 static inline void ftrace_exports_enable(void)
2443 {
2444 	static_branch_enable(&ftrace_exports_enabled);
2445 }
2446 
2447 static inline void ftrace_exports_disable(void)
2448 {
2449 	static_branch_disable(&ftrace_exports_enabled);
2450 }
2451 
2452 void ftrace_exports(struct ring_buffer_event *event)
2453 {
2454 	struct trace_export *export;
2455 
2456 	preempt_disable_notrace();
2457 
2458 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2459 	while (export) {
2460 		trace_process_export(export, event);
2461 		export = rcu_dereference_raw_notrace(export->next);
2462 	}
2463 
2464 	preempt_enable_notrace();
2465 }
2466 
2467 static inline void
2468 add_trace_export(struct trace_export **list, struct trace_export *export)
2469 {
2470 	rcu_assign_pointer(export->next, *list);
2471 	/*
2472 	 * We are entering export into the list but another
2473 	 * CPU might be walking that list. We need to make sure
2474 	 * the export->next pointer is valid before another CPU sees
2475 	 * the export pointer included into the list.
2476 	 */
2477 	rcu_assign_pointer(*list, export);
2478 }
2479 
2480 static inline int
2481 rm_trace_export(struct trace_export **list, struct trace_export *export)
2482 {
2483 	struct trace_export **p;
2484 
2485 	for (p = list; *p != NULL; p = &(*p)->next)
2486 		if (*p == export)
2487 			break;
2488 
2489 	if (*p != export)
2490 		return -1;
2491 
2492 	rcu_assign_pointer(*p, (*p)->next);
2493 
2494 	return 0;
2495 }
2496 
2497 static inline void
2498 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2499 {
2500 	if (*list == NULL)
2501 		ftrace_exports_enable();
2502 
2503 	add_trace_export(list, export);
2504 }
2505 
2506 static inline int
2507 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2508 {
2509 	int ret;
2510 
2511 	ret = rm_trace_export(list, export);
2512 	if (*list == NULL)
2513 		ftrace_exports_disable();
2514 
2515 	return ret;
2516 }
2517 
2518 int register_ftrace_export(struct trace_export *export)
2519 {
2520 	if (WARN_ON_ONCE(!export->write))
2521 		return -1;
2522 
2523 	mutex_lock(&ftrace_export_lock);
2524 
2525 	add_ftrace_export(&ftrace_exports_list, export);
2526 
2527 	mutex_unlock(&ftrace_export_lock);
2528 
2529 	return 0;
2530 }
2531 EXPORT_SYMBOL_GPL(register_ftrace_export);
2532 
2533 int unregister_ftrace_export(struct trace_export *export)
2534 {
2535 	int ret;
2536 
2537 	mutex_lock(&ftrace_export_lock);
2538 
2539 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2540 
2541 	mutex_unlock(&ftrace_export_lock);
2542 
2543 	return ret;
2544 }
2545 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2546 
2547 void
2548 trace_function(struct trace_array *tr,
2549 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2550 	       int pc)
2551 {
2552 	struct trace_event_call *call = &event_function;
2553 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2554 	struct ring_buffer_event *event;
2555 	struct ftrace_entry *entry;
2556 
2557 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2558 					    flags, pc);
2559 	if (!event)
2560 		return;
2561 	entry	= ring_buffer_event_data(event);
2562 	entry->ip			= ip;
2563 	entry->parent_ip		= parent_ip;
2564 
2565 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2566 		if (static_branch_unlikely(&ftrace_exports_enabled))
2567 			ftrace_exports(event);
2568 		__buffer_unlock_commit(buffer, event);
2569 	}
2570 }
2571 
2572 #ifdef CONFIG_STACKTRACE
2573 
2574 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2575 struct ftrace_stack {
2576 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2577 };
2578 
2579 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2580 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2581 
2582 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2583 				 unsigned long flags,
2584 				 int skip, int pc, struct pt_regs *regs)
2585 {
2586 	struct trace_event_call *call = &event_kernel_stack;
2587 	struct ring_buffer_event *event;
2588 	struct stack_entry *entry;
2589 	struct stack_trace trace;
2590 	int use_stack;
2591 	int size = FTRACE_STACK_ENTRIES;
2592 
2593 	trace.nr_entries	= 0;
2594 	trace.skip		= skip;
2595 
2596 	/*
2597 	 * Add one, for this function and the call to save_stack_trace()
2598 	 * If regs is set, then these functions will not be in the way.
2599 	 */
2600 #ifndef CONFIG_UNWINDER_ORC
2601 	if (!regs)
2602 		trace.skip++;
2603 #endif
2604 
2605 	/*
2606 	 * Since events can happen in NMIs there's no safe way to
2607 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2608 	 * or NMI comes in, it will just have to use the default
2609 	 * FTRACE_STACK_SIZE.
2610 	 */
2611 	preempt_disable_notrace();
2612 
2613 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2614 	/*
2615 	 * We don't need any atomic variables, just a barrier.
2616 	 * If an interrupt comes in, we don't care, because it would
2617 	 * have exited and put the counter back to what we want.
2618 	 * We just need a barrier to keep gcc from moving things
2619 	 * around.
2620 	 */
2621 	barrier();
2622 	if (use_stack == 1) {
2623 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2624 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2625 
2626 		if (regs)
2627 			save_stack_trace_regs(regs, &trace);
2628 		else
2629 			save_stack_trace(&trace);
2630 
2631 		if (trace.nr_entries > size)
2632 			size = trace.nr_entries;
2633 	} else
2634 		/* From now on, use_stack is a boolean */
2635 		use_stack = 0;
2636 
2637 	size *= sizeof(unsigned long);
2638 
2639 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2640 					    sizeof(*entry) + size, flags, pc);
2641 	if (!event)
2642 		goto out;
2643 	entry = ring_buffer_event_data(event);
2644 
2645 	memset(&entry->caller, 0, size);
2646 
2647 	if (use_stack)
2648 		memcpy(&entry->caller, trace.entries,
2649 		       trace.nr_entries * sizeof(unsigned long));
2650 	else {
2651 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2652 		trace.entries		= entry->caller;
2653 		if (regs)
2654 			save_stack_trace_regs(regs, &trace);
2655 		else
2656 			save_stack_trace(&trace);
2657 	}
2658 
2659 	entry->size = trace.nr_entries;
2660 
2661 	if (!call_filter_check_discard(call, entry, buffer, event))
2662 		__buffer_unlock_commit(buffer, event);
2663 
2664  out:
2665 	/* Again, don't let gcc optimize things here */
2666 	barrier();
2667 	__this_cpu_dec(ftrace_stack_reserve);
2668 	preempt_enable_notrace();
2669 
2670 }
2671 
2672 static inline void ftrace_trace_stack(struct trace_array *tr,
2673 				      struct ring_buffer *buffer,
2674 				      unsigned long flags,
2675 				      int skip, int pc, struct pt_regs *regs)
2676 {
2677 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2678 		return;
2679 
2680 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2681 }
2682 
2683 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2684 		   int pc)
2685 {
2686 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2687 
2688 	if (rcu_is_watching()) {
2689 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2690 		return;
2691 	}
2692 
2693 	/*
2694 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2695 	 * but if the above rcu_is_watching() failed, then the NMI
2696 	 * triggered someplace critical, and rcu_irq_enter() should
2697 	 * not be called from NMI.
2698 	 */
2699 	if (unlikely(in_nmi()))
2700 		return;
2701 
2702 	rcu_irq_enter_irqson();
2703 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2704 	rcu_irq_exit_irqson();
2705 }
2706 
2707 /**
2708  * trace_dump_stack - record a stack back trace in the trace buffer
2709  * @skip: Number of functions to skip (helper handlers)
2710  */
2711 void trace_dump_stack(int skip)
2712 {
2713 	unsigned long flags;
2714 
2715 	if (tracing_disabled || tracing_selftest_running)
2716 		return;
2717 
2718 	local_save_flags(flags);
2719 
2720 #ifndef CONFIG_UNWINDER_ORC
2721 	/* Skip 1 to skip this function. */
2722 	skip++;
2723 #endif
2724 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2725 			     flags, skip, preempt_count(), NULL);
2726 }
2727 
2728 static DEFINE_PER_CPU(int, user_stack_count);
2729 
2730 void
2731 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2732 {
2733 	struct trace_event_call *call = &event_user_stack;
2734 	struct ring_buffer_event *event;
2735 	struct userstack_entry *entry;
2736 	struct stack_trace trace;
2737 
2738 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2739 		return;
2740 
2741 	/*
2742 	 * NMIs can not handle page faults, even with fix ups.
2743 	 * The save user stack can (and often does) fault.
2744 	 */
2745 	if (unlikely(in_nmi()))
2746 		return;
2747 
2748 	/*
2749 	 * prevent recursion, since the user stack tracing may
2750 	 * trigger other kernel events.
2751 	 */
2752 	preempt_disable();
2753 	if (__this_cpu_read(user_stack_count))
2754 		goto out;
2755 
2756 	__this_cpu_inc(user_stack_count);
2757 
2758 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2759 					    sizeof(*entry), flags, pc);
2760 	if (!event)
2761 		goto out_drop_count;
2762 	entry	= ring_buffer_event_data(event);
2763 
2764 	entry->tgid		= current->tgid;
2765 	memset(&entry->caller, 0, sizeof(entry->caller));
2766 
2767 	trace.nr_entries	= 0;
2768 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2769 	trace.skip		= 0;
2770 	trace.entries		= entry->caller;
2771 
2772 	save_stack_trace_user(&trace);
2773 	if (!call_filter_check_discard(call, entry, buffer, event))
2774 		__buffer_unlock_commit(buffer, event);
2775 
2776  out_drop_count:
2777 	__this_cpu_dec(user_stack_count);
2778  out:
2779 	preempt_enable();
2780 }
2781 
2782 #ifdef UNUSED
2783 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2784 {
2785 	ftrace_trace_userstack(tr, flags, preempt_count());
2786 }
2787 #endif /* UNUSED */
2788 
2789 #endif /* CONFIG_STACKTRACE */
2790 
2791 /* created for use with alloc_percpu */
2792 struct trace_buffer_struct {
2793 	int nesting;
2794 	char buffer[4][TRACE_BUF_SIZE];
2795 };
2796 
2797 static struct trace_buffer_struct *trace_percpu_buffer;
2798 
2799 /*
2800  * Thise allows for lockless recording.  If we're nested too deeply, then
2801  * this returns NULL.
2802  */
2803 static char *get_trace_buf(void)
2804 {
2805 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2806 
2807 	if (!buffer || buffer->nesting >= 4)
2808 		return NULL;
2809 
2810 	buffer->nesting++;
2811 
2812 	/* Interrupts must see nesting incremented before we use the buffer */
2813 	barrier();
2814 	return &buffer->buffer[buffer->nesting][0];
2815 }
2816 
2817 static void put_trace_buf(void)
2818 {
2819 	/* Don't let the decrement of nesting leak before this */
2820 	barrier();
2821 	this_cpu_dec(trace_percpu_buffer->nesting);
2822 }
2823 
2824 static int alloc_percpu_trace_buffer(void)
2825 {
2826 	struct trace_buffer_struct *buffers;
2827 
2828 	buffers = alloc_percpu(struct trace_buffer_struct);
2829 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2830 		return -ENOMEM;
2831 
2832 	trace_percpu_buffer = buffers;
2833 	return 0;
2834 }
2835 
2836 static int buffers_allocated;
2837 
2838 void trace_printk_init_buffers(void)
2839 {
2840 	if (buffers_allocated)
2841 		return;
2842 
2843 	if (alloc_percpu_trace_buffer())
2844 		return;
2845 
2846 	/* trace_printk() is for debug use only. Don't use it in production. */
2847 
2848 	pr_warn("\n");
2849 	pr_warn("**********************************************************\n");
2850 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2851 	pr_warn("**                                                      **\n");
2852 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2853 	pr_warn("**                                                      **\n");
2854 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2855 	pr_warn("** unsafe for production use.                           **\n");
2856 	pr_warn("**                                                      **\n");
2857 	pr_warn("** If you see this message and you are not debugging    **\n");
2858 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2859 	pr_warn("**                                                      **\n");
2860 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2861 	pr_warn("**********************************************************\n");
2862 
2863 	/* Expand the buffers to set size */
2864 	tracing_update_buffers();
2865 
2866 	buffers_allocated = 1;
2867 
2868 	/*
2869 	 * trace_printk_init_buffers() can be called by modules.
2870 	 * If that happens, then we need to start cmdline recording
2871 	 * directly here. If the global_trace.buffer is already
2872 	 * allocated here, then this was called by module code.
2873 	 */
2874 	if (global_trace.trace_buffer.buffer)
2875 		tracing_start_cmdline_record();
2876 }
2877 
2878 void trace_printk_start_comm(void)
2879 {
2880 	/* Start tracing comms if trace printk is set */
2881 	if (!buffers_allocated)
2882 		return;
2883 	tracing_start_cmdline_record();
2884 }
2885 
2886 static void trace_printk_start_stop_comm(int enabled)
2887 {
2888 	if (!buffers_allocated)
2889 		return;
2890 
2891 	if (enabled)
2892 		tracing_start_cmdline_record();
2893 	else
2894 		tracing_stop_cmdline_record();
2895 }
2896 
2897 /**
2898  * trace_vbprintk - write binary msg to tracing buffer
2899  *
2900  */
2901 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2902 {
2903 	struct trace_event_call *call = &event_bprint;
2904 	struct ring_buffer_event *event;
2905 	struct ring_buffer *buffer;
2906 	struct trace_array *tr = &global_trace;
2907 	struct bprint_entry *entry;
2908 	unsigned long flags;
2909 	char *tbuffer;
2910 	int len = 0, size, pc;
2911 
2912 	if (unlikely(tracing_selftest_running || tracing_disabled))
2913 		return 0;
2914 
2915 	/* Don't pollute graph traces with trace_vprintk internals */
2916 	pause_graph_tracing();
2917 
2918 	pc = preempt_count();
2919 	preempt_disable_notrace();
2920 
2921 	tbuffer = get_trace_buf();
2922 	if (!tbuffer) {
2923 		len = 0;
2924 		goto out_nobuffer;
2925 	}
2926 
2927 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2928 
2929 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2930 		goto out;
2931 
2932 	local_save_flags(flags);
2933 	size = sizeof(*entry) + sizeof(u32) * len;
2934 	buffer = tr->trace_buffer.buffer;
2935 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2936 					    flags, pc);
2937 	if (!event)
2938 		goto out;
2939 	entry = ring_buffer_event_data(event);
2940 	entry->ip			= ip;
2941 	entry->fmt			= fmt;
2942 
2943 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2944 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2945 		__buffer_unlock_commit(buffer, event);
2946 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2947 	}
2948 
2949 out:
2950 	put_trace_buf();
2951 
2952 out_nobuffer:
2953 	preempt_enable_notrace();
2954 	unpause_graph_tracing();
2955 
2956 	return len;
2957 }
2958 EXPORT_SYMBOL_GPL(trace_vbprintk);
2959 
2960 static int
2961 __trace_array_vprintk(struct ring_buffer *buffer,
2962 		      unsigned long ip, const char *fmt, va_list args)
2963 {
2964 	struct trace_event_call *call = &event_print;
2965 	struct ring_buffer_event *event;
2966 	int len = 0, size, pc;
2967 	struct print_entry *entry;
2968 	unsigned long flags;
2969 	char *tbuffer;
2970 
2971 	if (tracing_disabled || tracing_selftest_running)
2972 		return 0;
2973 
2974 	/* Don't pollute graph traces with trace_vprintk internals */
2975 	pause_graph_tracing();
2976 
2977 	pc = preempt_count();
2978 	preempt_disable_notrace();
2979 
2980 
2981 	tbuffer = get_trace_buf();
2982 	if (!tbuffer) {
2983 		len = 0;
2984 		goto out_nobuffer;
2985 	}
2986 
2987 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2988 
2989 	local_save_flags(flags);
2990 	size = sizeof(*entry) + len + 1;
2991 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2992 					    flags, pc);
2993 	if (!event)
2994 		goto out;
2995 	entry = ring_buffer_event_data(event);
2996 	entry->ip = ip;
2997 
2998 	memcpy(&entry->buf, tbuffer, len + 1);
2999 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3000 		__buffer_unlock_commit(buffer, event);
3001 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3002 	}
3003 
3004 out:
3005 	put_trace_buf();
3006 
3007 out_nobuffer:
3008 	preempt_enable_notrace();
3009 	unpause_graph_tracing();
3010 
3011 	return len;
3012 }
3013 
3014 int trace_array_vprintk(struct trace_array *tr,
3015 			unsigned long ip, const char *fmt, va_list args)
3016 {
3017 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3018 }
3019 
3020 int trace_array_printk(struct trace_array *tr,
3021 		       unsigned long ip, const char *fmt, ...)
3022 {
3023 	int ret;
3024 	va_list ap;
3025 
3026 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3027 		return 0;
3028 
3029 	va_start(ap, fmt);
3030 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3031 	va_end(ap);
3032 	return ret;
3033 }
3034 
3035 int trace_array_printk_buf(struct ring_buffer *buffer,
3036 			   unsigned long ip, const char *fmt, ...)
3037 {
3038 	int ret;
3039 	va_list ap;
3040 
3041 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3042 		return 0;
3043 
3044 	va_start(ap, fmt);
3045 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3046 	va_end(ap);
3047 	return ret;
3048 }
3049 
3050 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3051 {
3052 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_vprintk);
3055 
3056 static void trace_iterator_increment(struct trace_iterator *iter)
3057 {
3058 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3059 
3060 	iter->idx++;
3061 	if (buf_iter)
3062 		ring_buffer_read(buf_iter, NULL);
3063 }
3064 
3065 static struct trace_entry *
3066 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3067 		unsigned long *lost_events)
3068 {
3069 	struct ring_buffer_event *event;
3070 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3071 
3072 	if (buf_iter)
3073 		event = ring_buffer_iter_peek(buf_iter, ts);
3074 	else
3075 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3076 					 lost_events);
3077 
3078 	if (event) {
3079 		iter->ent_size = ring_buffer_event_length(event);
3080 		return ring_buffer_event_data(event);
3081 	}
3082 	iter->ent_size = 0;
3083 	return NULL;
3084 }
3085 
3086 static struct trace_entry *
3087 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3088 		  unsigned long *missing_events, u64 *ent_ts)
3089 {
3090 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3091 	struct trace_entry *ent, *next = NULL;
3092 	unsigned long lost_events = 0, next_lost = 0;
3093 	int cpu_file = iter->cpu_file;
3094 	u64 next_ts = 0, ts;
3095 	int next_cpu = -1;
3096 	int next_size = 0;
3097 	int cpu;
3098 
3099 	/*
3100 	 * If we are in a per_cpu trace file, don't bother by iterating over
3101 	 * all cpu and peek directly.
3102 	 */
3103 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3104 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3105 			return NULL;
3106 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3107 		if (ent_cpu)
3108 			*ent_cpu = cpu_file;
3109 
3110 		return ent;
3111 	}
3112 
3113 	for_each_tracing_cpu(cpu) {
3114 
3115 		if (ring_buffer_empty_cpu(buffer, cpu))
3116 			continue;
3117 
3118 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3119 
3120 		/*
3121 		 * Pick the entry with the smallest timestamp:
3122 		 */
3123 		if (ent && (!next || ts < next_ts)) {
3124 			next = ent;
3125 			next_cpu = cpu;
3126 			next_ts = ts;
3127 			next_lost = lost_events;
3128 			next_size = iter->ent_size;
3129 		}
3130 	}
3131 
3132 	iter->ent_size = next_size;
3133 
3134 	if (ent_cpu)
3135 		*ent_cpu = next_cpu;
3136 
3137 	if (ent_ts)
3138 		*ent_ts = next_ts;
3139 
3140 	if (missing_events)
3141 		*missing_events = next_lost;
3142 
3143 	return next;
3144 }
3145 
3146 /* Find the next real entry, without updating the iterator itself */
3147 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3148 					  int *ent_cpu, u64 *ent_ts)
3149 {
3150 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3151 }
3152 
3153 /* Find the next real entry, and increment the iterator to the next entry */
3154 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3155 {
3156 	iter->ent = __find_next_entry(iter, &iter->cpu,
3157 				      &iter->lost_events, &iter->ts);
3158 
3159 	if (iter->ent)
3160 		trace_iterator_increment(iter);
3161 
3162 	return iter->ent ? iter : NULL;
3163 }
3164 
3165 static void trace_consume(struct trace_iterator *iter)
3166 {
3167 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3168 			    &iter->lost_events);
3169 }
3170 
3171 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3172 {
3173 	struct trace_iterator *iter = m->private;
3174 	int i = (int)*pos;
3175 	void *ent;
3176 
3177 	WARN_ON_ONCE(iter->leftover);
3178 
3179 	(*pos)++;
3180 
3181 	/* can't go backwards */
3182 	if (iter->idx > i)
3183 		return NULL;
3184 
3185 	if (iter->idx < 0)
3186 		ent = trace_find_next_entry_inc(iter);
3187 	else
3188 		ent = iter;
3189 
3190 	while (ent && iter->idx < i)
3191 		ent = trace_find_next_entry_inc(iter);
3192 
3193 	iter->pos = *pos;
3194 
3195 	return ent;
3196 }
3197 
3198 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3199 {
3200 	struct ring_buffer_event *event;
3201 	struct ring_buffer_iter *buf_iter;
3202 	unsigned long entries = 0;
3203 	u64 ts;
3204 
3205 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3206 
3207 	buf_iter = trace_buffer_iter(iter, cpu);
3208 	if (!buf_iter)
3209 		return;
3210 
3211 	ring_buffer_iter_reset(buf_iter);
3212 
3213 	/*
3214 	 * We could have the case with the max latency tracers
3215 	 * that a reset never took place on a cpu. This is evident
3216 	 * by the timestamp being before the start of the buffer.
3217 	 */
3218 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3219 		if (ts >= iter->trace_buffer->time_start)
3220 			break;
3221 		entries++;
3222 		ring_buffer_read(buf_iter, NULL);
3223 	}
3224 
3225 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3226 }
3227 
3228 /*
3229  * The current tracer is copied to avoid a global locking
3230  * all around.
3231  */
3232 static void *s_start(struct seq_file *m, loff_t *pos)
3233 {
3234 	struct trace_iterator *iter = m->private;
3235 	struct trace_array *tr = iter->tr;
3236 	int cpu_file = iter->cpu_file;
3237 	void *p = NULL;
3238 	loff_t l = 0;
3239 	int cpu;
3240 
3241 	/*
3242 	 * copy the tracer to avoid using a global lock all around.
3243 	 * iter->trace is a copy of current_trace, the pointer to the
3244 	 * name may be used instead of a strcmp(), as iter->trace->name
3245 	 * will point to the same string as current_trace->name.
3246 	 */
3247 	mutex_lock(&trace_types_lock);
3248 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3249 		*iter->trace = *tr->current_trace;
3250 	mutex_unlock(&trace_types_lock);
3251 
3252 #ifdef CONFIG_TRACER_MAX_TRACE
3253 	if (iter->snapshot && iter->trace->use_max_tr)
3254 		return ERR_PTR(-EBUSY);
3255 #endif
3256 
3257 	if (!iter->snapshot)
3258 		atomic_inc(&trace_record_taskinfo_disabled);
3259 
3260 	if (*pos != iter->pos) {
3261 		iter->ent = NULL;
3262 		iter->cpu = 0;
3263 		iter->idx = -1;
3264 
3265 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3266 			for_each_tracing_cpu(cpu)
3267 				tracing_iter_reset(iter, cpu);
3268 		} else
3269 			tracing_iter_reset(iter, cpu_file);
3270 
3271 		iter->leftover = 0;
3272 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3273 			;
3274 
3275 	} else {
3276 		/*
3277 		 * If we overflowed the seq_file before, then we want
3278 		 * to just reuse the trace_seq buffer again.
3279 		 */
3280 		if (iter->leftover)
3281 			p = iter;
3282 		else {
3283 			l = *pos - 1;
3284 			p = s_next(m, p, &l);
3285 		}
3286 	}
3287 
3288 	trace_event_read_lock();
3289 	trace_access_lock(cpu_file);
3290 	return p;
3291 }
3292 
3293 static void s_stop(struct seq_file *m, void *p)
3294 {
3295 	struct trace_iterator *iter = m->private;
3296 
3297 #ifdef CONFIG_TRACER_MAX_TRACE
3298 	if (iter->snapshot && iter->trace->use_max_tr)
3299 		return;
3300 #endif
3301 
3302 	if (!iter->snapshot)
3303 		atomic_dec(&trace_record_taskinfo_disabled);
3304 
3305 	trace_access_unlock(iter->cpu_file);
3306 	trace_event_read_unlock();
3307 }
3308 
3309 static void
3310 get_total_entries(struct trace_buffer *buf,
3311 		  unsigned long *total, unsigned long *entries)
3312 {
3313 	unsigned long count;
3314 	int cpu;
3315 
3316 	*total = 0;
3317 	*entries = 0;
3318 
3319 	for_each_tracing_cpu(cpu) {
3320 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3321 		/*
3322 		 * If this buffer has skipped entries, then we hold all
3323 		 * entries for the trace and we need to ignore the
3324 		 * ones before the time stamp.
3325 		 */
3326 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3327 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3328 			/* total is the same as the entries */
3329 			*total += count;
3330 		} else
3331 			*total += count +
3332 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3333 		*entries += count;
3334 	}
3335 }
3336 
3337 static void print_lat_help_header(struct seq_file *m)
3338 {
3339 	seq_puts(m, "#                  _------=> CPU#            \n"
3340 		    "#                 / _-----=> irqs-off        \n"
3341 		    "#                | / _----=> need-resched    \n"
3342 		    "#                || / _---=> hardirq/softirq \n"
3343 		    "#                ||| / _--=> preempt-depth   \n"
3344 		    "#                |||| /     delay            \n"
3345 		    "#  cmd     pid   ||||| time  |   caller      \n"
3346 		    "#     \\   /      |||||  \\    |   /         \n");
3347 }
3348 
3349 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3350 {
3351 	unsigned long total;
3352 	unsigned long entries;
3353 
3354 	get_total_entries(buf, &total, &entries);
3355 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3356 		   entries, total, num_online_cpus());
3357 	seq_puts(m, "#\n");
3358 }
3359 
3360 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3361 				   unsigned int flags)
3362 {
3363 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3364 
3365 	print_event_info(buf, m);
3366 
3367 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3368 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3369 }
3370 
3371 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3372 				       unsigned int flags)
3373 {
3374 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375 	const char tgid_space[] = "          ";
3376 	const char space[] = "  ";
3377 
3378 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3379 		   tgid ? tgid_space : space);
3380 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3381 		   tgid ? tgid_space : space);
3382 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3383 		   tgid ? tgid_space : space);
3384 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3385 		   tgid ? tgid_space : space);
3386 	seq_printf(m, "#                          %s||| /     delay\n",
3387 		   tgid ? tgid_space : space);
3388 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3389 		   tgid ? "   TGID   " : space);
3390 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3391 		   tgid ? "     |    " : space);
3392 }
3393 
3394 void
3395 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3396 {
3397 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3398 	struct trace_buffer *buf = iter->trace_buffer;
3399 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3400 	struct tracer *type = iter->trace;
3401 	unsigned long entries;
3402 	unsigned long total;
3403 	const char *name = "preemption";
3404 
3405 	name = type->name;
3406 
3407 	get_total_entries(buf, &total, &entries);
3408 
3409 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3410 		   name, UTS_RELEASE);
3411 	seq_puts(m, "# -----------------------------------"
3412 		 "---------------------------------\n");
3413 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3414 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3415 		   nsecs_to_usecs(data->saved_latency),
3416 		   entries,
3417 		   total,
3418 		   buf->cpu,
3419 #if defined(CONFIG_PREEMPT_NONE)
3420 		   "server",
3421 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3422 		   "desktop",
3423 #elif defined(CONFIG_PREEMPT)
3424 		   "preempt",
3425 #else
3426 		   "unknown",
3427 #endif
3428 		   /* These are reserved for later use */
3429 		   0, 0, 0, 0);
3430 #ifdef CONFIG_SMP
3431 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3432 #else
3433 	seq_puts(m, ")\n");
3434 #endif
3435 	seq_puts(m, "#    -----------------\n");
3436 	seq_printf(m, "#    | task: %.16s-%d "
3437 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3438 		   data->comm, data->pid,
3439 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3440 		   data->policy, data->rt_priority);
3441 	seq_puts(m, "#    -----------------\n");
3442 
3443 	if (data->critical_start) {
3444 		seq_puts(m, "#  => started at: ");
3445 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3446 		trace_print_seq(m, &iter->seq);
3447 		seq_puts(m, "\n#  => ended at:   ");
3448 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3449 		trace_print_seq(m, &iter->seq);
3450 		seq_puts(m, "\n#\n");
3451 	}
3452 
3453 	seq_puts(m, "#\n");
3454 }
3455 
3456 static void test_cpu_buff_start(struct trace_iterator *iter)
3457 {
3458 	struct trace_seq *s = &iter->seq;
3459 	struct trace_array *tr = iter->tr;
3460 
3461 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3462 		return;
3463 
3464 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3465 		return;
3466 
3467 	if (cpumask_available(iter->started) &&
3468 	    cpumask_test_cpu(iter->cpu, iter->started))
3469 		return;
3470 
3471 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3472 		return;
3473 
3474 	if (cpumask_available(iter->started))
3475 		cpumask_set_cpu(iter->cpu, iter->started);
3476 
3477 	/* Don't print started cpu buffer for the first entry of the trace */
3478 	if (iter->idx > 1)
3479 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3480 				iter->cpu);
3481 }
3482 
3483 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3484 {
3485 	struct trace_array *tr = iter->tr;
3486 	struct trace_seq *s = &iter->seq;
3487 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3488 	struct trace_entry *entry;
3489 	struct trace_event *event;
3490 
3491 	entry = iter->ent;
3492 
3493 	test_cpu_buff_start(iter);
3494 
3495 	event = ftrace_find_event(entry->type);
3496 
3497 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3498 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3499 			trace_print_lat_context(iter);
3500 		else
3501 			trace_print_context(iter);
3502 	}
3503 
3504 	if (trace_seq_has_overflowed(s))
3505 		return TRACE_TYPE_PARTIAL_LINE;
3506 
3507 	if (event)
3508 		return event->funcs->trace(iter, sym_flags, event);
3509 
3510 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3511 
3512 	return trace_handle_return(s);
3513 }
3514 
3515 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3516 {
3517 	struct trace_array *tr = iter->tr;
3518 	struct trace_seq *s = &iter->seq;
3519 	struct trace_entry *entry;
3520 	struct trace_event *event;
3521 
3522 	entry = iter->ent;
3523 
3524 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3525 		trace_seq_printf(s, "%d %d %llu ",
3526 				 entry->pid, iter->cpu, iter->ts);
3527 
3528 	if (trace_seq_has_overflowed(s))
3529 		return TRACE_TYPE_PARTIAL_LINE;
3530 
3531 	event = ftrace_find_event(entry->type);
3532 	if (event)
3533 		return event->funcs->raw(iter, 0, event);
3534 
3535 	trace_seq_printf(s, "%d ?\n", entry->type);
3536 
3537 	return trace_handle_return(s);
3538 }
3539 
3540 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3541 {
3542 	struct trace_array *tr = iter->tr;
3543 	struct trace_seq *s = &iter->seq;
3544 	unsigned char newline = '\n';
3545 	struct trace_entry *entry;
3546 	struct trace_event *event;
3547 
3548 	entry = iter->ent;
3549 
3550 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3551 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3552 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3553 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3554 		if (trace_seq_has_overflowed(s))
3555 			return TRACE_TYPE_PARTIAL_LINE;
3556 	}
3557 
3558 	event = ftrace_find_event(entry->type);
3559 	if (event) {
3560 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3561 		if (ret != TRACE_TYPE_HANDLED)
3562 			return ret;
3563 	}
3564 
3565 	SEQ_PUT_FIELD(s, newline);
3566 
3567 	return trace_handle_return(s);
3568 }
3569 
3570 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3571 {
3572 	struct trace_array *tr = iter->tr;
3573 	struct trace_seq *s = &iter->seq;
3574 	struct trace_entry *entry;
3575 	struct trace_event *event;
3576 
3577 	entry = iter->ent;
3578 
3579 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3580 		SEQ_PUT_FIELD(s, entry->pid);
3581 		SEQ_PUT_FIELD(s, iter->cpu);
3582 		SEQ_PUT_FIELD(s, iter->ts);
3583 		if (trace_seq_has_overflowed(s))
3584 			return TRACE_TYPE_PARTIAL_LINE;
3585 	}
3586 
3587 	event = ftrace_find_event(entry->type);
3588 	return event ? event->funcs->binary(iter, 0, event) :
3589 		TRACE_TYPE_HANDLED;
3590 }
3591 
3592 int trace_empty(struct trace_iterator *iter)
3593 {
3594 	struct ring_buffer_iter *buf_iter;
3595 	int cpu;
3596 
3597 	/* If we are looking at one CPU buffer, only check that one */
3598 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3599 		cpu = iter->cpu_file;
3600 		buf_iter = trace_buffer_iter(iter, cpu);
3601 		if (buf_iter) {
3602 			if (!ring_buffer_iter_empty(buf_iter))
3603 				return 0;
3604 		} else {
3605 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3606 				return 0;
3607 		}
3608 		return 1;
3609 	}
3610 
3611 	for_each_tracing_cpu(cpu) {
3612 		buf_iter = trace_buffer_iter(iter, cpu);
3613 		if (buf_iter) {
3614 			if (!ring_buffer_iter_empty(buf_iter))
3615 				return 0;
3616 		} else {
3617 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3618 				return 0;
3619 		}
3620 	}
3621 
3622 	return 1;
3623 }
3624 
3625 /*  Called with trace_event_read_lock() held. */
3626 enum print_line_t print_trace_line(struct trace_iterator *iter)
3627 {
3628 	struct trace_array *tr = iter->tr;
3629 	unsigned long trace_flags = tr->trace_flags;
3630 	enum print_line_t ret;
3631 
3632 	if (iter->lost_events) {
3633 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3634 				 iter->cpu, iter->lost_events);
3635 		if (trace_seq_has_overflowed(&iter->seq))
3636 			return TRACE_TYPE_PARTIAL_LINE;
3637 	}
3638 
3639 	if (iter->trace && iter->trace->print_line) {
3640 		ret = iter->trace->print_line(iter);
3641 		if (ret != TRACE_TYPE_UNHANDLED)
3642 			return ret;
3643 	}
3644 
3645 	if (iter->ent->type == TRACE_BPUTS &&
3646 			trace_flags & TRACE_ITER_PRINTK &&
3647 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3648 		return trace_print_bputs_msg_only(iter);
3649 
3650 	if (iter->ent->type == TRACE_BPRINT &&
3651 			trace_flags & TRACE_ITER_PRINTK &&
3652 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3653 		return trace_print_bprintk_msg_only(iter);
3654 
3655 	if (iter->ent->type == TRACE_PRINT &&
3656 			trace_flags & TRACE_ITER_PRINTK &&
3657 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3658 		return trace_print_printk_msg_only(iter);
3659 
3660 	if (trace_flags & TRACE_ITER_BIN)
3661 		return print_bin_fmt(iter);
3662 
3663 	if (trace_flags & TRACE_ITER_HEX)
3664 		return print_hex_fmt(iter);
3665 
3666 	if (trace_flags & TRACE_ITER_RAW)
3667 		return print_raw_fmt(iter);
3668 
3669 	return print_trace_fmt(iter);
3670 }
3671 
3672 void trace_latency_header(struct seq_file *m)
3673 {
3674 	struct trace_iterator *iter = m->private;
3675 	struct trace_array *tr = iter->tr;
3676 
3677 	/* print nothing if the buffers are empty */
3678 	if (trace_empty(iter))
3679 		return;
3680 
3681 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3682 		print_trace_header(m, iter);
3683 
3684 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3685 		print_lat_help_header(m);
3686 }
3687 
3688 void trace_default_header(struct seq_file *m)
3689 {
3690 	struct trace_iterator *iter = m->private;
3691 	struct trace_array *tr = iter->tr;
3692 	unsigned long trace_flags = tr->trace_flags;
3693 
3694 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3695 		return;
3696 
3697 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3698 		/* print nothing if the buffers are empty */
3699 		if (trace_empty(iter))
3700 			return;
3701 		print_trace_header(m, iter);
3702 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3703 			print_lat_help_header(m);
3704 	} else {
3705 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3706 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3707 				print_func_help_header_irq(iter->trace_buffer,
3708 							   m, trace_flags);
3709 			else
3710 				print_func_help_header(iter->trace_buffer, m,
3711 						       trace_flags);
3712 		}
3713 	}
3714 }
3715 
3716 static void test_ftrace_alive(struct seq_file *m)
3717 {
3718 	if (!ftrace_is_dead())
3719 		return;
3720 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3721 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3722 }
3723 
3724 #ifdef CONFIG_TRACER_MAX_TRACE
3725 static void show_snapshot_main_help(struct seq_file *m)
3726 {
3727 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3728 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3729 		    "#                      Takes a snapshot of the main buffer.\n"
3730 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3731 		    "#                      (Doesn't have to be '2' works with any number that\n"
3732 		    "#                       is not a '0' or '1')\n");
3733 }
3734 
3735 static void show_snapshot_percpu_help(struct seq_file *m)
3736 {
3737 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3738 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3739 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3741 #else
3742 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3743 		    "#                     Must use main snapshot file to allocate.\n");
3744 #endif
3745 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3746 		    "#                      (Doesn't have to be '2' works with any number that\n"
3747 		    "#                       is not a '0' or '1')\n");
3748 }
3749 
3750 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3751 {
3752 	if (iter->tr->allocated_snapshot)
3753 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3754 	else
3755 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3756 
3757 	seq_puts(m, "# Snapshot commands:\n");
3758 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3759 		show_snapshot_main_help(m);
3760 	else
3761 		show_snapshot_percpu_help(m);
3762 }
3763 #else
3764 /* Should never be called */
3765 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3766 #endif
3767 
3768 static int s_show(struct seq_file *m, void *v)
3769 {
3770 	struct trace_iterator *iter = v;
3771 	int ret;
3772 
3773 	if (iter->ent == NULL) {
3774 		if (iter->tr) {
3775 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3776 			seq_puts(m, "#\n");
3777 			test_ftrace_alive(m);
3778 		}
3779 		if (iter->snapshot && trace_empty(iter))
3780 			print_snapshot_help(m, iter);
3781 		else if (iter->trace && iter->trace->print_header)
3782 			iter->trace->print_header(m);
3783 		else
3784 			trace_default_header(m);
3785 
3786 	} else if (iter->leftover) {
3787 		/*
3788 		 * If we filled the seq_file buffer earlier, we
3789 		 * want to just show it now.
3790 		 */
3791 		ret = trace_print_seq(m, &iter->seq);
3792 
3793 		/* ret should this time be zero, but you never know */
3794 		iter->leftover = ret;
3795 
3796 	} else {
3797 		print_trace_line(iter);
3798 		ret = trace_print_seq(m, &iter->seq);
3799 		/*
3800 		 * If we overflow the seq_file buffer, then it will
3801 		 * ask us for this data again at start up.
3802 		 * Use that instead.
3803 		 *  ret is 0 if seq_file write succeeded.
3804 		 *        -1 otherwise.
3805 		 */
3806 		iter->leftover = ret;
3807 	}
3808 
3809 	return 0;
3810 }
3811 
3812 /*
3813  * Should be used after trace_array_get(), trace_types_lock
3814  * ensures that i_cdev was already initialized.
3815  */
3816 static inline int tracing_get_cpu(struct inode *inode)
3817 {
3818 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3819 		return (long)inode->i_cdev - 1;
3820 	return RING_BUFFER_ALL_CPUS;
3821 }
3822 
3823 static const struct seq_operations tracer_seq_ops = {
3824 	.start		= s_start,
3825 	.next		= s_next,
3826 	.stop		= s_stop,
3827 	.show		= s_show,
3828 };
3829 
3830 static struct trace_iterator *
3831 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3832 {
3833 	struct trace_array *tr = inode->i_private;
3834 	struct trace_iterator *iter;
3835 	int cpu;
3836 
3837 	if (tracing_disabled)
3838 		return ERR_PTR(-ENODEV);
3839 
3840 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3841 	if (!iter)
3842 		return ERR_PTR(-ENOMEM);
3843 
3844 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3845 				    GFP_KERNEL);
3846 	if (!iter->buffer_iter)
3847 		goto release;
3848 
3849 	/*
3850 	 * We make a copy of the current tracer to avoid concurrent
3851 	 * changes on it while we are reading.
3852 	 */
3853 	mutex_lock(&trace_types_lock);
3854 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3855 	if (!iter->trace)
3856 		goto fail;
3857 
3858 	*iter->trace = *tr->current_trace;
3859 
3860 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3861 		goto fail;
3862 
3863 	iter->tr = tr;
3864 
3865 #ifdef CONFIG_TRACER_MAX_TRACE
3866 	/* Currently only the top directory has a snapshot */
3867 	if (tr->current_trace->print_max || snapshot)
3868 		iter->trace_buffer = &tr->max_buffer;
3869 	else
3870 #endif
3871 		iter->trace_buffer = &tr->trace_buffer;
3872 	iter->snapshot = snapshot;
3873 	iter->pos = -1;
3874 	iter->cpu_file = tracing_get_cpu(inode);
3875 	mutex_init(&iter->mutex);
3876 
3877 	/* Notify the tracer early; before we stop tracing. */
3878 	if (iter->trace && iter->trace->open)
3879 		iter->trace->open(iter);
3880 
3881 	/* Annotate start of buffers if we had overruns */
3882 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3883 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3884 
3885 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3886 	if (trace_clocks[tr->clock_id].in_ns)
3887 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3888 
3889 	/* stop the trace while dumping if we are not opening "snapshot" */
3890 	if (!iter->snapshot)
3891 		tracing_stop_tr(tr);
3892 
3893 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3894 		for_each_tracing_cpu(cpu) {
3895 			iter->buffer_iter[cpu] =
3896 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3897 		}
3898 		ring_buffer_read_prepare_sync();
3899 		for_each_tracing_cpu(cpu) {
3900 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3901 			tracing_iter_reset(iter, cpu);
3902 		}
3903 	} else {
3904 		cpu = iter->cpu_file;
3905 		iter->buffer_iter[cpu] =
3906 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3907 		ring_buffer_read_prepare_sync();
3908 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3909 		tracing_iter_reset(iter, cpu);
3910 	}
3911 
3912 	mutex_unlock(&trace_types_lock);
3913 
3914 	return iter;
3915 
3916  fail:
3917 	mutex_unlock(&trace_types_lock);
3918 	kfree(iter->trace);
3919 	kfree(iter->buffer_iter);
3920 release:
3921 	seq_release_private(inode, file);
3922 	return ERR_PTR(-ENOMEM);
3923 }
3924 
3925 int tracing_open_generic(struct inode *inode, struct file *filp)
3926 {
3927 	if (tracing_disabled)
3928 		return -ENODEV;
3929 
3930 	filp->private_data = inode->i_private;
3931 	return 0;
3932 }
3933 
3934 bool tracing_is_disabled(void)
3935 {
3936 	return (tracing_disabled) ? true: false;
3937 }
3938 
3939 /*
3940  * Open and update trace_array ref count.
3941  * Must have the current trace_array passed to it.
3942  */
3943 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3944 {
3945 	struct trace_array *tr = inode->i_private;
3946 
3947 	if (tracing_disabled)
3948 		return -ENODEV;
3949 
3950 	if (trace_array_get(tr) < 0)
3951 		return -ENODEV;
3952 
3953 	filp->private_data = inode->i_private;
3954 
3955 	return 0;
3956 }
3957 
3958 static int tracing_release(struct inode *inode, struct file *file)
3959 {
3960 	struct trace_array *tr = inode->i_private;
3961 	struct seq_file *m = file->private_data;
3962 	struct trace_iterator *iter;
3963 	int cpu;
3964 
3965 	if (!(file->f_mode & FMODE_READ)) {
3966 		trace_array_put(tr);
3967 		return 0;
3968 	}
3969 
3970 	/* Writes do not use seq_file */
3971 	iter = m->private;
3972 	mutex_lock(&trace_types_lock);
3973 
3974 	for_each_tracing_cpu(cpu) {
3975 		if (iter->buffer_iter[cpu])
3976 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3977 	}
3978 
3979 	if (iter->trace && iter->trace->close)
3980 		iter->trace->close(iter);
3981 
3982 	if (!iter->snapshot)
3983 		/* reenable tracing if it was previously enabled */
3984 		tracing_start_tr(tr);
3985 
3986 	__trace_array_put(tr);
3987 
3988 	mutex_unlock(&trace_types_lock);
3989 
3990 	mutex_destroy(&iter->mutex);
3991 	free_cpumask_var(iter->started);
3992 	kfree(iter->trace);
3993 	kfree(iter->buffer_iter);
3994 	seq_release_private(inode, file);
3995 
3996 	return 0;
3997 }
3998 
3999 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4000 {
4001 	struct trace_array *tr = inode->i_private;
4002 
4003 	trace_array_put(tr);
4004 	return 0;
4005 }
4006 
4007 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4008 {
4009 	struct trace_array *tr = inode->i_private;
4010 
4011 	trace_array_put(tr);
4012 
4013 	return single_release(inode, file);
4014 }
4015 
4016 static int tracing_open(struct inode *inode, struct file *file)
4017 {
4018 	struct trace_array *tr = inode->i_private;
4019 	struct trace_iterator *iter;
4020 	int ret = 0;
4021 
4022 	if (trace_array_get(tr) < 0)
4023 		return -ENODEV;
4024 
4025 	/* If this file was open for write, then erase contents */
4026 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4027 		int cpu = tracing_get_cpu(inode);
4028 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4029 
4030 #ifdef CONFIG_TRACER_MAX_TRACE
4031 		if (tr->current_trace->print_max)
4032 			trace_buf = &tr->max_buffer;
4033 #endif
4034 
4035 		if (cpu == RING_BUFFER_ALL_CPUS)
4036 			tracing_reset_online_cpus(trace_buf);
4037 		else
4038 			tracing_reset(trace_buf, cpu);
4039 	}
4040 
4041 	if (file->f_mode & FMODE_READ) {
4042 		iter = __tracing_open(inode, file, false);
4043 		if (IS_ERR(iter))
4044 			ret = PTR_ERR(iter);
4045 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4046 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4047 	}
4048 
4049 	if (ret < 0)
4050 		trace_array_put(tr);
4051 
4052 	return ret;
4053 }
4054 
4055 /*
4056  * Some tracers are not suitable for instance buffers.
4057  * A tracer is always available for the global array (toplevel)
4058  * or if it explicitly states that it is.
4059  */
4060 static bool
4061 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4062 {
4063 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4064 }
4065 
4066 /* Find the next tracer that this trace array may use */
4067 static struct tracer *
4068 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4069 {
4070 	while (t && !trace_ok_for_array(t, tr))
4071 		t = t->next;
4072 
4073 	return t;
4074 }
4075 
4076 static void *
4077 t_next(struct seq_file *m, void *v, loff_t *pos)
4078 {
4079 	struct trace_array *tr = m->private;
4080 	struct tracer *t = v;
4081 
4082 	(*pos)++;
4083 
4084 	if (t)
4085 		t = get_tracer_for_array(tr, t->next);
4086 
4087 	return t;
4088 }
4089 
4090 static void *t_start(struct seq_file *m, loff_t *pos)
4091 {
4092 	struct trace_array *tr = m->private;
4093 	struct tracer *t;
4094 	loff_t l = 0;
4095 
4096 	mutex_lock(&trace_types_lock);
4097 
4098 	t = get_tracer_for_array(tr, trace_types);
4099 	for (; t && l < *pos; t = t_next(m, t, &l))
4100 			;
4101 
4102 	return t;
4103 }
4104 
4105 static void t_stop(struct seq_file *m, void *p)
4106 {
4107 	mutex_unlock(&trace_types_lock);
4108 }
4109 
4110 static int t_show(struct seq_file *m, void *v)
4111 {
4112 	struct tracer *t = v;
4113 
4114 	if (!t)
4115 		return 0;
4116 
4117 	seq_puts(m, t->name);
4118 	if (t->next)
4119 		seq_putc(m, ' ');
4120 	else
4121 		seq_putc(m, '\n');
4122 
4123 	return 0;
4124 }
4125 
4126 static const struct seq_operations show_traces_seq_ops = {
4127 	.start		= t_start,
4128 	.next		= t_next,
4129 	.stop		= t_stop,
4130 	.show		= t_show,
4131 };
4132 
4133 static int show_traces_open(struct inode *inode, struct file *file)
4134 {
4135 	struct trace_array *tr = inode->i_private;
4136 	struct seq_file *m;
4137 	int ret;
4138 
4139 	if (tracing_disabled)
4140 		return -ENODEV;
4141 
4142 	ret = seq_open(file, &show_traces_seq_ops);
4143 	if (ret)
4144 		return ret;
4145 
4146 	m = file->private_data;
4147 	m->private = tr;
4148 
4149 	return 0;
4150 }
4151 
4152 static ssize_t
4153 tracing_write_stub(struct file *filp, const char __user *ubuf,
4154 		   size_t count, loff_t *ppos)
4155 {
4156 	return count;
4157 }
4158 
4159 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4160 {
4161 	int ret;
4162 
4163 	if (file->f_mode & FMODE_READ)
4164 		ret = seq_lseek(file, offset, whence);
4165 	else
4166 		file->f_pos = ret = 0;
4167 
4168 	return ret;
4169 }
4170 
4171 static const struct file_operations tracing_fops = {
4172 	.open		= tracing_open,
4173 	.read		= seq_read,
4174 	.write		= tracing_write_stub,
4175 	.llseek		= tracing_lseek,
4176 	.release	= tracing_release,
4177 };
4178 
4179 static const struct file_operations show_traces_fops = {
4180 	.open		= show_traces_open,
4181 	.read		= seq_read,
4182 	.release	= seq_release,
4183 	.llseek		= seq_lseek,
4184 };
4185 
4186 static ssize_t
4187 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4188 		     size_t count, loff_t *ppos)
4189 {
4190 	struct trace_array *tr = file_inode(filp)->i_private;
4191 	char *mask_str;
4192 	int len;
4193 
4194 	len = snprintf(NULL, 0, "%*pb\n",
4195 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4196 	mask_str = kmalloc(len, GFP_KERNEL);
4197 	if (!mask_str)
4198 		return -ENOMEM;
4199 
4200 	len = snprintf(mask_str, len, "%*pb\n",
4201 		       cpumask_pr_args(tr->tracing_cpumask));
4202 	if (len >= count) {
4203 		count = -EINVAL;
4204 		goto out_err;
4205 	}
4206 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4207 
4208 out_err:
4209 	kfree(mask_str);
4210 
4211 	return count;
4212 }
4213 
4214 static ssize_t
4215 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4216 		      size_t count, loff_t *ppos)
4217 {
4218 	struct trace_array *tr = file_inode(filp)->i_private;
4219 	cpumask_var_t tracing_cpumask_new;
4220 	int err, cpu;
4221 
4222 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4223 		return -ENOMEM;
4224 
4225 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4226 	if (err)
4227 		goto err_unlock;
4228 
4229 	local_irq_disable();
4230 	arch_spin_lock(&tr->max_lock);
4231 	for_each_tracing_cpu(cpu) {
4232 		/*
4233 		 * Increase/decrease the disabled counter if we are
4234 		 * about to flip a bit in the cpumask:
4235 		 */
4236 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4237 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4238 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4239 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4240 		}
4241 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4242 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4243 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4244 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4245 		}
4246 	}
4247 	arch_spin_unlock(&tr->max_lock);
4248 	local_irq_enable();
4249 
4250 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4251 	free_cpumask_var(tracing_cpumask_new);
4252 
4253 	return count;
4254 
4255 err_unlock:
4256 	free_cpumask_var(tracing_cpumask_new);
4257 
4258 	return err;
4259 }
4260 
4261 static const struct file_operations tracing_cpumask_fops = {
4262 	.open		= tracing_open_generic_tr,
4263 	.read		= tracing_cpumask_read,
4264 	.write		= tracing_cpumask_write,
4265 	.release	= tracing_release_generic_tr,
4266 	.llseek		= generic_file_llseek,
4267 };
4268 
4269 static int tracing_trace_options_show(struct seq_file *m, void *v)
4270 {
4271 	struct tracer_opt *trace_opts;
4272 	struct trace_array *tr = m->private;
4273 	u32 tracer_flags;
4274 	int i;
4275 
4276 	mutex_lock(&trace_types_lock);
4277 	tracer_flags = tr->current_trace->flags->val;
4278 	trace_opts = tr->current_trace->flags->opts;
4279 
4280 	for (i = 0; trace_options[i]; i++) {
4281 		if (tr->trace_flags & (1 << i))
4282 			seq_printf(m, "%s\n", trace_options[i]);
4283 		else
4284 			seq_printf(m, "no%s\n", trace_options[i]);
4285 	}
4286 
4287 	for (i = 0; trace_opts[i].name; i++) {
4288 		if (tracer_flags & trace_opts[i].bit)
4289 			seq_printf(m, "%s\n", trace_opts[i].name);
4290 		else
4291 			seq_printf(m, "no%s\n", trace_opts[i].name);
4292 	}
4293 	mutex_unlock(&trace_types_lock);
4294 
4295 	return 0;
4296 }
4297 
4298 static int __set_tracer_option(struct trace_array *tr,
4299 			       struct tracer_flags *tracer_flags,
4300 			       struct tracer_opt *opts, int neg)
4301 {
4302 	struct tracer *trace = tracer_flags->trace;
4303 	int ret;
4304 
4305 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4306 	if (ret)
4307 		return ret;
4308 
4309 	if (neg)
4310 		tracer_flags->val &= ~opts->bit;
4311 	else
4312 		tracer_flags->val |= opts->bit;
4313 	return 0;
4314 }
4315 
4316 /* Try to assign a tracer specific option */
4317 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4318 {
4319 	struct tracer *trace = tr->current_trace;
4320 	struct tracer_flags *tracer_flags = trace->flags;
4321 	struct tracer_opt *opts = NULL;
4322 	int i;
4323 
4324 	for (i = 0; tracer_flags->opts[i].name; i++) {
4325 		opts = &tracer_flags->opts[i];
4326 
4327 		if (strcmp(cmp, opts->name) == 0)
4328 			return __set_tracer_option(tr, trace->flags, opts, neg);
4329 	}
4330 
4331 	return -EINVAL;
4332 }
4333 
4334 /* Some tracers require overwrite to stay enabled */
4335 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4336 {
4337 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4338 		return -1;
4339 
4340 	return 0;
4341 }
4342 
4343 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4344 {
4345 	/* do nothing if flag is already set */
4346 	if (!!(tr->trace_flags & mask) == !!enabled)
4347 		return 0;
4348 
4349 	/* Give the tracer a chance to approve the change */
4350 	if (tr->current_trace->flag_changed)
4351 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4352 			return -EINVAL;
4353 
4354 	if (enabled)
4355 		tr->trace_flags |= mask;
4356 	else
4357 		tr->trace_flags &= ~mask;
4358 
4359 	if (mask == TRACE_ITER_RECORD_CMD)
4360 		trace_event_enable_cmd_record(enabled);
4361 
4362 	if (mask == TRACE_ITER_RECORD_TGID) {
4363 		if (!tgid_map)
4364 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4365 					   sizeof(*tgid_map),
4366 					   GFP_KERNEL);
4367 		if (!tgid_map) {
4368 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4369 			return -ENOMEM;
4370 		}
4371 
4372 		trace_event_enable_tgid_record(enabled);
4373 	}
4374 
4375 	if (mask == TRACE_ITER_EVENT_FORK)
4376 		trace_event_follow_fork(tr, enabled);
4377 
4378 	if (mask == TRACE_ITER_FUNC_FORK)
4379 		ftrace_pid_follow_fork(tr, enabled);
4380 
4381 	if (mask == TRACE_ITER_OVERWRITE) {
4382 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4383 #ifdef CONFIG_TRACER_MAX_TRACE
4384 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4385 #endif
4386 	}
4387 
4388 	if (mask == TRACE_ITER_PRINTK) {
4389 		trace_printk_start_stop_comm(enabled);
4390 		trace_printk_control(enabled);
4391 	}
4392 
4393 	return 0;
4394 }
4395 
4396 static int trace_set_options(struct trace_array *tr, char *option)
4397 {
4398 	char *cmp;
4399 	int neg = 0;
4400 	int ret;
4401 	size_t orig_len = strlen(option);
4402 
4403 	cmp = strstrip(option);
4404 
4405 	if (strncmp(cmp, "no", 2) == 0) {
4406 		neg = 1;
4407 		cmp += 2;
4408 	}
4409 
4410 	mutex_lock(&trace_types_lock);
4411 
4412 	ret = match_string(trace_options, -1, cmp);
4413 	/* If no option could be set, test the specific tracer options */
4414 	if (ret < 0)
4415 		ret = set_tracer_option(tr, cmp, neg);
4416 	else
4417 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4418 
4419 	mutex_unlock(&trace_types_lock);
4420 
4421 	/*
4422 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4423 	 * turn it back into a space.
4424 	 */
4425 	if (orig_len > strlen(option))
4426 		option[strlen(option)] = ' ';
4427 
4428 	return ret;
4429 }
4430 
4431 static void __init apply_trace_boot_options(void)
4432 {
4433 	char *buf = trace_boot_options_buf;
4434 	char *option;
4435 
4436 	while (true) {
4437 		option = strsep(&buf, ",");
4438 
4439 		if (!option)
4440 			break;
4441 
4442 		if (*option)
4443 			trace_set_options(&global_trace, option);
4444 
4445 		/* Put back the comma to allow this to be called again */
4446 		if (buf)
4447 			*(buf - 1) = ',';
4448 	}
4449 }
4450 
4451 static ssize_t
4452 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4453 			size_t cnt, loff_t *ppos)
4454 {
4455 	struct seq_file *m = filp->private_data;
4456 	struct trace_array *tr = m->private;
4457 	char buf[64];
4458 	int ret;
4459 
4460 	if (cnt >= sizeof(buf))
4461 		return -EINVAL;
4462 
4463 	if (copy_from_user(buf, ubuf, cnt))
4464 		return -EFAULT;
4465 
4466 	buf[cnt] = 0;
4467 
4468 	ret = trace_set_options(tr, buf);
4469 	if (ret < 0)
4470 		return ret;
4471 
4472 	*ppos += cnt;
4473 
4474 	return cnt;
4475 }
4476 
4477 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4478 {
4479 	struct trace_array *tr = inode->i_private;
4480 	int ret;
4481 
4482 	if (tracing_disabled)
4483 		return -ENODEV;
4484 
4485 	if (trace_array_get(tr) < 0)
4486 		return -ENODEV;
4487 
4488 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4489 	if (ret < 0)
4490 		trace_array_put(tr);
4491 
4492 	return ret;
4493 }
4494 
4495 static const struct file_operations tracing_iter_fops = {
4496 	.open		= tracing_trace_options_open,
4497 	.read		= seq_read,
4498 	.llseek		= seq_lseek,
4499 	.release	= tracing_single_release_tr,
4500 	.write		= tracing_trace_options_write,
4501 };
4502 
4503 static const char readme_msg[] =
4504 	"tracing mini-HOWTO:\n\n"
4505 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4506 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4507 	" Important files:\n"
4508 	"  trace\t\t\t- The static contents of the buffer\n"
4509 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4510 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4511 	"  current_tracer\t- function and latency tracers\n"
4512 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4513 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4514 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4515 	"  trace_clock\t\t-change the clock used to order events\n"
4516 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4517 	"      global:   Synced across CPUs but slows tracing down.\n"
4518 	"     counter:   Not a clock, but just an increment\n"
4519 	"      uptime:   Jiffy counter from time of boot\n"
4520 	"        perf:   Same clock that perf events use\n"
4521 #ifdef CONFIG_X86_64
4522 	"     x86-tsc:   TSC cycle counter\n"
4523 #endif
4524 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4525 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4526 	"    absolute:   Absolute (standalone) timestamp\n"
4527 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4528 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4529 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4530 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4531 	"\t\t\t  Remove sub-buffer with rmdir\n"
4532 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4533 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4534 	"\t\t\t  option name\n"
4535 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4536 #ifdef CONFIG_DYNAMIC_FTRACE
4537 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4538 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4539 	"\t\t\t  functions\n"
4540 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4541 	"\t     modules: Can select a group via module\n"
4542 	"\t      Format: :mod:<module-name>\n"
4543 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4544 	"\t    triggers: a command to perform when function is hit\n"
4545 	"\t      Format: <function>:<trigger>[:count]\n"
4546 	"\t     trigger: traceon, traceoff\n"
4547 	"\t\t      enable_event:<system>:<event>\n"
4548 	"\t\t      disable_event:<system>:<event>\n"
4549 #ifdef CONFIG_STACKTRACE
4550 	"\t\t      stacktrace\n"
4551 #endif
4552 #ifdef CONFIG_TRACER_SNAPSHOT
4553 	"\t\t      snapshot\n"
4554 #endif
4555 	"\t\t      dump\n"
4556 	"\t\t      cpudump\n"
4557 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4558 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4559 	"\t     The first one will disable tracing every time do_fault is hit\n"
4560 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4561 	"\t       The first time do trap is hit and it disables tracing, the\n"
4562 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4563 	"\t       the counter will not decrement. It only decrements when the\n"
4564 	"\t       trigger did work\n"
4565 	"\t     To remove trigger without count:\n"
4566 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4567 	"\t     To remove trigger with a count:\n"
4568 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4569 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4570 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4571 	"\t    modules: Can select a group via module command :mod:\n"
4572 	"\t    Does not accept triggers\n"
4573 #endif /* CONFIG_DYNAMIC_FTRACE */
4574 #ifdef CONFIG_FUNCTION_TRACER
4575 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4576 	"\t\t    (function)\n"
4577 #endif
4578 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4579 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4580 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4581 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4582 #endif
4583 #ifdef CONFIG_TRACER_SNAPSHOT
4584 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4585 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4586 	"\t\t\t  information\n"
4587 #endif
4588 #ifdef CONFIG_STACK_TRACER
4589 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4590 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4591 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4592 	"\t\t\t  new trace)\n"
4593 #ifdef CONFIG_DYNAMIC_FTRACE
4594 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4595 	"\t\t\t  traces\n"
4596 #endif
4597 #endif /* CONFIG_STACK_TRACER */
4598 #ifdef CONFIG_KPROBE_EVENTS
4599 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4600 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4601 #endif
4602 #ifdef CONFIG_UPROBE_EVENTS
4603 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4604 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4605 #endif
4606 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4607 	"\t  accepts: event-definitions (one definition per line)\n"
4608 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4609 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4610 	"\t           -:[<group>/]<event>\n"
4611 #ifdef CONFIG_KPROBE_EVENTS
4612 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4613   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4614 #endif
4615 #ifdef CONFIG_UPROBE_EVENTS
4616 	"\t    place: <path>:<offset>\n"
4617 #endif
4618 	"\t     args: <name>=fetcharg[:type]\n"
4619 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4620 	"\t           $stack<index>, $stack, $retval, $comm\n"
4621 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4622 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4623 #endif
4624 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4625 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4626 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4627 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4628 	"\t\t\t  events\n"
4629 	"      filter\t\t- If set, only events passing filter are traced\n"
4630 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4631 	"\t\t\t  <event>:\n"
4632 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4633 	"      filter\t\t- If set, only events passing filter are traced\n"
4634 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4635 	"\t    Format: <trigger>[:count][if <filter>]\n"
4636 	"\t   trigger: traceon, traceoff\n"
4637 	"\t            enable_event:<system>:<event>\n"
4638 	"\t            disable_event:<system>:<event>\n"
4639 #ifdef CONFIG_HIST_TRIGGERS
4640 	"\t            enable_hist:<system>:<event>\n"
4641 	"\t            disable_hist:<system>:<event>\n"
4642 #endif
4643 #ifdef CONFIG_STACKTRACE
4644 	"\t\t    stacktrace\n"
4645 #endif
4646 #ifdef CONFIG_TRACER_SNAPSHOT
4647 	"\t\t    snapshot\n"
4648 #endif
4649 #ifdef CONFIG_HIST_TRIGGERS
4650 	"\t\t    hist (see below)\n"
4651 #endif
4652 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4653 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4654 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4655 	"\t                  events/block/block_unplug/trigger\n"
4656 	"\t   The first disables tracing every time block_unplug is hit.\n"
4657 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4658 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4659 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4660 	"\t   Like function triggers, the counter is only decremented if it\n"
4661 	"\t    enabled or disabled tracing.\n"
4662 	"\t   To remove a trigger without a count:\n"
4663 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4664 	"\t   To remove a trigger with a count:\n"
4665 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4666 	"\t   Filters can be ignored when removing a trigger.\n"
4667 #ifdef CONFIG_HIST_TRIGGERS
4668 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4669 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4670 	"\t            [:values=<field1[,field2,...]>]\n"
4671 	"\t            [:sort=<field1[,field2,...]>]\n"
4672 	"\t            [:size=#entries]\n"
4673 	"\t            [:pause][:continue][:clear]\n"
4674 	"\t            [:name=histname1]\n"
4675 	"\t            [if <filter>]\n\n"
4676 	"\t    When a matching event is hit, an entry is added to a hash\n"
4677 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4678 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4679 	"\t    correspond to fields in the event's format description.  Keys\n"
4680 	"\t    can be any field, or the special string 'stacktrace'.\n"
4681 	"\t    Compound keys consisting of up to two fields can be specified\n"
4682 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4683 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4684 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4685 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4686 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4687 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4688 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4689 	"\t    its histogram data will be shared with other triggers of the\n"
4690 	"\t    same name, and trigger hits will update this common data.\n\n"
4691 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4692 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4693 	"\t    triggers attached to an event, there will be a table for each\n"
4694 	"\t    trigger in the output.  The table displayed for a named\n"
4695 	"\t    trigger will be the same as any other instance having the\n"
4696 	"\t    same name.  The default format used to display a given field\n"
4697 	"\t    can be modified by appending any of the following modifiers\n"
4698 	"\t    to the field name, as applicable:\n\n"
4699 	"\t            .hex        display a number as a hex value\n"
4700 	"\t            .sym        display an address as a symbol\n"
4701 	"\t            .sym-offset display an address as a symbol and offset\n"
4702 	"\t            .execname   display a common_pid as a program name\n"
4703 	"\t            .syscall    display a syscall id as a syscall name\n"
4704 	"\t            .log2       display log2 value rather than raw number\n"
4705 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4706 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4707 	"\t    trigger or to start a hist trigger but not log any events\n"
4708 	"\t    until told to do so.  'continue' can be used to start or\n"
4709 	"\t    restart a paused hist trigger.\n\n"
4710 	"\t    The 'clear' parameter will clear the contents of a running\n"
4711 	"\t    hist trigger and leave its current paused/active state\n"
4712 	"\t    unchanged.\n\n"
4713 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4714 	"\t    have one event conditionally start and stop another event's\n"
4715 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4716 	"\t    the enable_event and disable_event triggers.\n"
4717 #endif
4718 ;
4719 
4720 static ssize_t
4721 tracing_readme_read(struct file *filp, char __user *ubuf,
4722 		       size_t cnt, loff_t *ppos)
4723 {
4724 	return simple_read_from_buffer(ubuf, cnt, ppos,
4725 					readme_msg, strlen(readme_msg));
4726 }
4727 
4728 static const struct file_operations tracing_readme_fops = {
4729 	.open		= tracing_open_generic,
4730 	.read		= tracing_readme_read,
4731 	.llseek		= generic_file_llseek,
4732 };
4733 
4734 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4735 {
4736 	int *ptr = v;
4737 
4738 	if (*pos || m->count)
4739 		ptr++;
4740 
4741 	(*pos)++;
4742 
4743 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4744 		if (trace_find_tgid(*ptr))
4745 			return ptr;
4746 	}
4747 
4748 	return NULL;
4749 }
4750 
4751 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4752 {
4753 	void *v;
4754 	loff_t l = 0;
4755 
4756 	if (!tgid_map)
4757 		return NULL;
4758 
4759 	v = &tgid_map[0];
4760 	while (l <= *pos) {
4761 		v = saved_tgids_next(m, v, &l);
4762 		if (!v)
4763 			return NULL;
4764 	}
4765 
4766 	return v;
4767 }
4768 
4769 static void saved_tgids_stop(struct seq_file *m, void *v)
4770 {
4771 }
4772 
4773 static int saved_tgids_show(struct seq_file *m, void *v)
4774 {
4775 	int pid = (int *)v - tgid_map;
4776 
4777 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4778 	return 0;
4779 }
4780 
4781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4782 	.start		= saved_tgids_start,
4783 	.stop		= saved_tgids_stop,
4784 	.next		= saved_tgids_next,
4785 	.show		= saved_tgids_show,
4786 };
4787 
4788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4789 {
4790 	if (tracing_disabled)
4791 		return -ENODEV;
4792 
4793 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4794 }
4795 
4796 
4797 static const struct file_operations tracing_saved_tgids_fops = {
4798 	.open		= tracing_saved_tgids_open,
4799 	.read		= seq_read,
4800 	.llseek		= seq_lseek,
4801 	.release	= seq_release,
4802 };
4803 
4804 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4805 {
4806 	unsigned int *ptr = v;
4807 
4808 	if (*pos || m->count)
4809 		ptr++;
4810 
4811 	(*pos)++;
4812 
4813 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4814 	     ptr++) {
4815 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4816 			continue;
4817 
4818 		return ptr;
4819 	}
4820 
4821 	return NULL;
4822 }
4823 
4824 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4825 {
4826 	void *v;
4827 	loff_t l = 0;
4828 
4829 	preempt_disable();
4830 	arch_spin_lock(&trace_cmdline_lock);
4831 
4832 	v = &savedcmd->map_cmdline_to_pid[0];
4833 	while (l <= *pos) {
4834 		v = saved_cmdlines_next(m, v, &l);
4835 		if (!v)
4836 			return NULL;
4837 	}
4838 
4839 	return v;
4840 }
4841 
4842 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4843 {
4844 	arch_spin_unlock(&trace_cmdline_lock);
4845 	preempt_enable();
4846 }
4847 
4848 static int saved_cmdlines_show(struct seq_file *m, void *v)
4849 {
4850 	char buf[TASK_COMM_LEN];
4851 	unsigned int *pid = v;
4852 
4853 	__trace_find_cmdline(*pid, buf);
4854 	seq_printf(m, "%d %s\n", *pid, buf);
4855 	return 0;
4856 }
4857 
4858 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4859 	.start		= saved_cmdlines_start,
4860 	.next		= saved_cmdlines_next,
4861 	.stop		= saved_cmdlines_stop,
4862 	.show		= saved_cmdlines_show,
4863 };
4864 
4865 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4866 {
4867 	if (tracing_disabled)
4868 		return -ENODEV;
4869 
4870 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4871 }
4872 
4873 static const struct file_operations tracing_saved_cmdlines_fops = {
4874 	.open		= tracing_saved_cmdlines_open,
4875 	.read		= seq_read,
4876 	.llseek		= seq_lseek,
4877 	.release	= seq_release,
4878 };
4879 
4880 static ssize_t
4881 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4882 				 size_t cnt, loff_t *ppos)
4883 {
4884 	char buf[64];
4885 	int r;
4886 
4887 	arch_spin_lock(&trace_cmdline_lock);
4888 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4889 	arch_spin_unlock(&trace_cmdline_lock);
4890 
4891 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4892 }
4893 
4894 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4895 {
4896 	kfree(s->saved_cmdlines);
4897 	kfree(s->map_cmdline_to_pid);
4898 	kfree(s);
4899 }
4900 
4901 static int tracing_resize_saved_cmdlines(unsigned int val)
4902 {
4903 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4904 
4905 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4906 	if (!s)
4907 		return -ENOMEM;
4908 
4909 	if (allocate_cmdlines_buffer(val, s) < 0) {
4910 		kfree(s);
4911 		return -ENOMEM;
4912 	}
4913 
4914 	arch_spin_lock(&trace_cmdline_lock);
4915 	savedcmd_temp = savedcmd;
4916 	savedcmd = s;
4917 	arch_spin_unlock(&trace_cmdline_lock);
4918 	free_saved_cmdlines_buffer(savedcmd_temp);
4919 
4920 	return 0;
4921 }
4922 
4923 static ssize_t
4924 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4925 				  size_t cnt, loff_t *ppos)
4926 {
4927 	unsigned long val;
4928 	int ret;
4929 
4930 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4931 	if (ret)
4932 		return ret;
4933 
4934 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4935 	if (!val || val > PID_MAX_DEFAULT)
4936 		return -EINVAL;
4937 
4938 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4939 	if (ret < 0)
4940 		return ret;
4941 
4942 	*ppos += cnt;
4943 
4944 	return cnt;
4945 }
4946 
4947 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4948 	.open		= tracing_open_generic,
4949 	.read		= tracing_saved_cmdlines_size_read,
4950 	.write		= tracing_saved_cmdlines_size_write,
4951 };
4952 
4953 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4954 static union trace_eval_map_item *
4955 update_eval_map(union trace_eval_map_item *ptr)
4956 {
4957 	if (!ptr->map.eval_string) {
4958 		if (ptr->tail.next) {
4959 			ptr = ptr->tail.next;
4960 			/* Set ptr to the next real item (skip head) */
4961 			ptr++;
4962 		} else
4963 			return NULL;
4964 	}
4965 	return ptr;
4966 }
4967 
4968 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4969 {
4970 	union trace_eval_map_item *ptr = v;
4971 
4972 	/*
4973 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4974 	 * This really should never happen.
4975 	 */
4976 	ptr = update_eval_map(ptr);
4977 	if (WARN_ON_ONCE(!ptr))
4978 		return NULL;
4979 
4980 	ptr++;
4981 
4982 	(*pos)++;
4983 
4984 	ptr = update_eval_map(ptr);
4985 
4986 	return ptr;
4987 }
4988 
4989 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4990 {
4991 	union trace_eval_map_item *v;
4992 	loff_t l = 0;
4993 
4994 	mutex_lock(&trace_eval_mutex);
4995 
4996 	v = trace_eval_maps;
4997 	if (v)
4998 		v++;
4999 
5000 	while (v && l < *pos) {
5001 		v = eval_map_next(m, v, &l);
5002 	}
5003 
5004 	return v;
5005 }
5006 
5007 static void eval_map_stop(struct seq_file *m, void *v)
5008 {
5009 	mutex_unlock(&trace_eval_mutex);
5010 }
5011 
5012 static int eval_map_show(struct seq_file *m, void *v)
5013 {
5014 	union trace_eval_map_item *ptr = v;
5015 
5016 	seq_printf(m, "%s %ld (%s)\n",
5017 		   ptr->map.eval_string, ptr->map.eval_value,
5018 		   ptr->map.system);
5019 
5020 	return 0;
5021 }
5022 
5023 static const struct seq_operations tracing_eval_map_seq_ops = {
5024 	.start		= eval_map_start,
5025 	.next		= eval_map_next,
5026 	.stop		= eval_map_stop,
5027 	.show		= eval_map_show,
5028 };
5029 
5030 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5031 {
5032 	if (tracing_disabled)
5033 		return -ENODEV;
5034 
5035 	return seq_open(filp, &tracing_eval_map_seq_ops);
5036 }
5037 
5038 static const struct file_operations tracing_eval_map_fops = {
5039 	.open		= tracing_eval_map_open,
5040 	.read		= seq_read,
5041 	.llseek		= seq_lseek,
5042 	.release	= seq_release,
5043 };
5044 
5045 static inline union trace_eval_map_item *
5046 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5047 {
5048 	/* Return tail of array given the head */
5049 	return ptr + ptr->head.length + 1;
5050 }
5051 
5052 static void
5053 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5054 			   int len)
5055 {
5056 	struct trace_eval_map **stop;
5057 	struct trace_eval_map **map;
5058 	union trace_eval_map_item *map_array;
5059 	union trace_eval_map_item *ptr;
5060 
5061 	stop = start + len;
5062 
5063 	/*
5064 	 * The trace_eval_maps contains the map plus a head and tail item,
5065 	 * where the head holds the module and length of array, and the
5066 	 * tail holds a pointer to the next list.
5067 	 */
5068 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5069 	if (!map_array) {
5070 		pr_warn("Unable to allocate trace eval mapping\n");
5071 		return;
5072 	}
5073 
5074 	mutex_lock(&trace_eval_mutex);
5075 
5076 	if (!trace_eval_maps)
5077 		trace_eval_maps = map_array;
5078 	else {
5079 		ptr = trace_eval_maps;
5080 		for (;;) {
5081 			ptr = trace_eval_jmp_to_tail(ptr);
5082 			if (!ptr->tail.next)
5083 				break;
5084 			ptr = ptr->tail.next;
5085 
5086 		}
5087 		ptr->tail.next = map_array;
5088 	}
5089 	map_array->head.mod = mod;
5090 	map_array->head.length = len;
5091 	map_array++;
5092 
5093 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5094 		map_array->map = **map;
5095 		map_array++;
5096 	}
5097 	memset(map_array, 0, sizeof(*map_array));
5098 
5099 	mutex_unlock(&trace_eval_mutex);
5100 }
5101 
5102 static void trace_create_eval_file(struct dentry *d_tracer)
5103 {
5104 	trace_create_file("eval_map", 0444, d_tracer,
5105 			  NULL, &tracing_eval_map_fops);
5106 }
5107 
5108 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5109 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5110 static inline void trace_insert_eval_map_file(struct module *mod,
5111 			      struct trace_eval_map **start, int len) { }
5112 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5113 
5114 static void trace_insert_eval_map(struct module *mod,
5115 				  struct trace_eval_map **start, int len)
5116 {
5117 	struct trace_eval_map **map;
5118 
5119 	if (len <= 0)
5120 		return;
5121 
5122 	map = start;
5123 
5124 	trace_event_eval_update(map, len);
5125 
5126 	trace_insert_eval_map_file(mod, start, len);
5127 }
5128 
5129 static ssize_t
5130 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5131 		       size_t cnt, loff_t *ppos)
5132 {
5133 	struct trace_array *tr = filp->private_data;
5134 	char buf[MAX_TRACER_SIZE+2];
5135 	int r;
5136 
5137 	mutex_lock(&trace_types_lock);
5138 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5139 	mutex_unlock(&trace_types_lock);
5140 
5141 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5142 }
5143 
5144 int tracer_init(struct tracer *t, struct trace_array *tr)
5145 {
5146 	tracing_reset_online_cpus(&tr->trace_buffer);
5147 	return t->init(tr);
5148 }
5149 
5150 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5151 {
5152 	int cpu;
5153 
5154 	for_each_tracing_cpu(cpu)
5155 		per_cpu_ptr(buf->data, cpu)->entries = val;
5156 }
5157 
5158 #ifdef CONFIG_TRACER_MAX_TRACE
5159 /* resize @tr's buffer to the size of @size_tr's entries */
5160 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5161 					struct trace_buffer *size_buf, int cpu_id)
5162 {
5163 	int cpu, ret = 0;
5164 
5165 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5166 		for_each_tracing_cpu(cpu) {
5167 			ret = ring_buffer_resize(trace_buf->buffer,
5168 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5169 			if (ret < 0)
5170 				break;
5171 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5172 				per_cpu_ptr(size_buf->data, cpu)->entries;
5173 		}
5174 	} else {
5175 		ret = ring_buffer_resize(trace_buf->buffer,
5176 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5177 		if (ret == 0)
5178 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5179 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5180 	}
5181 
5182 	return ret;
5183 }
5184 #endif /* CONFIG_TRACER_MAX_TRACE */
5185 
5186 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5187 					unsigned long size, int cpu)
5188 {
5189 	int ret;
5190 
5191 	/*
5192 	 * If kernel or user changes the size of the ring buffer
5193 	 * we use the size that was given, and we can forget about
5194 	 * expanding it later.
5195 	 */
5196 	ring_buffer_expanded = true;
5197 
5198 	/* May be called before buffers are initialized */
5199 	if (!tr->trace_buffer.buffer)
5200 		return 0;
5201 
5202 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5203 	if (ret < 0)
5204 		return ret;
5205 
5206 #ifdef CONFIG_TRACER_MAX_TRACE
5207 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5208 	    !tr->current_trace->use_max_tr)
5209 		goto out;
5210 
5211 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5212 	if (ret < 0) {
5213 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5214 						     &tr->trace_buffer, cpu);
5215 		if (r < 0) {
5216 			/*
5217 			 * AARGH! We are left with different
5218 			 * size max buffer!!!!
5219 			 * The max buffer is our "snapshot" buffer.
5220 			 * When a tracer needs a snapshot (one of the
5221 			 * latency tracers), it swaps the max buffer
5222 			 * with the saved snap shot. We succeeded to
5223 			 * update the size of the main buffer, but failed to
5224 			 * update the size of the max buffer. But when we tried
5225 			 * to reset the main buffer to the original size, we
5226 			 * failed there too. This is very unlikely to
5227 			 * happen, but if it does, warn and kill all
5228 			 * tracing.
5229 			 */
5230 			WARN_ON(1);
5231 			tracing_disabled = 1;
5232 		}
5233 		return ret;
5234 	}
5235 
5236 	if (cpu == RING_BUFFER_ALL_CPUS)
5237 		set_buffer_entries(&tr->max_buffer, size);
5238 	else
5239 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5240 
5241  out:
5242 #endif /* CONFIG_TRACER_MAX_TRACE */
5243 
5244 	if (cpu == RING_BUFFER_ALL_CPUS)
5245 		set_buffer_entries(&tr->trace_buffer, size);
5246 	else
5247 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5248 
5249 	return ret;
5250 }
5251 
5252 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5253 					  unsigned long size, int cpu_id)
5254 {
5255 	int ret = size;
5256 
5257 	mutex_lock(&trace_types_lock);
5258 
5259 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5260 		/* make sure, this cpu is enabled in the mask */
5261 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5262 			ret = -EINVAL;
5263 			goto out;
5264 		}
5265 	}
5266 
5267 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5268 	if (ret < 0)
5269 		ret = -ENOMEM;
5270 
5271 out:
5272 	mutex_unlock(&trace_types_lock);
5273 
5274 	return ret;
5275 }
5276 
5277 
5278 /**
5279  * tracing_update_buffers - used by tracing facility to expand ring buffers
5280  *
5281  * To save on memory when the tracing is never used on a system with it
5282  * configured in. The ring buffers are set to a minimum size. But once
5283  * a user starts to use the tracing facility, then they need to grow
5284  * to their default size.
5285  *
5286  * This function is to be called when a tracer is about to be used.
5287  */
5288 int tracing_update_buffers(void)
5289 {
5290 	int ret = 0;
5291 
5292 	mutex_lock(&trace_types_lock);
5293 	if (!ring_buffer_expanded)
5294 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5295 						RING_BUFFER_ALL_CPUS);
5296 	mutex_unlock(&trace_types_lock);
5297 
5298 	return ret;
5299 }
5300 
5301 struct trace_option_dentry;
5302 
5303 static void
5304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5305 
5306 /*
5307  * Used to clear out the tracer before deletion of an instance.
5308  * Must have trace_types_lock held.
5309  */
5310 static void tracing_set_nop(struct trace_array *tr)
5311 {
5312 	if (tr->current_trace == &nop_trace)
5313 		return;
5314 
5315 	tr->current_trace->enabled--;
5316 
5317 	if (tr->current_trace->reset)
5318 		tr->current_trace->reset(tr);
5319 
5320 	tr->current_trace = &nop_trace;
5321 }
5322 
5323 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5324 {
5325 	/* Only enable if the directory has been created already. */
5326 	if (!tr->dir)
5327 		return;
5328 
5329 	create_trace_option_files(tr, t);
5330 }
5331 
5332 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5333 {
5334 	struct tracer *t;
5335 #ifdef CONFIG_TRACER_MAX_TRACE
5336 	bool had_max_tr;
5337 #endif
5338 	int ret = 0;
5339 
5340 	mutex_lock(&trace_types_lock);
5341 
5342 	if (!ring_buffer_expanded) {
5343 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5344 						RING_BUFFER_ALL_CPUS);
5345 		if (ret < 0)
5346 			goto out;
5347 		ret = 0;
5348 	}
5349 
5350 	for (t = trace_types; t; t = t->next) {
5351 		if (strcmp(t->name, buf) == 0)
5352 			break;
5353 	}
5354 	if (!t) {
5355 		ret = -EINVAL;
5356 		goto out;
5357 	}
5358 	if (t == tr->current_trace)
5359 		goto out;
5360 
5361 	/* Some tracers won't work on kernel command line */
5362 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5363 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5364 			t->name);
5365 		goto out;
5366 	}
5367 
5368 	/* Some tracers are only allowed for the top level buffer */
5369 	if (!trace_ok_for_array(t, tr)) {
5370 		ret = -EINVAL;
5371 		goto out;
5372 	}
5373 
5374 	/* If trace pipe files are being read, we can't change the tracer */
5375 	if (tr->current_trace->ref) {
5376 		ret = -EBUSY;
5377 		goto out;
5378 	}
5379 
5380 	trace_branch_disable();
5381 
5382 	tr->current_trace->enabled--;
5383 
5384 	if (tr->current_trace->reset)
5385 		tr->current_trace->reset(tr);
5386 
5387 	/* Current trace needs to be nop_trace before synchronize_sched */
5388 	tr->current_trace = &nop_trace;
5389 
5390 #ifdef CONFIG_TRACER_MAX_TRACE
5391 	had_max_tr = tr->allocated_snapshot;
5392 
5393 	if (had_max_tr && !t->use_max_tr) {
5394 		/*
5395 		 * We need to make sure that the update_max_tr sees that
5396 		 * current_trace changed to nop_trace to keep it from
5397 		 * swapping the buffers after we resize it.
5398 		 * The update_max_tr is called from interrupts disabled
5399 		 * so a synchronized_sched() is sufficient.
5400 		 */
5401 		synchronize_sched();
5402 		free_snapshot(tr);
5403 	}
5404 #endif
5405 
5406 #ifdef CONFIG_TRACER_MAX_TRACE
5407 	if (t->use_max_tr && !had_max_tr) {
5408 		ret = tracing_alloc_snapshot_instance(tr);
5409 		if (ret < 0)
5410 			goto out;
5411 	}
5412 #endif
5413 
5414 	if (t->init) {
5415 		ret = tracer_init(t, tr);
5416 		if (ret)
5417 			goto out;
5418 	}
5419 
5420 	tr->current_trace = t;
5421 	tr->current_trace->enabled++;
5422 	trace_branch_enable(tr);
5423  out:
5424 	mutex_unlock(&trace_types_lock);
5425 
5426 	return ret;
5427 }
5428 
5429 static ssize_t
5430 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5431 			size_t cnt, loff_t *ppos)
5432 {
5433 	struct trace_array *tr = filp->private_data;
5434 	char buf[MAX_TRACER_SIZE+1];
5435 	int i;
5436 	size_t ret;
5437 	int err;
5438 
5439 	ret = cnt;
5440 
5441 	if (cnt > MAX_TRACER_SIZE)
5442 		cnt = MAX_TRACER_SIZE;
5443 
5444 	if (copy_from_user(buf, ubuf, cnt))
5445 		return -EFAULT;
5446 
5447 	buf[cnt] = 0;
5448 
5449 	/* strip ending whitespace. */
5450 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5451 		buf[i] = 0;
5452 
5453 	err = tracing_set_tracer(tr, buf);
5454 	if (err)
5455 		return err;
5456 
5457 	*ppos += ret;
5458 
5459 	return ret;
5460 }
5461 
5462 static ssize_t
5463 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5464 		   size_t cnt, loff_t *ppos)
5465 {
5466 	char buf[64];
5467 	int r;
5468 
5469 	r = snprintf(buf, sizeof(buf), "%ld\n",
5470 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5471 	if (r > sizeof(buf))
5472 		r = sizeof(buf);
5473 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5474 }
5475 
5476 static ssize_t
5477 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5478 		    size_t cnt, loff_t *ppos)
5479 {
5480 	unsigned long val;
5481 	int ret;
5482 
5483 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5484 	if (ret)
5485 		return ret;
5486 
5487 	*ptr = val * 1000;
5488 
5489 	return cnt;
5490 }
5491 
5492 static ssize_t
5493 tracing_thresh_read(struct file *filp, char __user *ubuf,
5494 		    size_t cnt, loff_t *ppos)
5495 {
5496 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5497 }
5498 
5499 static ssize_t
5500 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5501 		     size_t cnt, loff_t *ppos)
5502 {
5503 	struct trace_array *tr = filp->private_data;
5504 	int ret;
5505 
5506 	mutex_lock(&trace_types_lock);
5507 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5508 	if (ret < 0)
5509 		goto out;
5510 
5511 	if (tr->current_trace->update_thresh) {
5512 		ret = tr->current_trace->update_thresh(tr);
5513 		if (ret < 0)
5514 			goto out;
5515 	}
5516 
5517 	ret = cnt;
5518 out:
5519 	mutex_unlock(&trace_types_lock);
5520 
5521 	return ret;
5522 }
5523 
5524 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5525 
5526 static ssize_t
5527 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5528 		     size_t cnt, loff_t *ppos)
5529 {
5530 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5531 }
5532 
5533 static ssize_t
5534 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5535 		      size_t cnt, loff_t *ppos)
5536 {
5537 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5538 }
5539 
5540 #endif
5541 
5542 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5543 {
5544 	struct trace_array *tr = inode->i_private;
5545 	struct trace_iterator *iter;
5546 	int ret = 0;
5547 
5548 	if (tracing_disabled)
5549 		return -ENODEV;
5550 
5551 	if (trace_array_get(tr) < 0)
5552 		return -ENODEV;
5553 
5554 	mutex_lock(&trace_types_lock);
5555 
5556 	/* create a buffer to store the information to pass to userspace */
5557 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5558 	if (!iter) {
5559 		ret = -ENOMEM;
5560 		__trace_array_put(tr);
5561 		goto out;
5562 	}
5563 
5564 	trace_seq_init(&iter->seq);
5565 	iter->trace = tr->current_trace;
5566 
5567 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5568 		ret = -ENOMEM;
5569 		goto fail;
5570 	}
5571 
5572 	/* trace pipe does not show start of buffer */
5573 	cpumask_setall(iter->started);
5574 
5575 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5576 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5577 
5578 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5579 	if (trace_clocks[tr->clock_id].in_ns)
5580 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5581 
5582 	iter->tr = tr;
5583 	iter->trace_buffer = &tr->trace_buffer;
5584 	iter->cpu_file = tracing_get_cpu(inode);
5585 	mutex_init(&iter->mutex);
5586 	filp->private_data = iter;
5587 
5588 	if (iter->trace->pipe_open)
5589 		iter->trace->pipe_open(iter);
5590 
5591 	nonseekable_open(inode, filp);
5592 
5593 	tr->current_trace->ref++;
5594 out:
5595 	mutex_unlock(&trace_types_lock);
5596 	return ret;
5597 
5598 fail:
5599 	kfree(iter->trace);
5600 	kfree(iter);
5601 	__trace_array_put(tr);
5602 	mutex_unlock(&trace_types_lock);
5603 	return ret;
5604 }
5605 
5606 static int tracing_release_pipe(struct inode *inode, struct file *file)
5607 {
5608 	struct trace_iterator *iter = file->private_data;
5609 	struct trace_array *tr = inode->i_private;
5610 
5611 	mutex_lock(&trace_types_lock);
5612 
5613 	tr->current_trace->ref--;
5614 
5615 	if (iter->trace->pipe_close)
5616 		iter->trace->pipe_close(iter);
5617 
5618 	mutex_unlock(&trace_types_lock);
5619 
5620 	free_cpumask_var(iter->started);
5621 	mutex_destroy(&iter->mutex);
5622 	kfree(iter);
5623 
5624 	trace_array_put(tr);
5625 
5626 	return 0;
5627 }
5628 
5629 static __poll_t
5630 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5631 {
5632 	struct trace_array *tr = iter->tr;
5633 
5634 	/* Iterators are static, they should be filled or empty */
5635 	if (trace_buffer_iter(iter, iter->cpu_file))
5636 		return EPOLLIN | EPOLLRDNORM;
5637 
5638 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5639 		/*
5640 		 * Always select as readable when in blocking mode
5641 		 */
5642 		return EPOLLIN | EPOLLRDNORM;
5643 	else
5644 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5645 					     filp, poll_table);
5646 }
5647 
5648 static __poll_t
5649 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5650 {
5651 	struct trace_iterator *iter = filp->private_data;
5652 
5653 	return trace_poll(iter, filp, poll_table);
5654 }
5655 
5656 /* Must be called with iter->mutex held. */
5657 static int tracing_wait_pipe(struct file *filp)
5658 {
5659 	struct trace_iterator *iter = filp->private_data;
5660 	int ret;
5661 
5662 	while (trace_empty(iter)) {
5663 
5664 		if ((filp->f_flags & O_NONBLOCK)) {
5665 			return -EAGAIN;
5666 		}
5667 
5668 		/*
5669 		 * We block until we read something and tracing is disabled.
5670 		 * We still block if tracing is disabled, but we have never
5671 		 * read anything. This allows a user to cat this file, and
5672 		 * then enable tracing. But after we have read something,
5673 		 * we give an EOF when tracing is again disabled.
5674 		 *
5675 		 * iter->pos will be 0 if we haven't read anything.
5676 		 */
5677 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5678 			break;
5679 
5680 		mutex_unlock(&iter->mutex);
5681 
5682 		ret = wait_on_pipe(iter, false);
5683 
5684 		mutex_lock(&iter->mutex);
5685 
5686 		if (ret)
5687 			return ret;
5688 	}
5689 
5690 	return 1;
5691 }
5692 
5693 /*
5694  * Consumer reader.
5695  */
5696 static ssize_t
5697 tracing_read_pipe(struct file *filp, char __user *ubuf,
5698 		  size_t cnt, loff_t *ppos)
5699 {
5700 	struct trace_iterator *iter = filp->private_data;
5701 	ssize_t sret;
5702 
5703 	/*
5704 	 * Avoid more than one consumer on a single file descriptor
5705 	 * This is just a matter of traces coherency, the ring buffer itself
5706 	 * is protected.
5707 	 */
5708 	mutex_lock(&iter->mutex);
5709 
5710 	/* return any leftover data */
5711 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5712 	if (sret != -EBUSY)
5713 		goto out;
5714 
5715 	trace_seq_init(&iter->seq);
5716 
5717 	if (iter->trace->read) {
5718 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5719 		if (sret)
5720 			goto out;
5721 	}
5722 
5723 waitagain:
5724 	sret = tracing_wait_pipe(filp);
5725 	if (sret <= 0)
5726 		goto out;
5727 
5728 	/* stop when tracing is finished */
5729 	if (trace_empty(iter)) {
5730 		sret = 0;
5731 		goto out;
5732 	}
5733 
5734 	if (cnt >= PAGE_SIZE)
5735 		cnt = PAGE_SIZE - 1;
5736 
5737 	/* reset all but tr, trace, and overruns */
5738 	memset(&iter->seq, 0,
5739 	       sizeof(struct trace_iterator) -
5740 	       offsetof(struct trace_iterator, seq));
5741 	cpumask_clear(iter->started);
5742 	iter->pos = -1;
5743 
5744 	trace_event_read_lock();
5745 	trace_access_lock(iter->cpu_file);
5746 	while (trace_find_next_entry_inc(iter) != NULL) {
5747 		enum print_line_t ret;
5748 		int save_len = iter->seq.seq.len;
5749 
5750 		ret = print_trace_line(iter);
5751 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5752 			/* don't print partial lines */
5753 			iter->seq.seq.len = save_len;
5754 			break;
5755 		}
5756 		if (ret != TRACE_TYPE_NO_CONSUME)
5757 			trace_consume(iter);
5758 
5759 		if (trace_seq_used(&iter->seq) >= cnt)
5760 			break;
5761 
5762 		/*
5763 		 * Setting the full flag means we reached the trace_seq buffer
5764 		 * size and we should leave by partial output condition above.
5765 		 * One of the trace_seq_* functions is not used properly.
5766 		 */
5767 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5768 			  iter->ent->type);
5769 	}
5770 	trace_access_unlock(iter->cpu_file);
5771 	trace_event_read_unlock();
5772 
5773 	/* Now copy what we have to the user */
5774 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5775 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5776 		trace_seq_init(&iter->seq);
5777 
5778 	/*
5779 	 * If there was nothing to send to user, in spite of consuming trace
5780 	 * entries, go back to wait for more entries.
5781 	 */
5782 	if (sret == -EBUSY)
5783 		goto waitagain;
5784 
5785 out:
5786 	mutex_unlock(&iter->mutex);
5787 
5788 	return sret;
5789 }
5790 
5791 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5792 				     unsigned int idx)
5793 {
5794 	__free_page(spd->pages[idx]);
5795 }
5796 
5797 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5798 	.can_merge		= 0,
5799 	.confirm		= generic_pipe_buf_confirm,
5800 	.release		= generic_pipe_buf_release,
5801 	.steal			= generic_pipe_buf_steal,
5802 	.get			= generic_pipe_buf_get,
5803 };
5804 
5805 static size_t
5806 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5807 {
5808 	size_t count;
5809 	int save_len;
5810 	int ret;
5811 
5812 	/* Seq buffer is page-sized, exactly what we need. */
5813 	for (;;) {
5814 		save_len = iter->seq.seq.len;
5815 		ret = print_trace_line(iter);
5816 
5817 		if (trace_seq_has_overflowed(&iter->seq)) {
5818 			iter->seq.seq.len = save_len;
5819 			break;
5820 		}
5821 
5822 		/*
5823 		 * This should not be hit, because it should only
5824 		 * be set if the iter->seq overflowed. But check it
5825 		 * anyway to be safe.
5826 		 */
5827 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5828 			iter->seq.seq.len = save_len;
5829 			break;
5830 		}
5831 
5832 		count = trace_seq_used(&iter->seq) - save_len;
5833 		if (rem < count) {
5834 			rem = 0;
5835 			iter->seq.seq.len = save_len;
5836 			break;
5837 		}
5838 
5839 		if (ret != TRACE_TYPE_NO_CONSUME)
5840 			trace_consume(iter);
5841 		rem -= count;
5842 		if (!trace_find_next_entry_inc(iter))	{
5843 			rem = 0;
5844 			iter->ent = NULL;
5845 			break;
5846 		}
5847 	}
5848 
5849 	return rem;
5850 }
5851 
5852 static ssize_t tracing_splice_read_pipe(struct file *filp,
5853 					loff_t *ppos,
5854 					struct pipe_inode_info *pipe,
5855 					size_t len,
5856 					unsigned int flags)
5857 {
5858 	struct page *pages_def[PIPE_DEF_BUFFERS];
5859 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5860 	struct trace_iterator *iter = filp->private_data;
5861 	struct splice_pipe_desc spd = {
5862 		.pages		= pages_def,
5863 		.partial	= partial_def,
5864 		.nr_pages	= 0, /* This gets updated below. */
5865 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5866 		.ops		= &tracing_pipe_buf_ops,
5867 		.spd_release	= tracing_spd_release_pipe,
5868 	};
5869 	ssize_t ret;
5870 	size_t rem;
5871 	unsigned int i;
5872 
5873 	if (splice_grow_spd(pipe, &spd))
5874 		return -ENOMEM;
5875 
5876 	mutex_lock(&iter->mutex);
5877 
5878 	if (iter->trace->splice_read) {
5879 		ret = iter->trace->splice_read(iter, filp,
5880 					       ppos, pipe, len, flags);
5881 		if (ret)
5882 			goto out_err;
5883 	}
5884 
5885 	ret = tracing_wait_pipe(filp);
5886 	if (ret <= 0)
5887 		goto out_err;
5888 
5889 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5890 		ret = -EFAULT;
5891 		goto out_err;
5892 	}
5893 
5894 	trace_event_read_lock();
5895 	trace_access_lock(iter->cpu_file);
5896 
5897 	/* Fill as many pages as possible. */
5898 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5899 		spd.pages[i] = alloc_page(GFP_KERNEL);
5900 		if (!spd.pages[i])
5901 			break;
5902 
5903 		rem = tracing_fill_pipe_page(rem, iter);
5904 
5905 		/* Copy the data into the page, so we can start over. */
5906 		ret = trace_seq_to_buffer(&iter->seq,
5907 					  page_address(spd.pages[i]),
5908 					  trace_seq_used(&iter->seq));
5909 		if (ret < 0) {
5910 			__free_page(spd.pages[i]);
5911 			break;
5912 		}
5913 		spd.partial[i].offset = 0;
5914 		spd.partial[i].len = trace_seq_used(&iter->seq);
5915 
5916 		trace_seq_init(&iter->seq);
5917 	}
5918 
5919 	trace_access_unlock(iter->cpu_file);
5920 	trace_event_read_unlock();
5921 	mutex_unlock(&iter->mutex);
5922 
5923 	spd.nr_pages = i;
5924 
5925 	if (i)
5926 		ret = splice_to_pipe(pipe, &spd);
5927 	else
5928 		ret = 0;
5929 out:
5930 	splice_shrink_spd(&spd);
5931 	return ret;
5932 
5933 out_err:
5934 	mutex_unlock(&iter->mutex);
5935 	goto out;
5936 }
5937 
5938 static ssize_t
5939 tracing_entries_read(struct file *filp, char __user *ubuf,
5940 		     size_t cnt, loff_t *ppos)
5941 {
5942 	struct inode *inode = file_inode(filp);
5943 	struct trace_array *tr = inode->i_private;
5944 	int cpu = tracing_get_cpu(inode);
5945 	char buf[64];
5946 	int r = 0;
5947 	ssize_t ret;
5948 
5949 	mutex_lock(&trace_types_lock);
5950 
5951 	if (cpu == RING_BUFFER_ALL_CPUS) {
5952 		int cpu, buf_size_same;
5953 		unsigned long size;
5954 
5955 		size = 0;
5956 		buf_size_same = 1;
5957 		/* check if all cpu sizes are same */
5958 		for_each_tracing_cpu(cpu) {
5959 			/* fill in the size from first enabled cpu */
5960 			if (size == 0)
5961 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5962 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5963 				buf_size_same = 0;
5964 				break;
5965 			}
5966 		}
5967 
5968 		if (buf_size_same) {
5969 			if (!ring_buffer_expanded)
5970 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5971 					    size >> 10,
5972 					    trace_buf_size >> 10);
5973 			else
5974 				r = sprintf(buf, "%lu\n", size >> 10);
5975 		} else
5976 			r = sprintf(buf, "X\n");
5977 	} else
5978 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5979 
5980 	mutex_unlock(&trace_types_lock);
5981 
5982 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5983 	return ret;
5984 }
5985 
5986 static ssize_t
5987 tracing_entries_write(struct file *filp, const char __user *ubuf,
5988 		      size_t cnt, loff_t *ppos)
5989 {
5990 	struct inode *inode = file_inode(filp);
5991 	struct trace_array *tr = inode->i_private;
5992 	unsigned long val;
5993 	int ret;
5994 
5995 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5996 	if (ret)
5997 		return ret;
5998 
5999 	/* must have at least 1 entry */
6000 	if (!val)
6001 		return -EINVAL;
6002 
6003 	/* value is in KB */
6004 	val <<= 10;
6005 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6006 	if (ret < 0)
6007 		return ret;
6008 
6009 	*ppos += cnt;
6010 
6011 	return cnt;
6012 }
6013 
6014 static ssize_t
6015 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6016 				size_t cnt, loff_t *ppos)
6017 {
6018 	struct trace_array *tr = filp->private_data;
6019 	char buf[64];
6020 	int r, cpu;
6021 	unsigned long size = 0, expanded_size = 0;
6022 
6023 	mutex_lock(&trace_types_lock);
6024 	for_each_tracing_cpu(cpu) {
6025 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6026 		if (!ring_buffer_expanded)
6027 			expanded_size += trace_buf_size >> 10;
6028 	}
6029 	if (ring_buffer_expanded)
6030 		r = sprintf(buf, "%lu\n", size);
6031 	else
6032 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6033 	mutex_unlock(&trace_types_lock);
6034 
6035 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6036 }
6037 
6038 static ssize_t
6039 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6040 			  size_t cnt, loff_t *ppos)
6041 {
6042 	/*
6043 	 * There is no need to read what the user has written, this function
6044 	 * is just to make sure that there is no error when "echo" is used
6045 	 */
6046 
6047 	*ppos += cnt;
6048 
6049 	return cnt;
6050 }
6051 
6052 static int
6053 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6054 {
6055 	struct trace_array *tr = inode->i_private;
6056 
6057 	/* disable tracing ? */
6058 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6059 		tracer_tracing_off(tr);
6060 	/* resize the ring buffer to 0 */
6061 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6062 
6063 	trace_array_put(tr);
6064 
6065 	return 0;
6066 }
6067 
6068 static ssize_t
6069 tracing_mark_write(struct file *filp, const char __user *ubuf,
6070 					size_t cnt, loff_t *fpos)
6071 {
6072 	struct trace_array *tr = filp->private_data;
6073 	struct ring_buffer_event *event;
6074 	enum event_trigger_type tt = ETT_NONE;
6075 	struct ring_buffer *buffer;
6076 	struct print_entry *entry;
6077 	unsigned long irq_flags;
6078 	const char faulted[] = "<faulted>";
6079 	ssize_t written;
6080 	int size;
6081 	int len;
6082 
6083 /* Used in tracing_mark_raw_write() as well */
6084 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6085 
6086 	if (tracing_disabled)
6087 		return -EINVAL;
6088 
6089 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6090 		return -EINVAL;
6091 
6092 	if (cnt > TRACE_BUF_SIZE)
6093 		cnt = TRACE_BUF_SIZE;
6094 
6095 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6096 
6097 	local_save_flags(irq_flags);
6098 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6099 
6100 	/* If less than "<faulted>", then make sure we can still add that */
6101 	if (cnt < FAULTED_SIZE)
6102 		size += FAULTED_SIZE - cnt;
6103 
6104 	buffer = tr->trace_buffer.buffer;
6105 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6106 					    irq_flags, preempt_count());
6107 	if (unlikely(!event))
6108 		/* Ring buffer disabled, return as if not open for write */
6109 		return -EBADF;
6110 
6111 	entry = ring_buffer_event_data(event);
6112 	entry->ip = _THIS_IP_;
6113 
6114 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6115 	if (len) {
6116 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6117 		cnt = FAULTED_SIZE;
6118 		written = -EFAULT;
6119 	} else
6120 		written = cnt;
6121 	len = cnt;
6122 
6123 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6124 		/* do not add \n before testing triggers, but add \0 */
6125 		entry->buf[cnt] = '\0';
6126 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6127 	}
6128 
6129 	if (entry->buf[cnt - 1] != '\n') {
6130 		entry->buf[cnt] = '\n';
6131 		entry->buf[cnt + 1] = '\0';
6132 	} else
6133 		entry->buf[cnt] = '\0';
6134 
6135 	__buffer_unlock_commit(buffer, event);
6136 
6137 	if (tt)
6138 		event_triggers_post_call(tr->trace_marker_file, tt);
6139 
6140 	if (written > 0)
6141 		*fpos += written;
6142 
6143 	return written;
6144 }
6145 
6146 /* Limit it for now to 3K (including tag) */
6147 #define RAW_DATA_MAX_SIZE (1024*3)
6148 
6149 static ssize_t
6150 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6151 					size_t cnt, loff_t *fpos)
6152 {
6153 	struct trace_array *tr = filp->private_data;
6154 	struct ring_buffer_event *event;
6155 	struct ring_buffer *buffer;
6156 	struct raw_data_entry *entry;
6157 	const char faulted[] = "<faulted>";
6158 	unsigned long irq_flags;
6159 	ssize_t written;
6160 	int size;
6161 	int len;
6162 
6163 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6164 
6165 	if (tracing_disabled)
6166 		return -EINVAL;
6167 
6168 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6169 		return -EINVAL;
6170 
6171 	/* The marker must at least have a tag id */
6172 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6173 		return -EINVAL;
6174 
6175 	if (cnt > TRACE_BUF_SIZE)
6176 		cnt = TRACE_BUF_SIZE;
6177 
6178 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6179 
6180 	local_save_flags(irq_flags);
6181 	size = sizeof(*entry) + cnt;
6182 	if (cnt < FAULT_SIZE_ID)
6183 		size += FAULT_SIZE_ID - cnt;
6184 
6185 	buffer = tr->trace_buffer.buffer;
6186 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6187 					    irq_flags, preempt_count());
6188 	if (!event)
6189 		/* Ring buffer disabled, return as if not open for write */
6190 		return -EBADF;
6191 
6192 	entry = ring_buffer_event_data(event);
6193 
6194 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6195 	if (len) {
6196 		entry->id = -1;
6197 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6198 		written = -EFAULT;
6199 	} else
6200 		written = cnt;
6201 
6202 	__buffer_unlock_commit(buffer, event);
6203 
6204 	if (written > 0)
6205 		*fpos += written;
6206 
6207 	return written;
6208 }
6209 
6210 static int tracing_clock_show(struct seq_file *m, void *v)
6211 {
6212 	struct trace_array *tr = m->private;
6213 	int i;
6214 
6215 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6216 		seq_printf(m,
6217 			"%s%s%s%s", i ? " " : "",
6218 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6219 			i == tr->clock_id ? "]" : "");
6220 	seq_putc(m, '\n');
6221 
6222 	return 0;
6223 }
6224 
6225 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6226 {
6227 	int i;
6228 
6229 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6230 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6231 			break;
6232 	}
6233 	if (i == ARRAY_SIZE(trace_clocks))
6234 		return -EINVAL;
6235 
6236 	mutex_lock(&trace_types_lock);
6237 
6238 	tr->clock_id = i;
6239 
6240 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6241 
6242 	/*
6243 	 * New clock may not be consistent with the previous clock.
6244 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6245 	 */
6246 	tracing_reset_online_cpus(&tr->trace_buffer);
6247 
6248 #ifdef CONFIG_TRACER_MAX_TRACE
6249 	if (tr->max_buffer.buffer)
6250 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6251 	tracing_reset_online_cpus(&tr->max_buffer);
6252 #endif
6253 
6254 	mutex_unlock(&trace_types_lock);
6255 
6256 	return 0;
6257 }
6258 
6259 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6260 				   size_t cnt, loff_t *fpos)
6261 {
6262 	struct seq_file *m = filp->private_data;
6263 	struct trace_array *tr = m->private;
6264 	char buf[64];
6265 	const char *clockstr;
6266 	int ret;
6267 
6268 	if (cnt >= sizeof(buf))
6269 		return -EINVAL;
6270 
6271 	if (copy_from_user(buf, ubuf, cnt))
6272 		return -EFAULT;
6273 
6274 	buf[cnt] = 0;
6275 
6276 	clockstr = strstrip(buf);
6277 
6278 	ret = tracing_set_clock(tr, clockstr);
6279 	if (ret)
6280 		return ret;
6281 
6282 	*fpos += cnt;
6283 
6284 	return cnt;
6285 }
6286 
6287 static int tracing_clock_open(struct inode *inode, struct file *file)
6288 {
6289 	struct trace_array *tr = inode->i_private;
6290 	int ret;
6291 
6292 	if (tracing_disabled)
6293 		return -ENODEV;
6294 
6295 	if (trace_array_get(tr))
6296 		return -ENODEV;
6297 
6298 	ret = single_open(file, tracing_clock_show, inode->i_private);
6299 	if (ret < 0)
6300 		trace_array_put(tr);
6301 
6302 	return ret;
6303 }
6304 
6305 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6306 {
6307 	struct trace_array *tr = m->private;
6308 
6309 	mutex_lock(&trace_types_lock);
6310 
6311 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6312 		seq_puts(m, "delta [absolute]\n");
6313 	else
6314 		seq_puts(m, "[delta] absolute\n");
6315 
6316 	mutex_unlock(&trace_types_lock);
6317 
6318 	return 0;
6319 }
6320 
6321 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6322 {
6323 	struct trace_array *tr = inode->i_private;
6324 	int ret;
6325 
6326 	if (tracing_disabled)
6327 		return -ENODEV;
6328 
6329 	if (trace_array_get(tr))
6330 		return -ENODEV;
6331 
6332 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6333 	if (ret < 0)
6334 		trace_array_put(tr);
6335 
6336 	return ret;
6337 }
6338 
6339 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6340 {
6341 	int ret = 0;
6342 
6343 	mutex_lock(&trace_types_lock);
6344 
6345 	if (abs && tr->time_stamp_abs_ref++)
6346 		goto out;
6347 
6348 	if (!abs) {
6349 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6350 			ret = -EINVAL;
6351 			goto out;
6352 		}
6353 
6354 		if (--tr->time_stamp_abs_ref)
6355 			goto out;
6356 	}
6357 
6358 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6359 
6360 #ifdef CONFIG_TRACER_MAX_TRACE
6361 	if (tr->max_buffer.buffer)
6362 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6363 #endif
6364  out:
6365 	mutex_unlock(&trace_types_lock);
6366 
6367 	return ret;
6368 }
6369 
6370 struct ftrace_buffer_info {
6371 	struct trace_iterator	iter;
6372 	void			*spare;
6373 	unsigned int		spare_cpu;
6374 	unsigned int		read;
6375 };
6376 
6377 #ifdef CONFIG_TRACER_SNAPSHOT
6378 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6379 {
6380 	struct trace_array *tr = inode->i_private;
6381 	struct trace_iterator *iter;
6382 	struct seq_file *m;
6383 	int ret = 0;
6384 
6385 	if (trace_array_get(tr) < 0)
6386 		return -ENODEV;
6387 
6388 	if (file->f_mode & FMODE_READ) {
6389 		iter = __tracing_open(inode, file, true);
6390 		if (IS_ERR(iter))
6391 			ret = PTR_ERR(iter);
6392 	} else {
6393 		/* Writes still need the seq_file to hold the private data */
6394 		ret = -ENOMEM;
6395 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6396 		if (!m)
6397 			goto out;
6398 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6399 		if (!iter) {
6400 			kfree(m);
6401 			goto out;
6402 		}
6403 		ret = 0;
6404 
6405 		iter->tr = tr;
6406 		iter->trace_buffer = &tr->max_buffer;
6407 		iter->cpu_file = tracing_get_cpu(inode);
6408 		m->private = iter;
6409 		file->private_data = m;
6410 	}
6411 out:
6412 	if (ret < 0)
6413 		trace_array_put(tr);
6414 
6415 	return ret;
6416 }
6417 
6418 static ssize_t
6419 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6420 		       loff_t *ppos)
6421 {
6422 	struct seq_file *m = filp->private_data;
6423 	struct trace_iterator *iter = m->private;
6424 	struct trace_array *tr = iter->tr;
6425 	unsigned long val;
6426 	int ret;
6427 
6428 	ret = tracing_update_buffers();
6429 	if (ret < 0)
6430 		return ret;
6431 
6432 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6433 	if (ret)
6434 		return ret;
6435 
6436 	mutex_lock(&trace_types_lock);
6437 
6438 	if (tr->current_trace->use_max_tr) {
6439 		ret = -EBUSY;
6440 		goto out;
6441 	}
6442 
6443 	switch (val) {
6444 	case 0:
6445 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6446 			ret = -EINVAL;
6447 			break;
6448 		}
6449 		if (tr->allocated_snapshot)
6450 			free_snapshot(tr);
6451 		break;
6452 	case 1:
6453 /* Only allow per-cpu swap if the ring buffer supports it */
6454 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6455 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6456 			ret = -EINVAL;
6457 			break;
6458 		}
6459 #endif
6460 		if (!tr->allocated_snapshot) {
6461 			ret = tracing_alloc_snapshot_instance(tr);
6462 			if (ret < 0)
6463 				break;
6464 		}
6465 		local_irq_disable();
6466 		/* Now, we're going to swap */
6467 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6468 			update_max_tr(tr, current, smp_processor_id());
6469 		else
6470 			update_max_tr_single(tr, current, iter->cpu_file);
6471 		local_irq_enable();
6472 		break;
6473 	default:
6474 		if (tr->allocated_snapshot) {
6475 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6476 				tracing_reset_online_cpus(&tr->max_buffer);
6477 			else
6478 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6479 		}
6480 		break;
6481 	}
6482 
6483 	if (ret >= 0) {
6484 		*ppos += cnt;
6485 		ret = cnt;
6486 	}
6487 out:
6488 	mutex_unlock(&trace_types_lock);
6489 	return ret;
6490 }
6491 
6492 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6493 {
6494 	struct seq_file *m = file->private_data;
6495 	int ret;
6496 
6497 	ret = tracing_release(inode, file);
6498 
6499 	if (file->f_mode & FMODE_READ)
6500 		return ret;
6501 
6502 	/* If write only, the seq_file is just a stub */
6503 	if (m)
6504 		kfree(m->private);
6505 	kfree(m);
6506 
6507 	return 0;
6508 }
6509 
6510 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6511 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6512 				    size_t count, loff_t *ppos);
6513 static int tracing_buffers_release(struct inode *inode, struct file *file);
6514 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6515 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6516 
6517 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6518 {
6519 	struct ftrace_buffer_info *info;
6520 	int ret;
6521 
6522 	ret = tracing_buffers_open(inode, filp);
6523 	if (ret < 0)
6524 		return ret;
6525 
6526 	info = filp->private_data;
6527 
6528 	if (info->iter.trace->use_max_tr) {
6529 		tracing_buffers_release(inode, filp);
6530 		return -EBUSY;
6531 	}
6532 
6533 	info->iter.snapshot = true;
6534 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6535 
6536 	return ret;
6537 }
6538 
6539 #endif /* CONFIG_TRACER_SNAPSHOT */
6540 
6541 
6542 static const struct file_operations tracing_thresh_fops = {
6543 	.open		= tracing_open_generic,
6544 	.read		= tracing_thresh_read,
6545 	.write		= tracing_thresh_write,
6546 	.llseek		= generic_file_llseek,
6547 };
6548 
6549 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6550 static const struct file_operations tracing_max_lat_fops = {
6551 	.open		= tracing_open_generic,
6552 	.read		= tracing_max_lat_read,
6553 	.write		= tracing_max_lat_write,
6554 	.llseek		= generic_file_llseek,
6555 };
6556 #endif
6557 
6558 static const struct file_operations set_tracer_fops = {
6559 	.open		= tracing_open_generic,
6560 	.read		= tracing_set_trace_read,
6561 	.write		= tracing_set_trace_write,
6562 	.llseek		= generic_file_llseek,
6563 };
6564 
6565 static const struct file_operations tracing_pipe_fops = {
6566 	.open		= tracing_open_pipe,
6567 	.poll		= tracing_poll_pipe,
6568 	.read		= tracing_read_pipe,
6569 	.splice_read	= tracing_splice_read_pipe,
6570 	.release	= tracing_release_pipe,
6571 	.llseek		= no_llseek,
6572 };
6573 
6574 static const struct file_operations tracing_entries_fops = {
6575 	.open		= tracing_open_generic_tr,
6576 	.read		= tracing_entries_read,
6577 	.write		= tracing_entries_write,
6578 	.llseek		= generic_file_llseek,
6579 	.release	= tracing_release_generic_tr,
6580 };
6581 
6582 static const struct file_operations tracing_total_entries_fops = {
6583 	.open		= tracing_open_generic_tr,
6584 	.read		= tracing_total_entries_read,
6585 	.llseek		= generic_file_llseek,
6586 	.release	= tracing_release_generic_tr,
6587 };
6588 
6589 static const struct file_operations tracing_free_buffer_fops = {
6590 	.open		= tracing_open_generic_tr,
6591 	.write		= tracing_free_buffer_write,
6592 	.release	= tracing_free_buffer_release,
6593 };
6594 
6595 static const struct file_operations tracing_mark_fops = {
6596 	.open		= tracing_open_generic_tr,
6597 	.write		= tracing_mark_write,
6598 	.llseek		= generic_file_llseek,
6599 	.release	= tracing_release_generic_tr,
6600 };
6601 
6602 static const struct file_operations tracing_mark_raw_fops = {
6603 	.open		= tracing_open_generic_tr,
6604 	.write		= tracing_mark_raw_write,
6605 	.llseek		= generic_file_llseek,
6606 	.release	= tracing_release_generic_tr,
6607 };
6608 
6609 static const struct file_operations trace_clock_fops = {
6610 	.open		= tracing_clock_open,
6611 	.read		= seq_read,
6612 	.llseek		= seq_lseek,
6613 	.release	= tracing_single_release_tr,
6614 	.write		= tracing_clock_write,
6615 };
6616 
6617 static const struct file_operations trace_time_stamp_mode_fops = {
6618 	.open		= tracing_time_stamp_mode_open,
6619 	.read		= seq_read,
6620 	.llseek		= seq_lseek,
6621 	.release	= tracing_single_release_tr,
6622 };
6623 
6624 #ifdef CONFIG_TRACER_SNAPSHOT
6625 static const struct file_operations snapshot_fops = {
6626 	.open		= tracing_snapshot_open,
6627 	.read		= seq_read,
6628 	.write		= tracing_snapshot_write,
6629 	.llseek		= tracing_lseek,
6630 	.release	= tracing_snapshot_release,
6631 };
6632 
6633 static const struct file_operations snapshot_raw_fops = {
6634 	.open		= snapshot_raw_open,
6635 	.read		= tracing_buffers_read,
6636 	.release	= tracing_buffers_release,
6637 	.splice_read	= tracing_buffers_splice_read,
6638 	.llseek		= no_llseek,
6639 };
6640 
6641 #endif /* CONFIG_TRACER_SNAPSHOT */
6642 
6643 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6644 {
6645 	struct trace_array *tr = inode->i_private;
6646 	struct ftrace_buffer_info *info;
6647 	int ret;
6648 
6649 	if (tracing_disabled)
6650 		return -ENODEV;
6651 
6652 	if (trace_array_get(tr) < 0)
6653 		return -ENODEV;
6654 
6655 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6656 	if (!info) {
6657 		trace_array_put(tr);
6658 		return -ENOMEM;
6659 	}
6660 
6661 	mutex_lock(&trace_types_lock);
6662 
6663 	info->iter.tr		= tr;
6664 	info->iter.cpu_file	= tracing_get_cpu(inode);
6665 	info->iter.trace	= tr->current_trace;
6666 	info->iter.trace_buffer = &tr->trace_buffer;
6667 	info->spare		= NULL;
6668 	/* Force reading ring buffer for first read */
6669 	info->read		= (unsigned int)-1;
6670 
6671 	filp->private_data = info;
6672 
6673 	tr->current_trace->ref++;
6674 
6675 	mutex_unlock(&trace_types_lock);
6676 
6677 	ret = nonseekable_open(inode, filp);
6678 	if (ret < 0)
6679 		trace_array_put(tr);
6680 
6681 	return ret;
6682 }
6683 
6684 static __poll_t
6685 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6686 {
6687 	struct ftrace_buffer_info *info = filp->private_data;
6688 	struct trace_iterator *iter = &info->iter;
6689 
6690 	return trace_poll(iter, filp, poll_table);
6691 }
6692 
6693 static ssize_t
6694 tracing_buffers_read(struct file *filp, char __user *ubuf,
6695 		     size_t count, loff_t *ppos)
6696 {
6697 	struct ftrace_buffer_info *info = filp->private_data;
6698 	struct trace_iterator *iter = &info->iter;
6699 	ssize_t ret = 0;
6700 	ssize_t size;
6701 
6702 	if (!count)
6703 		return 0;
6704 
6705 #ifdef CONFIG_TRACER_MAX_TRACE
6706 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6707 		return -EBUSY;
6708 #endif
6709 
6710 	if (!info->spare) {
6711 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6712 							  iter->cpu_file);
6713 		if (IS_ERR(info->spare)) {
6714 			ret = PTR_ERR(info->spare);
6715 			info->spare = NULL;
6716 		} else {
6717 			info->spare_cpu = iter->cpu_file;
6718 		}
6719 	}
6720 	if (!info->spare)
6721 		return ret;
6722 
6723 	/* Do we have previous read data to read? */
6724 	if (info->read < PAGE_SIZE)
6725 		goto read;
6726 
6727  again:
6728 	trace_access_lock(iter->cpu_file);
6729 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6730 				    &info->spare,
6731 				    count,
6732 				    iter->cpu_file, 0);
6733 	trace_access_unlock(iter->cpu_file);
6734 
6735 	if (ret < 0) {
6736 		if (trace_empty(iter)) {
6737 			if ((filp->f_flags & O_NONBLOCK))
6738 				return -EAGAIN;
6739 
6740 			ret = wait_on_pipe(iter, false);
6741 			if (ret)
6742 				return ret;
6743 
6744 			goto again;
6745 		}
6746 		return 0;
6747 	}
6748 
6749 	info->read = 0;
6750  read:
6751 	size = PAGE_SIZE - info->read;
6752 	if (size > count)
6753 		size = count;
6754 
6755 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6756 	if (ret == size)
6757 		return -EFAULT;
6758 
6759 	size -= ret;
6760 
6761 	*ppos += size;
6762 	info->read += size;
6763 
6764 	return size;
6765 }
6766 
6767 static int tracing_buffers_release(struct inode *inode, struct file *file)
6768 {
6769 	struct ftrace_buffer_info *info = file->private_data;
6770 	struct trace_iterator *iter = &info->iter;
6771 
6772 	mutex_lock(&trace_types_lock);
6773 
6774 	iter->tr->current_trace->ref--;
6775 
6776 	__trace_array_put(iter->tr);
6777 
6778 	if (info->spare)
6779 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6780 					   info->spare_cpu, info->spare);
6781 	kfree(info);
6782 
6783 	mutex_unlock(&trace_types_lock);
6784 
6785 	return 0;
6786 }
6787 
6788 struct buffer_ref {
6789 	struct ring_buffer	*buffer;
6790 	void			*page;
6791 	int			cpu;
6792 	int			ref;
6793 };
6794 
6795 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6796 				    struct pipe_buffer *buf)
6797 {
6798 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6799 
6800 	if (--ref->ref)
6801 		return;
6802 
6803 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6804 	kfree(ref);
6805 	buf->private = 0;
6806 }
6807 
6808 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6809 				struct pipe_buffer *buf)
6810 {
6811 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6812 
6813 	ref->ref++;
6814 }
6815 
6816 /* Pipe buffer operations for a buffer. */
6817 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6818 	.can_merge		= 0,
6819 	.confirm		= generic_pipe_buf_confirm,
6820 	.release		= buffer_pipe_buf_release,
6821 	.steal			= generic_pipe_buf_steal,
6822 	.get			= buffer_pipe_buf_get,
6823 };
6824 
6825 /*
6826  * Callback from splice_to_pipe(), if we need to release some pages
6827  * at the end of the spd in case we error'ed out in filling the pipe.
6828  */
6829 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6830 {
6831 	struct buffer_ref *ref =
6832 		(struct buffer_ref *)spd->partial[i].private;
6833 
6834 	if (--ref->ref)
6835 		return;
6836 
6837 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6838 	kfree(ref);
6839 	spd->partial[i].private = 0;
6840 }
6841 
6842 static ssize_t
6843 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6844 			    struct pipe_inode_info *pipe, size_t len,
6845 			    unsigned int flags)
6846 {
6847 	struct ftrace_buffer_info *info = file->private_data;
6848 	struct trace_iterator *iter = &info->iter;
6849 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6850 	struct page *pages_def[PIPE_DEF_BUFFERS];
6851 	struct splice_pipe_desc spd = {
6852 		.pages		= pages_def,
6853 		.partial	= partial_def,
6854 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6855 		.ops		= &buffer_pipe_buf_ops,
6856 		.spd_release	= buffer_spd_release,
6857 	};
6858 	struct buffer_ref *ref;
6859 	int entries, i;
6860 	ssize_t ret = 0;
6861 
6862 #ifdef CONFIG_TRACER_MAX_TRACE
6863 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6864 		return -EBUSY;
6865 #endif
6866 
6867 	if (*ppos & (PAGE_SIZE - 1))
6868 		return -EINVAL;
6869 
6870 	if (len & (PAGE_SIZE - 1)) {
6871 		if (len < PAGE_SIZE)
6872 			return -EINVAL;
6873 		len &= PAGE_MASK;
6874 	}
6875 
6876 	if (splice_grow_spd(pipe, &spd))
6877 		return -ENOMEM;
6878 
6879  again:
6880 	trace_access_lock(iter->cpu_file);
6881 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6882 
6883 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6884 		struct page *page;
6885 		int r;
6886 
6887 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6888 		if (!ref) {
6889 			ret = -ENOMEM;
6890 			break;
6891 		}
6892 
6893 		ref->ref = 1;
6894 		ref->buffer = iter->trace_buffer->buffer;
6895 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6896 		if (IS_ERR(ref->page)) {
6897 			ret = PTR_ERR(ref->page);
6898 			ref->page = NULL;
6899 			kfree(ref);
6900 			break;
6901 		}
6902 		ref->cpu = iter->cpu_file;
6903 
6904 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6905 					  len, iter->cpu_file, 1);
6906 		if (r < 0) {
6907 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6908 						   ref->page);
6909 			kfree(ref);
6910 			break;
6911 		}
6912 
6913 		page = virt_to_page(ref->page);
6914 
6915 		spd.pages[i] = page;
6916 		spd.partial[i].len = PAGE_SIZE;
6917 		spd.partial[i].offset = 0;
6918 		spd.partial[i].private = (unsigned long)ref;
6919 		spd.nr_pages++;
6920 		*ppos += PAGE_SIZE;
6921 
6922 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6923 	}
6924 
6925 	trace_access_unlock(iter->cpu_file);
6926 	spd.nr_pages = i;
6927 
6928 	/* did we read anything? */
6929 	if (!spd.nr_pages) {
6930 		if (ret)
6931 			goto out;
6932 
6933 		ret = -EAGAIN;
6934 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6935 			goto out;
6936 
6937 		ret = wait_on_pipe(iter, true);
6938 		if (ret)
6939 			goto out;
6940 
6941 		goto again;
6942 	}
6943 
6944 	ret = splice_to_pipe(pipe, &spd);
6945 out:
6946 	splice_shrink_spd(&spd);
6947 
6948 	return ret;
6949 }
6950 
6951 static const struct file_operations tracing_buffers_fops = {
6952 	.open		= tracing_buffers_open,
6953 	.read		= tracing_buffers_read,
6954 	.poll		= tracing_buffers_poll,
6955 	.release	= tracing_buffers_release,
6956 	.splice_read	= tracing_buffers_splice_read,
6957 	.llseek		= no_llseek,
6958 };
6959 
6960 static ssize_t
6961 tracing_stats_read(struct file *filp, char __user *ubuf,
6962 		   size_t count, loff_t *ppos)
6963 {
6964 	struct inode *inode = file_inode(filp);
6965 	struct trace_array *tr = inode->i_private;
6966 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6967 	int cpu = tracing_get_cpu(inode);
6968 	struct trace_seq *s;
6969 	unsigned long cnt;
6970 	unsigned long long t;
6971 	unsigned long usec_rem;
6972 
6973 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6974 	if (!s)
6975 		return -ENOMEM;
6976 
6977 	trace_seq_init(s);
6978 
6979 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6980 	trace_seq_printf(s, "entries: %ld\n", cnt);
6981 
6982 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6983 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6984 
6985 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6986 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6987 
6988 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6989 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6990 
6991 	if (trace_clocks[tr->clock_id].in_ns) {
6992 		/* local or global for trace_clock */
6993 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6994 		usec_rem = do_div(t, USEC_PER_SEC);
6995 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6996 								t, usec_rem);
6997 
6998 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6999 		usec_rem = do_div(t, USEC_PER_SEC);
7000 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7001 	} else {
7002 		/* counter or tsc mode for trace_clock */
7003 		trace_seq_printf(s, "oldest event ts: %llu\n",
7004 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7005 
7006 		trace_seq_printf(s, "now ts: %llu\n",
7007 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7008 	}
7009 
7010 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7011 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7012 
7013 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7014 	trace_seq_printf(s, "read events: %ld\n", cnt);
7015 
7016 	count = simple_read_from_buffer(ubuf, count, ppos,
7017 					s->buffer, trace_seq_used(s));
7018 
7019 	kfree(s);
7020 
7021 	return count;
7022 }
7023 
7024 static const struct file_operations tracing_stats_fops = {
7025 	.open		= tracing_open_generic_tr,
7026 	.read		= tracing_stats_read,
7027 	.llseek		= generic_file_llseek,
7028 	.release	= tracing_release_generic_tr,
7029 };
7030 
7031 #ifdef CONFIG_DYNAMIC_FTRACE
7032 
7033 static ssize_t
7034 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7035 		  size_t cnt, loff_t *ppos)
7036 {
7037 	unsigned long *p = filp->private_data;
7038 	char buf[64]; /* Not too big for a shallow stack */
7039 	int r;
7040 
7041 	r = scnprintf(buf, 63, "%ld", *p);
7042 	buf[r++] = '\n';
7043 
7044 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7045 }
7046 
7047 static const struct file_operations tracing_dyn_info_fops = {
7048 	.open		= tracing_open_generic,
7049 	.read		= tracing_read_dyn_info,
7050 	.llseek		= generic_file_llseek,
7051 };
7052 #endif /* CONFIG_DYNAMIC_FTRACE */
7053 
7054 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7055 static void
7056 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7057 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7058 		void *data)
7059 {
7060 	tracing_snapshot_instance(tr);
7061 }
7062 
7063 static void
7064 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7065 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7066 		      void *data)
7067 {
7068 	struct ftrace_func_mapper *mapper = data;
7069 	long *count = NULL;
7070 
7071 	if (mapper)
7072 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7073 
7074 	if (count) {
7075 
7076 		if (*count <= 0)
7077 			return;
7078 
7079 		(*count)--;
7080 	}
7081 
7082 	tracing_snapshot_instance(tr);
7083 }
7084 
7085 static int
7086 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7087 		      struct ftrace_probe_ops *ops, void *data)
7088 {
7089 	struct ftrace_func_mapper *mapper = data;
7090 	long *count = NULL;
7091 
7092 	seq_printf(m, "%ps:", (void *)ip);
7093 
7094 	seq_puts(m, "snapshot");
7095 
7096 	if (mapper)
7097 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7098 
7099 	if (count)
7100 		seq_printf(m, ":count=%ld\n", *count);
7101 	else
7102 		seq_puts(m, ":unlimited\n");
7103 
7104 	return 0;
7105 }
7106 
7107 static int
7108 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7109 		     unsigned long ip, void *init_data, void **data)
7110 {
7111 	struct ftrace_func_mapper *mapper = *data;
7112 
7113 	if (!mapper) {
7114 		mapper = allocate_ftrace_func_mapper();
7115 		if (!mapper)
7116 			return -ENOMEM;
7117 		*data = mapper;
7118 	}
7119 
7120 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7121 }
7122 
7123 static void
7124 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7125 		     unsigned long ip, void *data)
7126 {
7127 	struct ftrace_func_mapper *mapper = data;
7128 
7129 	if (!ip) {
7130 		if (!mapper)
7131 			return;
7132 		free_ftrace_func_mapper(mapper, NULL);
7133 		return;
7134 	}
7135 
7136 	ftrace_func_mapper_remove_ip(mapper, ip);
7137 }
7138 
7139 static struct ftrace_probe_ops snapshot_probe_ops = {
7140 	.func			= ftrace_snapshot,
7141 	.print			= ftrace_snapshot_print,
7142 };
7143 
7144 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7145 	.func			= ftrace_count_snapshot,
7146 	.print			= ftrace_snapshot_print,
7147 	.init			= ftrace_snapshot_init,
7148 	.free			= ftrace_snapshot_free,
7149 };
7150 
7151 static int
7152 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7153 			       char *glob, char *cmd, char *param, int enable)
7154 {
7155 	struct ftrace_probe_ops *ops;
7156 	void *count = (void *)-1;
7157 	char *number;
7158 	int ret;
7159 
7160 	if (!tr)
7161 		return -ENODEV;
7162 
7163 	/* hash funcs only work with set_ftrace_filter */
7164 	if (!enable)
7165 		return -EINVAL;
7166 
7167 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7168 
7169 	if (glob[0] == '!')
7170 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7171 
7172 	if (!param)
7173 		goto out_reg;
7174 
7175 	number = strsep(&param, ":");
7176 
7177 	if (!strlen(number))
7178 		goto out_reg;
7179 
7180 	/*
7181 	 * We use the callback data field (which is a pointer)
7182 	 * as our counter.
7183 	 */
7184 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7185 	if (ret)
7186 		return ret;
7187 
7188  out_reg:
7189 	ret = tracing_alloc_snapshot_instance(tr);
7190 	if (ret < 0)
7191 		goto out;
7192 
7193 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7194 
7195  out:
7196 	return ret < 0 ? ret : 0;
7197 }
7198 
7199 static struct ftrace_func_command ftrace_snapshot_cmd = {
7200 	.name			= "snapshot",
7201 	.func			= ftrace_trace_snapshot_callback,
7202 };
7203 
7204 static __init int register_snapshot_cmd(void)
7205 {
7206 	return register_ftrace_command(&ftrace_snapshot_cmd);
7207 }
7208 #else
7209 static inline __init int register_snapshot_cmd(void) { return 0; }
7210 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7211 
7212 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7213 {
7214 	if (WARN_ON(!tr->dir))
7215 		return ERR_PTR(-ENODEV);
7216 
7217 	/* Top directory uses NULL as the parent */
7218 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7219 		return NULL;
7220 
7221 	/* All sub buffers have a descriptor */
7222 	return tr->dir;
7223 }
7224 
7225 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7226 {
7227 	struct dentry *d_tracer;
7228 
7229 	if (tr->percpu_dir)
7230 		return tr->percpu_dir;
7231 
7232 	d_tracer = tracing_get_dentry(tr);
7233 	if (IS_ERR(d_tracer))
7234 		return NULL;
7235 
7236 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7237 
7238 	WARN_ONCE(!tr->percpu_dir,
7239 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7240 
7241 	return tr->percpu_dir;
7242 }
7243 
7244 static struct dentry *
7245 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7246 		      void *data, long cpu, const struct file_operations *fops)
7247 {
7248 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7249 
7250 	if (ret) /* See tracing_get_cpu() */
7251 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7252 	return ret;
7253 }
7254 
7255 static void
7256 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7257 {
7258 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7259 	struct dentry *d_cpu;
7260 	char cpu_dir[30]; /* 30 characters should be more than enough */
7261 
7262 	if (!d_percpu)
7263 		return;
7264 
7265 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7266 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7267 	if (!d_cpu) {
7268 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7269 		return;
7270 	}
7271 
7272 	/* per cpu trace_pipe */
7273 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7274 				tr, cpu, &tracing_pipe_fops);
7275 
7276 	/* per cpu trace */
7277 	trace_create_cpu_file("trace", 0644, d_cpu,
7278 				tr, cpu, &tracing_fops);
7279 
7280 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7281 				tr, cpu, &tracing_buffers_fops);
7282 
7283 	trace_create_cpu_file("stats", 0444, d_cpu,
7284 				tr, cpu, &tracing_stats_fops);
7285 
7286 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7287 				tr, cpu, &tracing_entries_fops);
7288 
7289 #ifdef CONFIG_TRACER_SNAPSHOT
7290 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7291 				tr, cpu, &snapshot_fops);
7292 
7293 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7294 				tr, cpu, &snapshot_raw_fops);
7295 #endif
7296 }
7297 
7298 #ifdef CONFIG_FTRACE_SELFTEST
7299 /* Let selftest have access to static functions in this file */
7300 #include "trace_selftest.c"
7301 #endif
7302 
7303 static ssize_t
7304 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7305 			loff_t *ppos)
7306 {
7307 	struct trace_option_dentry *topt = filp->private_data;
7308 	char *buf;
7309 
7310 	if (topt->flags->val & topt->opt->bit)
7311 		buf = "1\n";
7312 	else
7313 		buf = "0\n";
7314 
7315 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7316 }
7317 
7318 static ssize_t
7319 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7320 			 loff_t *ppos)
7321 {
7322 	struct trace_option_dentry *topt = filp->private_data;
7323 	unsigned long val;
7324 	int ret;
7325 
7326 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7327 	if (ret)
7328 		return ret;
7329 
7330 	if (val != 0 && val != 1)
7331 		return -EINVAL;
7332 
7333 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7334 		mutex_lock(&trace_types_lock);
7335 		ret = __set_tracer_option(topt->tr, topt->flags,
7336 					  topt->opt, !val);
7337 		mutex_unlock(&trace_types_lock);
7338 		if (ret)
7339 			return ret;
7340 	}
7341 
7342 	*ppos += cnt;
7343 
7344 	return cnt;
7345 }
7346 
7347 
7348 static const struct file_operations trace_options_fops = {
7349 	.open = tracing_open_generic,
7350 	.read = trace_options_read,
7351 	.write = trace_options_write,
7352 	.llseek	= generic_file_llseek,
7353 };
7354 
7355 /*
7356  * In order to pass in both the trace_array descriptor as well as the index
7357  * to the flag that the trace option file represents, the trace_array
7358  * has a character array of trace_flags_index[], which holds the index
7359  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7360  * The address of this character array is passed to the flag option file
7361  * read/write callbacks.
7362  *
7363  * In order to extract both the index and the trace_array descriptor,
7364  * get_tr_index() uses the following algorithm.
7365  *
7366  *   idx = *ptr;
7367  *
7368  * As the pointer itself contains the address of the index (remember
7369  * index[1] == 1).
7370  *
7371  * Then to get the trace_array descriptor, by subtracting that index
7372  * from the ptr, we get to the start of the index itself.
7373  *
7374  *   ptr - idx == &index[0]
7375  *
7376  * Then a simple container_of() from that pointer gets us to the
7377  * trace_array descriptor.
7378  */
7379 static void get_tr_index(void *data, struct trace_array **ptr,
7380 			 unsigned int *pindex)
7381 {
7382 	*pindex = *(unsigned char *)data;
7383 
7384 	*ptr = container_of(data - *pindex, struct trace_array,
7385 			    trace_flags_index);
7386 }
7387 
7388 static ssize_t
7389 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7390 			loff_t *ppos)
7391 {
7392 	void *tr_index = filp->private_data;
7393 	struct trace_array *tr;
7394 	unsigned int index;
7395 	char *buf;
7396 
7397 	get_tr_index(tr_index, &tr, &index);
7398 
7399 	if (tr->trace_flags & (1 << index))
7400 		buf = "1\n";
7401 	else
7402 		buf = "0\n";
7403 
7404 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7405 }
7406 
7407 static ssize_t
7408 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7409 			 loff_t *ppos)
7410 {
7411 	void *tr_index = filp->private_data;
7412 	struct trace_array *tr;
7413 	unsigned int index;
7414 	unsigned long val;
7415 	int ret;
7416 
7417 	get_tr_index(tr_index, &tr, &index);
7418 
7419 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7420 	if (ret)
7421 		return ret;
7422 
7423 	if (val != 0 && val != 1)
7424 		return -EINVAL;
7425 
7426 	mutex_lock(&trace_types_lock);
7427 	ret = set_tracer_flag(tr, 1 << index, val);
7428 	mutex_unlock(&trace_types_lock);
7429 
7430 	if (ret < 0)
7431 		return ret;
7432 
7433 	*ppos += cnt;
7434 
7435 	return cnt;
7436 }
7437 
7438 static const struct file_operations trace_options_core_fops = {
7439 	.open = tracing_open_generic,
7440 	.read = trace_options_core_read,
7441 	.write = trace_options_core_write,
7442 	.llseek = generic_file_llseek,
7443 };
7444 
7445 struct dentry *trace_create_file(const char *name,
7446 				 umode_t mode,
7447 				 struct dentry *parent,
7448 				 void *data,
7449 				 const struct file_operations *fops)
7450 {
7451 	struct dentry *ret;
7452 
7453 	ret = tracefs_create_file(name, mode, parent, data, fops);
7454 	if (!ret)
7455 		pr_warn("Could not create tracefs '%s' entry\n", name);
7456 
7457 	return ret;
7458 }
7459 
7460 
7461 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7462 {
7463 	struct dentry *d_tracer;
7464 
7465 	if (tr->options)
7466 		return tr->options;
7467 
7468 	d_tracer = tracing_get_dentry(tr);
7469 	if (IS_ERR(d_tracer))
7470 		return NULL;
7471 
7472 	tr->options = tracefs_create_dir("options", d_tracer);
7473 	if (!tr->options) {
7474 		pr_warn("Could not create tracefs directory 'options'\n");
7475 		return NULL;
7476 	}
7477 
7478 	return tr->options;
7479 }
7480 
7481 static void
7482 create_trace_option_file(struct trace_array *tr,
7483 			 struct trace_option_dentry *topt,
7484 			 struct tracer_flags *flags,
7485 			 struct tracer_opt *opt)
7486 {
7487 	struct dentry *t_options;
7488 
7489 	t_options = trace_options_init_dentry(tr);
7490 	if (!t_options)
7491 		return;
7492 
7493 	topt->flags = flags;
7494 	topt->opt = opt;
7495 	topt->tr = tr;
7496 
7497 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7498 				    &trace_options_fops);
7499 
7500 }
7501 
7502 static void
7503 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7504 {
7505 	struct trace_option_dentry *topts;
7506 	struct trace_options *tr_topts;
7507 	struct tracer_flags *flags;
7508 	struct tracer_opt *opts;
7509 	int cnt;
7510 	int i;
7511 
7512 	if (!tracer)
7513 		return;
7514 
7515 	flags = tracer->flags;
7516 
7517 	if (!flags || !flags->opts)
7518 		return;
7519 
7520 	/*
7521 	 * If this is an instance, only create flags for tracers
7522 	 * the instance may have.
7523 	 */
7524 	if (!trace_ok_for_array(tracer, tr))
7525 		return;
7526 
7527 	for (i = 0; i < tr->nr_topts; i++) {
7528 		/* Make sure there's no duplicate flags. */
7529 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7530 			return;
7531 	}
7532 
7533 	opts = flags->opts;
7534 
7535 	for (cnt = 0; opts[cnt].name; cnt++)
7536 		;
7537 
7538 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7539 	if (!topts)
7540 		return;
7541 
7542 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7543 			    GFP_KERNEL);
7544 	if (!tr_topts) {
7545 		kfree(topts);
7546 		return;
7547 	}
7548 
7549 	tr->topts = tr_topts;
7550 	tr->topts[tr->nr_topts].tracer = tracer;
7551 	tr->topts[tr->nr_topts].topts = topts;
7552 	tr->nr_topts++;
7553 
7554 	for (cnt = 0; opts[cnt].name; cnt++) {
7555 		create_trace_option_file(tr, &topts[cnt], flags,
7556 					 &opts[cnt]);
7557 		WARN_ONCE(topts[cnt].entry == NULL,
7558 			  "Failed to create trace option: %s",
7559 			  opts[cnt].name);
7560 	}
7561 }
7562 
7563 static struct dentry *
7564 create_trace_option_core_file(struct trace_array *tr,
7565 			      const char *option, long index)
7566 {
7567 	struct dentry *t_options;
7568 
7569 	t_options = trace_options_init_dentry(tr);
7570 	if (!t_options)
7571 		return NULL;
7572 
7573 	return trace_create_file(option, 0644, t_options,
7574 				 (void *)&tr->trace_flags_index[index],
7575 				 &trace_options_core_fops);
7576 }
7577 
7578 static void create_trace_options_dir(struct trace_array *tr)
7579 {
7580 	struct dentry *t_options;
7581 	bool top_level = tr == &global_trace;
7582 	int i;
7583 
7584 	t_options = trace_options_init_dentry(tr);
7585 	if (!t_options)
7586 		return;
7587 
7588 	for (i = 0; trace_options[i]; i++) {
7589 		if (top_level ||
7590 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7591 			create_trace_option_core_file(tr, trace_options[i], i);
7592 	}
7593 }
7594 
7595 static ssize_t
7596 rb_simple_read(struct file *filp, char __user *ubuf,
7597 	       size_t cnt, loff_t *ppos)
7598 {
7599 	struct trace_array *tr = filp->private_data;
7600 	char buf[64];
7601 	int r;
7602 
7603 	r = tracer_tracing_is_on(tr);
7604 	r = sprintf(buf, "%d\n", r);
7605 
7606 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7607 }
7608 
7609 static ssize_t
7610 rb_simple_write(struct file *filp, const char __user *ubuf,
7611 		size_t cnt, loff_t *ppos)
7612 {
7613 	struct trace_array *tr = filp->private_data;
7614 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7615 	unsigned long val;
7616 	int ret;
7617 
7618 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7619 	if (ret)
7620 		return ret;
7621 
7622 	if (buffer) {
7623 		mutex_lock(&trace_types_lock);
7624 		if (val) {
7625 			tracer_tracing_on(tr);
7626 			if (tr->current_trace->start)
7627 				tr->current_trace->start(tr);
7628 		} else {
7629 			tracer_tracing_off(tr);
7630 			if (tr->current_trace->stop)
7631 				tr->current_trace->stop(tr);
7632 		}
7633 		mutex_unlock(&trace_types_lock);
7634 	}
7635 
7636 	(*ppos)++;
7637 
7638 	return cnt;
7639 }
7640 
7641 static const struct file_operations rb_simple_fops = {
7642 	.open		= tracing_open_generic_tr,
7643 	.read		= rb_simple_read,
7644 	.write		= rb_simple_write,
7645 	.release	= tracing_release_generic_tr,
7646 	.llseek		= default_llseek,
7647 };
7648 
7649 struct dentry *trace_instance_dir;
7650 
7651 static void
7652 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7653 
7654 static int
7655 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7656 {
7657 	enum ring_buffer_flags rb_flags;
7658 
7659 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7660 
7661 	buf->tr = tr;
7662 
7663 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7664 	if (!buf->buffer)
7665 		return -ENOMEM;
7666 
7667 	buf->data = alloc_percpu(struct trace_array_cpu);
7668 	if (!buf->data) {
7669 		ring_buffer_free(buf->buffer);
7670 		buf->buffer = NULL;
7671 		return -ENOMEM;
7672 	}
7673 
7674 	/* Allocate the first page for all buffers */
7675 	set_buffer_entries(&tr->trace_buffer,
7676 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7677 
7678 	return 0;
7679 }
7680 
7681 static int allocate_trace_buffers(struct trace_array *tr, int size)
7682 {
7683 	int ret;
7684 
7685 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7686 	if (ret)
7687 		return ret;
7688 
7689 #ifdef CONFIG_TRACER_MAX_TRACE
7690 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7691 				    allocate_snapshot ? size : 1);
7692 	if (WARN_ON(ret)) {
7693 		ring_buffer_free(tr->trace_buffer.buffer);
7694 		tr->trace_buffer.buffer = NULL;
7695 		free_percpu(tr->trace_buffer.data);
7696 		tr->trace_buffer.data = NULL;
7697 		return -ENOMEM;
7698 	}
7699 	tr->allocated_snapshot = allocate_snapshot;
7700 
7701 	/*
7702 	 * Only the top level trace array gets its snapshot allocated
7703 	 * from the kernel command line.
7704 	 */
7705 	allocate_snapshot = false;
7706 #endif
7707 	return 0;
7708 }
7709 
7710 static void free_trace_buffer(struct trace_buffer *buf)
7711 {
7712 	if (buf->buffer) {
7713 		ring_buffer_free(buf->buffer);
7714 		buf->buffer = NULL;
7715 		free_percpu(buf->data);
7716 		buf->data = NULL;
7717 	}
7718 }
7719 
7720 static void free_trace_buffers(struct trace_array *tr)
7721 {
7722 	if (!tr)
7723 		return;
7724 
7725 	free_trace_buffer(&tr->trace_buffer);
7726 
7727 #ifdef CONFIG_TRACER_MAX_TRACE
7728 	free_trace_buffer(&tr->max_buffer);
7729 #endif
7730 }
7731 
7732 static void init_trace_flags_index(struct trace_array *tr)
7733 {
7734 	int i;
7735 
7736 	/* Used by the trace options files */
7737 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7738 		tr->trace_flags_index[i] = i;
7739 }
7740 
7741 static void __update_tracer_options(struct trace_array *tr)
7742 {
7743 	struct tracer *t;
7744 
7745 	for (t = trace_types; t; t = t->next)
7746 		add_tracer_options(tr, t);
7747 }
7748 
7749 static void update_tracer_options(struct trace_array *tr)
7750 {
7751 	mutex_lock(&trace_types_lock);
7752 	__update_tracer_options(tr);
7753 	mutex_unlock(&trace_types_lock);
7754 }
7755 
7756 static int instance_mkdir(const char *name)
7757 {
7758 	struct trace_array *tr;
7759 	int ret;
7760 
7761 	mutex_lock(&event_mutex);
7762 	mutex_lock(&trace_types_lock);
7763 
7764 	ret = -EEXIST;
7765 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7766 		if (tr->name && strcmp(tr->name, name) == 0)
7767 			goto out_unlock;
7768 	}
7769 
7770 	ret = -ENOMEM;
7771 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7772 	if (!tr)
7773 		goto out_unlock;
7774 
7775 	tr->name = kstrdup(name, GFP_KERNEL);
7776 	if (!tr->name)
7777 		goto out_free_tr;
7778 
7779 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7780 		goto out_free_tr;
7781 
7782 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7783 
7784 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7785 
7786 	raw_spin_lock_init(&tr->start_lock);
7787 
7788 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7789 
7790 	tr->current_trace = &nop_trace;
7791 
7792 	INIT_LIST_HEAD(&tr->systems);
7793 	INIT_LIST_HEAD(&tr->events);
7794 	INIT_LIST_HEAD(&tr->hist_vars);
7795 
7796 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7797 		goto out_free_tr;
7798 
7799 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7800 	if (!tr->dir)
7801 		goto out_free_tr;
7802 
7803 	ret = event_trace_add_tracer(tr->dir, tr);
7804 	if (ret) {
7805 		tracefs_remove_recursive(tr->dir);
7806 		goto out_free_tr;
7807 	}
7808 
7809 	ftrace_init_trace_array(tr);
7810 
7811 	init_tracer_tracefs(tr, tr->dir);
7812 	init_trace_flags_index(tr);
7813 	__update_tracer_options(tr);
7814 
7815 	list_add(&tr->list, &ftrace_trace_arrays);
7816 
7817 	mutex_unlock(&trace_types_lock);
7818 	mutex_unlock(&event_mutex);
7819 
7820 	return 0;
7821 
7822  out_free_tr:
7823 	free_trace_buffers(tr);
7824 	free_cpumask_var(tr->tracing_cpumask);
7825 	kfree(tr->name);
7826 	kfree(tr);
7827 
7828  out_unlock:
7829 	mutex_unlock(&trace_types_lock);
7830 	mutex_unlock(&event_mutex);
7831 
7832 	return ret;
7833 
7834 }
7835 
7836 static int instance_rmdir(const char *name)
7837 {
7838 	struct trace_array *tr;
7839 	int found = 0;
7840 	int ret;
7841 	int i;
7842 
7843 	mutex_lock(&event_mutex);
7844 	mutex_lock(&trace_types_lock);
7845 
7846 	ret = -ENODEV;
7847 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7848 		if (tr->name && strcmp(tr->name, name) == 0) {
7849 			found = 1;
7850 			break;
7851 		}
7852 	}
7853 	if (!found)
7854 		goto out_unlock;
7855 
7856 	ret = -EBUSY;
7857 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7858 		goto out_unlock;
7859 
7860 	list_del(&tr->list);
7861 
7862 	/* Disable all the flags that were enabled coming in */
7863 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7864 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7865 			set_tracer_flag(tr, 1 << i, 0);
7866 	}
7867 
7868 	tracing_set_nop(tr);
7869 	clear_ftrace_function_probes(tr);
7870 	event_trace_del_tracer(tr);
7871 	ftrace_clear_pids(tr);
7872 	ftrace_destroy_function_files(tr);
7873 	tracefs_remove_recursive(tr->dir);
7874 	free_trace_buffers(tr);
7875 
7876 	for (i = 0; i < tr->nr_topts; i++) {
7877 		kfree(tr->topts[i].topts);
7878 	}
7879 	kfree(tr->topts);
7880 
7881 	free_cpumask_var(tr->tracing_cpumask);
7882 	kfree(tr->name);
7883 	kfree(tr);
7884 
7885 	ret = 0;
7886 
7887  out_unlock:
7888 	mutex_unlock(&trace_types_lock);
7889 	mutex_unlock(&event_mutex);
7890 
7891 	return ret;
7892 }
7893 
7894 static __init void create_trace_instances(struct dentry *d_tracer)
7895 {
7896 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7897 							 instance_mkdir,
7898 							 instance_rmdir);
7899 	if (WARN_ON(!trace_instance_dir))
7900 		return;
7901 }
7902 
7903 static void
7904 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7905 {
7906 	struct trace_event_file *file;
7907 	int cpu;
7908 
7909 	trace_create_file("available_tracers", 0444, d_tracer,
7910 			tr, &show_traces_fops);
7911 
7912 	trace_create_file("current_tracer", 0644, d_tracer,
7913 			tr, &set_tracer_fops);
7914 
7915 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7916 			  tr, &tracing_cpumask_fops);
7917 
7918 	trace_create_file("trace_options", 0644, d_tracer,
7919 			  tr, &tracing_iter_fops);
7920 
7921 	trace_create_file("trace", 0644, d_tracer,
7922 			  tr, &tracing_fops);
7923 
7924 	trace_create_file("trace_pipe", 0444, d_tracer,
7925 			  tr, &tracing_pipe_fops);
7926 
7927 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7928 			  tr, &tracing_entries_fops);
7929 
7930 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7931 			  tr, &tracing_total_entries_fops);
7932 
7933 	trace_create_file("free_buffer", 0200, d_tracer,
7934 			  tr, &tracing_free_buffer_fops);
7935 
7936 	trace_create_file("trace_marker", 0220, d_tracer,
7937 			  tr, &tracing_mark_fops);
7938 
7939 	file = __find_event_file(tr, "ftrace", "print");
7940 	if (file && file->dir)
7941 		trace_create_file("trigger", 0644, file->dir, file,
7942 				  &event_trigger_fops);
7943 	tr->trace_marker_file = file;
7944 
7945 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7946 			  tr, &tracing_mark_raw_fops);
7947 
7948 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7949 			  &trace_clock_fops);
7950 
7951 	trace_create_file("tracing_on", 0644, d_tracer,
7952 			  tr, &rb_simple_fops);
7953 
7954 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7955 			  &trace_time_stamp_mode_fops);
7956 
7957 	create_trace_options_dir(tr);
7958 
7959 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7960 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7961 			&tr->max_latency, &tracing_max_lat_fops);
7962 #endif
7963 
7964 	if (ftrace_create_function_files(tr, d_tracer))
7965 		WARN(1, "Could not allocate function filter files");
7966 
7967 #ifdef CONFIG_TRACER_SNAPSHOT
7968 	trace_create_file("snapshot", 0644, d_tracer,
7969 			  tr, &snapshot_fops);
7970 #endif
7971 
7972 	for_each_tracing_cpu(cpu)
7973 		tracing_init_tracefs_percpu(tr, cpu);
7974 
7975 	ftrace_init_tracefs(tr, d_tracer);
7976 }
7977 
7978 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7979 {
7980 	struct vfsmount *mnt;
7981 	struct file_system_type *type;
7982 
7983 	/*
7984 	 * To maintain backward compatibility for tools that mount
7985 	 * debugfs to get to the tracing facility, tracefs is automatically
7986 	 * mounted to the debugfs/tracing directory.
7987 	 */
7988 	type = get_fs_type("tracefs");
7989 	if (!type)
7990 		return NULL;
7991 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7992 	put_filesystem(type);
7993 	if (IS_ERR(mnt))
7994 		return NULL;
7995 	mntget(mnt);
7996 
7997 	return mnt;
7998 }
7999 
8000 /**
8001  * tracing_init_dentry - initialize top level trace array
8002  *
8003  * This is called when creating files or directories in the tracing
8004  * directory. It is called via fs_initcall() by any of the boot up code
8005  * and expects to return the dentry of the top level tracing directory.
8006  */
8007 struct dentry *tracing_init_dentry(void)
8008 {
8009 	struct trace_array *tr = &global_trace;
8010 
8011 	/* The top level trace array uses  NULL as parent */
8012 	if (tr->dir)
8013 		return NULL;
8014 
8015 	if (WARN_ON(!tracefs_initialized()) ||
8016 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8017 		 WARN_ON(!debugfs_initialized())))
8018 		return ERR_PTR(-ENODEV);
8019 
8020 	/*
8021 	 * As there may still be users that expect the tracing
8022 	 * files to exist in debugfs/tracing, we must automount
8023 	 * the tracefs file system there, so older tools still
8024 	 * work with the newer kerenl.
8025 	 */
8026 	tr->dir = debugfs_create_automount("tracing", NULL,
8027 					   trace_automount, NULL);
8028 	if (!tr->dir) {
8029 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8030 		return ERR_PTR(-ENOMEM);
8031 	}
8032 
8033 	return NULL;
8034 }
8035 
8036 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8037 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8038 
8039 static void __init trace_eval_init(void)
8040 {
8041 	int len;
8042 
8043 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8044 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8045 }
8046 
8047 #ifdef CONFIG_MODULES
8048 static void trace_module_add_evals(struct module *mod)
8049 {
8050 	if (!mod->num_trace_evals)
8051 		return;
8052 
8053 	/*
8054 	 * Modules with bad taint do not have events created, do
8055 	 * not bother with enums either.
8056 	 */
8057 	if (trace_module_has_bad_taint(mod))
8058 		return;
8059 
8060 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8061 }
8062 
8063 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8064 static void trace_module_remove_evals(struct module *mod)
8065 {
8066 	union trace_eval_map_item *map;
8067 	union trace_eval_map_item **last = &trace_eval_maps;
8068 
8069 	if (!mod->num_trace_evals)
8070 		return;
8071 
8072 	mutex_lock(&trace_eval_mutex);
8073 
8074 	map = trace_eval_maps;
8075 
8076 	while (map) {
8077 		if (map->head.mod == mod)
8078 			break;
8079 		map = trace_eval_jmp_to_tail(map);
8080 		last = &map->tail.next;
8081 		map = map->tail.next;
8082 	}
8083 	if (!map)
8084 		goto out;
8085 
8086 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8087 	kfree(map);
8088  out:
8089 	mutex_unlock(&trace_eval_mutex);
8090 }
8091 #else
8092 static inline void trace_module_remove_evals(struct module *mod) { }
8093 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8094 
8095 static int trace_module_notify(struct notifier_block *self,
8096 			       unsigned long val, void *data)
8097 {
8098 	struct module *mod = data;
8099 
8100 	switch (val) {
8101 	case MODULE_STATE_COMING:
8102 		trace_module_add_evals(mod);
8103 		break;
8104 	case MODULE_STATE_GOING:
8105 		trace_module_remove_evals(mod);
8106 		break;
8107 	}
8108 
8109 	return 0;
8110 }
8111 
8112 static struct notifier_block trace_module_nb = {
8113 	.notifier_call = trace_module_notify,
8114 	.priority = 0,
8115 };
8116 #endif /* CONFIG_MODULES */
8117 
8118 static __init int tracer_init_tracefs(void)
8119 {
8120 	struct dentry *d_tracer;
8121 
8122 	trace_access_lock_init();
8123 
8124 	d_tracer = tracing_init_dentry();
8125 	if (IS_ERR(d_tracer))
8126 		return 0;
8127 
8128 	event_trace_init();
8129 
8130 	init_tracer_tracefs(&global_trace, d_tracer);
8131 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8132 
8133 	trace_create_file("tracing_thresh", 0644, d_tracer,
8134 			&global_trace, &tracing_thresh_fops);
8135 
8136 	trace_create_file("README", 0444, d_tracer,
8137 			NULL, &tracing_readme_fops);
8138 
8139 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8140 			NULL, &tracing_saved_cmdlines_fops);
8141 
8142 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8143 			  NULL, &tracing_saved_cmdlines_size_fops);
8144 
8145 	trace_create_file("saved_tgids", 0444, d_tracer,
8146 			NULL, &tracing_saved_tgids_fops);
8147 
8148 	trace_eval_init();
8149 
8150 	trace_create_eval_file(d_tracer);
8151 
8152 #ifdef CONFIG_MODULES
8153 	register_module_notifier(&trace_module_nb);
8154 #endif
8155 
8156 #ifdef CONFIG_DYNAMIC_FTRACE
8157 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8158 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8159 #endif
8160 
8161 	create_trace_instances(d_tracer);
8162 
8163 	update_tracer_options(&global_trace);
8164 
8165 	return 0;
8166 }
8167 
8168 static int trace_panic_handler(struct notifier_block *this,
8169 			       unsigned long event, void *unused)
8170 {
8171 	if (ftrace_dump_on_oops)
8172 		ftrace_dump(ftrace_dump_on_oops);
8173 	return NOTIFY_OK;
8174 }
8175 
8176 static struct notifier_block trace_panic_notifier = {
8177 	.notifier_call  = trace_panic_handler,
8178 	.next           = NULL,
8179 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8180 };
8181 
8182 static int trace_die_handler(struct notifier_block *self,
8183 			     unsigned long val,
8184 			     void *data)
8185 {
8186 	switch (val) {
8187 	case DIE_OOPS:
8188 		if (ftrace_dump_on_oops)
8189 			ftrace_dump(ftrace_dump_on_oops);
8190 		break;
8191 	default:
8192 		break;
8193 	}
8194 	return NOTIFY_OK;
8195 }
8196 
8197 static struct notifier_block trace_die_notifier = {
8198 	.notifier_call = trace_die_handler,
8199 	.priority = 200
8200 };
8201 
8202 /*
8203  * printk is set to max of 1024, we really don't need it that big.
8204  * Nothing should be printing 1000 characters anyway.
8205  */
8206 #define TRACE_MAX_PRINT		1000
8207 
8208 /*
8209  * Define here KERN_TRACE so that we have one place to modify
8210  * it if we decide to change what log level the ftrace dump
8211  * should be at.
8212  */
8213 #define KERN_TRACE		KERN_EMERG
8214 
8215 void
8216 trace_printk_seq(struct trace_seq *s)
8217 {
8218 	/* Probably should print a warning here. */
8219 	if (s->seq.len >= TRACE_MAX_PRINT)
8220 		s->seq.len = TRACE_MAX_PRINT;
8221 
8222 	/*
8223 	 * More paranoid code. Although the buffer size is set to
8224 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8225 	 * an extra layer of protection.
8226 	 */
8227 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8228 		s->seq.len = s->seq.size - 1;
8229 
8230 	/* should be zero ended, but we are paranoid. */
8231 	s->buffer[s->seq.len] = 0;
8232 
8233 	printk(KERN_TRACE "%s", s->buffer);
8234 
8235 	trace_seq_init(s);
8236 }
8237 
8238 void trace_init_global_iter(struct trace_iterator *iter)
8239 {
8240 	iter->tr = &global_trace;
8241 	iter->trace = iter->tr->current_trace;
8242 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8243 	iter->trace_buffer = &global_trace.trace_buffer;
8244 
8245 	if (iter->trace && iter->trace->open)
8246 		iter->trace->open(iter);
8247 
8248 	/* Annotate start of buffers if we had overruns */
8249 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8250 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8251 
8252 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8253 	if (trace_clocks[iter->tr->clock_id].in_ns)
8254 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8255 }
8256 
8257 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8258 {
8259 	/* use static because iter can be a bit big for the stack */
8260 	static struct trace_iterator iter;
8261 	static atomic_t dump_running;
8262 	struct trace_array *tr = &global_trace;
8263 	unsigned int old_userobj;
8264 	unsigned long flags;
8265 	int cnt = 0, cpu;
8266 
8267 	/* Only allow one dump user at a time. */
8268 	if (atomic_inc_return(&dump_running) != 1) {
8269 		atomic_dec(&dump_running);
8270 		return;
8271 	}
8272 
8273 	/*
8274 	 * Always turn off tracing when we dump.
8275 	 * We don't need to show trace output of what happens
8276 	 * between multiple crashes.
8277 	 *
8278 	 * If the user does a sysrq-z, then they can re-enable
8279 	 * tracing with echo 1 > tracing_on.
8280 	 */
8281 	tracing_off();
8282 
8283 	local_irq_save(flags);
8284 
8285 	/* Simulate the iterator */
8286 	trace_init_global_iter(&iter);
8287 
8288 	for_each_tracing_cpu(cpu) {
8289 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8290 	}
8291 
8292 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8293 
8294 	/* don't look at user memory in panic mode */
8295 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8296 
8297 	switch (oops_dump_mode) {
8298 	case DUMP_ALL:
8299 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8300 		break;
8301 	case DUMP_ORIG:
8302 		iter.cpu_file = raw_smp_processor_id();
8303 		break;
8304 	case DUMP_NONE:
8305 		goto out_enable;
8306 	default:
8307 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8308 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8309 	}
8310 
8311 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8312 
8313 	/* Did function tracer already get disabled? */
8314 	if (ftrace_is_dead()) {
8315 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8316 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8317 	}
8318 
8319 	/*
8320 	 * We need to stop all tracing on all CPUS to read the
8321 	 * the next buffer. This is a bit expensive, but is
8322 	 * not done often. We fill all what we can read,
8323 	 * and then release the locks again.
8324 	 */
8325 
8326 	while (!trace_empty(&iter)) {
8327 
8328 		if (!cnt)
8329 			printk(KERN_TRACE "---------------------------------\n");
8330 
8331 		cnt++;
8332 
8333 		/* reset all but tr, trace, and overruns */
8334 		memset(&iter.seq, 0,
8335 		       sizeof(struct trace_iterator) -
8336 		       offsetof(struct trace_iterator, seq));
8337 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8338 		iter.pos = -1;
8339 
8340 		if (trace_find_next_entry_inc(&iter) != NULL) {
8341 			int ret;
8342 
8343 			ret = print_trace_line(&iter);
8344 			if (ret != TRACE_TYPE_NO_CONSUME)
8345 				trace_consume(&iter);
8346 		}
8347 		touch_nmi_watchdog();
8348 
8349 		trace_printk_seq(&iter.seq);
8350 	}
8351 
8352 	if (!cnt)
8353 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8354 	else
8355 		printk(KERN_TRACE "---------------------------------\n");
8356 
8357  out_enable:
8358 	tr->trace_flags |= old_userobj;
8359 
8360 	for_each_tracing_cpu(cpu) {
8361 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8362 	}
8363  	atomic_dec(&dump_running);
8364 	local_irq_restore(flags);
8365 }
8366 EXPORT_SYMBOL_GPL(ftrace_dump);
8367 
8368 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8369 {
8370 	char **argv;
8371 	int argc, ret;
8372 
8373 	argc = 0;
8374 	ret = 0;
8375 	argv = argv_split(GFP_KERNEL, buf, &argc);
8376 	if (!argv)
8377 		return -ENOMEM;
8378 
8379 	if (argc)
8380 		ret = createfn(argc, argv);
8381 
8382 	argv_free(argv);
8383 
8384 	return ret;
8385 }
8386 
8387 #define WRITE_BUFSIZE  4096
8388 
8389 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8390 				size_t count, loff_t *ppos,
8391 				int (*createfn)(int, char **))
8392 {
8393 	char *kbuf, *buf, *tmp;
8394 	int ret = 0;
8395 	size_t done = 0;
8396 	size_t size;
8397 
8398 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8399 	if (!kbuf)
8400 		return -ENOMEM;
8401 
8402 	while (done < count) {
8403 		size = count - done;
8404 
8405 		if (size >= WRITE_BUFSIZE)
8406 			size = WRITE_BUFSIZE - 1;
8407 
8408 		if (copy_from_user(kbuf, buffer + done, size)) {
8409 			ret = -EFAULT;
8410 			goto out;
8411 		}
8412 		kbuf[size] = '\0';
8413 		buf = kbuf;
8414 		do {
8415 			tmp = strchr(buf, '\n');
8416 			if (tmp) {
8417 				*tmp = '\0';
8418 				size = tmp - buf + 1;
8419 			} else {
8420 				size = strlen(buf);
8421 				if (done + size < count) {
8422 					if (buf != kbuf)
8423 						break;
8424 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8425 					pr_warn("Line length is too long: Should be less than %d\n",
8426 						WRITE_BUFSIZE - 2);
8427 					ret = -EINVAL;
8428 					goto out;
8429 				}
8430 			}
8431 			done += size;
8432 
8433 			/* Remove comments */
8434 			tmp = strchr(buf, '#');
8435 
8436 			if (tmp)
8437 				*tmp = '\0';
8438 
8439 			ret = trace_run_command(buf, createfn);
8440 			if (ret)
8441 				goto out;
8442 			buf += size;
8443 
8444 		} while (done < count);
8445 	}
8446 	ret = done;
8447 
8448 out:
8449 	kfree(kbuf);
8450 
8451 	return ret;
8452 }
8453 
8454 __init static int tracer_alloc_buffers(void)
8455 {
8456 	int ring_buf_size;
8457 	int ret = -ENOMEM;
8458 
8459 	/*
8460 	 * Make sure we don't accidently add more trace options
8461 	 * than we have bits for.
8462 	 */
8463 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8464 
8465 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8466 		goto out;
8467 
8468 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8469 		goto out_free_buffer_mask;
8470 
8471 	/* Only allocate trace_printk buffers if a trace_printk exists */
8472 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8473 		/* Must be called before global_trace.buffer is allocated */
8474 		trace_printk_init_buffers();
8475 
8476 	/* To save memory, keep the ring buffer size to its minimum */
8477 	if (ring_buffer_expanded)
8478 		ring_buf_size = trace_buf_size;
8479 	else
8480 		ring_buf_size = 1;
8481 
8482 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8483 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8484 
8485 	raw_spin_lock_init(&global_trace.start_lock);
8486 
8487 	/*
8488 	 * The prepare callbacks allocates some memory for the ring buffer. We
8489 	 * don't free the buffer if the if the CPU goes down. If we were to free
8490 	 * the buffer, then the user would lose any trace that was in the
8491 	 * buffer. The memory will be removed once the "instance" is removed.
8492 	 */
8493 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8494 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8495 				      NULL);
8496 	if (ret < 0)
8497 		goto out_free_cpumask;
8498 	/* Used for event triggers */
8499 	ret = -ENOMEM;
8500 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8501 	if (!temp_buffer)
8502 		goto out_rm_hp_state;
8503 
8504 	if (trace_create_savedcmd() < 0)
8505 		goto out_free_temp_buffer;
8506 
8507 	/* TODO: make the number of buffers hot pluggable with CPUS */
8508 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8509 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8510 		WARN_ON(1);
8511 		goto out_free_savedcmd;
8512 	}
8513 
8514 	if (global_trace.buffer_disabled)
8515 		tracing_off();
8516 
8517 	if (trace_boot_clock) {
8518 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8519 		if (ret < 0)
8520 			pr_warn("Trace clock %s not defined, going back to default\n",
8521 				trace_boot_clock);
8522 	}
8523 
8524 	/*
8525 	 * register_tracer() might reference current_trace, so it
8526 	 * needs to be set before we register anything. This is
8527 	 * just a bootstrap of current_trace anyway.
8528 	 */
8529 	global_trace.current_trace = &nop_trace;
8530 
8531 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8532 
8533 	ftrace_init_global_array_ops(&global_trace);
8534 
8535 	init_trace_flags_index(&global_trace);
8536 
8537 	register_tracer(&nop_trace);
8538 
8539 	/* Function tracing may start here (via kernel command line) */
8540 	init_function_trace();
8541 
8542 	/* All seems OK, enable tracing */
8543 	tracing_disabled = 0;
8544 
8545 	atomic_notifier_chain_register(&panic_notifier_list,
8546 				       &trace_panic_notifier);
8547 
8548 	register_die_notifier(&trace_die_notifier);
8549 
8550 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8551 
8552 	INIT_LIST_HEAD(&global_trace.systems);
8553 	INIT_LIST_HEAD(&global_trace.events);
8554 	INIT_LIST_HEAD(&global_trace.hist_vars);
8555 	list_add(&global_trace.list, &ftrace_trace_arrays);
8556 
8557 	apply_trace_boot_options();
8558 
8559 	register_snapshot_cmd();
8560 
8561 	return 0;
8562 
8563 out_free_savedcmd:
8564 	free_saved_cmdlines_buffer(savedcmd);
8565 out_free_temp_buffer:
8566 	ring_buffer_free(temp_buffer);
8567 out_rm_hp_state:
8568 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8569 out_free_cpumask:
8570 	free_cpumask_var(global_trace.tracing_cpumask);
8571 out_free_buffer_mask:
8572 	free_cpumask_var(tracing_buffer_mask);
8573 out:
8574 	return ret;
8575 }
8576 
8577 void __init early_trace_init(void)
8578 {
8579 	if (tracepoint_printk) {
8580 		tracepoint_print_iter =
8581 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8582 		if (WARN_ON(!tracepoint_print_iter))
8583 			tracepoint_printk = 0;
8584 		else
8585 			static_key_enable(&tracepoint_printk_key.key);
8586 	}
8587 	tracer_alloc_buffers();
8588 }
8589 
8590 void __init trace_init(void)
8591 {
8592 	trace_event_init();
8593 }
8594 
8595 __init static int clear_boot_tracer(void)
8596 {
8597 	/*
8598 	 * The default tracer at boot buffer is an init section.
8599 	 * This function is called in lateinit. If we did not
8600 	 * find the boot tracer, then clear it out, to prevent
8601 	 * later registration from accessing the buffer that is
8602 	 * about to be freed.
8603 	 */
8604 	if (!default_bootup_tracer)
8605 		return 0;
8606 
8607 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8608 	       default_bootup_tracer);
8609 	default_bootup_tracer = NULL;
8610 
8611 	return 0;
8612 }
8613 
8614 fs_initcall(tracer_init_tracefs);
8615 late_initcall_sync(clear_boot_tracer);
8616 
8617 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8618 __init static int tracing_set_default_clock(void)
8619 {
8620 	/* sched_clock_stable() is determined in late_initcall */
8621 	if (!trace_boot_clock && !sched_clock_stable()) {
8622 		printk(KERN_WARNING
8623 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8624 		       "If you want to keep using the local clock, then add:\n"
8625 		       "  \"trace_clock=local\"\n"
8626 		       "on the kernel command line\n");
8627 		tracing_set_clock(&global_trace, "global");
8628 	}
8629 
8630 	return 0;
8631 }
8632 late_initcall_sync(tracing_set_default_clock);
8633 #endif
8634