xref: /openbmc/linux/kernel/trace/trace.c (revision 763f96944c954ce0e00a10a7bdfe29adbe4f92eb)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = tracing_alloc_snapshot_instance(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	struct ring_buffer *buf;
1364 
1365 	if (tr->stop_count)
1366 		return;
1367 
1368 	WARN_ON_ONCE(!irqs_disabled());
1369 
1370 	if (!tr->allocated_snapshot) {
1371 		/* Only the nop tracer should hit this when disabling */
1372 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373 		return;
1374 	}
1375 
1376 	arch_spin_lock(&tr->max_lock);
1377 
1378 	buf = tr->trace_buffer.buffer;
1379 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 	tr->max_buffer.buffer = buf;
1381 
1382 	__update_max_tr(tr, tsk, cpu);
1383 	arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 	int ret;
1398 
1399 	if (tr->stop_count)
1400 		return;
1401 
1402 	WARN_ON_ONCE(!irqs_disabled());
1403 	if (!tr->allocated_snapshot) {
1404 		/* Only the nop tracer should hit this when disabling */
1405 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 		return;
1407 	}
1408 
1409 	arch_spin_lock(&tr->max_lock);
1410 
1411 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413 	if (ret == -EBUSY) {
1414 		/*
1415 		 * We failed to swap the buffer due to a commit taking
1416 		 * place on this CPU. We fail to record, but we reset
1417 		 * the max trace buffer (no one writes directly to it)
1418 		 * and flag that it failed.
1419 		 */
1420 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 			"Failed to swap buffers due to commit in progress\n");
1422 	}
1423 
1424 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426 	__update_max_tr(tr, tsk, cpu);
1427 	arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 	/* Iterators are static, they should be filled or empty */
1434 	if (trace_buffer_iter(iter, iter->cpu_file))
1435 		return 0;
1436 
1437 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 				full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445 	struct list_head		list;
1446 	struct tracer			*type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
1451 static int save_selftest(struct tracer *type)
1452 {
1453 	struct trace_selftests *selftest;
1454 
1455 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 	if (!selftest)
1457 		return -ENOMEM;
1458 
1459 	selftest->type = type;
1460 	list_add(&selftest->list, &postponed_selftests);
1461 	return 0;
1462 }
1463 
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	struct tracer *saved_tracer = tr->current_trace;
1468 	int ret;
1469 
1470 	if (!type->selftest || tracing_selftest_disabled)
1471 		return 0;
1472 
1473 	/*
1474 	 * If a tracer registers early in boot up (before scheduling is
1475 	 * initialized and such), then do not run its selftests yet.
1476 	 * Instead, run it a little later in the boot process.
1477 	 */
1478 	if (!selftests_can_run)
1479 		return save_selftest(type);
1480 
1481 	/*
1482 	 * Run a selftest on this tracer.
1483 	 * Here we reset the trace buffer, and set the current
1484 	 * tracer to be this tracer. The tracer can then run some
1485 	 * internal tracing to verify that everything is in order.
1486 	 * If we fail, we do not register this tracer.
1487 	 */
1488 	tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490 	tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 	if (type->use_max_tr) {
1494 		/* If we expanded the buffers, make sure the max is expanded too */
1495 		if (ring_buffer_expanded)
1496 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 					   RING_BUFFER_ALL_CPUS);
1498 		tr->allocated_snapshot = true;
1499 	}
1500 #endif
1501 
1502 	/* the test is responsible for initializing and enabling */
1503 	pr_info("Testing tracer %s: ", type->name);
1504 	ret = type->selftest(type, tr);
1505 	/* the test is responsible for resetting too */
1506 	tr->current_trace = saved_tracer;
1507 	if (ret) {
1508 		printk(KERN_CONT "FAILED!\n");
1509 		/* Add the warning after printing 'FAILED' */
1510 		WARN_ON(1);
1511 		return -1;
1512 	}
1513 	/* Only reset on passing, to avoid touching corrupted buffers */
1514 	tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 	if (type->use_max_tr) {
1518 		tr->allocated_snapshot = false;
1519 
1520 		/* Shrink the max buffer again */
1521 		if (ring_buffer_expanded)
1522 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 					   RING_BUFFER_ALL_CPUS);
1524 	}
1525 #endif
1526 
1527 	printk(KERN_CONT "PASSED\n");
1528 	return 0;
1529 }
1530 
1531 static __init int init_trace_selftests(void)
1532 {
1533 	struct trace_selftests *p, *n;
1534 	struct tracer *t, **last;
1535 	int ret;
1536 
1537 	selftests_can_run = true;
1538 
1539 	mutex_lock(&trace_types_lock);
1540 
1541 	if (list_empty(&postponed_selftests))
1542 		goto out;
1543 
1544 	pr_info("Running postponed tracer tests:\n");
1545 
1546 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547 		ret = run_tracer_selftest(p->type);
1548 		/* If the test fails, then warn and remove from available_tracers */
1549 		if (ret < 0) {
1550 			WARN(1, "tracer: %s failed selftest, disabling\n",
1551 			     p->type->name);
1552 			last = &trace_types;
1553 			for (t = trace_types; t; t = t->next) {
1554 				if (t == p->type) {
1555 					*last = t->next;
1556 					break;
1557 				}
1558 				last = &t->next;
1559 			}
1560 		}
1561 		list_del(&p->list);
1562 		kfree(p);
1563 	}
1564 
1565  out:
1566 	mutex_unlock(&trace_types_lock);
1567 
1568 	return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574 	return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577 
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579 
1580 static void __init apply_trace_boot_options(void);
1581 
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590 	struct tracer *t;
1591 	int ret = 0;
1592 
1593 	if (!type->name) {
1594 		pr_info("Tracer must have a name\n");
1595 		return -1;
1596 	}
1597 
1598 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600 		return -1;
1601 	}
1602 
1603 	mutex_lock(&trace_types_lock);
1604 
1605 	tracing_selftest_running = true;
1606 
1607 	for (t = trace_types; t; t = t->next) {
1608 		if (strcmp(type->name, t->name) == 0) {
1609 			/* already found */
1610 			pr_info("Tracer %s already registered\n",
1611 				type->name);
1612 			ret = -1;
1613 			goto out;
1614 		}
1615 	}
1616 
1617 	if (!type->set_flag)
1618 		type->set_flag = &dummy_set_flag;
1619 	if (!type->flags) {
1620 		/*allocate a dummy tracer_flags*/
1621 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622 		if (!type->flags) {
1623 			ret = -ENOMEM;
1624 			goto out;
1625 		}
1626 		type->flags->val = 0;
1627 		type->flags->opts = dummy_tracer_opt;
1628 	} else
1629 		if (!type->flags->opts)
1630 			type->flags->opts = dummy_tracer_opt;
1631 
1632 	/* store the tracer for __set_tracer_option */
1633 	type->flags->trace = type;
1634 
1635 	ret = run_tracer_selftest(type);
1636 	if (ret < 0)
1637 		goto out;
1638 
1639 	type->next = trace_types;
1640 	trace_types = type;
1641 	add_tracer_options(&global_trace, type);
1642 
1643  out:
1644 	tracing_selftest_running = false;
1645 	mutex_unlock(&trace_types_lock);
1646 
1647 	if (ret || !default_bootup_tracer)
1648 		goto out_unlock;
1649 
1650 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651 		goto out_unlock;
1652 
1653 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654 	/* Do we want this tracer to start on bootup? */
1655 	tracing_set_tracer(&global_trace, type->name);
1656 	default_bootup_tracer = NULL;
1657 
1658 	apply_trace_boot_options();
1659 
1660 	/* disable other selftests, since this will break it. */
1661 	tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664 	       type->name);
1665 #endif
1666 
1667  out_unlock:
1668 	return ret;
1669 }
1670 
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673 	struct ring_buffer *buffer = buf->buffer;
1674 
1675 	if (!buffer)
1676 		return;
1677 
1678 	ring_buffer_record_disable(buffer);
1679 
1680 	/* Make sure all commits have finished */
1681 	synchronize_sched();
1682 	ring_buffer_reset_cpu(buffer, cpu);
1683 
1684 	ring_buffer_record_enable(buffer);
1685 }
1686 
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689 	struct ring_buffer *buffer = buf->buffer;
1690 	int cpu;
1691 
1692 	if (!buffer)
1693 		return;
1694 
1695 	ring_buffer_record_disable(buffer);
1696 
1697 	/* Make sure all commits have finished */
1698 	synchronize_sched();
1699 
1700 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701 
1702 	for_each_online_cpu(cpu)
1703 		ring_buffer_reset_cpu(buffer, cpu);
1704 
1705 	ring_buffer_record_enable(buffer);
1706 }
1707 
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711 	struct trace_array *tr;
1712 
1713 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714 		if (!tr->clear_trace)
1715 			continue;
1716 		tr->clear_trace = false;
1717 		tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 		tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721 	}
1722 }
1723 
1724 static int *tgid_map;
1725 
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731 	unsigned *map_cmdline_to_pid;
1732 	unsigned cmdline_num;
1733 	int cmdline_idx;
1734 	char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737 
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740 
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745 
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750 
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752 				    struct saved_cmdlines_buffer *s)
1753 {
1754 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1755 					GFP_KERNEL);
1756 	if (!s->map_cmdline_to_pid)
1757 		return -ENOMEM;
1758 
1759 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1760 	if (!s->saved_cmdlines) {
1761 		kfree(s->map_cmdline_to_pid);
1762 		return -ENOMEM;
1763 	}
1764 
1765 	s->cmdline_idx = 0;
1766 	s->cmdline_num = val;
1767 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1768 	       sizeof(s->map_pid_to_cmdline));
1769 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1770 	       val * sizeof(*s->map_cmdline_to_pid));
1771 
1772 	return 0;
1773 }
1774 
1775 static int trace_create_savedcmd(void)
1776 {
1777 	int ret;
1778 
1779 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1780 	if (!savedcmd)
1781 		return -ENOMEM;
1782 
1783 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1784 	if (ret < 0) {
1785 		kfree(savedcmd);
1786 		savedcmd = NULL;
1787 		return -ENOMEM;
1788 	}
1789 
1790 	return 0;
1791 }
1792 
1793 int is_tracing_stopped(void)
1794 {
1795 	return global_trace.stop_count;
1796 }
1797 
1798 /**
1799  * tracing_start - quick start of the tracer
1800  *
1801  * If tracing is enabled but was stopped by tracing_stop,
1802  * this will start the tracer back up.
1803  */
1804 void tracing_start(void)
1805 {
1806 	struct ring_buffer *buffer;
1807 	unsigned long flags;
1808 
1809 	if (tracing_disabled)
1810 		return;
1811 
1812 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1813 	if (--global_trace.stop_count) {
1814 		if (global_trace.stop_count < 0) {
1815 			/* Someone screwed up their debugging */
1816 			WARN_ON_ONCE(1);
1817 			global_trace.stop_count = 0;
1818 		}
1819 		goto out;
1820 	}
1821 
1822 	/* Prevent the buffers from switching */
1823 	arch_spin_lock(&global_trace.max_lock);
1824 
1825 	buffer = global_trace.trace_buffer.buffer;
1826 	if (buffer)
1827 		ring_buffer_record_enable(buffer);
1828 
1829 #ifdef CONFIG_TRACER_MAX_TRACE
1830 	buffer = global_trace.max_buffer.buffer;
1831 	if (buffer)
1832 		ring_buffer_record_enable(buffer);
1833 #endif
1834 
1835 	arch_spin_unlock(&global_trace.max_lock);
1836 
1837  out:
1838 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1839 }
1840 
1841 static void tracing_start_tr(struct trace_array *tr)
1842 {
1843 	struct ring_buffer *buffer;
1844 	unsigned long flags;
1845 
1846 	if (tracing_disabled)
1847 		return;
1848 
1849 	/* If global, we need to also start the max tracer */
1850 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1851 		return tracing_start();
1852 
1853 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1854 
1855 	if (--tr->stop_count) {
1856 		if (tr->stop_count < 0) {
1857 			/* Someone screwed up their debugging */
1858 			WARN_ON_ONCE(1);
1859 			tr->stop_count = 0;
1860 		}
1861 		goto out;
1862 	}
1863 
1864 	buffer = tr->trace_buffer.buffer;
1865 	if (buffer)
1866 		ring_buffer_record_enable(buffer);
1867 
1868  out:
1869 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1870 }
1871 
1872 /**
1873  * tracing_stop - quick stop of the tracer
1874  *
1875  * Light weight way to stop tracing. Use in conjunction with
1876  * tracing_start.
1877  */
1878 void tracing_stop(void)
1879 {
1880 	struct ring_buffer *buffer;
1881 	unsigned long flags;
1882 
1883 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1884 	if (global_trace.stop_count++)
1885 		goto out;
1886 
1887 	/* Prevent the buffers from switching */
1888 	arch_spin_lock(&global_trace.max_lock);
1889 
1890 	buffer = global_trace.trace_buffer.buffer;
1891 	if (buffer)
1892 		ring_buffer_record_disable(buffer);
1893 
1894 #ifdef CONFIG_TRACER_MAX_TRACE
1895 	buffer = global_trace.max_buffer.buffer;
1896 	if (buffer)
1897 		ring_buffer_record_disable(buffer);
1898 #endif
1899 
1900 	arch_spin_unlock(&global_trace.max_lock);
1901 
1902  out:
1903 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1904 }
1905 
1906 static void tracing_stop_tr(struct trace_array *tr)
1907 {
1908 	struct ring_buffer *buffer;
1909 	unsigned long flags;
1910 
1911 	/* If global, we need to also stop the max tracer */
1912 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1913 		return tracing_stop();
1914 
1915 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1916 	if (tr->stop_count++)
1917 		goto out;
1918 
1919 	buffer = tr->trace_buffer.buffer;
1920 	if (buffer)
1921 		ring_buffer_record_disable(buffer);
1922 
1923  out:
1924 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1925 }
1926 
1927 static int trace_save_cmdline(struct task_struct *tsk)
1928 {
1929 	unsigned pid, idx;
1930 
1931 	/* treat recording of idle task as a success */
1932 	if (!tsk->pid)
1933 		return 1;
1934 
1935 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1936 		return 0;
1937 
1938 	/*
1939 	 * It's not the end of the world if we don't get
1940 	 * the lock, but we also don't want to spin
1941 	 * nor do we want to disable interrupts,
1942 	 * so if we miss here, then better luck next time.
1943 	 */
1944 	if (!arch_spin_trylock(&trace_cmdline_lock))
1945 		return 0;
1946 
1947 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1948 	if (idx == NO_CMDLINE_MAP) {
1949 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1950 
1951 		/*
1952 		 * Check whether the cmdline buffer at idx has a pid
1953 		 * mapped. We are going to overwrite that entry so we
1954 		 * need to clear the map_pid_to_cmdline. Otherwise we
1955 		 * would read the new comm for the old pid.
1956 		 */
1957 		pid = savedcmd->map_cmdline_to_pid[idx];
1958 		if (pid != NO_CMDLINE_MAP)
1959 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1960 
1961 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1962 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1963 
1964 		savedcmd->cmdline_idx = idx;
1965 	}
1966 
1967 	set_cmdline(idx, tsk->comm);
1968 
1969 	arch_spin_unlock(&trace_cmdline_lock);
1970 
1971 	return 1;
1972 }
1973 
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976 	unsigned map;
1977 
1978 	if (!pid) {
1979 		strcpy(comm, "<idle>");
1980 		return;
1981 	}
1982 
1983 	if (WARN_ON_ONCE(pid < 0)) {
1984 		strcpy(comm, "<XXX>");
1985 		return;
1986 	}
1987 
1988 	if (pid > PID_MAX_DEFAULT) {
1989 		strcpy(comm, "<...>");
1990 		return;
1991 	}
1992 
1993 	map = savedcmd->map_pid_to_cmdline[pid];
1994 	if (map != NO_CMDLINE_MAP)
1995 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1996 	else
1997 		strcpy(comm, "<...>");
1998 }
1999 
2000 void trace_find_cmdline(int pid, char comm[])
2001 {
2002 	preempt_disable();
2003 	arch_spin_lock(&trace_cmdline_lock);
2004 
2005 	__trace_find_cmdline(pid, comm);
2006 
2007 	arch_spin_unlock(&trace_cmdline_lock);
2008 	preempt_enable();
2009 }
2010 
2011 int trace_find_tgid(int pid)
2012 {
2013 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2014 		return 0;
2015 
2016 	return tgid_map[pid];
2017 }
2018 
2019 static int trace_save_tgid(struct task_struct *tsk)
2020 {
2021 	/* treat recording of idle task as a success */
2022 	if (!tsk->pid)
2023 		return 1;
2024 
2025 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2026 		return 0;
2027 
2028 	tgid_map[tsk->pid] = tsk->tgid;
2029 	return 1;
2030 }
2031 
2032 static bool tracing_record_taskinfo_skip(int flags)
2033 {
2034 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2035 		return true;
2036 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2037 		return true;
2038 	if (!__this_cpu_read(trace_taskinfo_save))
2039 		return true;
2040 	return false;
2041 }
2042 
2043 /**
2044  * tracing_record_taskinfo - record the task info of a task
2045  *
2046  * @task  - task to record
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *        - TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo(struct task_struct *task, int flags)
2051 {
2052 	bool done;
2053 
2054 	if (tracing_record_taskinfo_skip(flags))
2055 		return;
2056 
2057 	/*
2058 	 * Record as much task information as possible. If some fail, continue
2059 	 * to try to record the others.
2060 	 */
2061 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2062 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2063 
2064 	/* If recording any information failed, retry again soon. */
2065 	if (!done)
2066 		return;
2067 
2068 	__this_cpu_write(trace_taskinfo_save, false);
2069 }
2070 
2071 /**
2072  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2073  *
2074  * @prev - previous task during sched_switch
2075  * @next - next task during sched_switch
2076  * @flags - TRACE_RECORD_CMDLINE for recording comm
2077  *          TRACE_RECORD_TGID for recording tgid
2078  */
2079 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2080 					  struct task_struct *next, int flags)
2081 {
2082 	bool done;
2083 
2084 	if (tracing_record_taskinfo_skip(flags))
2085 		return;
2086 
2087 	/*
2088 	 * Record as much task information as possible. If some fail, continue
2089 	 * to try to record the others.
2090 	 */
2091 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2092 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2093 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2094 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2095 
2096 	/* If recording any information failed, retry again soon. */
2097 	if (!done)
2098 		return;
2099 
2100 	__this_cpu_write(trace_taskinfo_save, false);
2101 }
2102 
2103 /* Helpers to record a specific task information */
2104 void tracing_record_cmdline(struct task_struct *task)
2105 {
2106 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2107 }
2108 
2109 void tracing_record_tgid(struct task_struct *task)
2110 {
2111 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2112 }
2113 
2114 /*
2115  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2116  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2117  * simplifies those functions and keeps them in sync.
2118  */
2119 enum print_line_t trace_handle_return(struct trace_seq *s)
2120 {
2121 	return trace_seq_has_overflowed(s) ?
2122 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2123 }
2124 EXPORT_SYMBOL_GPL(trace_handle_return);
2125 
2126 void
2127 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2128 			     int pc)
2129 {
2130 	struct task_struct *tsk = current;
2131 
2132 	entry->preempt_count		= pc & 0xff;
2133 	entry->pid			= (tsk) ? tsk->pid : 0;
2134 	entry->flags =
2135 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2136 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2137 #else
2138 		TRACE_FLAG_IRQS_NOSUPPORT |
2139 #endif
2140 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2141 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2142 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2143 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2144 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2145 }
2146 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2147 
2148 struct ring_buffer_event *
2149 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2150 			  int type,
2151 			  unsigned long len,
2152 			  unsigned long flags, int pc)
2153 {
2154 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2155 }
2156 
2157 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2158 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2159 static int trace_buffered_event_ref;
2160 
2161 /**
2162  * trace_buffered_event_enable - enable buffering events
2163  *
2164  * When events are being filtered, it is quicker to use a temporary
2165  * buffer to write the event data into if there's a likely chance
2166  * that it will not be committed. The discard of the ring buffer
2167  * is not as fast as committing, and is much slower than copying
2168  * a commit.
2169  *
2170  * When an event is to be filtered, allocate per cpu buffers to
2171  * write the event data into, and if the event is filtered and discarded
2172  * it is simply dropped, otherwise, the entire data is to be committed
2173  * in one shot.
2174  */
2175 void trace_buffered_event_enable(void)
2176 {
2177 	struct ring_buffer_event *event;
2178 	struct page *page;
2179 	int cpu;
2180 
2181 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2182 
2183 	if (trace_buffered_event_ref++)
2184 		return;
2185 
2186 	for_each_tracing_cpu(cpu) {
2187 		page = alloc_pages_node(cpu_to_node(cpu),
2188 					GFP_KERNEL | __GFP_NORETRY, 0);
2189 		if (!page)
2190 			goto failed;
2191 
2192 		event = page_address(page);
2193 		memset(event, 0, sizeof(*event));
2194 
2195 		per_cpu(trace_buffered_event, cpu) = event;
2196 
2197 		preempt_disable();
2198 		if (cpu == smp_processor_id() &&
2199 		    this_cpu_read(trace_buffered_event) !=
2200 		    per_cpu(trace_buffered_event, cpu))
2201 			WARN_ON_ONCE(1);
2202 		preempt_enable();
2203 	}
2204 
2205 	return;
2206  failed:
2207 	trace_buffered_event_disable();
2208 }
2209 
2210 static void enable_trace_buffered_event(void *data)
2211 {
2212 	/* Probably not needed, but do it anyway */
2213 	smp_rmb();
2214 	this_cpu_dec(trace_buffered_event_cnt);
2215 }
2216 
2217 static void disable_trace_buffered_event(void *data)
2218 {
2219 	this_cpu_inc(trace_buffered_event_cnt);
2220 }
2221 
2222 /**
2223  * trace_buffered_event_disable - disable buffering events
2224  *
2225  * When a filter is removed, it is faster to not use the buffered
2226  * events, and to commit directly into the ring buffer. Free up
2227  * the temp buffers when there are no more users. This requires
2228  * special synchronization with current events.
2229  */
2230 void trace_buffered_event_disable(void)
2231 {
2232 	int cpu;
2233 
2234 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2235 
2236 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2237 		return;
2238 
2239 	if (--trace_buffered_event_ref)
2240 		return;
2241 
2242 	preempt_disable();
2243 	/* For each CPU, set the buffer as used. */
2244 	smp_call_function_many(tracing_buffer_mask,
2245 			       disable_trace_buffered_event, NULL, 1);
2246 	preempt_enable();
2247 
2248 	/* Wait for all current users to finish */
2249 	synchronize_sched();
2250 
2251 	for_each_tracing_cpu(cpu) {
2252 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2253 		per_cpu(trace_buffered_event, cpu) = NULL;
2254 	}
2255 	/*
2256 	 * Make sure trace_buffered_event is NULL before clearing
2257 	 * trace_buffered_event_cnt.
2258 	 */
2259 	smp_wmb();
2260 
2261 	preempt_disable();
2262 	/* Do the work on each cpu */
2263 	smp_call_function_many(tracing_buffer_mask,
2264 			       enable_trace_buffered_event, NULL, 1);
2265 	preempt_enable();
2266 }
2267 
2268 static struct ring_buffer *temp_buffer;
2269 
2270 struct ring_buffer_event *
2271 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2272 			  struct trace_event_file *trace_file,
2273 			  int type, unsigned long len,
2274 			  unsigned long flags, int pc)
2275 {
2276 	struct ring_buffer_event *entry;
2277 	int val;
2278 
2279 	*current_rb = trace_file->tr->trace_buffer.buffer;
2280 
2281 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2282 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2283 	    (entry = this_cpu_read(trace_buffered_event))) {
2284 		/* Try to use the per cpu buffer first */
2285 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2286 		if (val == 1) {
2287 			trace_event_setup(entry, type, flags, pc);
2288 			entry->array[0] = len;
2289 			return entry;
2290 		}
2291 		this_cpu_dec(trace_buffered_event_cnt);
2292 	}
2293 
2294 	entry = __trace_buffer_lock_reserve(*current_rb,
2295 					    type, len, flags, pc);
2296 	/*
2297 	 * If tracing is off, but we have triggers enabled
2298 	 * we still need to look at the event data. Use the temp_buffer
2299 	 * to store the trace event for the tigger to use. It's recusive
2300 	 * safe and will not be recorded anywhere.
2301 	 */
2302 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2303 		*current_rb = temp_buffer;
2304 		entry = __trace_buffer_lock_reserve(*current_rb,
2305 						    type, len, flags, pc);
2306 	}
2307 	return entry;
2308 }
2309 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2310 
2311 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2312 static DEFINE_MUTEX(tracepoint_printk_mutex);
2313 
2314 static void output_printk(struct trace_event_buffer *fbuffer)
2315 {
2316 	struct trace_event_call *event_call;
2317 	struct trace_event *event;
2318 	unsigned long flags;
2319 	struct trace_iterator *iter = tracepoint_print_iter;
2320 
2321 	/* We should never get here if iter is NULL */
2322 	if (WARN_ON_ONCE(!iter))
2323 		return;
2324 
2325 	event_call = fbuffer->trace_file->event_call;
2326 	if (!event_call || !event_call->event.funcs ||
2327 	    !event_call->event.funcs->trace)
2328 		return;
2329 
2330 	event = &fbuffer->trace_file->event_call->event;
2331 
2332 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2333 	trace_seq_init(&iter->seq);
2334 	iter->ent = fbuffer->entry;
2335 	event_call->event.funcs->trace(iter, 0, event);
2336 	trace_seq_putc(&iter->seq, 0);
2337 	printk("%s", iter->seq.buffer);
2338 
2339 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2340 }
2341 
2342 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2343 			     void __user *buffer, size_t *lenp,
2344 			     loff_t *ppos)
2345 {
2346 	int save_tracepoint_printk;
2347 	int ret;
2348 
2349 	mutex_lock(&tracepoint_printk_mutex);
2350 	save_tracepoint_printk = tracepoint_printk;
2351 
2352 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2353 
2354 	/*
2355 	 * This will force exiting early, as tracepoint_printk
2356 	 * is always zero when tracepoint_printk_iter is not allocated
2357 	 */
2358 	if (!tracepoint_print_iter)
2359 		tracepoint_printk = 0;
2360 
2361 	if (save_tracepoint_printk == tracepoint_printk)
2362 		goto out;
2363 
2364 	if (tracepoint_printk)
2365 		static_key_enable(&tracepoint_printk_key.key);
2366 	else
2367 		static_key_disable(&tracepoint_printk_key.key);
2368 
2369  out:
2370 	mutex_unlock(&tracepoint_printk_mutex);
2371 
2372 	return ret;
2373 }
2374 
2375 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2376 {
2377 	if (static_key_false(&tracepoint_printk_key.key))
2378 		output_printk(fbuffer);
2379 
2380 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2381 				    fbuffer->event, fbuffer->entry,
2382 				    fbuffer->flags, fbuffer->pc);
2383 }
2384 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2385 
2386 /*
2387  * Skip 3:
2388  *
2389  *   trace_buffer_unlock_commit_regs()
2390  *   trace_event_buffer_commit()
2391  *   trace_event_raw_event_xxx()
2392  */
2393 # define STACK_SKIP 3
2394 
2395 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2396 				     struct ring_buffer *buffer,
2397 				     struct ring_buffer_event *event,
2398 				     unsigned long flags, int pc,
2399 				     struct pt_regs *regs)
2400 {
2401 	__buffer_unlock_commit(buffer, event);
2402 
2403 	/*
2404 	 * If regs is not set, then skip the necessary functions.
2405 	 * Note, we can still get here via blktrace, wakeup tracer
2406 	 * and mmiotrace, but that's ok if they lose a function or
2407 	 * two. They are not that meaningful.
2408 	 */
2409 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2410 	ftrace_trace_userstack(buffer, flags, pc);
2411 }
2412 
2413 /*
2414  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2415  */
2416 void
2417 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2418 				   struct ring_buffer_event *event)
2419 {
2420 	__buffer_unlock_commit(buffer, event);
2421 }
2422 
2423 static void
2424 trace_process_export(struct trace_export *export,
2425 	       struct ring_buffer_event *event)
2426 {
2427 	struct trace_entry *entry;
2428 	unsigned int size = 0;
2429 
2430 	entry = ring_buffer_event_data(event);
2431 	size = ring_buffer_event_length(event);
2432 	export->write(export, entry, size);
2433 }
2434 
2435 static DEFINE_MUTEX(ftrace_export_lock);
2436 
2437 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2438 
2439 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2440 
2441 static inline void ftrace_exports_enable(void)
2442 {
2443 	static_branch_enable(&ftrace_exports_enabled);
2444 }
2445 
2446 static inline void ftrace_exports_disable(void)
2447 {
2448 	static_branch_disable(&ftrace_exports_enabled);
2449 }
2450 
2451 void ftrace_exports(struct ring_buffer_event *event)
2452 {
2453 	struct trace_export *export;
2454 
2455 	preempt_disable_notrace();
2456 
2457 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2458 	while (export) {
2459 		trace_process_export(export, event);
2460 		export = rcu_dereference_raw_notrace(export->next);
2461 	}
2462 
2463 	preempt_enable_notrace();
2464 }
2465 
2466 static inline void
2467 add_trace_export(struct trace_export **list, struct trace_export *export)
2468 {
2469 	rcu_assign_pointer(export->next, *list);
2470 	/*
2471 	 * We are entering export into the list but another
2472 	 * CPU might be walking that list. We need to make sure
2473 	 * the export->next pointer is valid before another CPU sees
2474 	 * the export pointer included into the list.
2475 	 */
2476 	rcu_assign_pointer(*list, export);
2477 }
2478 
2479 static inline int
2480 rm_trace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482 	struct trace_export **p;
2483 
2484 	for (p = list; *p != NULL; p = &(*p)->next)
2485 		if (*p == export)
2486 			break;
2487 
2488 	if (*p != export)
2489 		return -1;
2490 
2491 	rcu_assign_pointer(*p, (*p)->next);
2492 
2493 	return 0;
2494 }
2495 
2496 static inline void
2497 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499 	if (*list == NULL)
2500 		ftrace_exports_enable();
2501 
2502 	add_trace_export(list, export);
2503 }
2504 
2505 static inline int
2506 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2507 {
2508 	int ret;
2509 
2510 	ret = rm_trace_export(list, export);
2511 	if (*list == NULL)
2512 		ftrace_exports_disable();
2513 
2514 	return ret;
2515 }
2516 
2517 int register_ftrace_export(struct trace_export *export)
2518 {
2519 	if (WARN_ON_ONCE(!export->write))
2520 		return -1;
2521 
2522 	mutex_lock(&ftrace_export_lock);
2523 
2524 	add_ftrace_export(&ftrace_exports_list, export);
2525 
2526 	mutex_unlock(&ftrace_export_lock);
2527 
2528 	return 0;
2529 }
2530 EXPORT_SYMBOL_GPL(register_ftrace_export);
2531 
2532 int unregister_ftrace_export(struct trace_export *export)
2533 {
2534 	int ret;
2535 
2536 	mutex_lock(&ftrace_export_lock);
2537 
2538 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2539 
2540 	mutex_unlock(&ftrace_export_lock);
2541 
2542 	return ret;
2543 }
2544 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2545 
2546 void
2547 trace_function(struct trace_array *tr,
2548 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2549 	       int pc)
2550 {
2551 	struct trace_event_call *call = &event_function;
2552 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2553 	struct ring_buffer_event *event;
2554 	struct ftrace_entry *entry;
2555 
2556 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2557 					    flags, pc);
2558 	if (!event)
2559 		return;
2560 	entry	= ring_buffer_event_data(event);
2561 	entry->ip			= ip;
2562 	entry->parent_ip		= parent_ip;
2563 
2564 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2565 		if (static_branch_unlikely(&ftrace_exports_enabled))
2566 			ftrace_exports(event);
2567 		__buffer_unlock_commit(buffer, event);
2568 	}
2569 }
2570 
2571 #ifdef CONFIG_STACKTRACE
2572 
2573 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2574 struct ftrace_stack {
2575 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2576 };
2577 
2578 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2579 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2580 
2581 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2582 				 unsigned long flags,
2583 				 int skip, int pc, struct pt_regs *regs)
2584 {
2585 	struct trace_event_call *call = &event_kernel_stack;
2586 	struct ring_buffer_event *event;
2587 	struct stack_entry *entry;
2588 	struct stack_trace trace;
2589 	int use_stack;
2590 	int size = FTRACE_STACK_ENTRIES;
2591 
2592 	trace.nr_entries	= 0;
2593 	trace.skip		= skip;
2594 
2595 	/*
2596 	 * Add one, for this function and the call to save_stack_trace()
2597 	 * If regs is set, then these functions will not be in the way.
2598 	 */
2599 #ifndef CONFIG_UNWINDER_ORC
2600 	if (!regs)
2601 		trace.skip++;
2602 #endif
2603 
2604 	/*
2605 	 * Since events can happen in NMIs there's no safe way to
2606 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2607 	 * or NMI comes in, it will just have to use the default
2608 	 * FTRACE_STACK_SIZE.
2609 	 */
2610 	preempt_disable_notrace();
2611 
2612 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2613 	/*
2614 	 * We don't need any atomic variables, just a barrier.
2615 	 * If an interrupt comes in, we don't care, because it would
2616 	 * have exited and put the counter back to what we want.
2617 	 * We just need a barrier to keep gcc from moving things
2618 	 * around.
2619 	 */
2620 	barrier();
2621 	if (use_stack == 1) {
2622 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2623 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2624 
2625 		if (regs)
2626 			save_stack_trace_regs(regs, &trace);
2627 		else
2628 			save_stack_trace(&trace);
2629 
2630 		if (trace.nr_entries > size)
2631 			size = trace.nr_entries;
2632 	} else
2633 		/* From now on, use_stack is a boolean */
2634 		use_stack = 0;
2635 
2636 	size *= sizeof(unsigned long);
2637 
2638 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2639 					    sizeof(*entry) + size, flags, pc);
2640 	if (!event)
2641 		goto out;
2642 	entry = ring_buffer_event_data(event);
2643 
2644 	memset(&entry->caller, 0, size);
2645 
2646 	if (use_stack)
2647 		memcpy(&entry->caller, trace.entries,
2648 		       trace.nr_entries * sizeof(unsigned long));
2649 	else {
2650 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2651 		trace.entries		= entry->caller;
2652 		if (regs)
2653 			save_stack_trace_regs(regs, &trace);
2654 		else
2655 			save_stack_trace(&trace);
2656 	}
2657 
2658 	entry->size = trace.nr_entries;
2659 
2660 	if (!call_filter_check_discard(call, entry, buffer, event))
2661 		__buffer_unlock_commit(buffer, event);
2662 
2663  out:
2664 	/* Again, don't let gcc optimize things here */
2665 	barrier();
2666 	__this_cpu_dec(ftrace_stack_reserve);
2667 	preempt_enable_notrace();
2668 
2669 }
2670 
2671 static inline void ftrace_trace_stack(struct trace_array *tr,
2672 				      struct ring_buffer *buffer,
2673 				      unsigned long flags,
2674 				      int skip, int pc, struct pt_regs *regs)
2675 {
2676 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2677 		return;
2678 
2679 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2680 }
2681 
2682 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2683 		   int pc)
2684 {
2685 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2686 
2687 	if (rcu_is_watching()) {
2688 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2689 		return;
2690 	}
2691 
2692 	/*
2693 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2694 	 * but if the above rcu_is_watching() failed, then the NMI
2695 	 * triggered someplace critical, and rcu_irq_enter() should
2696 	 * not be called from NMI.
2697 	 */
2698 	if (unlikely(in_nmi()))
2699 		return;
2700 
2701 	rcu_irq_enter_irqson();
2702 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2703 	rcu_irq_exit_irqson();
2704 }
2705 
2706 /**
2707  * trace_dump_stack - record a stack back trace in the trace buffer
2708  * @skip: Number of functions to skip (helper handlers)
2709  */
2710 void trace_dump_stack(int skip)
2711 {
2712 	unsigned long flags;
2713 
2714 	if (tracing_disabled || tracing_selftest_running)
2715 		return;
2716 
2717 	local_save_flags(flags);
2718 
2719 #ifndef CONFIG_UNWINDER_ORC
2720 	/* Skip 1 to skip this function. */
2721 	skip++;
2722 #endif
2723 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2724 			     flags, skip, preempt_count(), NULL);
2725 }
2726 
2727 static DEFINE_PER_CPU(int, user_stack_count);
2728 
2729 void
2730 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2731 {
2732 	struct trace_event_call *call = &event_user_stack;
2733 	struct ring_buffer_event *event;
2734 	struct userstack_entry *entry;
2735 	struct stack_trace trace;
2736 
2737 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2738 		return;
2739 
2740 	/*
2741 	 * NMIs can not handle page faults, even with fix ups.
2742 	 * The save user stack can (and often does) fault.
2743 	 */
2744 	if (unlikely(in_nmi()))
2745 		return;
2746 
2747 	/*
2748 	 * prevent recursion, since the user stack tracing may
2749 	 * trigger other kernel events.
2750 	 */
2751 	preempt_disable();
2752 	if (__this_cpu_read(user_stack_count))
2753 		goto out;
2754 
2755 	__this_cpu_inc(user_stack_count);
2756 
2757 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2758 					    sizeof(*entry), flags, pc);
2759 	if (!event)
2760 		goto out_drop_count;
2761 	entry	= ring_buffer_event_data(event);
2762 
2763 	entry->tgid		= current->tgid;
2764 	memset(&entry->caller, 0, sizeof(entry->caller));
2765 
2766 	trace.nr_entries	= 0;
2767 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2768 	trace.skip		= 0;
2769 	trace.entries		= entry->caller;
2770 
2771 	save_stack_trace_user(&trace);
2772 	if (!call_filter_check_discard(call, entry, buffer, event))
2773 		__buffer_unlock_commit(buffer, event);
2774 
2775  out_drop_count:
2776 	__this_cpu_dec(user_stack_count);
2777  out:
2778 	preempt_enable();
2779 }
2780 
2781 #ifdef UNUSED
2782 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2783 {
2784 	ftrace_trace_userstack(tr, flags, preempt_count());
2785 }
2786 #endif /* UNUSED */
2787 
2788 #endif /* CONFIG_STACKTRACE */
2789 
2790 /* created for use with alloc_percpu */
2791 struct trace_buffer_struct {
2792 	int nesting;
2793 	char buffer[4][TRACE_BUF_SIZE];
2794 };
2795 
2796 static struct trace_buffer_struct *trace_percpu_buffer;
2797 
2798 /*
2799  * Thise allows for lockless recording.  If we're nested too deeply, then
2800  * this returns NULL.
2801  */
2802 static char *get_trace_buf(void)
2803 {
2804 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2805 
2806 	if (!buffer || buffer->nesting >= 4)
2807 		return NULL;
2808 
2809 	buffer->nesting++;
2810 
2811 	/* Interrupts must see nesting incremented before we use the buffer */
2812 	barrier();
2813 	return &buffer->buffer[buffer->nesting][0];
2814 }
2815 
2816 static void put_trace_buf(void)
2817 {
2818 	/* Don't let the decrement of nesting leak before this */
2819 	barrier();
2820 	this_cpu_dec(trace_percpu_buffer->nesting);
2821 }
2822 
2823 static int alloc_percpu_trace_buffer(void)
2824 {
2825 	struct trace_buffer_struct *buffers;
2826 
2827 	buffers = alloc_percpu(struct trace_buffer_struct);
2828 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2829 		return -ENOMEM;
2830 
2831 	trace_percpu_buffer = buffers;
2832 	return 0;
2833 }
2834 
2835 static int buffers_allocated;
2836 
2837 void trace_printk_init_buffers(void)
2838 {
2839 	if (buffers_allocated)
2840 		return;
2841 
2842 	if (alloc_percpu_trace_buffer())
2843 		return;
2844 
2845 	/* trace_printk() is for debug use only. Don't use it in production. */
2846 
2847 	pr_warn("\n");
2848 	pr_warn("**********************************************************\n");
2849 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2850 	pr_warn("**                                                      **\n");
2851 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2852 	pr_warn("**                                                      **\n");
2853 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2854 	pr_warn("** unsafe for production use.                           **\n");
2855 	pr_warn("**                                                      **\n");
2856 	pr_warn("** If you see this message and you are not debugging    **\n");
2857 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2858 	pr_warn("**                                                      **\n");
2859 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2860 	pr_warn("**********************************************************\n");
2861 
2862 	/* Expand the buffers to set size */
2863 	tracing_update_buffers();
2864 
2865 	buffers_allocated = 1;
2866 
2867 	/*
2868 	 * trace_printk_init_buffers() can be called by modules.
2869 	 * If that happens, then we need to start cmdline recording
2870 	 * directly here. If the global_trace.buffer is already
2871 	 * allocated here, then this was called by module code.
2872 	 */
2873 	if (global_trace.trace_buffer.buffer)
2874 		tracing_start_cmdline_record();
2875 }
2876 
2877 void trace_printk_start_comm(void)
2878 {
2879 	/* Start tracing comms if trace printk is set */
2880 	if (!buffers_allocated)
2881 		return;
2882 	tracing_start_cmdline_record();
2883 }
2884 
2885 static void trace_printk_start_stop_comm(int enabled)
2886 {
2887 	if (!buffers_allocated)
2888 		return;
2889 
2890 	if (enabled)
2891 		tracing_start_cmdline_record();
2892 	else
2893 		tracing_stop_cmdline_record();
2894 }
2895 
2896 /**
2897  * trace_vbprintk - write binary msg to tracing buffer
2898  *
2899  */
2900 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2901 {
2902 	struct trace_event_call *call = &event_bprint;
2903 	struct ring_buffer_event *event;
2904 	struct ring_buffer *buffer;
2905 	struct trace_array *tr = &global_trace;
2906 	struct bprint_entry *entry;
2907 	unsigned long flags;
2908 	char *tbuffer;
2909 	int len = 0, size, pc;
2910 
2911 	if (unlikely(tracing_selftest_running || tracing_disabled))
2912 		return 0;
2913 
2914 	/* Don't pollute graph traces with trace_vprintk internals */
2915 	pause_graph_tracing();
2916 
2917 	pc = preempt_count();
2918 	preempt_disable_notrace();
2919 
2920 	tbuffer = get_trace_buf();
2921 	if (!tbuffer) {
2922 		len = 0;
2923 		goto out_nobuffer;
2924 	}
2925 
2926 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2927 
2928 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2929 		goto out;
2930 
2931 	local_save_flags(flags);
2932 	size = sizeof(*entry) + sizeof(u32) * len;
2933 	buffer = tr->trace_buffer.buffer;
2934 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2935 					    flags, pc);
2936 	if (!event)
2937 		goto out;
2938 	entry = ring_buffer_event_data(event);
2939 	entry->ip			= ip;
2940 	entry->fmt			= fmt;
2941 
2942 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2943 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2944 		__buffer_unlock_commit(buffer, event);
2945 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2946 	}
2947 
2948 out:
2949 	put_trace_buf();
2950 
2951 out_nobuffer:
2952 	preempt_enable_notrace();
2953 	unpause_graph_tracing();
2954 
2955 	return len;
2956 }
2957 EXPORT_SYMBOL_GPL(trace_vbprintk);
2958 
2959 static int
2960 __trace_array_vprintk(struct ring_buffer *buffer,
2961 		      unsigned long ip, const char *fmt, va_list args)
2962 {
2963 	struct trace_event_call *call = &event_print;
2964 	struct ring_buffer_event *event;
2965 	int len = 0, size, pc;
2966 	struct print_entry *entry;
2967 	unsigned long flags;
2968 	char *tbuffer;
2969 
2970 	if (tracing_disabled || tracing_selftest_running)
2971 		return 0;
2972 
2973 	/* Don't pollute graph traces with trace_vprintk internals */
2974 	pause_graph_tracing();
2975 
2976 	pc = preempt_count();
2977 	preempt_disable_notrace();
2978 
2979 
2980 	tbuffer = get_trace_buf();
2981 	if (!tbuffer) {
2982 		len = 0;
2983 		goto out_nobuffer;
2984 	}
2985 
2986 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2987 
2988 	local_save_flags(flags);
2989 	size = sizeof(*entry) + len + 1;
2990 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2991 					    flags, pc);
2992 	if (!event)
2993 		goto out;
2994 	entry = ring_buffer_event_data(event);
2995 	entry->ip = ip;
2996 
2997 	memcpy(&entry->buf, tbuffer, len + 1);
2998 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2999 		__buffer_unlock_commit(buffer, event);
3000 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3001 	}
3002 
3003 out:
3004 	put_trace_buf();
3005 
3006 out_nobuffer:
3007 	preempt_enable_notrace();
3008 	unpause_graph_tracing();
3009 
3010 	return len;
3011 }
3012 
3013 int trace_array_vprintk(struct trace_array *tr,
3014 			unsigned long ip, const char *fmt, va_list args)
3015 {
3016 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3017 }
3018 
3019 int trace_array_printk(struct trace_array *tr,
3020 		       unsigned long ip, const char *fmt, ...)
3021 {
3022 	int ret;
3023 	va_list ap;
3024 
3025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3026 		return 0;
3027 
3028 	va_start(ap, fmt);
3029 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3030 	va_end(ap);
3031 	return ret;
3032 }
3033 
3034 int trace_array_printk_buf(struct ring_buffer *buffer,
3035 			   unsigned long ip, const char *fmt, ...)
3036 {
3037 	int ret;
3038 	va_list ap;
3039 
3040 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3041 		return 0;
3042 
3043 	va_start(ap, fmt);
3044 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3045 	va_end(ap);
3046 	return ret;
3047 }
3048 
3049 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3050 {
3051 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_vprintk);
3054 
3055 static void trace_iterator_increment(struct trace_iterator *iter)
3056 {
3057 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3058 
3059 	iter->idx++;
3060 	if (buf_iter)
3061 		ring_buffer_read(buf_iter, NULL);
3062 }
3063 
3064 static struct trace_entry *
3065 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3066 		unsigned long *lost_events)
3067 {
3068 	struct ring_buffer_event *event;
3069 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3070 
3071 	if (buf_iter)
3072 		event = ring_buffer_iter_peek(buf_iter, ts);
3073 	else
3074 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3075 					 lost_events);
3076 
3077 	if (event) {
3078 		iter->ent_size = ring_buffer_event_length(event);
3079 		return ring_buffer_event_data(event);
3080 	}
3081 	iter->ent_size = 0;
3082 	return NULL;
3083 }
3084 
3085 static struct trace_entry *
3086 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3087 		  unsigned long *missing_events, u64 *ent_ts)
3088 {
3089 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3090 	struct trace_entry *ent, *next = NULL;
3091 	unsigned long lost_events = 0, next_lost = 0;
3092 	int cpu_file = iter->cpu_file;
3093 	u64 next_ts = 0, ts;
3094 	int next_cpu = -1;
3095 	int next_size = 0;
3096 	int cpu;
3097 
3098 	/*
3099 	 * If we are in a per_cpu trace file, don't bother by iterating over
3100 	 * all cpu and peek directly.
3101 	 */
3102 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3103 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3104 			return NULL;
3105 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3106 		if (ent_cpu)
3107 			*ent_cpu = cpu_file;
3108 
3109 		return ent;
3110 	}
3111 
3112 	for_each_tracing_cpu(cpu) {
3113 
3114 		if (ring_buffer_empty_cpu(buffer, cpu))
3115 			continue;
3116 
3117 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3118 
3119 		/*
3120 		 * Pick the entry with the smallest timestamp:
3121 		 */
3122 		if (ent && (!next || ts < next_ts)) {
3123 			next = ent;
3124 			next_cpu = cpu;
3125 			next_ts = ts;
3126 			next_lost = lost_events;
3127 			next_size = iter->ent_size;
3128 		}
3129 	}
3130 
3131 	iter->ent_size = next_size;
3132 
3133 	if (ent_cpu)
3134 		*ent_cpu = next_cpu;
3135 
3136 	if (ent_ts)
3137 		*ent_ts = next_ts;
3138 
3139 	if (missing_events)
3140 		*missing_events = next_lost;
3141 
3142 	return next;
3143 }
3144 
3145 /* Find the next real entry, without updating the iterator itself */
3146 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3147 					  int *ent_cpu, u64 *ent_ts)
3148 {
3149 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3150 }
3151 
3152 /* Find the next real entry, and increment the iterator to the next entry */
3153 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3154 {
3155 	iter->ent = __find_next_entry(iter, &iter->cpu,
3156 				      &iter->lost_events, &iter->ts);
3157 
3158 	if (iter->ent)
3159 		trace_iterator_increment(iter);
3160 
3161 	return iter->ent ? iter : NULL;
3162 }
3163 
3164 static void trace_consume(struct trace_iterator *iter)
3165 {
3166 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3167 			    &iter->lost_events);
3168 }
3169 
3170 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3171 {
3172 	struct trace_iterator *iter = m->private;
3173 	int i = (int)*pos;
3174 	void *ent;
3175 
3176 	WARN_ON_ONCE(iter->leftover);
3177 
3178 	(*pos)++;
3179 
3180 	/* can't go backwards */
3181 	if (iter->idx > i)
3182 		return NULL;
3183 
3184 	if (iter->idx < 0)
3185 		ent = trace_find_next_entry_inc(iter);
3186 	else
3187 		ent = iter;
3188 
3189 	while (ent && iter->idx < i)
3190 		ent = trace_find_next_entry_inc(iter);
3191 
3192 	iter->pos = *pos;
3193 
3194 	return ent;
3195 }
3196 
3197 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3198 {
3199 	struct ring_buffer_event *event;
3200 	struct ring_buffer_iter *buf_iter;
3201 	unsigned long entries = 0;
3202 	u64 ts;
3203 
3204 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3205 
3206 	buf_iter = trace_buffer_iter(iter, cpu);
3207 	if (!buf_iter)
3208 		return;
3209 
3210 	ring_buffer_iter_reset(buf_iter);
3211 
3212 	/*
3213 	 * We could have the case with the max latency tracers
3214 	 * that a reset never took place on a cpu. This is evident
3215 	 * by the timestamp being before the start of the buffer.
3216 	 */
3217 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3218 		if (ts >= iter->trace_buffer->time_start)
3219 			break;
3220 		entries++;
3221 		ring_buffer_read(buf_iter, NULL);
3222 	}
3223 
3224 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3225 }
3226 
3227 /*
3228  * The current tracer is copied to avoid a global locking
3229  * all around.
3230  */
3231 static void *s_start(struct seq_file *m, loff_t *pos)
3232 {
3233 	struct trace_iterator *iter = m->private;
3234 	struct trace_array *tr = iter->tr;
3235 	int cpu_file = iter->cpu_file;
3236 	void *p = NULL;
3237 	loff_t l = 0;
3238 	int cpu;
3239 
3240 	/*
3241 	 * copy the tracer to avoid using a global lock all around.
3242 	 * iter->trace is a copy of current_trace, the pointer to the
3243 	 * name may be used instead of a strcmp(), as iter->trace->name
3244 	 * will point to the same string as current_trace->name.
3245 	 */
3246 	mutex_lock(&trace_types_lock);
3247 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3248 		*iter->trace = *tr->current_trace;
3249 	mutex_unlock(&trace_types_lock);
3250 
3251 #ifdef CONFIG_TRACER_MAX_TRACE
3252 	if (iter->snapshot && iter->trace->use_max_tr)
3253 		return ERR_PTR(-EBUSY);
3254 #endif
3255 
3256 	if (!iter->snapshot)
3257 		atomic_inc(&trace_record_taskinfo_disabled);
3258 
3259 	if (*pos != iter->pos) {
3260 		iter->ent = NULL;
3261 		iter->cpu = 0;
3262 		iter->idx = -1;
3263 
3264 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3265 			for_each_tracing_cpu(cpu)
3266 				tracing_iter_reset(iter, cpu);
3267 		} else
3268 			tracing_iter_reset(iter, cpu_file);
3269 
3270 		iter->leftover = 0;
3271 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3272 			;
3273 
3274 	} else {
3275 		/*
3276 		 * If we overflowed the seq_file before, then we want
3277 		 * to just reuse the trace_seq buffer again.
3278 		 */
3279 		if (iter->leftover)
3280 			p = iter;
3281 		else {
3282 			l = *pos - 1;
3283 			p = s_next(m, p, &l);
3284 		}
3285 	}
3286 
3287 	trace_event_read_lock();
3288 	trace_access_lock(cpu_file);
3289 	return p;
3290 }
3291 
3292 static void s_stop(struct seq_file *m, void *p)
3293 {
3294 	struct trace_iterator *iter = m->private;
3295 
3296 #ifdef CONFIG_TRACER_MAX_TRACE
3297 	if (iter->snapshot && iter->trace->use_max_tr)
3298 		return;
3299 #endif
3300 
3301 	if (!iter->snapshot)
3302 		atomic_dec(&trace_record_taskinfo_disabled);
3303 
3304 	trace_access_unlock(iter->cpu_file);
3305 	trace_event_read_unlock();
3306 }
3307 
3308 static void
3309 get_total_entries(struct trace_buffer *buf,
3310 		  unsigned long *total, unsigned long *entries)
3311 {
3312 	unsigned long count;
3313 	int cpu;
3314 
3315 	*total = 0;
3316 	*entries = 0;
3317 
3318 	for_each_tracing_cpu(cpu) {
3319 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3320 		/*
3321 		 * If this buffer has skipped entries, then we hold all
3322 		 * entries for the trace and we need to ignore the
3323 		 * ones before the time stamp.
3324 		 */
3325 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3326 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3327 			/* total is the same as the entries */
3328 			*total += count;
3329 		} else
3330 			*total += count +
3331 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3332 		*entries += count;
3333 	}
3334 }
3335 
3336 static void print_lat_help_header(struct seq_file *m)
3337 {
3338 	seq_puts(m, "#                  _------=> CPU#            \n"
3339 		    "#                 / _-----=> irqs-off        \n"
3340 		    "#                | / _----=> need-resched    \n"
3341 		    "#                || / _---=> hardirq/softirq \n"
3342 		    "#                ||| / _--=> preempt-depth   \n"
3343 		    "#                |||| /     delay            \n"
3344 		    "#  cmd     pid   ||||| time  |   caller      \n"
3345 		    "#     \\   /      |||||  \\    |   /         \n");
3346 }
3347 
3348 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3349 {
3350 	unsigned long total;
3351 	unsigned long entries;
3352 
3353 	get_total_entries(buf, &total, &entries);
3354 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3355 		   entries, total, num_online_cpus());
3356 	seq_puts(m, "#\n");
3357 }
3358 
3359 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3360 				   unsigned int flags)
3361 {
3362 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3363 
3364 	print_event_info(buf, m);
3365 
3366 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3367 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3368 }
3369 
3370 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3371 				       unsigned int flags)
3372 {
3373 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3374 	const char tgid_space[] = "          ";
3375 	const char space[] = "  ";
3376 
3377 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3378 		   tgid ? tgid_space : space);
3379 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3380 		   tgid ? tgid_space : space);
3381 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3382 		   tgid ? tgid_space : space);
3383 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3384 		   tgid ? tgid_space : space);
3385 	seq_printf(m, "#                          %s||| /     delay\n",
3386 		   tgid ? tgid_space : space);
3387 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3388 		   tgid ? "   TGID   " : space);
3389 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3390 		   tgid ? "     |    " : space);
3391 }
3392 
3393 void
3394 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3395 {
3396 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3397 	struct trace_buffer *buf = iter->trace_buffer;
3398 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3399 	struct tracer *type = iter->trace;
3400 	unsigned long entries;
3401 	unsigned long total;
3402 	const char *name = "preemption";
3403 
3404 	name = type->name;
3405 
3406 	get_total_entries(buf, &total, &entries);
3407 
3408 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3409 		   name, UTS_RELEASE);
3410 	seq_puts(m, "# -----------------------------------"
3411 		 "---------------------------------\n");
3412 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3413 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3414 		   nsecs_to_usecs(data->saved_latency),
3415 		   entries,
3416 		   total,
3417 		   buf->cpu,
3418 #if defined(CONFIG_PREEMPT_NONE)
3419 		   "server",
3420 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3421 		   "desktop",
3422 #elif defined(CONFIG_PREEMPT)
3423 		   "preempt",
3424 #else
3425 		   "unknown",
3426 #endif
3427 		   /* These are reserved for later use */
3428 		   0, 0, 0, 0);
3429 #ifdef CONFIG_SMP
3430 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3431 #else
3432 	seq_puts(m, ")\n");
3433 #endif
3434 	seq_puts(m, "#    -----------------\n");
3435 	seq_printf(m, "#    | task: %.16s-%d "
3436 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3437 		   data->comm, data->pid,
3438 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3439 		   data->policy, data->rt_priority);
3440 	seq_puts(m, "#    -----------------\n");
3441 
3442 	if (data->critical_start) {
3443 		seq_puts(m, "#  => started at: ");
3444 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3445 		trace_print_seq(m, &iter->seq);
3446 		seq_puts(m, "\n#  => ended at:   ");
3447 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3448 		trace_print_seq(m, &iter->seq);
3449 		seq_puts(m, "\n#\n");
3450 	}
3451 
3452 	seq_puts(m, "#\n");
3453 }
3454 
3455 static void test_cpu_buff_start(struct trace_iterator *iter)
3456 {
3457 	struct trace_seq *s = &iter->seq;
3458 	struct trace_array *tr = iter->tr;
3459 
3460 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3461 		return;
3462 
3463 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3464 		return;
3465 
3466 	if (cpumask_available(iter->started) &&
3467 	    cpumask_test_cpu(iter->cpu, iter->started))
3468 		return;
3469 
3470 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3471 		return;
3472 
3473 	if (cpumask_available(iter->started))
3474 		cpumask_set_cpu(iter->cpu, iter->started);
3475 
3476 	/* Don't print started cpu buffer for the first entry of the trace */
3477 	if (iter->idx > 1)
3478 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3479 				iter->cpu);
3480 }
3481 
3482 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3483 {
3484 	struct trace_array *tr = iter->tr;
3485 	struct trace_seq *s = &iter->seq;
3486 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3487 	struct trace_entry *entry;
3488 	struct trace_event *event;
3489 
3490 	entry = iter->ent;
3491 
3492 	test_cpu_buff_start(iter);
3493 
3494 	event = ftrace_find_event(entry->type);
3495 
3496 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3497 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3498 			trace_print_lat_context(iter);
3499 		else
3500 			trace_print_context(iter);
3501 	}
3502 
3503 	if (trace_seq_has_overflowed(s))
3504 		return TRACE_TYPE_PARTIAL_LINE;
3505 
3506 	if (event)
3507 		return event->funcs->trace(iter, sym_flags, event);
3508 
3509 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3510 
3511 	return trace_handle_return(s);
3512 }
3513 
3514 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3515 {
3516 	struct trace_array *tr = iter->tr;
3517 	struct trace_seq *s = &iter->seq;
3518 	struct trace_entry *entry;
3519 	struct trace_event *event;
3520 
3521 	entry = iter->ent;
3522 
3523 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3524 		trace_seq_printf(s, "%d %d %llu ",
3525 				 entry->pid, iter->cpu, iter->ts);
3526 
3527 	if (trace_seq_has_overflowed(s))
3528 		return TRACE_TYPE_PARTIAL_LINE;
3529 
3530 	event = ftrace_find_event(entry->type);
3531 	if (event)
3532 		return event->funcs->raw(iter, 0, event);
3533 
3534 	trace_seq_printf(s, "%d ?\n", entry->type);
3535 
3536 	return trace_handle_return(s);
3537 }
3538 
3539 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3540 {
3541 	struct trace_array *tr = iter->tr;
3542 	struct trace_seq *s = &iter->seq;
3543 	unsigned char newline = '\n';
3544 	struct trace_entry *entry;
3545 	struct trace_event *event;
3546 
3547 	entry = iter->ent;
3548 
3549 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3550 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3551 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3552 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3553 		if (trace_seq_has_overflowed(s))
3554 			return TRACE_TYPE_PARTIAL_LINE;
3555 	}
3556 
3557 	event = ftrace_find_event(entry->type);
3558 	if (event) {
3559 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3560 		if (ret != TRACE_TYPE_HANDLED)
3561 			return ret;
3562 	}
3563 
3564 	SEQ_PUT_FIELD(s, newline);
3565 
3566 	return trace_handle_return(s);
3567 }
3568 
3569 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3570 {
3571 	struct trace_array *tr = iter->tr;
3572 	struct trace_seq *s = &iter->seq;
3573 	struct trace_entry *entry;
3574 	struct trace_event *event;
3575 
3576 	entry = iter->ent;
3577 
3578 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579 		SEQ_PUT_FIELD(s, entry->pid);
3580 		SEQ_PUT_FIELD(s, iter->cpu);
3581 		SEQ_PUT_FIELD(s, iter->ts);
3582 		if (trace_seq_has_overflowed(s))
3583 			return TRACE_TYPE_PARTIAL_LINE;
3584 	}
3585 
3586 	event = ftrace_find_event(entry->type);
3587 	return event ? event->funcs->binary(iter, 0, event) :
3588 		TRACE_TYPE_HANDLED;
3589 }
3590 
3591 int trace_empty(struct trace_iterator *iter)
3592 {
3593 	struct ring_buffer_iter *buf_iter;
3594 	int cpu;
3595 
3596 	/* If we are looking at one CPU buffer, only check that one */
3597 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3598 		cpu = iter->cpu_file;
3599 		buf_iter = trace_buffer_iter(iter, cpu);
3600 		if (buf_iter) {
3601 			if (!ring_buffer_iter_empty(buf_iter))
3602 				return 0;
3603 		} else {
3604 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3605 				return 0;
3606 		}
3607 		return 1;
3608 	}
3609 
3610 	for_each_tracing_cpu(cpu) {
3611 		buf_iter = trace_buffer_iter(iter, cpu);
3612 		if (buf_iter) {
3613 			if (!ring_buffer_iter_empty(buf_iter))
3614 				return 0;
3615 		} else {
3616 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617 				return 0;
3618 		}
3619 	}
3620 
3621 	return 1;
3622 }
3623 
3624 /*  Called with trace_event_read_lock() held. */
3625 enum print_line_t print_trace_line(struct trace_iterator *iter)
3626 {
3627 	struct trace_array *tr = iter->tr;
3628 	unsigned long trace_flags = tr->trace_flags;
3629 	enum print_line_t ret;
3630 
3631 	if (iter->lost_events) {
3632 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3633 				 iter->cpu, iter->lost_events);
3634 		if (trace_seq_has_overflowed(&iter->seq))
3635 			return TRACE_TYPE_PARTIAL_LINE;
3636 	}
3637 
3638 	if (iter->trace && iter->trace->print_line) {
3639 		ret = iter->trace->print_line(iter);
3640 		if (ret != TRACE_TYPE_UNHANDLED)
3641 			return ret;
3642 	}
3643 
3644 	if (iter->ent->type == TRACE_BPUTS &&
3645 			trace_flags & TRACE_ITER_PRINTK &&
3646 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3647 		return trace_print_bputs_msg_only(iter);
3648 
3649 	if (iter->ent->type == TRACE_BPRINT &&
3650 			trace_flags & TRACE_ITER_PRINTK &&
3651 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3652 		return trace_print_bprintk_msg_only(iter);
3653 
3654 	if (iter->ent->type == TRACE_PRINT &&
3655 			trace_flags & TRACE_ITER_PRINTK &&
3656 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3657 		return trace_print_printk_msg_only(iter);
3658 
3659 	if (trace_flags & TRACE_ITER_BIN)
3660 		return print_bin_fmt(iter);
3661 
3662 	if (trace_flags & TRACE_ITER_HEX)
3663 		return print_hex_fmt(iter);
3664 
3665 	if (trace_flags & TRACE_ITER_RAW)
3666 		return print_raw_fmt(iter);
3667 
3668 	return print_trace_fmt(iter);
3669 }
3670 
3671 void trace_latency_header(struct seq_file *m)
3672 {
3673 	struct trace_iterator *iter = m->private;
3674 	struct trace_array *tr = iter->tr;
3675 
3676 	/* print nothing if the buffers are empty */
3677 	if (trace_empty(iter))
3678 		return;
3679 
3680 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3681 		print_trace_header(m, iter);
3682 
3683 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3684 		print_lat_help_header(m);
3685 }
3686 
3687 void trace_default_header(struct seq_file *m)
3688 {
3689 	struct trace_iterator *iter = m->private;
3690 	struct trace_array *tr = iter->tr;
3691 	unsigned long trace_flags = tr->trace_flags;
3692 
3693 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3694 		return;
3695 
3696 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3697 		/* print nothing if the buffers are empty */
3698 		if (trace_empty(iter))
3699 			return;
3700 		print_trace_header(m, iter);
3701 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3702 			print_lat_help_header(m);
3703 	} else {
3704 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3705 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3706 				print_func_help_header_irq(iter->trace_buffer,
3707 							   m, trace_flags);
3708 			else
3709 				print_func_help_header(iter->trace_buffer, m,
3710 						       trace_flags);
3711 		}
3712 	}
3713 }
3714 
3715 static void test_ftrace_alive(struct seq_file *m)
3716 {
3717 	if (!ftrace_is_dead())
3718 		return;
3719 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3720 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3721 }
3722 
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 static void show_snapshot_main_help(struct seq_file *m)
3725 {
3726 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3727 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3728 		    "#                      Takes a snapshot of the main buffer.\n"
3729 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3730 		    "#                      (Doesn't have to be '2' works with any number that\n"
3731 		    "#                       is not a '0' or '1')\n");
3732 }
3733 
3734 static void show_snapshot_percpu_help(struct seq_file *m)
3735 {
3736 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3737 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3738 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3739 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3740 #else
3741 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3742 		    "#                     Must use main snapshot file to allocate.\n");
3743 #endif
3744 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3745 		    "#                      (Doesn't have to be '2' works with any number that\n"
3746 		    "#                       is not a '0' or '1')\n");
3747 }
3748 
3749 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3750 {
3751 	if (iter->tr->allocated_snapshot)
3752 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3753 	else
3754 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3755 
3756 	seq_puts(m, "# Snapshot commands:\n");
3757 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3758 		show_snapshot_main_help(m);
3759 	else
3760 		show_snapshot_percpu_help(m);
3761 }
3762 #else
3763 /* Should never be called */
3764 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3765 #endif
3766 
3767 static int s_show(struct seq_file *m, void *v)
3768 {
3769 	struct trace_iterator *iter = v;
3770 	int ret;
3771 
3772 	if (iter->ent == NULL) {
3773 		if (iter->tr) {
3774 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3775 			seq_puts(m, "#\n");
3776 			test_ftrace_alive(m);
3777 		}
3778 		if (iter->snapshot && trace_empty(iter))
3779 			print_snapshot_help(m, iter);
3780 		else if (iter->trace && iter->trace->print_header)
3781 			iter->trace->print_header(m);
3782 		else
3783 			trace_default_header(m);
3784 
3785 	} else if (iter->leftover) {
3786 		/*
3787 		 * If we filled the seq_file buffer earlier, we
3788 		 * want to just show it now.
3789 		 */
3790 		ret = trace_print_seq(m, &iter->seq);
3791 
3792 		/* ret should this time be zero, but you never know */
3793 		iter->leftover = ret;
3794 
3795 	} else {
3796 		print_trace_line(iter);
3797 		ret = trace_print_seq(m, &iter->seq);
3798 		/*
3799 		 * If we overflow the seq_file buffer, then it will
3800 		 * ask us for this data again at start up.
3801 		 * Use that instead.
3802 		 *  ret is 0 if seq_file write succeeded.
3803 		 *        -1 otherwise.
3804 		 */
3805 		iter->leftover = ret;
3806 	}
3807 
3808 	return 0;
3809 }
3810 
3811 /*
3812  * Should be used after trace_array_get(), trace_types_lock
3813  * ensures that i_cdev was already initialized.
3814  */
3815 static inline int tracing_get_cpu(struct inode *inode)
3816 {
3817 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3818 		return (long)inode->i_cdev - 1;
3819 	return RING_BUFFER_ALL_CPUS;
3820 }
3821 
3822 static const struct seq_operations tracer_seq_ops = {
3823 	.start		= s_start,
3824 	.next		= s_next,
3825 	.stop		= s_stop,
3826 	.show		= s_show,
3827 };
3828 
3829 static struct trace_iterator *
3830 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3831 {
3832 	struct trace_array *tr = inode->i_private;
3833 	struct trace_iterator *iter;
3834 	int cpu;
3835 
3836 	if (tracing_disabled)
3837 		return ERR_PTR(-ENODEV);
3838 
3839 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3840 	if (!iter)
3841 		return ERR_PTR(-ENOMEM);
3842 
3843 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3844 				    GFP_KERNEL);
3845 	if (!iter->buffer_iter)
3846 		goto release;
3847 
3848 	/*
3849 	 * We make a copy of the current tracer to avoid concurrent
3850 	 * changes on it while we are reading.
3851 	 */
3852 	mutex_lock(&trace_types_lock);
3853 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3854 	if (!iter->trace)
3855 		goto fail;
3856 
3857 	*iter->trace = *tr->current_trace;
3858 
3859 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3860 		goto fail;
3861 
3862 	iter->tr = tr;
3863 
3864 #ifdef CONFIG_TRACER_MAX_TRACE
3865 	/* Currently only the top directory has a snapshot */
3866 	if (tr->current_trace->print_max || snapshot)
3867 		iter->trace_buffer = &tr->max_buffer;
3868 	else
3869 #endif
3870 		iter->trace_buffer = &tr->trace_buffer;
3871 	iter->snapshot = snapshot;
3872 	iter->pos = -1;
3873 	iter->cpu_file = tracing_get_cpu(inode);
3874 	mutex_init(&iter->mutex);
3875 
3876 	/* Notify the tracer early; before we stop tracing. */
3877 	if (iter->trace && iter->trace->open)
3878 		iter->trace->open(iter);
3879 
3880 	/* Annotate start of buffers if we had overruns */
3881 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3882 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3883 
3884 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3885 	if (trace_clocks[tr->clock_id].in_ns)
3886 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3887 
3888 	/* stop the trace while dumping if we are not opening "snapshot" */
3889 	if (!iter->snapshot)
3890 		tracing_stop_tr(tr);
3891 
3892 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3893 		for_each_tracing_cpu(cpu) {
3894 			iter->buffer_iter[cpu] =
3895 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3896 		}
3897 		ring_buffer_read_prepare_sync();
3898 		for_each_tracing_cpu(cpu) {
3899 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3900 			tracing_iter_reset(iter, cpu);
3901 		}
3902 	} else {
3903 		cpu = iter->cpu_file;
3904 		iter->buffer_iter[cpu] =
3905 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3906 		ring_buffer_read_prepare_sync();
3907 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3908 		tracing_iter_reset(iter, cpu);
3909 	}
3910 
3911 	mutex_unlock(&trace_types_lock);
3912 
3913 	return iter;
3914 
3915  fail:
3916 	mutex_unlock(&trace_types_lock);
3917 	kfree(iter->trace);
3918 	kfree(iter->buffer_iter);
3919 release:
3920 	seq_release_private(inode, file);
3921 	return ERR_PTR(-ENOMEM);
3922 }
3923 
3924 int tracing_open_generic(struct inode *inode, struct file *filp)
3925 {
3926 	if (tracing_disabled)
3927 		return -ENODEV;
3928 
3929 	filp->private_data = inode->i_private;
3930 	return 0;
3931 }
3932 
3933 bool tracing_is_disabled(void)
3934 {
3935 	return (tracing_disabled) ? true: false;
3936 }
3937 
3938 /*
3939  * Open and update trace_array ref count.
3940  * Must have the current trace_array passed to it.
3941  */
3942 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3943 {
3944 	struct trace_array *tr = inode->i_private;
3945 
3946 	if (tracing_disabled)
3947 		return -ENODEV;
3948 
3949 	if (trace_array_get(tr) < 0)
3950 		return -ENODEV;
3951 
3952 	filp->private_data = inode->i_private;
3953 
3954 	return 0;
3955 }
3956 
3957 static int tracing_release(struct inode *inode, struct file *file)
3958 {
3959 	struct trace_array *tr = inode->i_private;
3960 	struct seq_file *m = file->private_data;
3961 	struct trace_iterator *iter;
3962 	int cpu;
3963 
3964 	if (!(file->f_mode & FMODE_READ)) {
3965 		trace_array_put(tr);
3966 		return 0;
3967 	}
3968 
3969 	/* Writes do not use seq_file */
3970 	iter = m->private;
3971 	mutex_lock(&trace_types_lock);
3972 
3973 	for_each_tracing_cpu(cpu) {
3974 		if (iter->buffer_iter[cpu])
3975 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3976 	}
3977 
3978 	if (iter->trace && iter->trace->close)
3979 		iter->trace->close(iter);
3980 
3981 	if (!iter->snapshot)
3982 		/* reenable tracing if it was previously enabled */
3983 		tracing_start_tr(tr);
3984 
3985 	__trace_array_put(tr);
3986 
3987 	mutex_unlock(&trace_types_lock);
3988 
3989 	mutex_destroy(&iter->mutex);
3990 	free_cpumask_var(iter->started);
3991 	kfree(iter->trace);
3992 	kfree(iter->buffer_iter);
3993 	seq_release_private(inode, file);
3994 
3995 	return 0;
3996 }
3997 
3998 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3999 {
4000 	struct trace_array *tr = inode->i_private;
4001 
4002 	trace_array_put(tr);
4003 	return 0;
4004 }
4005 
4006 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4007 {
4008 	struct trace_array *tr = inode->i_private;
4009 
4010 	trace_array_put(tr);
4011 
4012 	return single_release(inode, file);
4013 }
4014 
4015 static int tracing_open(struct inode *inode, struct file *file)
4016 {
4017 	struct trace_array *tr = inode->i_private;
4018 	struct trace_iterator *iter;
4019 	int ret = 0;
4020 
4021 	if (trace_array_get(tr) < 0)
4022 		return -ENODEV;
4023 
4024 	/* If this file was open for write, then erase contents */
4025 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4026 		int cpu = tracing_get_cpu(inode);
4027 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4028 
4029 #ifdef CONFIG_TRACER_MAX_TRACE
4030 		if (tr->current_trace->print_max)
4031 			trace_buf = &tr->max_buffer;
4032 #endif
4033 
4034 		if (cpu == RING_BUFFER_ALL_CPUS)
4035 			tracing_reset_online_cpus(trace_buf);
4036 		else
4037 			tracing_reset(trace_buf, cpu);
4038 	}
4039 
4040 	if (file->f_mode & FMODE_READ) {
4041 		iter = __tracing_open(inode, file, false);
4042 		if (IS_ERR(iter))
4043 			ret = PTR_ERR(iter);
4044 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4045 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4046 	}
4047 
4048 	if (ret < 0)
4049 		trace_array_put(tr);
4050 
4051 	return ret;
4052 }
4053 
4054 /*
4055  * Some tracers are not suitable for instance buffers.
4056  * A tracer is always available for the global array (toplevel)
4057  * or if it explicitly states that it is.
4058  */
4059 static bool
4060 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4061 {
4062 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4063 }
4064 
4065 /* Find the next tracer that this trace array may use */
4066 static struct tracer *
4067 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4068 {
4069 	while (t && !trace_ok_for_array(t, tr))
4070 		t = t->next;
4071 
4072 	return t;
4073 }
4074 
4075 static void *
4076 t_next(struct seq_file *m, void *v, loff_t *pos)
4077 {
4078 	struct trace_array *tr = m->private;
4079 	struct tracer *t = v;
4080 
4081 	(*pos)++;
4082 
4083 	if (t)
4084 		t = get_tracer_for_array(tr, t->next);
4085 
4086 	return t;
4087 }
4088 
4089 static void *t_start(struct seq_file *m, loff_t *pos)
4090 {
4091 	struct trace_array *tr = m->private;
4092 	struct tracer *t;
4093 	loff_t l = 0;
4094 
4095 	mutex_lock(&trace_types_lock);
4096 
4097 	t = get_tracer_for_array(tr, trace_types);
4098 	for (; t && l < *pos; t = t_next(m, t, &l))
4099 			;
4100 
4101 	return t;
4102 }
4103 
4104 static void t_stop(struct seq_file *m, void *p)
4105 {
4106 	mutex_unlock(&trace_types_lock);
4107 }
4108 
4109 static int t_show(struct seq_file *m, void *v)
4110 {
4111 	struct tracer *t = v;
4112 
4113 	if (!t)
4114 		return 0;
4115 
4116 	seq_puts(m, t->name);
4117 	if (t->next)
4118 		seq_putc(m, ' ');
4119 	else
4120 		seq_putc(m, '\n');
4121 
4122 	return 0;
4123 }
4124 
4125 static const struct seq_operations show_traces_seq_ops = {
4126 	.start		= t_start,
4127 	.next		= t_next,
4128 	.stop		= t_stop,
4129 	.show		= t_show,
4130 };
4131 
4132 static int show_traces_open(struct inode *inode, struct file *file)
4133 {
4134 	struct trace_array *tr = inode->i_private;
4135 	struct seq_file *m;
4136 	int ret;
4137 
4138 	if (tracing_disabled)
4139 		return -ENODEV;
4140 
4141 	ret = seq_open(file, &show_traces_seq_ops);
4142 	if (ret)
4143 		return ret;
4144 
4145 	m = file->private_data;
4146 	m->private = tr;
4147 
4148 	return 0;
4149 }
4150 
4151 static ssize_t
4152 tracing_write_stub(struct file *filp, const char __user *ubuf,
4153 		   size_t count, loff_t *ppos)
4154 {
4155 	return count;
4156 }
4157 
4158 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4159 {
4160 	int ret;
4161 
4162 	if (file->f_mode & FMODE_READ)
4163 		ret = seq_lseek(file, offset, whence);
4164 	else
4165 		file->f_pos = ret = 0;
4166 
4167 	return ret;
4168 }
4169 
4170 static const struct file_operations tracing_fops = {
4171 	.open		= tracing_open,
4172 	.read		= seq_read,
4173 	.write		= tracing_write_stub,
4174 	.llseek		= tracing_lseek,
4175 	.release	= tracing_release,
4176 };
4177 
4178 static const struct file_operations show_traces_fops = {
4179 	.open		= show_traces_open,
4180 	.read		= seq_read,
4181 	.release	= seq_release,
4182 	.llseek		= seq_lseek,
4183 };
4184 
4185 static ssize_t
4186 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4187 		     size_t count, loff_t *ppos)
4188 {
4189 	struct trace_array *tr = file_inode(filp)->i_private;
4190 	char *mask_str;
4191 	int len;
4192 
4193 	len = snprintf(NULL, 0, "%*pb\n",
4194 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4195 	mask_str = kmalloc(len, GFP_KERNEL);
4196 	if (!mask_str)
4197 		return -ENOMEM;
4198 
4199 	len = snprintf(mask_str, len, "%*pb\n",
4200 		       cpumask_pr_args(tr->tracing_cpumask));
4201 	if (len >= count) {
4202 		count = -EINVAL;
4203 		goto out_err;
4204 	}
4205 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4206 
4207 out_err:
4208 	kfree(mask_str);
4209 
4210 	return count;
4211 }
4212 
4213 static ssize_t
4214 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4215 		      size_t count, loff_t *ppos)
4216 {
4217 	struct trace_array *tr = file_inode(filp)->i_private;
4218 	cpumask_var_t tracing_cpumask_new;
4219 	int err, cpu;
4220 
4221 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4222 		return -ENOMEM;
4223 
4224 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4225 	if (err)
4226 		goto err_unlock;
4227 
4228 	local_irq_disable();
4229 	arch_spin_lock(&tr->max_lock);
4230 	for_each_tracing_cpu(cpu) {
4231 		/*
4232 		 * Increase/decrease the disabled counter if we are
4233 		 * about to flip a bit in the cpumask:
4234 		 */
4235 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4236 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4237 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4238 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4239 		}
4240 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4241 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4242 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4243 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4244 		}
4245 	}
4246 	arch_spin_unlock(&tr->max_lock);
4247 	local_irq_enable();
4248 
4249 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4250 	free_cpumask_var(tracing_cpumask_new);
4251 
4252 	return count;
4253 
4254 err_unlock:
4255 	free_cpumask_var(tracing_cpumask_new);
4256 
4257 	return err;
4258 }
4259 
4260 static const struct file_operations tracing_cpumask_fops = {
4261 	.open		= tracing_open_generic_tr,
4262 	.read		= tracing_cpumask_read,
4263 	.write		= tracing_cpumask_write,
4264 	.release	= tracing_release_generic_tr,
4265 	.llseek		= generic_file_llseek,
4266 };
4267 
4268 static int tracing_trace_options_show(struct seq_file *m, void *v)
4269 {
4270 	struct tracer_opt *trace_opts;
4271 	struct trace_array *tr = m->private;
4272 	u32 tracer_flags;
4273 	int i;
4274 
4275 	mutex_lock(&trace_types_lock);
4276 	tracer_flags = tr->current_trace->flags->val;
4277 	trace_opts = tr->current_trace->flags->opts;
4278 
4279 	for (i = 0; trace_options[i]; i++) {
4280 		if (tr->trace_flags & (1 << i))
4281 			seq_printf(m, "%s\n", trace_options[i]);
4282 		else
4283 			seq_printf(m, "no%s\n", trace_options[i]);
4284 	}
4285 
4286 	for (i = 0; trace_opts[i].name; i++) {
4287 		if (tracer_flags & trace_opts[i].bit)
4288 			seq_printf(m, "%s\n", trace_opts[i].name);
4289 		else
4290 			seq_printf(m, "no%s\n", trace_opts[i].name);
4291 	}
4292 	mutex_unlock(&trace_types_lock);
4293 
4294 	return 0;
4295 }
4296 
4297 static int __set_tracer_option(struct trace_array *tr,
4298 			       struct tracer_flags *tracer_flags,
4299 			       struct tracer_opt *opts, int neg)
4300 {
4301 	struct tracer *trace = tracer_flags->trace;
4302 	int ret;
4303 
4304 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4305 	if (ret)
4306 		return ret;
4307 
4308 	if (neg)
4309 		tracer_flags->val &= ~opts->bit;
4310 	else
4311 		tracer_flags->val |= opts->bit;
4312 	return 0;
4313 }
4314 
4315 /* Try to assign a tracer specific option */
4316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4317 {
4318 	struct tracer *trace = tr->current_trace;
4319 	struct tracer_flags *tracer_flags = trace->flags;
4320 	struct tracer_opt *opts = NULL;
4321 	int i;
4322 
4323 	for (i = 0; tracer_flags->opts[i].name; i++) {
4324 		opts = &tracer_flags->opts[i];
4325 
4326 		if (strcmp(cmp, opts->name) == 0)
4327 			return __set_tracer_option(tr, trace->flags, opts, neg);
4328 	}
4329 
4330 	return -EINVAL;
4331 }
4332 
4333 /* Some tracers require overwrite to stay enabled */
4334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4335 {
4336 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4337 		return -1;
4338 
4339 	return 0;
4340 }
4341 
4342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4343 {
4344 	/* do nothing if flag is already set */
4345 	if (!!(tr->trace_flags & mask) == !!enabled)
4346 		return 0;
4347 
4348 	/* Give the tracer a chance to approve the change */
4349 	if (tr->current_trace->flag_changed)
4350 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4351 			return -EINVAL;
4352 
4353 	if (enabled)
4354 		tr->trace_flags |= mask;
4355 	else
4356 		tr->trace_flags &= ~mask;
4357 
4358 	if (mask == TRACE_ITER_RECORD_CMD)
4359 		trace_event_enable_cmd_record(enabled);
4360 
4361 	if (mask == TRACE_ITER_RECORD_TGID) {
4362 		if (!tgid_map)
4363 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4364 					   GFP_KERNEL);
4365 		if (!tgid_map) {
4366 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4367 			return -ENOMEM;
4368 		}
4369 
4370 		trace_event_enable_tgid_record(enabled);
4371 	}
4372 
4373 	if (mask == TRACE_ITER_EVENT_FORK)
4374 		trace_event_follow_fork(tr, enabled);
4375 
4376 	if (mask == TRACE_ITER_FUNC_FORK)
4377 		ftrace_pid_follow_fork(tr, enabled);
4378 
4379 	if (mask == TRACE_ITER_OVERWRITE) {
4380 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4381 #ifdef CONFIG_TRACER_MAX_TRACE
4382 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4383 #endif
4384 	}
4385 
4386 	if (mask == TRACE_ITER_PRINTK) {
4387 		trace_printk_start_stop_comm(enabled);
4388 		trace_printk_control(enabled);
4389 	}
4390 
4391 	return 0;
4392 }
4393 
4394 static int trace_set_options(struct trace_array *tr, char *option)
4395 {
4396 	char *cmp;
4397 	int neg = 0;
4398 	int ret;
4399 	size_t orig_len = strlen(option);
4400 
4401 	cmp = strstrip(option);
4402 
4403 	if (strncmp(cmp, "no", 2) == 0) {
4404 		neg = 1;
4405 		cmp += 2;
4406 	}
4407 
4408 	mutex_lock(&trace_types_lock);
4409 
4410 	ret = match_string(trace_options, -1, cmp);
4411 	/* If no option could be set, test the specific tracer options */
4412 	if (ret < 0)
4413 		ret = set_tracer_option(tr, cmp, neg);
4414 	else
4415 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4416 
4417 	mutex_unlock(&trace_types_lock);
4418 
4419 	/*
4420 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4421 	 * turn it back into a space.
4422 	 */
4423 	if (orig_len > strlen(option))
4424 		option[strlen(option)] = ' ';
4425 
4426 	return ret;
4427 }
4428 
4429 static void __init apply_trace_boot_options(void)
4430 {
4431 	char *buf = trace_boot_options_buf;
4432 	char *option;
4433 
4434 	while (true) {
4435 		option = strsep(&buf, ",");
4436 
4437 		if (!option)
4438 			break;
4439 
4440 		if (*option)
4441 			trace_set_options(&global_trace, option);
4442 
4443 		/* Put back the comma to allow this to be called again */
4444 		if (buf)
4445 			*(buf - 1) = ',';
4446 	}
4447 }
4448 
4449 static ssize_t
4450 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4451 			size_t cnt, loff_t *ppos)
4452 {
4453 	struct seq_file *m = filp->private_data;
4454 	struct trace_array *tr = m->private;
4455 	char buf[64];
4456 	int ret;
4457 
4458 	if (cnt >= sizeof(buf))
4459 		return -EINVAL;
4460 
4461 	if (copy_from_user(buf, ubuf, cnt))
4462 		return -EFAULT;
4463 
4464 	buf[cnt] = 0;
4465 
4466 	ret = trace_set_options(tr, buf);
4467 	if (ret < 0)
4468 		return ret;
4469 
4470 	*ppos += cnt;
4471 
4472 	return cnt;
4473 }
4474 
4475 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4476 {
4477 	struct trace_array *tr = inode->i_private;
4478 	int ret;
4479 
4480 	if (tracing_disabled)
4481 		return -ENODEV;
4482 
4483 	if (trace_array_get(tr) < 0)
4484 		return -ENODEV;
4485 
4486 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4487 	if (ret < 0)
4488 		trace_array_put(tr);
4489 
4490 	return ret;
4491 }
4492 
4493 static const struct file_operations tracing_iter_fops = {
4494 	.open		= tracing_trace_options_open,
4495 	.read		= seq_read,
4496 	.llseek		= seq_lseek,
4497 	.release	= tracing_single_release_tr,
4498 	.write		= tracing_trace_options_write,
4499 };
4500 
4501 static const char readme_msg[] =
4502 	"tracing mini-HOWTO:\n\n"
4503 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4504 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4505 	" Important files:\n"
4506 	"  trace\t\t\t- The static contents of the buffer\n"
4507 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4508 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4509 	"  current_tracer\t- function and latency tracers\n"
4510 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4511 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4512 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4513 	"  trace_clock\t\t-change the clock used to order events\n"
4514 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4515 	"      global:   Synced across CPUs but slows tracing down.\n"
4516 	"     counter:   Not a clock, but just an increment\n"
4517 	"      uptime:   Jiffy counter from time of boot\n"
4518 	"        perf:   Same clock that perf events use\n"
4519 #ifdef CONFIG_X86_64
4520 	"     x86-tsc:   TSC cycle counter\n"
4521 #endif
4522 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4523 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4524 	"    absolute:   Absolute (standalone) timestamp\n"
4525 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4526 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4527 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4528 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4529 	"\t\t\t  Remove sub-buffer with rmdir\n"
4530 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4531 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4532 	"\t\t\t  option name\n"
4533 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4534 #ifdef CONFIG_DYNAMIC_FTRACE
4535 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4536 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4537 	"\t\t\t  functions\n"
4538 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4539 	"\t     modules: Can select a group via module\n"
4540 	"\t      Format: :mod:<module-name>\n"
4541 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4542 	"\t    triggers: a command to perform when function is hit\n"
4543 	"\t      Format: <function>:<trigger>[:count]\n"
4544 	"\t     trigger: traceon, traceoff\n"
4545 	"\t\t      enable_event:<system>:<event>\n"
4546 	"\t\t      disable_event:<system>:<event>\n"
4547 #ifdef CONFIG_STACKTRACE
4548 	"\t\t      stacktrace\n"
4549 #endif
4550 #ifdef CONFIG_TRACER_SNAPSHOT
4551 	"\t\t      snapshot\n"
4552 #endif
4553 	"\t\t      dump\n"
4554 	"\t\t      cpudump\n"
4555 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4556 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4557 	"\t     The first one will disable tracing every time do_fault is hit\n"
4558 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4559 	"\t       The first time do trap is hit and it disables tracing, the\n"
4560 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4561 	"\t       the counter will not decrement. It only decrements when the\n"
4562 	"\t       trigger did work\n"
4563 	"\t     To remove trigger without count:\n"
4564 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4565 	"\t     To remove trigger with a count:\n"
4566 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4567 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4568 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4569 	"\t    modules: Can select a group via module command :mod:\n"
4570 	"\t    Does not accept triggers\n"
4571 #endif /* CONFIG_DYNAMIC_FTRACE */
4572 #ifdef CONFIG_FUNCTION_TRACER
4573 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4574 	"\t\t    (function)\n"
4575 #endif
4576 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4577 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4578 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4579 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4580 #endif
4581 #ifdef CONFIG_TRACER_SNAPSHOT
4582 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4583 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4584 	"\t\t\t  information\n"
4585 #endif
4586 #ifdef CONFIG_STACK_TRACER
4587 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4588 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4589 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4590 	"\t\t\t  new trace)\n"
4591 #ifdef CONFIG_DYNAMIC_FTRACE
4592 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4593 	"\t\t\t  traces\n"
4594 #endif
4595 #endif /* CONFIG_STACK_TRACER */
4596 #ifdef CONFIG_KPROBE_EVENTS
4597 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4598 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4599 #endif
4600 #ifdef CONFIG_UPROBE_EVENTS
4601 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4602 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4603 #endif
4604 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4605 	"\t  accepts: event-definitions (one definition per line)\n"
4606 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4607 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4608 	"\t           -:[<group>/]<event>\n"
4609 #ifdef CONFIG_KPROBE_EVENTS
4610 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4611   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4612 #endif
4613 #ifdef CONFIG_UPROBE_EVENTS
4614 	"\t    place: <path>:<offset>\n"
4615 #endif
4616 	"\t     args: <name>=fetcharg[:type]\n"
4617 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4618 	"\t           $stack<index>, $stack, $retval, $comm\n"
4619 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4620 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4621 #endif
4622 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4623 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4624 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4625 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4626 	"\t\t\t  events\n"
4627 	"      filter\t\t- If set, only events passing filter are traced\n"
4628 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4629 	"\t\t\t  <event>:\n"
4630 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4631 	"      filter\t\t- If set, only events passing filter are traced\n"
4632 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4633 	"\t    Format: <trigger>[:count][if <filter>]\n"
4634 	"\t   trigger: traceon, traceoff\n"
4635 	"\t            enable_event:<system>:<event>\n"
4636 	"\t            disable_event:<system>:<event>\n"
4637 #ifdef CONFIG_HIST_TRIGGERS
4638 	"\t            enable_hist:<system>:<event>\n"
4639 	"\t            disable_hist:<system>:<event>\n"
4640 #endif
4641 #ifdef CONFIG_STACKTRACE
4642 	"\t\t    stacktrace\n"
4643 #endif
4644 #ifdef CONFIG_TRACER_SNAPSHOT
4645 	"\t\t    snapshot\n"
4646 #endif
4647 #ifdef CONFIG_HIST_TRIGGERS
4648 	"\t\t    hist (see below)\n"
4649 #endif
4650 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4651 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4652 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4653 	"\t                  events/block/block_unplug/trigger\n"
4654 	"\t   The first disables tracing every time block_unplug is hit.\n"
4655 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4656 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4657 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4658 	"\t   Like function triggers, the counter is only decremented if it\n"
4659 	"\t    enabled or disabled tracing.\n"
4660 	"\t   To remove a trigger without a count:\n"
4661 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4662 	"\t   To remove a trigger with a count:\n"
4663 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4664 	"\t   Filters can be ignored when removing a trigger.\n"
4665 #ifdef CONFIG_HIST_TRIGGERS
4666 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4667 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4668 	"\t            [:values=<field1[,field2,...]>]\n"
4669 	"\t            [:sort=<field1[,field2,...]>]\n"
4670 	"\t            [:size=#entries]\n"
4671 	"\t            [:pause][:continue][:clear]\n"
4672 	"\t            [:name=histname1]\n"
4673 	"\t            [if <filter>]\n\n"
4674 	"\t    When a matching event is hit, an entry is added to a hash\n"
4675 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4676 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4677 	"\t    correspond to fields in the event's format description.  Keys\n"
4678 	"\t    can be any field, or the special string 'stacktrace'.\n"
4679 	"\t    Compound keys consisting of up to two fields can be specified\n"
4680 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4681 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4682 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4683 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4684 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4685 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4686 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4687 	"\t    its histogram data will be shared with other triggers of the\n"
4688 	"\t    same name, and trigger hits will update this common data.\n\n"
4689 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4690 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4691 	"\t    triggers attached to an event, there will be a table for each\n"
4692 	"\t    trigger in the output.  The table displayed for a named\n"
4693 	"\t    trigger will be the same as any other instance having the\n"
4694 	"\t    same name.  The default format used to display a given field\n"
4695 	"\t    can be modified by appending any of the following modifiers\n"
4696 	"\t    to the field name, as applicable:\n\n"
4697 	"\t            .hex        display a number as a hex value\n"
4698 	"\t            .sym        display an address as a symbol\n"
4699 	"\t            .sym-offset display an address as a symbol and offset\n"
4700 	"\t            .execname   display a common_pid as a program name\n"
4701 	"\t            .syscall    display a syscall id as a syscall name\n"
4702 	"\t            .log2       display log2 value rather than raw number\n"
4703 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4704 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4705 	"\t    trigger or to start a hist trigger but not log any events\n"
4706 	"\t    until told to do so.  'continue' can be used to start or\n"
4707 	"\t    restart a paused hist trigger.\n\n"
4708 	"\t    The 'clear' parameter will clear the contents of a running\n"
4709 	"\t    hist trigger and leave its current paused/active state\n"
4710 	"\t    unchanged.\n\n"
4711 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4712 	"\t    have one event conditionally start and stop another event's\n"
4713 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4714 	"\t    the enable_event and disable_event triggers.\n"
4715 #endif
4716 ;
4717 
4718 static ssize_t
4719 tracing_readme_read(struct file *filp, char __user *ubuf,
4720 		       size_t cnt, loff_t *ppos)
4721 {
4722 	return simple_read_from_buffer(ubuf, cnt, ppos,
4723 					readme_msg, strlen(readme_msg));
4724 }
4725 
4726 static const struct file_operations tracing_readme_fops = {
4727 	.open		= tracing_open_generic,
4728 	.read		= tracing_readme_read,
4729 	.llseek		= generic_file_llseek,
4730 };
4731 
4732 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4733 {
4734 	int *ptr = v;
4735 
4736 	if (*pos || m->count)
4737 		ptr++;
4738 
4739 	(*pos)++;
4740 
4741 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4742 		if (trace_find_tgid(*ptr))
4743 			return ptr;
4744 	}
4745 
4746 	return NULL;
4747 }
4748 
4749 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4750 {
4751 	void *v;
4752 	loff_t l = 0;
4753 
4754 	if (!tgid_map)
4755 		return NULL;
4756 
4757 	v = &tgid_map[0];
4758 	while (l <= *pos) {
4759 		v = saved_tgids_next(m, v, &l);
4760 		if (!v)
4761 			return NULL;
4762 	}
4763 
4764 	return v;
4765 }
4766 
4767 static void saved_tgids_stop(struct seq_file *m, void *v)
4768 {
4769 }
4770 
4771 static int saved_tgids_show(struct seq_file *m, void *v)
4772 {
4773 	int pid = (int *)v - tgid_map;
4774 
4775 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4776 	return 0;
4777 }
4778 
4779 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4780 	.start		= saved_tgids_start,
4781 	.stop		= saved_tgids_stop,
4782 	.next		= saved_tgids_next,
4783 	.show		= saved_tgids_show,
4784 };
4785 
4786 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4787 {
4788 	if (tracing_disabled)
4789 		return -ENODEV;
4790 
4791 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4792 }
4793 
4794 
4795 static const struct file_operations tracing_saved_tgids_fops = {
4796 	.open		= tracing_saved_tgids_open,
4797 	.read		= seq_read,
4798 	.llseek		= seq_lseek,
4799 	.release	= seq_release,
4800 };
4801 
4802 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4803 {
4804 	unsigned int *ptr = v;
4805 
4806 	if (*pos || m->count)
4807 		ptr++;
4808 
4809 	(*pos)++;
4810 
4811 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4812 	     ptr++) {
4813 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4814 			continue;
4815 
4816 		return ptr;
4817 	}
4818 
4819 	return NULL;
4820 }
4821 
4822 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4823 {
4824 	void *v;
4825 	loff_t l = 0;
4826 
4827 	preempt_disable();
4828 	arch_spin_lock(&trace_cmdline_lock);
4829 
4830 	v = &savedcmd->map_cmdline_to_pid[0];
4831 	while (l <= *pos) {
4832 		v = saved_cmdlines_next(m, v, &l);
4833 		if (!v)
4834 			return NULL;
4835 	}
4836 
4837 	return v;
4838 }
4839 
4840 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4841 {
4842 	arch_spin_unlock(&trace_cmdline_lock);
4843 	preempt_enable();
4844 }
4845 
4846 static int saved_cmdlines_show(struct seq_file *m, void *v)
4847 {
4848 	char buf[TASK_COMM_LEN];
4849 	unsigned int *pid = v;
4850 
4851 	__trace_find_cmdline(*pid, buf);
4852 	seq_printf(m, "%d %s\n", *pid, buf);
4853 	return 0;
4854 }
4855 
4856 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4857 	.start		= saved_cmdlines_start,
4858 	.next		= saved_cmdlines_next,
4859 	.stop		= saved_cmdlines_stop,
4860 	.show		= saved_cmdlines_show,
4861 };
4862 
4863 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4864 {
4865 	if (tracing_disabled)
4866 		return -ENODEV;
4867 
4868 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4869 }
4870 
4871 static const struct file_operations tracing_saved_cmdlines_fops = {
4872 	.open		= tracing_saved_cmdlines_open,
4873 	.read		= seq_read,
4874 	.llseek		= seq_lseek,
4875 	.release	= seq_release,
4876 };
4877 
4878 static ssize_t
4879 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4880 				 size_t cnt, loff_t *ppos)
4881 {
4882 	char buf[64];
4883 	int r;
4884 
4885 	arch_spin_lock(&trace_cmdline_lock);
4886 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4887 	arch_spin_unlock(&trace_cmdline_lock);
4888 
4889 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4890 }
4891 
4892 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4893 {
4894 	kfree(s->saved_cmdlines);
4895 	kfree(s->map_cmdline_to_pid);
4896 	kfree(s);
4897 }
4898 
4899 static int tracing_resize_saved_cmdlines(unsigned int val)
4900 {
4901 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4902 
4903 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4904 	if (!s)
4905 		return -ENOMEM;
4906 
4907 	if (allocate_cmdlines_buffer(val, s) < 0) {
4908 		kfree(s);
4909 		return -ENOMEM;
4910 	}
4911 
4912 	arch_spin_lock(&trace_cmdline_lock);
4913 	savedcmd_temp = savedcmd;
4914 	savedcmd = s;
4915 	arch_spin_unlock(&trace_cmdline_lock);
4916 	free_saved_cmdlines_buffer(savedcmd_temp);
4917 
4918 	return 0;
4919 }
4920 
4921 static ssize_t
4922 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4923 				  size_t cnt, loff_t *ppos)
4924 {
4925 	unsigned long val;
4926 	int ret;
4927 
4928 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4929 	if (ret)
4930 		return ret;
4931 
4932 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4933 	if (!val || val > PID_MAX_DEFAULT)
4934 		return -EINVAL;
4935 
4936 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4937 	if (ret < 0)
4938 		return ret;
4939 
4940 	*ppos += cnt;
4941 
4942 	return cnt;
4943 }
4944 
4945 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4946 	.open		= tracing_open_generic,
4947 	.read		= tracing_saved_cmdlines_size_read,
4948 	.write		= tracing_saved_cmdlines_size_write,
4949 };
4950 
4951 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4952 static union trace_eval_map_item *
4953 update_eval_map(union trace_eval_map_item *ptr)
4954 {
4955 	if (!ptr->map.eval_string) {
4956 		if (ptr->tail.next) {
4957 			ptr = ptr->tail.next;
4958 			/* Set ptr to the next real item (skip head) */
4959 			ptr++;
4960 		} else
4961 			return NULL;
4962 	}
4963 	return ptr;
4964 }
4965 
4966 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4967 {
4968 	union trace_eval_map_item *ptr = v;
4969 
4970 	/*
4971 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4972 	 * This really should never happen.
4973 	 */
4974 	ptr = update_eval_map(ptr);
4975 	if (WARN_ON_ONCE(!ptr))
4976 		return NULL;
4977 
4978 	ptr++;
4979 
4980 	(*pos)++;
4981 
4982 	ptr = update_eval_map(ptr);
4983 
4984 	return ptr;
4985 }
4986 
4987 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4988 {
4989 	union trace_eval_map_item *v;
4990 	loff_t l = 0;
4991 
4992 	mutex_lock(&trace_eval_mutex);
4993 
4994 	v = trace_eval_maps;
4995 	if (v)
4996 		v++;
4997 
4998 	while (v && l < *pos) {
4999 		v = eval_map_next(m, v, &l);
5000 	}
5001 
5002 	return v;
5003 }
5004 
5005 static void eval_map_stop(struct seq_file *m, void *v)
5006 {
5007 	mutex_unlock(&trace_eval_mutex);
5008 }
5009 
5010 static int eval_map_show(struct seq_file *m, void *v)
5011 {
5012 	union trace_eval_map_item *ptr = v;
5013 
5014 	seq_printf(m, "%s %ld (%s)\n",
5015 		   ptr->map.eval_string, ptr->map.eval_value,
5016 		   ptr->map.system);
5017 
5018 	return 0;
5019 }
5020 
5021 static const struct seq_operations tracing_eval_map_seq_ops = {
5022 	.start		= eval_map_start,
5023 	.next		= eval_map_next,
5024 	.stop		= eval_map_stop,
5025 	.show		= eval_map_show,
5026 };
5027 
5028 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5029 {
5030 	if (tracing_disabled)
5031 		return -ENODEV;
5032 
5033 	return seq_open(filp, &tracing_eval_map_seq_ops);
5034 }
5035 
5036 static const struct file_operations tracing_eval_map_fops = {
5037 	.open		= tracing_eval_map_open,
5038 	.read		= seq_read,
5039 	.llseek		= seq_lseek,
5040 	.release	= seq_release,
5041 };
5042 
5043 static inline union trace_eval_map_item *
5044 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5045 {
5046 	/* Return tail of array given the head */
5047 	return ptr + ptr->head.length + 1;
5048 }
5049 
5050 static void
5051 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5052 			   int len)
5053 {
5054 	struct trace_eval_map **stop;
5055 	struct trace_eval_map **map;
5056 	union trace_eval_map_item *map_array;
5057 	union trace_eval_map_item *ptr;
5058 
5059 	stop = start + len;
5060 
5061 	/*
5062 	 * The trace_eval_maps contains the map plus a head and tail item,
5063 	 * where the head holds the module and length of array, and the
5064 	 * tail holds a pointer to the next list.
5065 	 */
5066 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5067 	if (!map_array) {
5068 		pr_warn("Unable to allocate trace eval mapping\n");
5069 		return;
5070 	}
5071 
5072 	mutex_lock(&trace_eval_mutex);
5073 
5074 	if (!trace_eval_maps)
5075 		trace_eval_maps = map_array;
5076 	else {
5077 		ptr = trace_eval_maps;
5078 		for (;;) {
5079 			ptr = trace_eval_jmp_to_tail(ptr);
5080 			if (!ptr->tail.next)
5081 				break;
5082 			ptr = ptr->tail.next;
5083 
5084 		}
5085 		ptr->tail.next = map_array;
5086 	}
5087 	map_array->head.mod = mod;
5088 	map_array->head.length = len;
5089 	map_array++;
5090 
5091 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5092 		map_array->map = **map;
5093 		map_array++;
5094 	}
5095 	memset(map_array, 0, sizeof(*map_array));
5096 
5097 	mutex_unlock(&trace_eval_mutex);
5098 }
5099 
5100 static void trace_create_eval_file(struct dentry *d_tracer)
5101 {
5102 	trace_create_file("eval_map", 0444, d_tracer,
5103 			  NULL, &tracing_eval_map_fops);
5104 }
5105 
5106 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5107 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5108 static inline void trace_insert_eval_map_file(struct module *mod,
5109 			      struct trace_eval_map **start, int len) { }
5110 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5111 
5112 static void trace_insert_eval_map(struct module *mod,
5113 				  struct trace_eval_map **start, int len)
5114 {
5115 	struct trace_eval_map **map;
5116 
5117 	if (len <= 0)
5118 		return;
5119 
5120 	map = start;
5121 
5122 	trace_event_eval_update(map, len);
5123 
5124 	trace_insert_eval_map_file(mod, start, len);
5125 }
5126 
5127 static ssize_t
5128 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5129 		       size_t cnt, loff_t *ppos)
5130 {
5131 	struct trace_array *tr = filp->private_data;
5132 	char buf[MAX_TRACER_SIZE+2];
5133 	int r;
5134 
5135 	mutex_lock(&trace_types_lock);
5136 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5137 	mutex_unlock(&trace_types_lock);
5138 
5139 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5140 }
5141 
5142 int tracer_init(struct tracer *t, struct trace_array *tr)
5143 {
5144 	tracing_reset_online_cpus(&tr->trace_buffer);
5145 	return t->init(tr);
5146 }
5147 
5148 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5149 {
5150 	int cpu;
5151 
5152 	for_each_tracing_cpu(cpu)
5153 		per_cpu_ptr(buf->data, cpu)->entries = val;
5154 }
5155 
5156 #ifdef CONFIG_TRACER_MAX_TRACE
5157 /* resize @tr's buffer to the size of @size_tr's entries */
5158 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5159 					struct trace_buffer *size_buf, int cpu_id)
5160 {
5161 	int cpu, ret = 0;
5162 
5163 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5164 		for_each_tracing_cpu(cpu) {
5165 			ret = ring_buffer_resize(trace_buf->buffer,
5166 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5167 			if (ret < 0)
5168 				break;
5169 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5170 				per_cpu_ptr(size_buf->data, cpu)->entries;
5171 		}
5172 	} else {
5173 		ret = ring_buffer_resize(trace_buf->buffer,
5174 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5175 		if (ret == 0)
5176 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5177 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5178 	}
5179 
5180 	return ret;
5181 }
5182 #endif /* CONFIG_TRACER_MAX_TRACE */
5183 
5184 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5185 					unsigned long size, int cpu)
5186 {
5187 	int ret;
5188 
5189 	/*
5190 	 * If kernel or user changes the size of the ring buffer
5191 	 * we use the size that was given, and we can forget about
5192 	 * expanding it later.
5193 	 */
5194 	ring_buffer_expanded = true;
5195 
5196 	/* May be called before buffers are initialized */
5197 	if (!tr->trace_buffer.buffer)
5198 		return 0;
5199 
5200 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5201 	if (ret < 0)
5202 		return ret;
5203 
5204 #ifdef CONFIG_TRACER_MAX_TRACE
5205 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5206 	    !tr->current_trace->use_max_tr)
5207 		goto out;
5208 
5209 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5210 	if (ret < 0) {
5211 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5212 						     &tr->trace_buffer, cpu);
5213 		if (r < 0) {
5214 			/*
5215 			 * AARGH! We are left with different
5216 			 * size max buffer!!!!
5217 			 * The max buffer is our "snapshot" buffer.
5218 			 * When a tracer needs a snapshot (one of the
5219 			 * latency tracers), it swaps the max buffer
5220 			 * with the saved snap shot. We succeeded to
5221 			 * update the size of the main buffer, but failed to
5222 			 * update the size of the max buffer. But when we tried
5223 			 * to reset the main buffer to the original size, we
5224 			 * failed there too. This is very unlikely to
5225 			 * happen, but if it does, warn and kill all
5226 			 * tracing.
5227 			 */
5228 			WARN_ON(1);
5229 			tracing_disabled = 1;
5230 		}
5231 		return ret;
5232 	}
5233 
5234 	if (cpu == RING_BUFFER_ALL_CPUS)
5235 		set_buffer_entries(&tr->max_buffer, size);
5236 	else
5237 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5238 
5239  out:
5240 #endif /* CONFIG_TRACER_MAX_TRACE */
5241 
5242 	if (cpu == RING_BUFFER_ALL_CPUS)
5243 		set_buffer_entries(&tr->trace_buffer, size);
5244 	else
5245 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5246 
5247 	return ret;
5248 }
5249 
5250 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5251 					  unsigned long size, int cpu_id)
5252 {
5253 	int ret = size;
5254 
5255 	mutex_lock(&trace_types_lock);
5256 
5257 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5258 		/* make sure, this cpu is enabled in the mask */
5259 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5260 			ret = -EINVAL;
5261 			goto out;
5262 		}
5263 	}
5264 
5265 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5266 	if (ret < 0)
5267 		ret = -ENOMEM;
5268 
5269 out:
5270 	mutex_unlock(&trace_types_lock);
5271 
5272 	return ret;
5273 }
5274 
5275 
5276 /**
5277  * tracing_update_buffers - used by tracing facility to expand ring buffers
5278  *
5279  * To save on memory when the tracing is never used on a system with it
5280  * configured in. The ring buffers are set to a minimum size. But once
5281  * a user starts to use the tracing facility, then they need to grow
5282  * to their default size.
5283  *
5284  * This function is to be called when a tracer is about to be used.
5285  */
5286 int tracing_update_buffers(void)
5287 {
5288 	int ret = 0;
5289 
5290 	mutex_lock(&trace_types_lock);
5291 	if (!ring_buffer_expanded)
5292 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5293 						RING_BUFFER_ALL_CPUS);
5294 	mutex_unlock(&trace_types_lock);
5295 
5296 	return ret;
5297 }
5298 
5299 struct trace_option_dentry;
5300 
5301 static void
5302 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5303 
5304 /*
5305  * Used to clear out the tracer before deletion of an instance.
5306  * Must have trace_types_lock held.
5307  */
5308 static void tracing_set_nop(struct trace_array *tr)
5309 {
5310 	if (tr->current_trace == &nop_trace)
5311 		return;
5312 
5313 	tr->current_trace->enabled--;
5314 
5315 	if (tr->current_trace->reset)
5316 		tr->current_trace->reset(tr);
5317 
5318 	tr->current_trace = &nop_trace;
5319 }
5320 
5321 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5322 {
5323 	/* Only enable if the directory has been created already. */
5324 	if (!tr->dir)
5325 		return;
5326 
5327 	create_trace_option_files(tr, t);
5328 }
5329 
5330 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5331 {
5332 	struct tracer *t;
5333 #ifdef CONFIG_TRACER_MAX_TRACE
5334 	bool had_max_tr;
5335 #endif
5336 	int ret = 0;
5337 
5338 	mutex_lock(&trace_types_lock);
5339 
5340 	if (!ring_buffer_expanded) {
5341 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5342 						RING_BUFFER_ALL_CPUS);
5343 		if (ret < 0)
5344 			goto out;
5345 		ret = 0;
5346 	}
5347 
5348 	for (t = trace_types; t; t = t->next) {
5349 		if (strcmp(t->name, buf) == 0)
5350 			break;
5351 	}
5352 	if (!t) {
5353 		ret = -EINVAL;
5354 		goto out;
5355 	}
5356 	if (t == tr->current_trace)
5357 		goto out;
5358 
5359 	/* Some tracers won't work on kernel command line */
5360 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5361 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5362 			t->name);
5363 		goto out;
5364 	}
5365 
5366 	/* Some tracers are only allowed for the top level buffer */
5367 	if (!trace_ok_for_array(t, tr)) {
5368 		ret = -EINVAL;
5369 		goto out;
5370 	}
5371 
5372 	/* If trace pipe files are being read, we can't change the tracer */
5373 	if (tr->current_trace->ref) {
5374 		ret = -EBUSY;
5375 		goto out;
5376 	}
5377 
5378 	trace_branch_disable();
5379 
5380 	tr->current_trace->enabled--;
5381 
5382 	if (tr->current_trace->reset)
5383 		tr->current_trace->reset(tr);
5384 
5385 	/* Current trace needs to be nop_trace before synchronize_sched */
5386 	tr->current_trace = &nop_trace;
5387 
5388 #ifdef CONFIG_TRACER_MAX_TRACE
5389 	had_max_tr = tr->allocated_snapshot;
5390 
5391 	if (had_max_tr && !t->use_max_tr) {
5392 		/*
5393 		 * We need to make sure that the update_max_tr sees that
5394 		 * current_trace changed to nop_trace to keep it from
5395 		 * swapping the buffers after we resize it.
5396 		 * The update_max_tr is called from interrupts disabled
5397 		 * so a synchronized_sched() is sufficient.
5398 		 */
5399 		synchronize_sched();
5400 		free_snapshot(tr);
5401 	}
5402 #endif
5403 
5404 #ifdef CONFIG_TRACER_MAX_TRACE
5405 	if (t->use_max_tr && !had_max_tr) {
5406 		ret = tracing_alloc_snapshot_instance(tr);
5407 		if (ret < 0)
5408 			goto out;
5409 	}
5410 #endif
5411 
5412 	if (t->init) {
5413 		ret = tracer_init(t, tr);
5414 		if (ret)
5415 			goto out;
5416 	}
5417 
5418 	tr->current_trace = t;
5419 	tr->current_trace->enabled++;
5420 	trace_branch_enable(tr);
5421  out:
5422 	mutex_unlock(&trace_types_lock);
5423 
5424 	return ret;
5425 }
5426 
5427 static ssize_t
5428 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5429 			size_t cnt, loff_t *ppos)
5430 {
5431 	struct trace_array *tr = filp->private_data;
5432 	char buf[MAX_TRACER_SIZE+1];
5433 	int i;
5434 	size_t ret;
5435 	int err;
5436 
5437 	ret = cnt;
5438 
5439 	if (cnt > MAX_TRACER_SIZE)
5440 		cnt = MAX_TRACER_SIZE;
5441 
5442 	if (copy_from_user(buf, ubuf, cnt))
5443 		return -EFAULT;
5444 
5445 	buf[cnt] = 0;
5446 
5447 	/* strip ending whitespace. */
5448 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5449 		buf[i] = 0;
5450 
5451 	err = tracing_set_tracer(tr, buf);
5452 	if (err)
5453 		return err;
5454 
5455 	*ppos += ret;
5456 
5457 	return ret;
5458 }
5459 
5460 static ssize_t
5461 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5462 		   size_t cnt, loff_t *ppos)
5463 {
5464 	char buf[64];
5465 	int r;
5466 
5467 	r = snprintf(buf, sizeof(buf), "%ld\n",
5468 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5469 	if (r > sizeof(buf))
5470 		r = sizeof(buf);
5471 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5472 }
5473 
5474 static ssize_t
5475 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5476 		    size_t cnt, loff_t *ppos)
5477 {
5478 	unsigned long val;
5479 	int ret;
5480 
5481 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5482 	if (ret)
5483 		return ret;
5484 
5485 	*ptr = val * 1000;
5486 
5487 	return cnt;
5488 }
5489 
5490 static ssize_t
5491 tracing_thresh_read(struct file *filp, char __user *ubuf,
5492 		    size_t cnt, loff_t *ppos)
5493 {
5494 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5495 }
5496 
5497 static ssize_t
5498 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5499 		     size_t cnt, loff_t *ppos)
5500 {
5501 	struct trace_array *tr = filp->private_data;
5502 	int ret;
5503 
5504 	mutex_lock(&trace_types_lock);
5505 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5506 	if (ret < 0)
5507 		goto out;
5508 
5509 	if (tr->current_trace->update_thresh) {
5510 		ret = tr->current_trace->update_thresh(tr);
5511 		if (ret < 0)
5512 			goto out;
5513 	}
5514 
5515 	ret = cnt;
5516 out:
5517 	mutex_unlock(&trace_types_lock);
5518 
5519 	return ret;
5520 }
5521 
5522 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5523 
5524 static ssize_t
5525 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5526 		     size_t cnt, loff_t *ppos)
5527 {
5528 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5529 }
5530 
5531 static ssize_t
5532 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5533 		      size_t cnt, loff_t *ppos)
5534 {
5535 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5536 }
5537 
5538 #endif
5539 
5540 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5541 {
5542 	struct trace_array *tr = inode->i_private;
5543 	struct trace_iterator *iter;
5544 	int ret = 0;
5545 
5546 	if (tracing_disabled)
5547 		return -ENODEV;
5548 
5549 	if (trace_array_get(tr) < 0)
5550 		return -ENODEV;
5551 
5552 	mutex_lock(&trace_types_lock);
5553 
5554 	/* create a buffer to store the information to pass to userspace */
5555 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5556 	if (!iter) {
5557 		ret = -ENOMEM;
5558 		__trace_array_put(tr);
5559 		goto out;
5560 	}
5561 
5562 	trace_seq_init(&iter->seq);
5563 	iter->trace = tr->current_trace;
5564 
5565 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5566 		ret = -ENOMEM;
5567 		goto fail;
5568 	}
5569 
5570 	/* trace pipe does not show start of buffer */
5571 	cpumask_setall(iter->started);
5572 
5573 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5574 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5575 
5576 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5577 	if (trace_clocks[tr->clock_id].in_ns)
5578 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5579 
5580 	iter->tr = tr;
5581 	iter->trace_buffer = &tr->trace_buffer;
5582 	iter->cpu_file = tracing_get_cpu(inode);
5583 	mutex_init(&iter->mutex);
5584 	filp->private_data = iter;
5585 
5586 	if (iter->trace->pipe_open)
5587 		iter->trace->pipe_open(iter);
5588 
5589 	nonseekable_open(inode, filp);
5590 
5591 	tr->current_trace->ref++;
5592 out:
5593 	mutex_unlock(&trace_types_lock);
5594 	return ret;
5595 
5596 fail:
5597 	kfree(iter->trace);
5598 	kfree(iter);
5599 	__trace_array_put(tr);
5600 	mutex_unlock(&trace_types_lock);
5601 	return ret;
5602 }
5603 
5604 static int tracing_release_pipe(struct inode *inode, struct file *file)
5605 {
5606 	struct trace_iterator *iter = file->private_data;
5607 	struct trace_array *tr = inode->i_private;
5608 
5609 	mutex_lock(&trace_types_lock);
5610 
5611 	tr->current_trace->ref--;
5612 
5613 	if (iter->trace->pipe_close)
5614 		iter->trace->pipe_close(iter);
5615 
5616 	mutex_unlock(&trace_types_lock);
5617 
5618 	free_cpumask_var(iter->started);
5619 	mutex_destroy(&iter->mutex);
5620 	kfree(iter);
5621 
5622 	trace_array_put(tr);
5623 
5624 	return 0;
5625 }
5626 
5627 static __poll_t
5628 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5629 {
5630 	struct trace_array *tr = iter->tr;
5631 
5632 	/* Iterators are static, they should be filled or empty */
5633 	if (trace_buffer_iter(iter, iter->cpu_file))
5634 		return EPOLLIN | EPOLLRDNORM;
5635 
5636 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5637 		/*
5638 		 * Always select as readable when in blocking mode
5639 		 */
5640 		return EPOLLIN | EPOLLRDNORM;
5641 	else
5642 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5643 					     filp, poll_table);
5644 }
5645 
5646 static __poll_t
5647 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5648 {
5649 	struct trace_iterator *iter = filp->private_data;
5650 
5651 	return trace_poll(iter, filp, poll_table);
5652 }
5653 
5654 /* Must be called with iter->mutex held. */
5655 static int tracing_wait_pipe(struct file *filp)
5656 {
5657 	struct trace_iterator *iter = filp->private_data;
5658 	int ret;
5659 
5660 	while (trace_empty(iter)) {
5661 
5662 		if ((filp->f_flags & O_NONBLOCK)) {
5663 			return -EAGAIN;
5664 		}
5665 
5666 		/*
5667 		 * We block until we read something and tracing is disabled.
5668 		 * We still block if tracing is disabled, but we have never
5669 		 * read anything. This allows a user to cat this file, and
5670 		 * then enable tracing. But after we have read something,
5671 		 * we give an EOF when tracing is again disabled.
5672 		 *
5673 		 * iter->pos will be 0 if we haven't read anything.
5674 		 */
5675 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5676 			break;
5677 
5678 		mutex_unlock(&iter->mutex);
5679 
5680 		ret = wait_on_pipe(iter, false);
5681 
5682 		mutex_lock(&iter->mutex);
5683 
5684 		if (ret)
5685 			return ret;
5686 	}
5687 
5688 	return 1;
5689 }
5690 
5691 /*
5692  * Consumer reader.
5693  */
5694 static ssize_t
5695 tracing_read_pipe(struct file *filp, char __user *ubuf,
5696 		  size_t cnt, loff_t *ppos)
5697 {
5698 	struct trace_iterator *iter = filp->private_data;
5699 	ssize_t sret;
5700 
5701 	/*
5702 	 * Avoid more than one consumer on a single file descriptor
5703 	 * This is just a matter of traces coherency, the ring buffer itself
5704 	 * is protected.
5705 	 */
5706 	mutex_lock(&iter->mutex);
5707 
5708 	/* return any leftover data */
5709 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5710 	if (sret != -EBUSY)
5711 		goto out;
5712 
5713 	trace_seq_init(&iter->seq);
5714 
5715 	if (iter->trace->read) {
5716 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5717 		if (sret)
5718 			goto out;
5719 	}
5720 
5721 waitagain:
5722 	sret = tracing_wait_pipe(filp);
5723 	if (sret <= 0)
5724 		goto out;
5725 
5726 	/* stop when tracing is finished */
5727 	if (trace_empty(iter)) {
5728 		sret = 0;
5729 		goto out;
5730 	}
5731 
5732 	if (cnt >= PAGE_SIZE)
5733 		cnt = PAGE_SIZE - 1;
5734 
5735 	/* reset all but tr, trace, and overruns */
5736 	memset(&iter->seq, 0,
5737 	       sizeof(struct trace_iterator) -
5738 	       offsetof(struct trace_iterator, seq));
5739 	cpumask_clear(iter->started);
5740 	iter->pos = -1;
5741 
5742 	trace_event_read_lock();
5743 	trace_access_lock(iter->cpu_file);
5744 	while (trace_find_next_entry_inc(iter) != NULL) {
5745 		enum print_line_t ret;
5746 		int save_len = iter->seq.seq.len;
5747 
5748 		ret = print_trace_line(iter);
5749 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5750 			/* don't print partial lines */
5751 			iter->seq.seq.len = save_len;
5752 			break;
5753 		}
5754 		if (ret != TRACE_TYPE_NO_CONSUME)
5755 			trace_consume(iter);
5756 
5757 		if (trace_seq_used(&iter->seq) >= cnt)
5758 			break;
5759 
5760 		/*
5761 		 * Setting the full flag means we reached the trace_seq buffer
5762 		 * size and we should leave by partial output condition above.
5763 		 * One of the trace_seq_* functions is not used properly.
5764 		 */
5765 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5766 			  iter->ent->type);
5767 	}
5768 	trace_access_unlock(iter->cpu_file);
5769 	trace_event_read_unlock();
5770 
5771 	/* Now copy what we have to the user */
5772 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5773 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5774 		trace_seq_init(&iter->seq);
5775 
5776 	/*
5777 	 * If there was nothing to send to user, in spite of consuming trace
5778 	 * entries, go back to wait for more entries.
5779 	 */
5780 	if (sret == -EBUSY)
5781 		goto waitagain;
5782 
5783 out:
5784 	mutex_unlock(&iter->mutex);
5785 
5786 	return sret;
5787 }
5788 
5789 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5790 				     unsigned int idx)
5791 {
5792 	__free_page(spd->pages[idx]);
5793 }
5794 
5795 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5796 	.can_merge		= 0,
5797 	.confirm		= generic_pipe_buf_confirm,
5798 	.release		= generic_pipe_buf_release,
5799 	.steal			= generic_pipe_buf_steal,
5800 	.get			= generic_pipe_buf_get,
5801 };
5802 
5803 static size_t
5804 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5805 {
5806 	size_t count;
5807 	int save_len;
5808 	int ret;
5809 
5810 	/* Seq buffer is page-sized, exactly what we need. */
5811 	for (;;) {
5812 		save_len = iter->seq.seq.len;
5813 		ret = print_trace_line(iter);
5814 
5815 		if (trace_seq_has_overflowed(&iter->seq)) {
5816 			iter->seq.seq.len = save_len;
5817 			break;
5818 		}
5819 
5820 		/*
5821 		 * This should not be hit, because it should only
5822 		 * be set if the iter->seq overflowed. But check it
5823 		 * anyway to be safe.
5824 		 */
5825 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5826 			iter->seq.seq.len = save_len;
5827 			break;
5828 		}
5829 
5830 		count = trace_seq_used(&iter->seq) - save_len;
5831 		if (rem < count) {
5832 			rem = 0;
5833 			iter->seq.seq.len = save_len;
5834 			break;
5835 		}
5836 
5837 		if (ret != TRACE_TYPE_NO_CONSUME)
5838 			trace_consume(iter);
5839 		rem -= count;
5840 		if (!trace_find_next_entry_inc(iter))	{
5841 			rem = 0;
5842 			iter->ent = NULL;
5843 			break;
5844 		}
5845 	}
5846 
5847 	return rem;
5848 }
5849 
5850 static ssize_t tracing_splice_read_pipe(struct file *filp,
5851 					loff_t *ppos,
5852 					struct pipe_inode_info *pipe,
5853 					size_t len,
5854 					unsigned int flags)
5855 {
5856 	struct page *pages_def[PIPE_DEF_BUFFERS];
5857 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5858 	struct trace_iterator *iter = filp->private_data;
5859 	struct splice_pipe_desc spd = {
5860 		.pages		= pages_def,
5861 		.partial	= partial_def,
5862 		.nr_pages	= 0, /* This gets updated below. */
5863 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5864 		.ops		= &tracing_pipe_buf_ops,
5865 		.spd_release	= tracing_spd_release_pipe,
5866 	};
5867 	ssize_t ret;
5868 	size_t rem;
5869 	unsigned int i;
5870 
5871 	if (splice_grow_spd(pipe, &spd))
5872 		return -ENOMEM;
5873 
5874 	mutex_lock(&iter->mutex);
5875 
5876 	if (iter->trace->splice_read) {
5877 		ret = iter->trace->splice_read(iter, filp,
5878 					       ppos, pipe, len, flags);
5879 		if (ret)
5880 			goto out_err;
5881 	}
5882 
5883 	ret = tracing_wait_pipe(filp);
5884 	if (ret <= 0)
5885 		goto out_err;
5886 
5887 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5888 		ret = -EFAULT;
5889 		goto out_err;
5890 	}
5891 
5892 	trace_event_read_lock();
5893 	trace_access_lock(iter->cpu_file);
5894 
5895 	/* Fill as many pages as possible. */
5896 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5897 		spd.pages[i] = alloc_page(GFP_KERNEL);
5898 		if (!spd.pages[i])
5899 			break;
5900 
5901 		rem = tracing_fill_pipe_page(rem, iter);
5902 
5903 		/* Copy the data into the page, so we can start over. */
5904 		ret = trace_seq_to_buffer(&iter->seq,
5905 					  page_address(spd.pages[i]),
5906 					  trace_seq_used(&iter->seq));
5907 		if (ret < 0) {
5908 			__free_page(spd.pages[i]);
5909 			break;
5910 		}
5911 		spd.partial[i].offset = 0;
5912 		spd.partial[i].len = trace_seq_used(&iter->seq);
5913 
5914 		trace_seq_init(&iter->seq);
5915 	}
5916 
5917 	trace_access_unlock(iter->cpu_file);
5918 	trace_event_read_unlock();
5919 	mutex_unlock(&iter->mutex);
5920 
5921 	spd.nr_pages = i;
5922 
5923 	if (i)
5924 		ret = splice_to_pipe(pipe, &spd);
5925 	else
5926 		ret = 0;
5927 out:
5928 	splice_shrink_spd(&spd);
5929 	return ret;
5930 
5931 out_err:
5932 	mutex_unlock(&iter->mutex);
5933 	goto out;
5934 }
5935 
5936 static ssize_t
5937 tracing_entries_read(struct file *filp, char __user *ubuf,
5938 		     size_t cnt, loff_t *ppos)
5939 {
5940 	struct inode *inode = file_inode(filp);
5941 	struct trace_array *tr = inode->i_private;
5942 	int cpu = tracing_get_cpu(inode);
5943 	char buf[64];
5944 	int r = 0;
5945 	ssize_t ret;
5946 
5947 	mutex_lock(&trace_types_lock);
5948 
5949 	if (cpu == RING_BUFFER_ALL_CPUS) {
5950 		int cpu, buf_size_same;
5951 		unsigned long size;
5952 
5953 		size = 0;
5954 		buf_size_same = 1;
5955 		/* check if all cpu sizes are same */
5956 		for_each_tracing_cpu(cpu) {
5957 			/* fill in the size from first enabled cpu */
5958 			if (size == 0)
5959 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5960 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5961 				buf_size_same = 0;
5962 				break;
5963 			}
5964 		}
5965 
5966 		if (buf_size_same) {
5967 			if (!ring_buffer_expanded)
5968 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5969 					    size >> 10,
5970 					    trace_buf_size >> 10);
5971 			else
5972 				r = sprintf(buf, "%lu\n", size >> 10);
5973 		} else
5974 			r = sprintf(buf, "X\n");
5975 	} else
5976 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5977 
5978 	mutex_unlock(&trace_types_lock);
5979 
5980 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5981 	return ret;
5982 }
5983 
5984 static ssize_t
5985 tracing_entries_write(struct file *filp, const char __user *ubuf,
5986 		      size_t cnt, loff_t *ppos)
5987 {
5988 	struct inode *inode = file_inode(filp);
5989 	struct trace_array *tr = inode->i_private;
5990 	unsigned long val;
5991 	int ret;
5992 
5993 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5994 	if (ret)
5995 		return ret;
5996 
5997 	/* must have at least 1 entry */
5998 	if (!val)
5999 		return -EINVAL;
6000 
6001 	/* value is in KB */
6002 	val <<= 10;
6003 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6004 	if (ret < 0)
6005 		return ret;
6006 
6007 	*ppos += cnt;
6008 
6009 	return cnt;
6010 }
6011 
6012 static ssize_t
6013 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6014 				size_t cnt, loff_t *ppos)
6015 {
6016 	struct trace_array *tr = filp->private_data;
6017 	char buf[64];
6018 	int r, cpu;
6019 	unsigned long size = 0, expanded_size = 0;
6020 
6021 	mutex_lock(&trace_types_lock);
6022 	for_each_tracing_cpu(cpu) {
6023 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6024 		if (!ring_buffer_expanded)
6025 			expanded_size += trace_buf_size >> 10;
6026 	}
6027 	if (ring_buffer_expanded)
6028 		r = sprintf(buf, "%lu\n", size);
6029 	else
6030 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6031 	mutex_unlock(&trace_types_lock);
6032 
6033 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6034 }
6035 
6036 static ssize_t
6037 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6038 			  size_t cnt, loff_t *ppos)
6039 {
6040 	/*
6041 	 * There is no need to read what the user has written, this function
6042 	 * is just to make sure that there is no error when "echo" is used
6043 	 */
6044 
6045 	*ppos += cnt;
6046 
6047 	return cnt;
6048 }
6049 
6050 static int
6051 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6052 {
6053 	struct trace_array *tr = inode->i_private;
6054 
6055 	/* disable tracing ? */
6056 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6057 		tracer_tracing_off(tr);
6058 	/* resize the ring buffer to 0 */
6059 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6060 
6061 	trace_array_put(tr);
6062 
6063 	return 0;
6064 }
6065 
6066 static ssize_t
6067 tracing_mark_write(struct file *filp, const char __user *ubuf,
6068 					size_t cnt, loff_t *fpos)
6069 {
6070 	struct trace_array *tr = filp->private_data;
6071 	struct ring_buffer_event *event;
6072 	enum event_trigger_type tt = ETT_NONE;
6073 	struct ring_buffer *buffer;
6074 	struct print_entry *entry;
6075 	unsigned long irq_flags;
6076 	const char faulted[] = "<faulted>";
6077 	ssize_t written;
6078 	int size;
6079 	int len;
6080 
6081 /* Used in tracing_mark_raw_write() as well */
6082 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6083 
6084 	if (tracing_disabled)
6085 		return -EINVAL;
6086 
6087 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6088 		return -EINVAL;
6089 
6090 	if (cnt > TRACE_BUF_SIZE)
6091 		cnt = TRACE_BUF_SIZE;
6092 
6093 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6094 
6095 	local_save_flags(irq_flags);
6096 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6097 
6098 	/* If less than "<faulted>", then make sure we can still add that */
6099 	if (cnt < FAULTED_SIZE)
6100 		size += FAULTED_SIZE - cnt;
6101 
6102 	buffer = tr->trace_buffer.buffer;
6103 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6104 					    irq_flags, preempt_count());
6105 	if (unlikely(!event))
6106 		/* Ring buffer disabled, return as if not open for write */
6107 		return -EBADF;
6108 
6109 	entry = ring_buffer_event_data(event);
6110 	entry->ip = _THIS_IP_;
6111 
6112 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6113 	if (len) {
6114 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6115 		cnt = FAULTED_SIZE;
6116 		written = -EFAULT;
6117 	} else
6118 		written = cnt;
6119 	len = cnt;
6120 
6121 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6122 		/* do not add \n before testing triggers, but add \0 */
6123 		entry->buf[cnt] = '\0';
6124 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6125 	}
6126 
6127 	if (entry->buf[cnt - 1] != '\n') {
6128 		entry->buf[cnt] = '\n';
6129 		entry->buf[cnt + 1] = '\0';
6130 	} else
6131 		entry->buf[cnt] = '\0';
6132 
6133 	__buffer_unlock_commit(buffer, event);
6134 
6135 	if (tt)
6136 		event_triggers_post_call(tr->trace_marker_file, tt);
6137 
6138 	if (written > 0)
6139 		*fpos += written;
6140 
6141 	return written;
6142 }
6143 
6144 /* Limit it for now to 3K (including tag) */
6145 #define RAW_DATA_MAX_SIZE (1024*3)
6146 
6147 static ssize_t
6148 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6149 					size_t cnt, loff_t *fpos)
6150 {
6151 	struct trace_array *tr = filp->private_data;
6152 	struct ring_buffer_event *event;
6153 	struct ring_buffer *buffer;
6154 	struct raw_data_entry *entry;
6155 	const char faulted[] = "<faulted>";
6156 	unsigned long irq_flags;
6157 	ssize_t written;
6158 	int size;
6159 	int len;
6160 
6161 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6162 
6163 	if (tracing_disabled)
6164 		return -EINVAL;
6165 
6166 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6167 		return -EINVAL;
6168 
6169 	/* The marker must at least have a tag id */
6170 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6171 		return -EINVAL;
6172 
6173 	if (cnt > TRACE_BUF_SIZE)
6174 		cnt = TRACE_BUF_SIZE;
6175 
6176 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6177 
6178 	local_save_flags(irq_flags);
6179 	size = sizeof(*entry) + cnt;
6180 	if (cnt < FAULT_SIZE_ID)
6181 		size += FAULT_SIZE_ID - cnt;
6182 
6183 	buffer = tr->trace_buffer.buffer;
6184 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6185 					    irq_flags, preempt_count());
6186 	if (!event)
6187 		/* Ring buffer disabled, return as if not open for write */
6188 		return -EBADF;
6189 
6190 	entry = ring_buffer_event_data(event);
6191 
6192 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6193 	if (len) {
6194 		entry->id = -1;
6195 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6196 		written = -EFAULT;
6197 	} else
6198 		written = cnt;
6199 
6200 	__buffer_unlock_commit(buffer, event);
6201 
6202 	if (written > 0)
6203 		*fpos += written;
6204 
6205 	return written;
6206 }
6207 
6208 static int tracing_clock_show(struct seq_file *m, void *v)
6209 {
6210 	struct trace_array *tr = m->private;
6211 	int i;
6212 
6213 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6214 		seq_printf(m,
6215 			"%s%s%s%s", i ? " " : "",
6216 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6217 			i == tr->clock_id ? "]" : "");
6218 	seq_putc(m, '\n');
6219 
6220 	return 0;
6221 }
6222 
6223 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6224 {
6225 	int i;
6226 
6227 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6228 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6229 			break;
6230 	}
6231 	if (i == ARRAY_SIZE(trace_clocks))
6232 		return -EINVAL;
6233 
6234 	mutex_lock(&trace_types_lock);
6235 
6236 	tr->clock_id = i;
6237 
6238 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6239 
6240 	/*
6241 	 * New clock may not be consistent with the previous clock.
6242 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6243 	 */
6244 	tracing_reset_online_cpus(&tr->trace_buffer);
6245 
6246 #ifdef CONFIG_TRACER_MAX_TRACE
6247 	if (tr->max_buffer.buffer)
6248 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6249 	tracing_reset_online_cpus(&tr->max_buffer);
6250 #endif
6251 
6252 	mutex_unlock(&trace_types_lock);
6253 
6254 	return 0;
6255 }
6256 
6257 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6258 				   size_t cnt, loff_t *fpos)
6259 {
6260 	struct seq_file *m = filp->private_data;
6261 	struct trace_array *tr = m->private;
6262 	char buf[64];
6263 	const char *clockstr;
6264 	int ret;
6265 
6266 	if (cnt >= sizeof(buf))
6267 		return -EINVAL;
6268 
6269 	if (copy_from_user(buf, ubuf, cnt))
6270 		return -EFAULT;
6271 
6272 	buf[cnt] = 0;
6273 
6274 	clockstr = strstrip(buf);
6275 
6276 	ret = tracing_set_clock(tr, clockstr);
6277 	if (ret)
6278 		return ret;
6279 
6280 	*fpos += cnt;
6281 
6282 	return cnt;
6283 }
6284 
6285 static int tracing_clock_open(struct inode *inode, struct file *file)
6286 {
6287 	struct trace_array *tr = inode->i_private;
6288 	int ret;
6289 
6290 	if (tracing_disabled)
6291 		return -ENODEV;
6292 
6293 	if (trace_array_get(tr))
6294 		return -ENODEV;
6295 
6296 	ret = single_open(file, tracing_clock_show, inode->i_private);
6297 	if (ret < 0)
6298 		trace_array_put(tr);
6299 
6300 	return ret;
6301 }
6302 
6303 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6304 {
6305 	struct trace_array *tr = m->private;
6306 
6307 	mutex_lock(&trace_types_lock);
6308 
6309 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6310 		seq_puts(m, "delta [absolute]\n");
6311 	else
6312 		seq_puts(m, "[delta] absolute\n");
6313 
6314 	mutex_unlock(&trace_types_lock);
6315 
6316 	return 0;
6317 }
6318 
6319 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6320 {
6321 	struct trace_array *tr = inode->i_private;
6322 	int ret;
6323 
6324 	if (tracing_disabled)
6325 		return -ENODEV;
6326 
6327 	if (trace_array_get(tr))
6328 		return -ENODEV;
6329 
6330 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6331 	if (ret < 0)
6332 		trace_array_put(tr);
6333 
6334 	return ret;
6335 }
6336 
6337 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6338 {
6339 	int ret = 0;
6340 
6341 	mutex_lock(&trace_types_lock);
6342 
6343 	if (abs && tr->time_stamp_abs_ref++)
6344 		goto out;
6345 
6346 	if (!abs) {
6347 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6348 			ret = -EINVAL;
6349 			goto out;
6350 		}
6351 
6352 		if (--tr->time_stamp_abs_ref)
6353 			goto out;
6354 	}
6355 
6356 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6357 
6358 #ifdef CONFIG_TRACER_MAX_TRACE
6359 	if (tr->max_buffer.buffer)
6360 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6361 #endif
6362  out:
6363 	mutex_unlock(&trace_types_lock);
6364 
6365 	return ret;
6366 }
6367 
6368 struct ftrace_buffer_info {
6369 	struct trace_iterator	iter;
6370 	void			*spare;
6371 	unsigned int		spare_cpu;
6372 	unsigned int		read;
6373 };
6374 
6375 #ifdef CONFIG_TRACER_SNAPSHOT
6376 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6377 {
6378 	struct trace_array *tr = inode->i_private;
6379 	struct trace_iterator *iter;
6380 	struct seq_file *m;
6381 	int ret = 0;
6382 
6383 	if (trace_array_get(tr) < 0)
6384 		return -ENODEV;
6385 
6386 	if (file->f_mode & FMODE_READ) {
6387 		iter = __tracing_open(inode, file, true);
6388 		if (IS_ERR(iter))
6389 			ret = PTR_ERR(iter);
6390 	} else {
6391 		/* Writes still need the seq_file to hold the private data */
6392 		ret = -ENOMEM;
6393 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6394 		if (!m)
6395 			goto out;
6396 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6397 		if (!iter) {
6398 			kfree(m);
6399 			goto out;
6400 		}
6401 		ret = 0;
6402 
6403 		iter->tr = tr;
6404 		iter->trace_buffer = &tr->max_buffer;
6405 		iter->cpu_file = tracing_get_cpu(inode);
6406 		m->private = iter;
6407 		file->private_data = m;
6408 	}
6409 out:
6410 	if (ret < 0)
6411 		trace_array_put(tr);
6412 
6413 	return ret;
6414 }
6415 
6416 static ssize_t
6417 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6418 		       loff_t *ppos)
6419 {
6420 	struct seq_file *m = filp->private_data;
6421 	struct trace_iterator *iter = m->private;
6422 	struct trace_array *tr = iter->tr;
6423 	unsigned long val;
6424 	int ret;
6425 
6426 	ret = tracing_update_buffers();
6427 	if (ret < 0)
6428 		return ret;
6429 
6430 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6431 	if (ret)
6432 		return ret;
6433 
6434 	mutex_lock(&trace_types_lock);
6435 
6436 	if (tr->current_trace->use_max_tr) {
6437 		ret = -EBUSY;
6438 		goto out;
6439 	}
6440 
6441 	switch (val) {
6442 	case 0:
6443 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6444 			ret = -EINVAL;
6445 			break;
6446 		}
6447 		if (tr->allocated_snapshot)
6448 			free_snapshot(tr);
6449 		break;
6450 	case 1:
6451 /* Only allow per-cpu swap if the ring buffer supports it */
6452 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6453 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6454 			ret = -EINVAL;
6455 			break;
6456 		}
6457 #endif
6458 		if (!tr->allocated_snapshot) {
6459 			ret = tracing_alloc_snapshot_instance(tr);
6460 			if (ret < 0)
6461 				break;
6462 		}
6463 		local_irq_disable();
6464 		/* Now, we're going to swap */
6465 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6466 			update_max_tr(tr, current, smp_processor_id());
6467 		else
6468 			update_max_tr_single(tr, current, iter->cpu_file);
6469 		local_irq_enable();
6470 		break;
6471 	default:
6472 		if (tr->allocated_snapshot) {
6473 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6474 				tracing_reset_online_cpus(&tr->max_buffer);
6475 			else
6476 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6477 		}
6478 		break;
6479 	}
6480 
6481 	if (ret >= 0) {
6482 		*ppos += cnt;
6483 		ret = cnt;
6484 	}
6485 out:
6486 	mutex_unlock(&trace_types_lock);
6487 	return ret;
6488 }
6489 
6490 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6491 {
6492 	struct seq_file *m = file->private_data;
6493 	int ret;
6494 
6495 	ret = tracing_release(inode, file);
6496 
6497 	if (file->f_mode & FMODE_READ)
6498 		return ret;
6499 
6500 	/* If write only, the seq_file is just a stub */
6501 	if (m)
6502 		kfree(m->private);
6503 	kfree(m);
6504 
6505 	return 0;
6506 }
6507 
6508 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6509 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6510 				    size_t count, loff_t *ppos);
6511 static int tracing_buffers_release(struct inode *inode, struct file *file);
6512 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6513 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6514 
6515 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6516 {
6517 	struct ftrace_buffer_info *info;
6518 	int ret;
6519 
6520 	ret = tracing_buffers_open(inode, filp);
6521 	if (ret < 0)
6522 		return ret;
6523 
6524 	info = filp->private_data;
6525 
6526 	if (info->iter.trace->use_max_tr) {
6527 		tracing_buffers_release(inode, filp);
6528 		return -EBUSY;
6529 	}
6530 
6531 	info->iter.snapshot = true;
6532 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6533 
6534 	return ret;
6535 }
6536 
6537 #endif /* CONFIG_TRACER_SNAPSHOT */
6538 
6539 
6540 static const struct file_operations tracing_thresh_fops = {
6541 	.open		= tracing_open_generic,
6542 	.read		= tracing_thresh_read,
6543 	.write		= tracing_thresh_write,
6544 	.llseek		= generic_file_llseek,
6545 };
6546 
6547 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6548 static const struct file_operations tracing_max_lat_fops = {
6549 	.open		= tracing_open_generic,
6550 	.read		= tracing_max_lat_read,
6551 	.write		= tracing_max_lat_write,
6552 	.llseek		= generic_file_llseek,
6553 };
6554 #endif
6555 
6556 static const struct file_operations set_tracer_fops = {
6557 	.open		= tracing_open_generic,
6558 	.read		= tracing_set_trace_read,
6559 	.write		= tracing_set_trace_write,
6560 	.llseek		= generic_file_llseek,
6561 };
6562 
6563 static const struct file_operations tracing_pipe_fops = {
6564 	.open		= tracing_open_pipe,
6565 	.poll		= tracing_poll_pipe,
6566 	.read		= tracing_read_pipe,
6567 	.splice_read	= tracing_splice_read_pipe,
6568 	.release	= tracing_release_pipe,
6569 	.llseek		= no_llseek,
6570 };
6571 
6572 static const struct file_operations tracing_entries_fops = {
6573 	.open		= tracing_open_generic_tr,
6574 	.read		= tracing_entries_read,
6575 	.write		= tracing_entries_write,
6576 	.llseek		= generic_file_llseek,
6577 	.release	= tracing_release_generic_tr,
6578 };
6579 
6580 static const struct file_operations tracing_total_entries_fops = {
6581 	.open		= tracing_open_generic_tr,
6582 	.read		= tracing_total_entries_read,
6583 	.llseek		= generic_file_llseek,
6584 	.release	= tracing_release_generic_tr,
6585 };
6586 
6587 static const struct file_operations tracing_free_buffer_fops = {
6588 	.open		= tracing_open_generic_tr,
6589 	.write		= tracing_free_buffer_write,
6590 	.release	= tracing_free_buffer_release,
6591 };
6592 
6593 static const struct file_operations tracing_mark_fops = {
6594 	.open		= tracing_open_generic_tr,
6595 	.write		= tracing_mark_write,
6596 	.llseek		= generic_file_llseek,
6597 	.release	= tracing_release_generic_tr,
6598 };
6599 
6600 static const struct file_operations tracing_mark_raw_fops = {
6601 	.open		= tracing_open_generic_tr,
6602 	.write		= tracing_mark_raw_write,
6603 	.llseek		= generic_file_llseek,
6604 	.release	= tracing_release_generic_tr,
6605 };
6606 
6607 static const struct file_operations trace_clock_fops = {
6608 	.open		= tracing_clock_open,
6609 	.read		= seq_read,
6610 	.llseek		= seq_lseek,
6611 	.release	= tracing_single_release_tr,
6612 	.write		= tracing_clock_write,
6613 };
6614 
6615 static const struct file_operations trace_time_stamp_mode_fops = {
6616 	.open		= tracing_time_stamp_mode_open,
6617 	.read		= seq_read,
6618 	.llseek		= seq_lseek,
6619 	.release	= tracing_single_release_tr,
6620 };
6621 
6622 #ifdef CONFIG_TRACER_SNAPSHOT
6623 static const struct file_operations snapshot_fops = {
6624 	.open		= tracing_snapshot_open,
6625 	.read		= seq_read,
6626 	.write		= tracing_snapshot_write,
6627 	.llseek		= tracing_lseek,
6628 	.release	= tracing_snapshot_release,
6629 };
6630 
6631 static const struct file_operations snapshot_raw_fops = {
6632 	.open		= snapshot_raw_open,
6633 	.read		= tracing_buffers_read,
6634 	.release	= tracing_buffers_release,
6635 	.splice_read	= tracing_buffers_splice_read,
6636 	.llseek		= no_llseek,
6637 };
6638 
6639 #endif /* CONFIG_TRACER_SNAPSHOT */
6640 
6641 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6642 {
6643 	struct trace_array *tr = inode->i_private;
6644 	struct ftrace_buffer_info *info;
6645 	int ret;
6646 
6647 	if (tracing_disabled)
6648 		return -ENODEV;
6649 
6650 	if (trace_array_get(tr) < 0)
6651 		return -ENODEV;
6652 
6653 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6654 	if (!info) {
6655 		trace_array_put(tr);
6656 		return -ENOMEM;
6657 	}
6658 
6659 	mutex_lock(&trace_types_lock);
6660 
6661 	info->iter.tr		= tr;
6662 	info->iter.cpu_file	= tracing_get_cpu(inode);
6663 	info->iter.trace	= tr->current_trace;
6664 	info->iter.trace_buffer = &tr->trace_buffer;
6665 	info->spare		= NULL;
6666 	/* Force reading ring buffer for first read */
6667 	info->read		= (unsigned int)-1;
6668 
6669 	filp->private_data = info;
6670 
6671 	tr->current_trace->ref++;
6672 
6673 	mutex_unlock(&trace_types_lock);
6674 
6675 	ret = nonseekable_open(inode, filp);
6676 	if (ret < 0)
6677 		trace_array_put(tr);
6678 
6679 	return ret;
6680 }
6681 
6682 static __poll_t
6683 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6684 {
6685 	struct ftrace_buffer_info *info = filp->private_data;
6686 	struct trace_iterator *iter = &info->iter;
6687 
6688 	return trace_poll(iter, filp, poll_table);
6689 }
6690 
6691 static ssize_t
6692 tracing_buffers_read(struct file *filp, char __user *ubuf,
6693 		     size_t count, loff_t *ppos)
6694 {
6695 	struct ftrace_buffer_info *info = filp->private_data;
6696 	struct trace_iterator *iter = &info->iter;
6697 	ssize_t ret = 0;
6698 	ssize_t size;
6699 
6700 	if (!count)
6701 		return 0;
6702 
6703 #ifdef CONFIG_TRACER_MAX_TRACE
6704 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6705 		return -EBUSY;
6706 #endif
6707 
6708 	if (!info->spare) {
6709 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6710 							  iter->cpu_file);
6711 		if (IS_ERR(info->spare)) {
6712 			ret = PTR_ERR(info->spare);
6713 			info->spare = NULL;
6714 		} else {
6715 			info->spare_cpu = iter->cpu_file;
6716 		}
6717 	}
6718 	if (!info->spare)
6719 		return ret;
6720 
6721 	/* Do we have previous read data to read? */
6722 	if (info->read < PAGE_SIZE)
6723 		goto read;
6724 
6725  again:
6726 	trace_access_lock(iter->cpu_file);
6727 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6728 				    &info->spare,
6729 				    count,
6730 				    iter->cpu_file, 0);
6731 	trace_access_unlock(iter->cpu_file);
6732 
6733 	if (ret < 0) {
6734 		if (trace_empty(iter)) {
6735 			if ((filp->f_flags & O_NONBLOCK))
6736 				return -EAGAIN;
6737 
6738 			ret = wait_on_pipe(iter, false);
6739 			if (ret)
6740 				return ret;
6741 
6742 			goto again;
6743 		}
6744 		return 0;
6745 	}
6746 
6747 	info->read = 0;
6748  read:
6749 	size = PAGE_SIZE - info->read;
6750 	if (size > count)
6751 		size = count;
6752 
6753 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6754 	if (ret == size)
6755 		return -EFAULT;
6756 
6757 	size -= ret;
6758 
6759 	*ppos += size;
6760 	info->read += size;
6761 
6762 	return size;
6763 }
6764 
6765 static int tracing_buffers_release(struct inode *inode, struct file *file)
6766 {
6767 	struct ftrace_buffer_info *info = file->private_data;
6768 	struct trace_iterator *iter = &info->iter;
6769 
6770 	mutex_lock(&trace_types_lock);
6771 
6772 	iter->tr->current_trace->ref--;
6773 
6774 	__trace_array_put(iter->tr);
6775 
6776 	if (info->spare)
6777 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6778 					   info->spare_cpu, info->spare);
6779 	kfree(info);
6780 
6781 	mutex_unlock(&trace_types_lock);
6782 
6783 	return 0;
6784 }
6785 
6786 struct buffer_ref {
6787 	struct ring_buffer	*buffer;
6788 	void			*page;
6789 	int			cpu;
6790 	int			ref;
6791 };
6792 
6793 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6794 				    struct pipe_buffer *buf)
6795 {
6796 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6797 
6798 	if (--ref->ref)
6799 		return;
6800 
6801 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6802 	kfree(ref);
6803 	buf->private = 0;
6804 }
6805 
6806 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6807 				struct pipe_buffer *buf)
6808 {
6809 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6810 
6811 	ref->ref++;
6812 }
6813 
6814 /* Pipe buffer operations for a buffer. */
6815 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6816 	.can_merge		= 0,
6817 	.confirm		= generic_pipe_buf_confirm,
6818 	.release		= buffer_pipe_buf_release,
6819 	.steal			= generic_pipe_buf_steal,
6820 	.get			= buffer_pipe_buf_get,
6821 };
6822 
6823 /*
6824  * Callback from splice_to_pipe(), if we need to release some pages
6825  * at the end of the spd in case we error'ed out in filling the pipe.
6826  */
6827 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6828 {
6829 	struct buffer_ref *ref =
6830 		(struct buffer_ref *)spd->partial[i].private;
6831 
6832 	if (--ref->ref)
6833 		return;
6834 
6835 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6836 	kfree(ref);
6837 	spd->partial[i].private = 0;
6838 }
6839 
6840 static ssize_t
6841 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6842 			    struct pipe_inode_info *pipe, size_t len,
6843 			    unsigned int flags)
6844 {
6845 	struct ftrace_buffer_info *info = file->private_data;
6846 	struct trace_iterator *iter = &info->iter;
6847 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6848 	struct page *pages_def[PIPE_DEF_BUFFERS];
6849 	struct splice_pipe_desc spd = {
6850 		.pages		= pages_def,
6851 		.partial	= partial_def,
6852 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6853 		.ops		= &buffer_pipe_buf_ops,
6854 		.spd_release	= buffer_spd_release,
6855 	};
6856 	struct buffer_ref *ref;
6857 	int entries, i;
6858 	ssize_t ret = 0;
6859 
6860 #ifdef CONFIG_TRACER_MAX_TRACE
6861 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6862 		return -EBUSY;
6863 #endif
6864 
6865 	if (*ppos & (PAGE_SIZE - 1))
6866 		return -EINVAL;
6867 
6868 	if (len & (PAGE_SIZE - 1)) {
6869 		if (len < PAGE_SIZE)
6870 			return -EINVAL;
6871 		len &= PAGE_MASK;
6872 	}
6873 
6874 	if (splice_grow_spd(pipe, &spd))
6875 		return -ENOMEM;
6876 
6877  again:
6878 	trace_access_lock(iter->cpu_file);
6879 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6880 
6881 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6882 		struct page *page;
6883 		int r;
6884 
6885 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6886 		if (!ref) {
6887 			ret = -ENOMEM;
6888 			break;
6889 		}
6890 
6891 		ref->ref = 1;
6892 		ref->buffer = iter->trace_buffer->buffer;
6893 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6894 		if (IS_ERR(ref->page)) {
6895 			ret = PTR_ERR(ref->page);
6896 			ref->page = NULL;
6897 			kfree(ref);
6898 			break;
6899 		}
6900 		ref->cpu = iter->cpu_file;
6901 
6902 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6903 					  len, iter->cpu_file, 1);
6904 		if (r < 0) {
6905 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6906 						   ref->page);
6907 			kfree(ref);
6908 			break;
6909 		}
6910 
6911 		page = virt_to_page(ref->page);
6912 
6913 		spd.pages[i] = page;
6914 		spd.partial[i].len = PAGE_SIZE;
6915 		spd.partial[i].offset = 0;
6916 		spd.partial[i].private = (unsigned long)ref;
6917 		spd.nr_pages++;
6918 		*ppos += PAGE_SIZE;
6919 
6920 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6921 	}
6922 
6923 	trace_access_unlock(iter->cpu_file);
6924 	spd.nr_pages = i;
6925 
6926 	/* did we read anything? */
6927 	if (!spd.nr_pages) {
6928 		if (ret)
6929 			goto out;
6930 
6931 		ret = -EAGAIN;
6932 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6933 			goto out;
6934 
6935 		ret = wait_on_pipe(iter, true);
6936 		if (ret)
6937 			goto out;
6938 
6939 		goto again;
6940 	}
6941 
6942 	ret = splice_to_pipe(pipe, &spd);
6943 out:
6944 	splice_shrink_spd(&spd);
6945 
6946 	return ret;
6947 }
6948 
6949 static const struct file_operations tracing_buffers_fops = {
6950 	.open		= tracing_buffers_open,
6951 	.read		= tracing_buffers_read,
6952 	.poll		= tracing_buffers_poll,
6953 	.release	= tracing_buffers_release,
6954 	.splice_read	= tracing_buffers_splice_read,
6955 	.llseek		= no_llseek,
6956 };
6957 
6958 static ssize_t
6959 tracing_stats_read(struct file *filp, char __user *ubuf,
6960 		   size_t count, loff_t *ppos)
6961 {
6962 	struct inode *inode = file_inode(filp);
6963 	struct trace_array *tr = inode->i_private;
6964 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6965 	int cpu = tracing_get_cpu(inode);
6966 	struct trace_seq *s;
6967 	unsigned long cnt;
6968 	unsigned long long t;
6969 	unsigned long usec_rem;
6970 
6971 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6972 	if (!s)
6973 		return -ENOMEM;
6974 
6975 	trace_seq_init(s);
6976 
6977 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6978 	trace_seq_printf(s, "entries: %ld\n", cnt);
6979 
6980 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6981 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6982 
6983 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6984 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6985 
6986 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6987 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6988 
6989 	if (trace_clocks[tr->clock_id].in_ns) {
6990 		/* local or global for trace_clock */
6991 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6992 		usec_rem = do_div(t, USEC_PER_SEC);
6993 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6994 								t, usec_rem);
6995 
6996 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6997 		usec_rem = do_div(t, USEC_PER_SEC);
6998 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6999 	} else {
7000 		/* counter or tsc mode for trace_clock */
7001 		trace_seq_printf(s, "oldest event ts: %llu\n",
7002 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7003 
7004 		trace_seq_printf(s, "now ts: %llu\n",
7005 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7006 	}
7007 
7008 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7009 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7010 
7011 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7012 	trace_seq_printf(s, "read events: %ld\n", cnt);
7013 
7014 	count = simple_read_from_buffer(ubuf, count, ppos,
7015 					s->buffer, trace_seq_used(s));
7016 
7017 	kfree(s);
7018 
7019 	return count;
7020 }
7021 
7022 static const struct file_operations tracing_stats_fops = {
7023 	.open		= tracing_open_generic_tr,
7024 	.read		= tracing_stats_read,
7025 	.llseek		= generic_file_llseek,
7026 	.release	= tracing_release_generic_tr,
7027 };
7028 
7029 #ifdef CONFIG_DYNAMIC_FTRACE
7030 
7031 static ssize_t
7032 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7033 		  size_t cnt, loff_t *ppos)
7034 {
7035 	unsigned long *p = filp->private_data;
7036 	char buf[64]; /* Not too big for a shallow stack */
7037 	int r;
7038 
7039 	r = scnprintf(buf, 63, "%ld", *p);
7040 	buf[r++] = '\n';
7041 
7042 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7043 }
7044 
7045 static const struct file_operations tracing_dyn_info_fops = {
7046 	.open		= tracing_open_generic,
7047 	.read		= tracing_read_dyn_info,
7048 	.llseek		= generic_file_llseek,
7049 };
7050 #endif /* CONFIG_DYNAMIC_FTRACE */
7051 
7052 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7053 static void
7054 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7055 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7056 		void *data)
7057 {
7058 	tracing_snapshot_instance(tr);
7059 }
7060 
7061 static void
7062 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7063 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7064 		      void *data)
7065 {
7066 	struct ftrace_func_mapper *mapper = data;
7067 	long *count = NULL;
7068 
7069 	if (mapper)
7070 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7071 
7072 	if (count) {
7073 
7074 		if (*count <= 0)
7075 			return;
7076 
7077 		(*count)--;
7078 	}
7079 
7080 	tracing_snapshot_instance(tr);
7081 }
7082 
7083 static int
7084 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7085 		      struct ftrace_probe_ops *ops, void *data)
7086 {
7087 	struct ftrace_func_mapper *mapper = data;
7088 	long *count = NULL;
7089 
7090 	seq_printf(m, "%ps:", (void *)ip);
7091 
7092 	seq_puts(m, "snapshot");
7093 
7094 	if (mapper)
7095 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7096 
7097 	if (count)
7098 		seq_printf(m, ":count=%ld\n", *count);
7099 	else
7100 		seq_puts(m, ":unlimited\n");
7101 
7102 	return 0;
7103 }
7104 
7105 static int
7106 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7107 		     unsigned long ip, void *init_data, void **data)
7108 {
7109 	struct ftrace_func_mapper *mapper = *data;
7110 
7111 	if (!mapper) {
7112 		mapper = allocate_ftrace_func_mapper();
7113 		if (!mapper)
7114 			return -ENOMEM;
7115 		*data = mapper;
7116 	}
7117 
7118 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7119 }
7120 
7121 static void
7122 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7123 		     unsigned long ip, void *data)
7124 {
7125 	struct ftrace_func_mapper *mapper = data;
7126 
7127 	if (!ip) {
7128 		if (!mapper)
7129 			return;
7130 		free_ftrace_func_mapper(mapper, NULL);
7131 		return;
7132 	}
7133 
7134 	ftrace_func_mapper_remove_ip(mapper, ip);
7135 }
7136 
7137 static struct ftrace_probe_ops snapshot_probe_ops = {
7138 	.func			= ftrace_snapshot,
7139 	.print			= ftrace_snapshot_print,
7140 };
7141 
7142 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7143 	.func			= ftrace_count_snapshot,
7144 	.print			= ftrace_snapshot_print,
7145 	.init			= ftrace_snapshot_init,
7146 	.free			= ftrace_snapshot_free,
7147 };
7148 
7149 static int
7150 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7151 			       char *glob, char *cmd, char *param, int enable)
7152 {
7153 	struct ftrace_probe_ops *ops;
7154 	void *count = (void *)-1;
7155 	char *number;
7156 	int ret;
7157 
7158 	if (!tr)
7159 		return -ENODEV;
7160 
7161 	/* hash funcs only work with set_ftrace_filter */
7162 	if (!enable)
7163 		return -EINVAL;
7164 
7165 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7166 
7167 	if (glob[0] == '!')
7168 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7169 
7170 	if (!param)
7171 		goto out_reg;
7172 
7173 	number = strsep(&param, ":");
7174 
7175 	if (!strlen(number))
7176 		goto out_reg;
7177 
7178 	/*
7179 	 * We use the callback data field (which is a pointer)
7180 	 * as our counter.
7181 	 */
7182 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7183 	if (ret)
7184 		return ret;
7185 
7186  out_reg:
7187 	ret = tracing_alloc_snapshot_instance(tr);
7188 	if (ret < 0)
7189 		goto out;
7190 
7191 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7192 
7193  out:
7194 	return ret < 0 ? ret : 0;
7195 }
7196 
7197 static struct ftrace_func_command ftrace_snapshot_cmd = {
7198 	.name			= "snapshot",
7199 	.func			= ftrace_trace_snapshot_callback,
7200 };
7201 
7202 static __init int register_snapshot_cmd(void)
7203 {
7204 	return register_ftrace_command(&ftrace_snapshot_cmd);
7205 }
7206 #else
7207 static inline __init int register_snapshot_cmd(void) { return 0; }
7208 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7209 
7210 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7211 {
7212 	if (WARN_ON(!tr->dir))
7213 		return ERR_PTR(-ENODEV);
7214 
7215 	/* Top directory uses NULL as the parent */
7216 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7217 		return NULL;
7218 
7219 	/* All sub buffers have a descriptor */
7220 	return tr->dir;
7221 }
7222 
7223 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7224 {
7225 	struct dentry *d_tracer;
7226 
7227 	if (tr->percpu_dir)
7228 		return tr->percpu_dir;
7229 
7230 	d_tracer = tracing_get_dentry(tr);
7231 	if (IS_ERR(d_tracer))
7232 		return NULL;
7233 
7234 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7235 
7236 	WARN_ONCE(!tr->percpu_dir,
7237 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7238 
7239 	return tr->percpu_dir;
7240 }
7241 
7242 static struct dentry *
7243 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7244 		      void *data, long cpu, const struct file_operations *fops)
7245 {
7246 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7247 
7248 	if (ret) /* See tracing_get_cpu() */
7249 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7250 	return ret;
7251 }
7252 
7253 static void
7254 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7255 {
7256 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7257 	struct dentry *d_cpu;
7258 	char cpu_dir[30]; /* 30 characters should be more than enough */
7259 
7260 	if (!d_percpu)
7261 		return;
7262 
7263 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7264 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7265 	if (!d_cpu) {
7266 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7267 		return;
7268 	}
7269 
7270 	/* per cpu trace_pipe */
7271 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7272 				tr, cpu, &tracing_pipe_fops);
7273 
7274 	/* per cpu trace */
7275 	trace_create_cpu_file("trace", 0644, d_cpu,
7276 				tr, cpu, &tracing_fops);
7277 
7278 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7279 				tr, cpu, &tracing_buffers_fops);
7280 
7281 	trace_create_cpu_file("stats", 0444, d_cpu,
7282 				tr, cpu, &tracing_stats_fops);
7283 
7284 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7285 				tr, cpu, &tracing_entries_fops);
7286 
7287 #ifdef CONFIG_TRACER_SNAPSHOT
7288 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7289 				tr, cpu, &snapshot_fops);
7290 
7291 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7292 				tr, cpu, &snapshot_raw_fops);
7293 #endif
7294 }
7295 
7296 #ifdef CONFIG_FTRACE_SELFTEST
7297 /* Let selftest have access to static functions in this file */
7298 #include "trace_selftest.c"
7299 #endif
7300 
7301 static ssize_t
7302 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7303 			loff_t *ppos)
7304 {
7305 	struct trace_option_dentry *topt = filp->private_data;
7306 	char *buf;
7307 
7308 	if (topt->flags->val & topt->opt->bit)
7309 		buf = "1\n";
7310 	else
7311 		buf = "0\n";
7312 
7313 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7314 }
7315 
7316 static ssize_t
7317 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7318 			 loff_t *ppos)
7319 {
7320 	struct trace_option_dentry *topt = filp->private_data;
7321 	unsigned long val;
7322 	int ret;
7323 
7324 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7325 	if (ret)
7326 		return ret;
7327 
7328 	if (val != 0 && val != 1)
7329 		return -EINVAL;
7330 
7331 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7332 		mutex_lock(&trace_types_lock);
7333 		ret = __set_tracer_option(topt->tr, topt->flags,
7334 					  topt->opt, !val);
7335 		mutex_unlock(&trace_types_lock);
7336 		if (ret)
7337 			return ret;
7338 	}
7339 
7340 	*ppos += cnt;
7341 
7342 	return cnt;
7343 }
7344 
7345 
7346 static const struct file_operations trace_options_fops = {
7347 	.open = tracing_open_generic,
7348 	.read = trace_options_read,
7349 	.write = trace_options_write,
7350 	.llseek	= generic_file_llseek,
7351 };
7352 
7353 /*
7354  * In order to pass in both the trace_array descriptor as well as the index
7355  * to the flag that the trace option file represents, the trace_array
7356  * has a character array of trace_flags_index[], which holds the index
7357  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7358  * The address of this character array is passed to the flag option file
7359  * read/write callbacks.
7360  *
7361  * In order to extract both the index and the trace_array descriptor,
7362  * get_tr_index() uses the following algorithm.
7363  *
7364  *   idx = *ptr;
7365  *
7366  * As the pointer itself contains the address of the index (remember
7367  * index[1] == 1).
7368  *
7369  * Then to get the trace_array descriptor, by subtracting that index
7370  * from the ptr, we get to the start of the index itself.
7371  *
7372  *   ptr - idx == &index[0]
7373  *
7374  * Then a simple container_of() from that pointer gets us to the
7375  * trace_array descriptor.
7376  */
7377 static void get_tr_index(void *data, struct trace_array **ptr,
7378 			 unsigned int *pindex)
7379 {
7380 	*pindex = *(unsigned char *)data;
7381 
7382 	*ptr = container_of(data - *pindex, struct trace_array,
7383 			    trace_flags_index);
7384 }
7385 
7386 static ssize_t
7387 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7388 			loff_t *ppos)
7389 {
7390 	void *tr_index = filp->private_data;
7391 	struct trace_array *tr;
7392 	unsigned int index;
7393 	char *buf;
7394 
7395 	get_tr_index(tr_index, &tr, &index);
7396 
7397 	if (tr->trace_flags & (1 << index))
7398 		buf = "1\n";
7399 	else
7400 		buf = "0\n";
7401 
7402 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7403 }
7404 
7405 static ssize_t
7406 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7407 			 loff_t *ppos)
7408 {
7409 	void *tr_index = filp->private_data;
7410 	struct trace_array *tr;
7411 	unsigned int index;
7412 	unsigned long val;
7413 	int ret;
7414 
7415 	get_tr_index(tr_index, &tr, &index);
7416 
7417 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7418 	if (ret)
7419 		return ret;
7420 
7421 	if (val != 0 && val != 1)
7422 		return -EINVAL;
7423 
7424 	mutex_lock(&trace_types_lock);
7425 	ret = set_tracer_flag(tr, 1 << index, val);
7426 	mutex_unlock(&trace_types_lock);
7427 
7428 	if (ret < 0)
7429 		return ret;
7430 
7431 	*ppos += cnt;
7432 
7433 	return cnt;
7434 }
7435 
7436 static const struct file_operations trace_options_core_fops = {
7437 	.open = tracing_open_generic,
7438 	.read = trace_options_core_read,
7439 	.write = trace_options_core_write,
7440 	.llseek = generic_file_llseek,
7441 };
7442 
7443 struct dentry *trace_create_file(const char *name,
7444 				 umode_t mode,
7445 				 struct dentry *parent,
7446 				 void *data,
7447 				 const struct file_operations *fops)
7448 {
7449 	struct dentry *ret;
7450 
7451 	ret = tracefs_create_file(name, mode, parent, data, fops);
7452 	if (!ret)
7453 		pr_warn("Could not create tracefs '%s' entry\n", name);
7454 
7455 	return ret;
7456 }
7457 
7458 
7459 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7460 {
7461 	struct dentry *d_tracer;
7462 
7463 	if (tr->options)
7464 		return tr->options;
7465 
7466 	d_tracer = tracing_get_dentry(tr);
7467 	if (IS_ERR(d_tracer))
7468 		return NULL;
7469 
7470 	tr->options = tracefs_create_dir("options", d_tracer);
7471 	if (!tr->options) {
7472 		pr_warn("Could not create tracefs directory 'options'\n");
7473 		return NULL;
7474 	}
7475 
7476 	return tr->options;
7477 }
7478 
7479 static void
7480 create_trace_option_file(struct trace_array *tr,
7481 			 struct trace_option_dentry *topt,
7482 			 struct tracer_flags *flags,
7483 			 struct tracer_opt *opt)
7484 {
7485 	struct dentry *t_options;
7486 
7487 	t_options = trace_options_init_dentry(tr);
7488 	if (!t_options)
7489 		return;
7490 
7491 	topt->flags = flags;
7492 	topt->opt = opt;
7493 	topt->tr = tr;
7494 
7495 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7496 				    &trace_options_fops);
7497 
7498 }
7499 
7500 static void
7501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7502 {
7503 	struct trace_option_dentry *topts;
7504 	struct trace_options *tr_topts;
7505 	struct tracer_flags *flags;
7506 	struct tracer_opt *opts;
7507 	int cnt;
7508 	int i;
7509 
7510 	if (!tracer)
7511 		return;
7512 
7513 	flags = tracer->flags;
7514 
7515 	if (!flags || !flags->opts)
7516 		return;
7517 
7518 	/*
7519 	 * If this is an instance, only create flags for tracers
7520 	 * the instance may have.
7521 	 */
7522 	if (!trace_ok_for_array(tracer, tr))
7523 		return;
7524 
7525 	for (i = 0; i < tr->nr_topts; i++) {
7526 		/* Make sure there's no duplicate flags. */
7527 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7528 			return;
7529 	}
7530 
7531 	opts = flags->opts;
7532 
7533 	for (cnt = 0; opts[cnt].name; cnt++)
7534 		;
7535 
7536 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7537 	if (!topts)
7538 		return;
7539 
7540 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7541 			    GFP_KERNEL);
7542 	if (!tr_topts) {
7543 		kfree(topts);
7544 		return;
7545 	}
7546 
7547 	tr->topts = tr_topts;
7548 	tr->topts[tr->nr_topts].tracer = tracer;
7549 	tr->topts[tr->nr_topts].topts = topts;
7550 	tr->nr_topts++;
7551 
7552 	for (cnt = 0; opts[cnt].name; cnt++) {
7553 		create_trace_option_file(tr, &topts[cnt], flags,
7554 					 &opts[cnt]);
7555 		WARN_ONCE(topts[cnt].entry == NULL,
7556 			  "Failed to create trace option: %s",
7557 			  opts[cnt].name);
7558 	}
7559 }
7560 
7561 static struct dentry *
7562 create_trace_option_core_file(struct trace_array *tr,
7563 			      const char *option, long index)
7564 {
7565 	struct dentry *t_options;
7566 
7567 	t_options = trace_options_init_dentry(tr);
7568 	if (!t_options)
7569 		return NULL;
7570 
7571 	return trace_create_file(option, 0644, t_options,
7572 				 (void *)&tr->trace_flags_index[index],
7573 				 &trace_options_core_fops);
7574 }
7575 
7576 static void create_trace_options_dir(struct trace_array *tr)
7577 {
7578 	struct dentry *t_options;
7579 	bool top_level = tr == &global_trace;
7580 	int i;
7581 
7582 	t_options = trace_options_init_dentry(tr);
7583 	if (!t_options)
7584 		return;
7585 
7586 	for (i = 0; trace_options[i]; i++) {
7587 		if (top_level ||
7588 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7589 			create_trace_option_core_file(tr, trace_options[i], i);
7590 	}
7591 }
7592 
7593 static ssize_t
7594 rb_simple_read(struct file *filp, char __user *ubuf,
7595 	       size_t cnt, loff_t *ppos)
7596 {
7597 	struct trace_array *tr = filp->private_data;
7598 	char buf[64];
7599 	int r;
7600 
7601 	r = tracer_tracing_is_on(tr);
7602 	r = sprintf(buf, "%d\n", r);
7603 
7604 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7605 }
7606 
7607 static ssize_t
7608 rb_simple_write(struct file *filp, const char __user *ubuf,
7609 		size_t cnt, loff_t *ppos)
7610 {
7611 	struct trace_array *tr = filp->private_data;
7612 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7613 	unsigned long val;
7614 	int ret;
7615 
7616 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7617 	if (ret)
7618 		return ret;
7619 
7620 	if (buffer) {
7621 		mutex_lock(&trace_types_lock);
7622 		if (val) {
7623 			tracer_tracing_on(tr);
7624 			if (tr->current_trace->start)
7625 				tr->current_trace->start(tr);
7626 		} else {
7627 			tracer_tracing_off(tr);
7628 			if (tr->current_trace->stop)
7629 				tr->current_trace->stop(tr);
7630 		}
7631 		mutex_unlock(&trace_types_lock);
7632 	}
7633 
7634 	(*ppos)++;
7635 
7636 	return cnt;
7637 }
7638 
7639 static const struct file_operations rb_simple_fops = {
7640 	.open		= tracing_open_generic_tr,
7641 	.read		= rb_simple_read,
7642 	.write		= rb_simple_write,
7643 	.release	= tracing_release_generic_tr,
7644 	.llseek		= default_llseek,
7645 };
7646 
7647 struct dentry *trace_instance_dir;
7648 
7649 static void
7650 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7651 
7652 static int
7653 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7654 {
7655 	enum ring_buffer_flags rb_flags;
7656 
7657 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7658 
7659 	buf->tr = tr;
7660 
7661 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7662 	if (!buf->buffer)
7663 		return -ENOMEM;
7664 
7665 	buf->data = alloc_percpu(struct trace_array_cpu);
7666 	if (!buf->data) {
7667 		ring_buffer_free(buf->buffer);
7668 		buf->buffer = NULL;
7669 		return -ENOMEM;
7670 	}
7671 
7672 	/* Allocate the first page for all buffers */
7673 	set_buffer_entries(&tr->trace_buffer,
7674 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7675 
7676 	return 0;
7677 }
7678 
7679 static int allocate_trace_buffers(struct trace_array *tr, int size)
7680 {
7681 	int ret;
7682 
7683 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7684 	if (ret)
7685 		return ret;
7686 
7687 #ifdef CONFIG_TRACER_MAX_TRACE
7688 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7689 				    allocate_snapshot ? size : 1);
7690 	if (WARN_ON(ret)) {
7691 		ring_buffer_free(tr->trace_buffer.buffer);
7692 		tr->trace_buffer.buffer = NULL;
7693 		free_percpu(tr->trace_buffer.data);
7694 		tr->trace_buffer.data = NULL;
7695 		return -ENOMEM;
7696 	}
7697 	tr->allocated_snapshot = allocate_snapshot;
7698 
7699 	/*
7700 	 * Only the top level trace array gets its snapshot allocated
7701 	 * from the kernel command line.
7702 	 */
7703 	allocate_snapshot = false;
7704 #endif
7705 	return 0;
7706 }
7707 
7708 static void free_trace_buffer(struct trace_buffer *buf)
7709 {
7710 	if (buf->buffer) {
7711 		ring_buffer_free(buf->buffer);
7712 		buf->buffer = NULL;
7713 		free_percpu(buf->data);
7714 		buf->data = NULL;
7715 	}
7716 }
7717 
7718 static void free_trace_buffers(struct trace_array *tr)
7719 {
7720 	if (!tr)
7721 		return;
7722 
7723 	free_trace_buffer(&tr->trace_buffer);
7724 
7725 #ifdef CONFIG_TRACER_MAX_TRACE
7726 	free_trace_buffer(&tr->max_buffer);
7727 #endif
7728 }
7729 
7730 static void init_trace_flags_index(struct trace_array *tr)
7731 {
7732 	int i;
7733 
7734 	/* Used by the trace options files */
7735 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7736 		tr->trace_flags_index[i] = i;
7737 }
7738 
7739 static void __update_tracer_options(struct trace_array *tr)
7740 {
7741 	struct tracer *t;
7742 
7743 	for (t = trace_types; t; t = t->next)
7744 		add_tracer_options(tr, t);
7745 }
7746 
7747 static void update_tracer_options(struct trace_array *tr)
7748 {
7749 	mutex_lock(&trace_types_lock);
7750 	__update_tracer_options(tr);
7751 	mutex_unlock(&trace_types_lock);
7752 }
7753 
7754 static int instance_mkdir(const char *name)
7755 {
7756 	struct trace_array *tr;
7757 	int ret;
7758 
7759 	mutex_lock(&event_mutex);
7760 	mutex_lock(&trace_types_lock);
7761 
7762 	ret = -EEXIST;
7763 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7764 		if (tr->name && strcmp(tr->name, name) == 0)
7765 			goto out_unlock;
7766 	}
7767 
7768 	ret = -ENOMEM;
7769 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7770 	if (!tr)
7771 		goto out_unlock;
7772 
7773 	tr->name = kstrdup(name, GFP_KERNEL);
7774 	if (!tr->name)
7775 		goto out_free_tr;
7776 
7777 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7778 		goto out_free_tr;
7779 
7780 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7781 
7782 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7783 
7784 	raw_spin_lock_init(&tr->start_lock);
7785 
7786 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7787 
7788 	tr->current_trace = &nop_trace;
7789 
7790 	INIT_LIST_HEAD(&tr->systems);
7791 	INIT_LIST_HEAD(&tr->events);
7792 	INIT_LIST_HEAD(&tr->hist_vars);
7793 
7794 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7795 		goto out_free_tr;
7796 
7797 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7798 	if (!tr->dir)
7799 		goto out_free_tr;
7800 
7801 	ret = event_trace_add_tracer(tr->dir, tr);
7802 	if (ret) {
7803 		tracefs_remove_recursive(tr->dir);
7804 		goto out_free_tr;
7805 	}
7806 
7807 	ftrace_init_trace_array(tr);
7808 
7809 	init_tracer_tracefs(tr, tr->dir);
7810 	init_trace_flags_index(tr);
7811 	__update_tracer_options(tr);
7812 
7813 	list_add(&tr->list, &ftrace_trace_arrays);
7814 
7815 	mutex_unlock(&trace_types_lock);
7816 	mutex_unlock(&event_mutex);
7817 
7818 	return 0;
7819 
7820  out_free_tr:
7821 	free_trace_buffers(tr);
7822 	free_cpumask_var(tr->tracing_cpumask);
7823 	kfree(tr->name);
7824 	kfree(tr);
7825 
7826  out_unlock:
7827 	mutex_unlock(&trace_types_lock);
7828 	mutex_unlock(&event_mutex);
7829 
7830 	return ret;
7831 
7832 }
7833 
7834 static int instance_rmdir(const char *name)
7835 {
7836 	struct trace_array *tr;
7837 	int found = 0;
7838 	int ret;
7839 	int i;
7840 
7841 	mutex_lock(&event_mutex);
7842 	mutex_lock(&trace_types_lock);
7843 
7844 	ret = -ENODEV;
7845 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7846 		if (tr->name && strcmp(tr->name, name) == 0) {
7847 			found = 1;
7848 			break;
7849 		}
7850 	}
7851 	if (!found)
7852 		goto out_unlock;
7853 
7854 	ret = -EBUSY;
7855 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7856 		goto out_unlock;
7857 
7858 	list_del(&tr->list);
7859 
7860 	/* Disable all the flags that were enabled coming in */
7861 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7862 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7863 			set_tracer_flag(tr, 1 << i, 0);
7864 	}
7865 
7866 	tracing_set_nop(tr);
7867 	clear_ftrace_function_probes(tr);
7868 	event_trace_del_tracer(tr);
7869 	ftrace_clear_pids(tr);
7870 	ftrace_destroy_function_files(tr);
7871 	tracefs_remove_recursive(tr->dir);
7872 	free_trace_buffers(tr);
7873 
7874 	for (i = 0; i < tr->nr_topts; i++) {
7875 		kfree(tr->topts[i].topts);
7876 	}
7877 	kfree(tr->topts);
7878 
7879 	free_cpumask_var(tr->tracing_cpumask);
7880 	kfree(tr->name);
7881 	kfree(tr);
7882 
7883 	ret = 0;
7884 
7885  out_unlock:
7886 	mutex_unlock(&trace_types_lock);
7887 	mutex_unlock(&event_mutex);
7888 
7889 	return ret;
7890 }
7891 
7892 static __init void create_trace_instances(struct dentry *d_tracer)
7893 {
7894 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7895 							 instance_mkdir,
7896 							 instance_rmdir);
7897 	if (WARN_ON(!trace_instance_dir))
7898 		return;
7899 }
7900 
7901 static void
7902 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7903 {
7904 	struct trace_event_file *file;
7905 	int cpu;
7906 
7907 	trace_create_file("available_tracers", 0444, d_tracer,
7908 			tr, &show_traces_fops);
7909 
7910 	trace_create_file("current_tracer", 0644, d_tracer,
7911 			tr, &set_tracer_fops);
7912 
7913 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7914 			  tr, &tracing_cpumask_fops);
7915 
7916 	trace_create_file("trace_options", 0644, d_tracer,
7917 			  tr, &tracing_iter_fops);
7918 
7919 	trace_create_file("trace", 0644, d_tracer,
7920 			  tr, &tracing_fops);
7921 
7922 	trace_create_file("trace_pipe", 0444, d_tracer,
7923 			  tr, &tracing_pipe_fops);
7924 
7925 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7926 			  tr, &tracing_entries_fops);
7927 
7928 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7929 			  tr, &tracing_total_entries_fops);
7930 
7931 	trace_create_file("free_buffer", 0200, d_tracer,
7932 			  tr, &tracing_free_buffer_fops);
7933 
7934 	trace_create_file("trace_marker", 0220, d_tracer,
7935 			  tr, &tracing_mark_fops);
7936 
7937 	file = __find_event_file(tr, "ftrace", "print");
7938 	if (file && file->dir)
7939 		trace_create_file("trigger", 0644, file->dir, file,
7940 				  &event_trigger_fops);
7941 	tr->trace_marker_file = file;
7942 
7943 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7944 			  tr, &tracing_mark_raw_fops);
7945 
7946 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7947 			  &trace_clock_fops);
7948 
7949 	trace_create_file("tracing_on", 0644, d_tracer,
7950 			  tr, &rb_simple_fops);
7951 
7952 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7953 			  &trace_time_stamp_mode_fops);
7954 
7955 	create_trace_options_dir(tr);
7956 
7957 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7958 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7959 			&tr->max_latency, &tracing_max_lat_fops);
7960 #endif
7961 
7962 	if (ftrace_create_function_files(tr, d_tracer))
7963 		WARN(1, "Could not allocate function filter files");
7964 
7965 #ifdef CONFIG_TRACER_SNAPSHOT
7966 	trace_create_file("snapshot", 0644, d_tracer,
7967 			  tr, &snapshot_fops);
7968 #endif
7969 
7970 	for_each_tracing_cpu(cpu)
7971 		tracing_init_tracefs_percpu(tr, cpu);
7972 
7973 	ftrace_init_tracefs(tr, d_tracer);
7974 }
7975 
7976 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7977 {
7978 	struct vfsmount *mnt;
7979 	struct file_system_type *type;
7980 
7981 	/*
7982 	 * To maintain backward compatibility for tools that mount
7983 	 * debugfs to get to the tracing facility, tracefs is automatically
7984 	 * mounted to the debugfs/tracing directory.
7985 	 */
7986 	type = get_fs_type("tracefs");
7987 	if (!type)
7988 		return NULL;
7989 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7990 	put_filesystem(type);
7991 	if (IS_ERR(mnt))
7992 		return NULL;
7993 	mntget(mnt);
7994 
7995 	return mnt;
7996 }
7997 
7998 /**
7999  * tracing_init_dentry - initialize top level trace array
8000  *
8001  * This is called when creating files or directories in the tracing
8002  * directory. It is called via fs_initcall() by any of the boot up code
8003  * and expects to return the dentry of the top level tracing directory.
8004  */
8005 struct dentry *tracing_init_dentry(void)
8006 {
8007 	struct trace_array *tr = &global_trace;
8008 
8009 	/* The top level trace array uses  NULL as parent */
8010 	if (tr->dir)
8011 		return NULL;
8012 
8013 	if (WARN_ON(!tracefs_initialized()) ||
8014 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8015 		 WARN_ON(!debugfs_initialized())))
8016 		return ERR_PTR(-ENODEV);
8017 
8018 	/*
8019 	 * As there may still be users that expect the tracing
8020 	 * files to exist in debugfs/tracing, we must automount
8021 	 * the tracefs file system there, so older tools still
8022 	 * work with the newer kerenl.
8023 	 */
8024 	tr->dir = debugfs_create_automount("tracing", NULL,
8025 					   trace_automount, NULL);
8026 	if (!tr->dir) {
8027 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8028 		return ERR_PTR(-ENOMEM);
8029 	}
8030 
8031 	return NULL;
8032 }
8033 
8034 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8035 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8036 
8037 static void __init trace_eval_init(void)
8038 {
8039 	int len;
8040 
8041 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8042 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8043 }
8044 
8045 #ifdef CONFIG_MODULES
8046 static void trace_module_add_evals(struct module *mod)
8047 {
8048 	if (!mod->num_trace_evals)
8049 		return;
8050 
8051 	/*
8052 	 * Modules with bad taint do not have events created, do
8053 	 * not bother with enums either.
8054 	 */
8055 	if (trace_module_has_bad_taint(mod))
8056 		return;
8057 
8058 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8059 }
8060 
8061 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8062 static void trace_module_remove_evals(struct module *mod)
8063 {
8064 	union trace_eval_map_item *map;
8065 	union trace_eval_map_item **last = &trace_eval_maps;
8066 
8067 	if (!mod->num_trace_evals)
8068 		return;
8069 
8070 	mutex_lock(&trace_eval_mutex);
8071 
8072 	map = trace_eval_maps;
8073 
8074 	while (map) {
8075 		if (map->head.mod == mod)
8076 			break;
8077 		map = trace_eval_jmp_to_tail(map);
8078 		last = &map->tail.next;
8079 		map = map->tail.next;
8080 	}
8081 	if (!map)
8082 		goto out;
8083 
8084 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8085 	kfree(map);
8086  out:
8087 	mutex_unlock(&trace_eval_mutex);
8088 }
8089 #else
8090 static inline void trace_module_remove_evals(struct module *mod) { }
8091 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8092 
8093 static int trace_module_notify(struct notifier_block *self,
8094 			       unsigned long val, void *data)
8095 {
8096 	struct module *mod = data;
8097 
8098 	switch (val) {
8099 	case MODULE_STATE_COMING:
8100 		trace_module_add_evals(mod);
8101 		break;
8102 	case MODULE_STATE_GOING:
8103 		trace_module_remove_evals(mod);
8104 		break;
8105 	}
8106 
8107 	return 0;
8108 }
8109 
8110 static struct notifier_block trace_module_nb = {
8111 	.notifier_call = trace_module_notify,
8112 	.priority = 0,
8113 };
8114 #endif /* CONFIG_MODULES */
8115 
8116 static __init int tracer_init_tracefs(void)
8117 {
8118 	struct dentry *d_tracer;
8119 
8120 	trace_access_lock_init();
8121 
8122 	d_tracer = tracing_init_dentry();
8123 	if (IS_ERR(d_tracer))
8124 		return 0;
8125 
8126 	event_trace_init();
8127 
8128 	init_tracer_tracefs(&global_trace, d_tracer);
8129 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8130 
8131 	trace_create_file("tracing_thresh", 0644, d_tracer,
8132 			&global_trace, &tracing_thresh_fops);
8133 
8134 	trace_create_file("README", 0444, d_tracer,
8135 			NULL, &tracing_readme_fops);
8136 
8137 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8138 			NULL, &tracing_saved_cmdlines_fops);
8139 
8140 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8141 			  NULL, &tracing_saved_cmdlines_size_fops);
8142 
8143 	trace_create_file("saved_tgids", 0444, d_tracer,
8144 			NULL, &tracing_saved_tgids_fops);
8145 
8146 	trace_eval_init();
8147 
8148 	trace_create_eval_file(d_tracer);
8149 
8150 #ifdef CONFIG_MODULES
8151 	register_module_notifier(&trace_module_nb);
8152 #endif
8153 
8154 #ifdef CONFIG_DYNAMIC_FTRACE
8155 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8156 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8157 #endif
8158 
8159 	create_trace_instances(d_tracer);
8160 
8161 	update_tracer_options(&global_trace);
8162 
8163 	return 0;
8164 }
8165 
8166 static int trace_panic_handler(struct notifier_block *this,
8167 			       unsigned long event, void *unused)
8168 {
8169 	if (ftrace_dump_on_oops)
8170 		ftrace_dump(ftrace_dump_on_oops);
8171 	return NOTIFY_OK;
8172 }
8173 
8174 static struct notifier_block trace_panic_notifier = {
8175 	.notifier_call  = trace_panic_handler,
8176 	.next           = NULL,
8177 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8178 };
8179 
8180 static int trace_die_handler(struct notifier_block *self,
8181 			     unsigned long val,
8182 			     void *data)
8183 {
8184 	switch (val) {
8185 	case DIE_OOPS:
8186 		if (ftrace_dump_on_oops)
8187 			ftrace_dump(ftrace_dump_on_oops);
8188 		break;
8189 	default:
8190 		break;
8191 	}
8192 	return NOTIFY_OK;
8193 }
8194 
8195 static struct notifier_block trace_die_notifier = {
8196 	.notifier_call = trace_die_handler,
8197 	.priority = 200
8198 };
8199 
8200 /*
8201  * printk is set to max of 1024, we really don't need it that big.
8202  * Nothing should be printing 1000 characters anyway.
8203  */
8204 #define TRACE_MAX_PRINT		1000
8205 
8206 /*
8207  * Define here KERN_TRACE so that we have one place to modify
8208  * it if we decide to change what log level the ftrace dump
8209  * should be at.
8210  */
8211 #define KERN_TRACE		KERN_EMERG
8212 
8213 void
8214 trace_printk_seq(struct trace_seq *s)
8215 {
8216 	/* Probably should print a warning here. */
8217 	if (s->seq.len >= TRACE_MAX_PRINT)
8218 		s->seq.len = TRACE_MAX_PRINT;
8219 
8220 	/*
8221 	 * More paranoid code. Although the buffer size is set to
8222 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8223 	 * an extra layer of protection.
8224 	 */
8225 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8226 		s->seq.len = s->seq.size - 1;
8227 
8228 	/* should be zero ended, but we are paranoid. */
8229 	s->buffer[s->seq.len] = 0;
8230 
8231 	printk(KERN_TRACE "%s", s->buffer);
8232 
8233 	trace_seq_init(s);
8234 }
8235 
8236 void trace_init_global_iter(struct trace_iterator *iter)
8237 {
8238 	iter->tr = &global_trace;
8239 	iter->trace = iter->tr->current_trace;
8240 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8241 	iter->trace_buffer = &global_trace.trace_buffer;
8242 
8243 	if (iter->trace && iter->trace->open)
8244 		iter->trace->open(iter);
8245 
8246 	/* Annotate start of buffers if we had overruns */
8247 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8248 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8249 
8250 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8251 	if (trace_clocks[iter->tr->clock_id].in_ns)
8252 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8253 }
8254 
8255 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8256 {
8257 	/* use static because iter can be a bit big for the stack */
8258 	static struct trace_iterator iter;
8259 	static atomic_t dump_running;
8260 	struct trace_array *tr = &global_trace;
8261 	unsigned int old_userobj;
8262 	unsigned long flags;
8263 	int cnt = 0, cpu;
8264 
8265 	/* Only allow one dump user at a time. */
8266 	if (atomic_inc_return(&dump_running) != 1) {
8267 		atomic_dec(&dump_running);
8268 		return;
8269 	}
8270 
8271 	/*
8272 	 * Always turn off tracing when we dump.
8273 	 * We don't need to show trace output of what happens
8274 	 * between multiple crashes.
8275 	 *
8276 	 * If the user does a sysrq-z, then they can re-enable
8277 	 * tracing with echo 1 > tracing_on.
8278 	 */
8279 	tracing_off();
8280 
8281 	local_irq_save(flags);
8282 
8283 	/* Simulate the iterator */
8284 	trace_init_global_iter(&iter);
8285 
8286 	for_each_tracing_cpu(cpu) {
8287 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8288 	}
8289 
8290 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8291 
8292 	/* don't look at user memory in panic mode */
8293 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8294 
8295 	switch (oops_dump_mode) {
8296 	case DUMP_ALL:
8297 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8298 		break;
8299 	case DUMP_ORIG:
8300 		iter.cpu_file = raw_smp_processor_id();
8301 		break;
8302 	case DUMP_NONE:
8303 		goto out_enable;
8304 	default:
8305 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8306 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8307 	}
8308 
8309 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8310 
8311 	/* Did function tracer already get disabled? */
8312 	if (ftrace_is_dead()) {
8313 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8314 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8315 	}
8316 
8317 	/*
8318 	 * We need to stop all tracing on all CPUS to read the
8319 	 * the next buffer. This is a bit expensive, but is
8320 	 * not done often. We fill all what we can read,
8321 	 * and then release the locks again.
8322 	 */
8323 
8324 	while (!trace_empty(&iter)) {
8325 
8326 		if (!cnt)
8327 			printk(KERN_TRACE "---------------------------------\n");
8328 
8329 		cnt++;
8330 
8331 		/* reset all but tr, trace, and overruns */
8332 		memset(&iter.seq, 0,
8333 		       sizeof(struct trace_iterator) -
8334 		       offsetof(struct trace_iterator, seq));
8335 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8336 		iter.pos = -1;
8337 
8338 		if (trace_find_next_entry_inc(&iter) != NULL) {
8339 			int ret;
8340 
8341 			ret = print_trace_line(&iter);
8342 			if (ret != TRACE_TYPE_NO_CONSUME)
8343 				trace_consume(&iter);
8344 		}
8345 		touch_nmi_watchdog();
8346 
8347 		trace_printk_seq(&iter.seq);
8348 	}
8349 
8350 	if (!cnt)
8351 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8352 	else
8353 		printk(KERN_TRACE "---------------------------------\n");
8354 
8355  out_enable:
8356 	tr->trace_flags |= old_userobj;
8357 
8358 	for_each_tracing_cpu(cpu) {
8359 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8360 	}
8361  	atomic_dec(&dump_running);
8362 	local_irq_restore(flags);
8363 }
8364 EXPORT_SYMBOL_GPL(ftrace_dump);
8365 
8366 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8367 {
8368 	char **argv;
8369 	int argc, ret;
8370 
8371 	argc = 0;
8372 	ret = 0;
8373 	argv = argv_split(GFP_KERNEL, buf, &argc);
8374 	if (!argv)
8375 		return -ENOMEM;
8376 
8377 	if (argc)
8378 		ret = createfn(argc, argv);
8379 
8380 	argv_free(argv);
8381 
8382 	return ret;
8383 }
8384 
8385 #define WRITE_BUFSIZE  4096
8386 
8387 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8388 				size_t count, loff_t *ppos,
8389 				int (*createfn)(int, char **))
8390 {
8391 	char *kbuf, *buf, *tmp;
8392 	int ret = 0;
8393 	size_t done = 0;
8394 	size_t size;
8395 
8396 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8397 	if (!kbuf)
8398 		return -ENOMEM;
8399 
8400 	while (done < count) {
8401 		size = count - done;
8402 
8403 		if (size >= WRITE_BUFSIZE)
8404 			size = WRITE_BUFSIZE - 1;
8405 
8406 		if (copy_from_user(kbuf, buffer + done, size)) {
8407 			ret = -EFAULT;
8408 			goto out;
8409 		}
8410 		kbuf[size] = '\0';
8411 		buf = kbuf;
8412 		do {
8413 			tmp = strchr(buf, '\n');
8414 			if (tmp) {
8415 				*tmp = '\0';
8416 				size = tmp - buf + 1;
8417 			} else {
8418 				size = strlen(buf);
8419 				if (done + size < count) {
8420 					if (buf != kbuf)
8421 						break;
8422 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8423 					pr_warn("Line length is too long: Should be less than %d\n",
8424 						WRITE_BUFSIZE - 2);
8425 					ret = -EINVAL;
8426 					goto out;
8427 				}
8428 			}
8429 			done += size;
8430 
8431 			/* Remove comments */
8432 			tmp = strchr(buf, '#');
8433 
8434 			if (tmp)
8435 				*tmp = '\0';
8436 
8437 			ret = trace_run_command(buf, createfn);
8438 			if (ret)
8439 				goto out;
8440 			buf += size;
8441 
8442 		} while (done < count);
8443 	}
8444 	ret = done;
8445 
8446 out:
8447 	kfree(kbuf);
8448 
8449 	return ret;
8450 }
8451 
8452 __init static int tracer_alloc_buffers(void)
8453 {
8454 	int ring_buf_size;
8455 	int ret = -ENOMEM;
8456 
8457 	/*
8458 	 * Make sure we don't accidently add more trace options
8459 	 * than we have bits for.
8460 	 */
8461 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8462 
8463 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8464 		goto out;
8465 
8466 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8467 		goto out_free_buffer_mask;
8468 
8469 	/* Only allocate trace_printk buffers if a trace_printk exists */
8470 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8471 		/* Must be called before global_trace.buffer is allocated */
8472 		trace_printk_init_buffers();
8473 
8474 	/* To save memory, keep the ring buffer size to its minimum */
8475 	if (ring_buffer_expanded)
8476 		ring_buf_size = trace_buf_size;
8477 	else
8478 		ring_buf_size = 1;
8479 
8480 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8481 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8482 
8483 	raw_spin_lock_init(&global_trace.start_lock);
8484 
8485 	/*
8486 	 * The prepare callbacks allocates some memory for the ring buffer. We
8487 	 * don't free the buffer if the if the CPU goes down. If we were to free
8488 	 * the buffer, then the user would lose any trace that was in the
8489 	 * buffer. The memory will be removed once the "instance" is removed.
8490 	 */
8491 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8492 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8493 				      NULL);
8494 	if (ret < 0)
8495 		goto out_free_cpumask;
8496 	/* Used for event triggers */
8497 	ret = -ENOMEM;
8498 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8499 	if (!temp_buffer)
8500 		goto out_rm_hp_state;
8501 
8502 	if (trace_create_savedcmd() < 0)
8503 		goto out_free_temp_buffer;
8504 
8505 	/* TODO: make the number of buffers hot pluggable with CPUS */
8506 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8507 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8508 		WARN_ON(1);
8509 		goto out_free_savedcmd;
8510 	}
8511 
8512 	if (global_trace.buffer_disabled)
8513 		tracing_off();
8514 
8515 	if (trace_boot_clock) {
8516 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8517 		if (ret < 0)
8518 			pr_warn("Trace clock %s not defined, going back to default\n",
8519 				trace_boot_clock);
8520 	}
8521 
8522 	/*
8523 	 * register_tracer() might reference current_trace, so it
8524 	 * needs to be set before we register anything. This is
8525 	 * just a bootstrap of current_trace anyway.
8526 	 */
8527 	global_trace.current_trace = &nop_trace;
8528 
8529 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8530 
8531 	ftrace_init_global_array_ops(&global_trace);
8532 
8533 	init_trace_flags_index(&global_trace);
8534 
8535 	register_tracer(&nop_trace);
8536 
8537 	/* Function tracing may start here (via kernel command line) */
8538 	init_function_trace();
8539 
8540 	/* All seems OK, enable tracing */
8541 	tracing_disabled = 0;
8542 
8543 	atomic_notifier_chain_register(&panic_notifier_list,
8544 				       &trace_panic_notifier);
8545 
8546 	register_die_notifier(&trace_die_notifier);
8547 
8548 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8549 
8550 	INIT_LIST_HEAD(&global_trace.systems);
8551 	INIT_LIST_HEAD(&global_trace.events);
8552 	INIT_LIST_HEAD(&global_trace.hist_vars);
8553 	list_add(&global_trace.list, &ftrace_trace_arrays);
8554 
8555 	apply_trace_boot_options();
8556 
8557 	register_snapshot_cmd();
8558 
8559 	return 0;
8560 
8561 out_free_savedcmd:
8562 	free_saved_cmdlines_buffer(savedcmd);
8563 out_free_temp_buffer:
8564 	ring_buffer_free(temp_buffer);
8565 out_rm_hp_state:
8566 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8567 out_free_cpumask:
8568 	free_cpumask_var(global_trace.tracing_cpumask);
8569 out_free_buffer_mask:
8570 	free_cpumask_var(tracing_buffer_mask);
8571 out:
8572 	return ret;
8573 }
8574 
8575 void __init early_trace_init(void)
8576 {
8577 	if (tracepoint_printk) {
8578 		tracepoint_print_iter =
8579 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8580 		if (WARN_ON(!tracepoint_print_iter))
8581 			tracepoint_printk = 0;
8582 		else
8583 			static_key_enable(&tracepoint_printk_key.key);
8584 	}
8585 	tracer_alloc_buffers();
8586 }
8587 
8588 void __init trace_init(void)
8589 {
8590 	trace_event_init();
8591 }
8592 
8593 __init static int clear_boot_tracer(void)
8594 {
8595 	/*
8596 	 * The default tracer at boot buffer is an init section.
8597 	 * This function is called in lateinit. If we did not
8598 	 * find the boot tracer, then clear it out, to prevent
8599 	 * later registration from accessing the buffer that is
8600 	 * about to be freed.
8601 	 */
8602 	if (!default_bootup_tracer)
8603 		return 0;
8604 
8605 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8606 	       default_bootup_tracer);
8607 	default_bootup_tracer = NULL;
8608 
8609 	return 0;
8610 }
8611 
8612 fs_initcall(tracer_init_tracefs);
8613 late_initcall_sync(clear_boot_tracer);
8614 
8615 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8616 __init static int tracing_set_default_clock(void)
8617 {
8618 	/* sched_clock_stable() is determined in late_initcall */
8619 	if (!trace_boot_clock && !sched_clock_stable()) {
8620 		printk(KERN_WARNING
8621 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8622 		       "If you want to keep using the local clock, then add:\n"
8623 		       "  \"trace_clock=local\"\n"
8624 		       "on the kernel command line\n");
8625 		tracing_set_clock(&global_trace, "global");
8626 	}
8627 
8628 	return 0;
8629 }
8630 late_initcall_sync(tracing_set_default_clock);
8631 #endif
8632