xref: /openbmc/linux/kernel/trace/trace.c (revision 7ef282e05132d56b6f6b71e3873f317664bea78b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning) {
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 	}
1307 }
1308 
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317 	if (tr->array_buffer.buffer)
1318 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 	return !tr->buffer_disabled;
1320 }
1321 
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327 	return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330 
1331 static int __init set_buf_size(char *str)
1332 {
1333 	unsigned long buf_size;
1334 
1335 	if (!str)
1336 		return 0;
1337 	buf_size = memparse(str, &str);
1338 	/* nr_entries can not be zero */
1339 	if (buf_size == 0)
1340 		return 0;
1341 	trace_buf_size = buf_size;
1342 	return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345 
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348 	unsigned long threshold;
1349 	int ret;
1350 
1351 	if (!str)
1352 		return 0;
1353 	ret = kstrtoul(str, 0, &threshold);
1354 	if (ret < 0)
1355 		return 0;
1356 	tracing_thresh = threshold * 1000;
1357 	return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360 
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363 	return nsecs / 1000;
1364 }
1365 
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374 
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377 	TRACE_FLAGS
1378 	NULL
1379 };
1380 
1381 static struct {
1382 	u64 (*func)(void);
1383 	const char *name;
1384 	int in_ns;		/* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 	{ trace_clock_local,		"local",	1 },
1387 	{ trace_clock_global,		"global",	1 },
1388 	{ trace_clock_counter,		"counter",	0 },
1389 	{ trace_clock_jiffies,		"uptime",	0 },
1390 	{ trace_clock,			"perf",		1 },
1391 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1392 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1393 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1394 	ARCH_TRACE_CLOCKS
1395 };
1396 
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399 	if (trace_clocks[tr->clock_id].in_ns)
1400 		return true;
1401 
1402 	return false;
1403 }
1404 
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410 	memset(parser, 0, sizeof(*parser));
1411 
1412 	parser->buffer = kmalloc(size, GFP_KERNEL);
1413 	if (!parser->buffer)
1414 		return 1;
1415 
1416 	parser->size = size;
1417 	return 0;
1418 }
1419 
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425 	kfree(parser->buffer);
1426 	parser->buffer = NULL;
1427 }
1428 
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 	size_t cnt, loff_t *ppos)
1442 {
1443 	char ch;
1444 	size_t read = 0;
1445 	ssize_t ret;
1446 
1447 	if (!*ppos)
1448 		trace_parser_clear(parser);
1449 
1450 	ret = get_user(ch, ubuf++);
1451 	if (ret)
1452 		goto out;
1453 
1454 	read++;
1455 	cnt--;
1456 
1457 	/*
1458 	 * The parser is not finished with the last write,
1459 	 * continue reading the user input without skipping spaces.
1460 	 */
1461 	if (!parser->cont) {
1462 		/* skip white space */
1463 		while (cnt && isspace(ch)) {
1464 			ret = get_user(ch, ubuf++);
1465 			if (ret)
1466 				goto out;
1467 			read++;
1468 			cnt--;
1469 		}
1470 
1471 		parser->idx = 0;
1472 
1473 		/* only spaces were written */
1474 		if (isspace(ch) || !ch) {
1475 			*ppos += read;
1476 			ret = read;
1477 			goto out;
1478 		}
1479 	}
1480 
1481 	/* read the non-space input */
1482 	while (cnt && !isspace(ch) && ch) {
1483 		if (parser->idx < parser->size - 1)
1484 			parser->buffer[parser->idx++] = ch;
1485 		else {
1486 			ret = -EINVAL;
1487 			goto out;
1488 		}
1489 		ret = get_user(ch, ubuf++);
1490 		if (ret)
1491 			goto out;
1492 		read++;
1493 		cnt--;
1494 	}
1495 
1496 	/* We either got finished input or we have to wait for another call. */
1497 	if (isspace(ch) || !ch) {
1498 		parser->buffer[parser->idx] = 0;
1499 		parser->cont = false;
1500 	} else if (parser->idx < parser->size - 1) {
1501 		parser->cont = true;
1502 		parser->buffer[parser->idx++] = ch;
1503 		/* Make sure the parsed string always terminates with '\0'. */
1504 		parser->buffer[parser->idx] = 0;
1505 	} else {
1506 		ret = -EINVAL;
1507 		goto out;
1508 	}
1509 
1510 	*ppos += read;
1511 	ret = read;
1512 
1513 out:
1514 	return ret;
1515 }
1516 
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520 	int len;
1521 
1522 	if (trace_seq_used(s) <= s->seq.readpos)
1523 		return -EBUSY;
1524 
1525 	len = trace_seq_used(s) - s->seq.readpos;
1526 	if (cnt > len)
1527 		cnt = len;
1528 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529 
1530 	s->seq.readpos += cnt;
1531 	return cnt;
1532 }
1533 
1534 unsigned long __read_mostly	tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536 
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 	defined(CONFIG_FSNOTIFY)
1539 
1540 static struct workqueue_struct *fsnotify_wq;
1541 
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544 	struct trace_array *tr = container_of(work, struct trace_array,
1545 					      fsnotify_work);
1546 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549 
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552 	struct trace_array *tr = container_of(iwork, struct trace_array,
1553 					      fsnotify_irqwork);
1554 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556 
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558 				     struct dentry *d_tracer)
1559 {
1560 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563 					      d_tracer, &tr->max_latency,
1564 					      &tracing_max_lat_fops);
1565 }
1566 
1567 __init static int latency_fsnotify_init(void)
1568 {
1569 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 	if (!fsnotify_wq) {
1572 		pr_err("Unable to allocate tr_max_lat_wq\n");
1573 		return -ENOMEM;
1574 	}
1575 	return 0;
1576 }
1577 
1578 late_initcall_sync(latency_fsnotify_init);
1579 
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582 	if (!fsnotify_wq)
1583 		return;
1584 	/*
1585 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586 	 * possible that we are called from __schedule() or do_idle(), which
1587 	 * could cause a deadlock.
1588 	 */
1589 	irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591 
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597 
1598 #define trace_create_maxlat_file(tr, d_tracer)				\
1599 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1600 			  &tr->max_latency, &tracing_max_lat_fops)
1601 
1602 #endif
1603 
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 	struct array_buffer *trace_buf = &tr->array_buffer;
1614 	struct array_buffer *max_buf = &tr->max_buffer;
1615 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617 
1618 	max_buf->cpu = cpu;
1619 	max_buf->time_start = data->preempt_timestamp;
1620 
1621 	max_data->saved_latency = tr->max_latency;
1622 	max_data->critical_start = data->critical_start;
1623 	max_data->critical_end = data->critical_end;
1624 
1625 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626 	max_data->pid = tsk->pid;
1627 	/*
1628 	 * If tsk == current, then use current_uid(), as that does not use
1629 	 * RCU. The irq tracer can be called out of RCU scope.
1630 	 */
1631 	if (tsk == current)
1632 		max_data->uid = current_uid();
1633 	else
1634 		max_data->uid = task_uid(tsk);
1635 
1636 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 	max_data->policy = tsk->policy;
1638 	max_data->rt_priority = tsk->rt_priority;
1639 
1640 	/* record this tasks comm */
1641 	tracing_record_cmdline(tsk);
1642 	latency_fsnotify(tr);
1643 }
1644 
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657 	      void *cond_data)
1658 {
1659 	if (tr->stop_count)
1660 		return;
1661 
1662 	WARN_ON_ONCE(!irqs_disabled());
1663 
1664 	if (!tr->allocated_snapshot) {
1665 		/* Only the nop tracer should hit this when disabling */
1666 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667 		return;
1668 	}
1669 
1670 	arch_spin_lock(&tr->max_lock);
1671 
1672 	/* Inherit the recordable setting from array_buffer */
1673 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674 		ring_buffer_record_on(tr->max_buffer.buffer);
1675 	else
1676 		ring_buffer_record_off(tr->max_buffer.buffer);
1677 
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680 		goto out_unlock;
1681 #endif
1682 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 
1684 	__update_max_tr(tr, tsk, cpu);
1685 
1686  out_unlock:
1687 	arch_spin_unlock(&tr->max_lock);
1688 }
1689 
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701 	int ret;
1702 
1703 	if (tr->stop_count)
1704 		return;
1705 
1706 	WARN_ON_ONCE(!irqs_disabled());
1707 	if (!tr->allocated_snapshot) {
1708 		/* Only the nop tracer should hit this when disabling */
1709 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710 		return;
1711 	}
1712 
1713 	arch_spin_lock(&tr->max_lock);
1714 
1715 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 
1717 	if (ret == -EBUSY) {
1718 		/*
1719 		 * We failed to swap the buffer due to a commit taking
1720 		 * place on this CPU. We fail to record, but we reset
1721 		 * the max trace buffer (no one writes directly to it)
1722 		 * and flag that it failed.
1723 		 */
1724 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725 			"Failed to swap buffers due to commit in progress\n");
1726 	}
1727 
1728 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 
1730 	__update_max_tr(tr, tsk, cpu);
1731 	arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737 	/* Iterators are static, they should be filled or empty */
1738 	if (trace_buffer_iter(iter, iter->cpu_file))
1739 		return 0;
1740 
1741 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742 				full);
1743 }
1744 
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747 
1748 struct trace_selftests {
1749 	struct list_head		list;
1750 	struct tracer			*type;
1751 };
1752 
1753 static LIST_HEAD(postponed_selftests);
1754 
1755 static int save_selftest(struct tracer *type)
1756 {
1757 	struct trace_selftests *selftest;
1758 
1759 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760 	if (!selftest)
1761 		return -ENOMEM;
1762 
1763 	selftest->type = type;
1764 	list_add(&selftest->list, &postponed_selftests);
1765 	return 0;
1766 }
1767 
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770 	struct trace_array *tr = &global_trace;
1771 	struct tracer *saved_tracer = tr->current_trace;
1772 	int ret;
1773 
1774 	if (!type->selftest || tracing_selftest_disabled)
1775 		return 0;
1776 
1777 	/*
1778 	 * If a tracer registers early in boot up (before scheduling is
1779 	 * initialized and such), then do not run its selftests yet.
1780 	 * Instead, run it a little later in the boot process.
1781 	 */
1782 	if (!selftests_can_run)
1783 		return save_selftest(type);
1784 
1785 	/*
1786 	 * Run a selftest on this tracer.
1787 	 * Here we reset the trace buffer, and set the current
1788 	 * tracer to be this tracer. The tracer can then run some
1789 	 * internal tracing to verify that everything is in order.
1790 	 * If we fail, we do not register this tracer.
1791 	 */
1792 	tracing_reset_online_cpus(&tr->array_buffer);
1793 
1794 	tr->current_trace = type;
1795 
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797 	if (type->use_max_tr) {
1798 		/* If we expanded the buffers, make sure the max is expanded too */
1799 		if (ring_buffer_expanded)
1800 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801 					   RING_BUFFER_ALL_CPUS);
1802 		tr->allocated_snapshot = true;
1803 	}
1804 #endif
1805 
1806 	/* the test is responsible for initializing and enabling */
1807 	pr_info("Testing tracer %s: ", type->name);
1808 	ret = type->selftest(type, tr);
1809 	/* the test is responsible for resetting too */
1810 	tr->current_trace = saved_tracer;
1811 	if (ret) {
1812 		printk(KERN_CONT "FAILED!\n");
1813 		/* Add the warning after printing 'FAILED' */
1814 		WARN_ON(1);
1815 		return -1;
1816 	}
1817 	/* Only reset on passing, to avoid touching corrupted buffers */
1818 	tracing_reset_online_cpus(&tr->array_buffer);
1819 
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821 	if (type->use_max_tr) {
1822 		tr->allocated_snapshot = false;
1823 
1824 		/* Shrink the max buffer again */
1825 		if (ring_buffer_expanded)
1826 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1827 					   RING_BUFFER_ALL_CPUS);
1828 	}
1829 #endif
1830 
1831 	printk(KERN_CONT "PASSED\n");
1832 	return 0;
1833 }
1834 
1835 static __init int init_trace_selftests(void)
1836 {
1837 	struct trace_selftests *p, *n;
1838 	struct tracer *t, **last;
1839 	int ret;
1840 
1841 	selftests_can_run = true;
1842 
1843 	mutex_lock(&trace_types_lock);
1844 
1845 	if (list_empty(&postponed_selftests))
1846 		goto out;
1847 
1848 	pr_info("Running postponed tracer tests:\n");
1849 
1850 	tracing_selftest_running = true;
1851 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852 		/* This loop can take minutes when sanitizers are enabled, so
1853 		 * lets make sure we allow RCU processing.
1854 		 */
1855 		cond_resched();
1856 		ret = run_tracer_selftest(p->type);
1857 		/* If the test fails, then warn and remove from available_tracers */
1858 		if (ret < 0) {
1859 			WARN(1, "tracer: %s failed selftest, disabling\n",
1860 			     p->type->name);
1861 			last = &trace_types;
1862 			for (t = trace_types; t; t = t->next) {
1863 				if (t == p->type) {
1864 					*last = t->next;
1865 					break;
1866 				}
1867 				last = &t->next;
1868 			}
1869 		}
1870 		list_del(&p->list);
1871 		kfree(p);
1872 	}
1873 	tracing_selftest_running = false;
1874 
1875  out:
1876 	mutex_unlock(&trace_types_lock);
1877 
1878 	return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884 	return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 
1890 static void __init apply_trace_boot_options(void);
1891 
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900 	struct tracer *t;
1901 	int ret = 0;
1902 
1903 	if (!type->name) {
1904 		pr_info("Tracer must have a name\n");
1905 		return -1;
1906 	}
1907 
1908 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910 		return -1;
1911 	}
1912 
1913 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914 		pr_warn("Can not register tracer %s due to lockdown\n",
1915 			   type->name);
1916 		return -EPERM;
1917 	}
1918 
1919 	mutex_lock(&trace_types_lock);
1920 
1921 	tracing_selftest_running = true;
1922 
1923 	for (t = trace_types; t; t = t->next) {
1924 		if (strcmp(type->name, t->name) == 0) {
1925 			/* already found */
1926 			pr_info("Tracer %s already registered\n",
1927 				type->name);
1928 			ret = -1;
1929 			goto out;
1930 		}
1931 	}
1932 
1933 	if (!type->set_flag)
1934 		type->set_flag = &dummy_set_flag;
1935 	if (!type->flags) {
1936 		/*allocate a dummy tracer_flags*/
1937 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938 		if (!type->flags) {
1939 			ret = -ENOMEM;
1940 			goto out;
1941 		}
1942 		type->flags->val = 0;
1943 		type->flags->opts = dummy_tracer_opt;
1944 	} else
1945 		if (!type->flags->opts)
1946 			type->flags->opts = dummy_tracer_opt;
1947 
1948 	/* store the tracer for __set_tracer_option */
1949 	type->flags->trace = type;
1950 
1951 	ret = run_tracer_selftest(type);
1952 	if (ret < 0)
1953 		goto out;
1954 
1955 	type->next = trace_types;
1956 	trace_types = type;
1957 	add_tracer_options(&global_trace, type);
1958 
1959  out:
1960 	tracing_selftest_running = false;
1961 	mutex_unlock(&trace_types_lock);
1962 
1963 	if (ret || !default_bootup_tracer)
1964 		goto out_unlock;
1965 
1966 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967 		goto out_unlock;
1968 
1969 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970 	/* Do we want this tracer to start on bootup? */
1971 	tracing_set_tracer(&global_trace, type->name);
1972 	default_bootup_tracer = NULL;
1973 
1974 	apply_trace_boot_options();
1975 
1976 	/* disable other selftests, since this will break it. */
1977 	tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980 	       type->name);
1981 #endif
1982 
1983  out_unlock:
1984 	return ret;
1985 }
1986 
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989 	struct trace_buffer *buffer = buf->buffer;
1990 
1991 	if (!buffer)
1992 		return;
1993 
1994 	ring_buffer_record_disable(buffer);
1995 
1996 	/* Make sure all commits have finished */
1997 	synchronize_rcu();
1998 	ring_buffer_reset_cpu(buffer, cpu);
1999 
2000 	ring_buffer_record_enable(buffer);
2001 }
2002 
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005 	struct trace_buffer *buffer = buf->buffer;
2006 	int cpu;
2007 
2008 	if (!buffer)
2009 		return;
2010 
2011 	ring_buffer_record_disable(buffer);
2012 
2013 	/* Make sure all commits have finished */
2014 	synchronize_rcu();
2015 
2016 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017 
2018 	for_each_online_cpu(cpu)
2019 		ring_buffer_reset_cpu(buffer, cpu);
2020 
2021 	ring_buffer_record_enable(buffer);
2022 }
2023 
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027 	struct trace_array *tr;
2028 
2029 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030 		if (!tr->clear_trace)
2031 			continue;
2032 		tr->clear_trace = false;
2033 		tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035 		tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037 	}
2038 }
2039 
2040 static int *tgid_map;
2041 
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047 	unsigned *map_cmdline_to_pid;
2048 	unsigned cmdline_num;
2049 	int cmdline_idx;
2050 	char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053 
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056 
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061 
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066 
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068 				    struct saved_cmdlines_buffer *s)
2069 {
2070 	s->map_cmdline_to_pid = kmalloc_array(val,
2071 					      sizeof(*s->map_cmdline_to_pid),
2072 					      GFP_KERNEL);
2073 	if (!s->map_cmdline_to_pid)
2074 		return -ENOMEM;
2075 
2076 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077 	if (!s->saved_cmdlines) {
2078 		kfree(s->map_cmdline_to_pid);
2079 		return -ENOMEM;
2080 	}
2081 
2082 	s->cmdline_idx = 0;
2083 	s->cmdline_num = val;
2084 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085 	       sizeof(s->map_pid_to_cmdline));
2086 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087 	       val * sizeof(*s->map_cmdline_to_pid));
2088 
2089 	return 0;
2090 }
2091 
2092 static int trace_create_savedcmd(void)
2093 {
2094 	int ret;
2095 
2096 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097 	if (!savedcmd)
2098 		return -ENOMEM;
2099 
2100 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101 	if (ret < 0) {
2102 		kfree(savedcmd);
2103 		savedcmd = NULL;
2104 		return -ENOMEM;
2105 	}
2106 
2107 	return 0;
2108 }
2109 
2110 int is_tracing_stopped(void)
2111 {
2112 	return global_trace.stop_count;
2113 }
2114 
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123 	struct trace_buffer *buffer;
2124 	unsigned long flags;
2125 
2126 	if (tracing_disabled)
2127 		return;
2128 
2129 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130 	if (--global_trace.stop_count) {
2131 		if (global_trace.stop_count < 0) {
2132 			/* Someone screwed up their debugging */
2133 			WARN_ON_ONCE(1);
2134 			global_trace.stop_count = 0;
2135 		}
2136 		goto out;
2137 	}
2138 
2139 	/* Prevent the buffers from switching */
2140 	arch_spin_lock(&global_trace.max_lock);
2141 
2142 	buffer = global_trace.array_buffer.buffer;
2143 	if (buffer)
2144 		ring_buffer_record_enable(buffer);
2145 
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147 	buffer = global_trace.max_buffer.buffer;
2148 	if (buffer)
2149 		ring_buffer_record_enable(buffer);
2150 #endif
2151 
2152 	arch_spin_unlock(&global_trace.max_lock);
2153 
2154  out:
2155 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157 
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160 	struct trace_buffer *buffer;
2161 	unsigned long flags;
2162 
2163 	if (tracing_disabled)
2164 		return;
2165 
2166 	/* If global, we need to also start the max tracer */
2167 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168 		return tracing_start();
2169 
2170 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2171 
2172 	if (--tr->stop_count) {
2173 		if (tr->stop_count < 0) {
2174 			/* Someone screwed up their debugging */
2175 			WARN_ON_ONCE(1);
2176 			tr->stop_count = 0;
2177 		}
2178 		goto out;
2179 	}
2180 
2181 	buffer = tr->array_buffer.buffer;
2182 	if (buffer)
2183 		ring_buffer_record_enable(buffer);
2184 
2185  out:
2186 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188 
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197 	struct trace_buffer *buffer;
2198 	unsigned long flags;
2199 
2200 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201 	if (global_trace.stop_count++)
2202 		goto out;
2203 
2204 	/* Prevent the buffers from switching */
2205 	arch_spin_lock(&global_trace.max_lock);
2206 
2207 	buffer = global_trace.array_buffer.buffer;
2208 	if (buffer)
2209 		ring_buffer_record_disable(buffer);
2210 
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212 	buffer = global_trace.max_buffer.buffer;
2213 	if (buffer)
2214 		ring_buffer_record_disable(buffer);
2215 #endif
2216 
2217 	arch_spin_unlock(&global_trace.max_lock);
2218 
2219  out:
2220 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222 
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225 	struct trace_buffer *buffer;
2226 	unsigned long flags;
2227 
2228 	/* If global, we need to also stop the max tracer */
2229 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230 		return tracing_stop();
2231 
2232 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2233 	if (tr->stop_count++)
2234 		goto out;
2235 
2236 	buffer = tr->array_buffer.buffer;
2237 	if (buffer)
2238 		ring_buffer_record_disable(buffer);
2239 
2240  out:
2241 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243 
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246 	unsigned pid, idx;
2247 
2248 	/* treat recording of idle task as a success */
2249 	if (!tsk->pid)
2250 		return 1;
2251 
2252 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253 		return 0;
2254 
2255 	/*
2256 	 * It's not the end of the world if we don't get
2257 	 * the lock, but we also don't want to spin
2258 	 * nor do we want to disable interrupts,
2259 	 * so if we miss here, then better luck next time.
2260 	 */
2261 	if (!arch_spin_trylock(&trace_cmdline_lock))
2262 		return 0;
2263 
2264 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265 	if (idx == NO_CMDLINE_MAP) {
2266 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267 
2268 		/*
2269 		 * Check whether the cmdline buffer at idx has a pid
2270 		 * mapped. We are going to overwrite that entry so we
2271 		 * need to clear the map_pid_to_cmdline. Otherwise we
2272 		 * would read the new comm for the old pid.
2273 		 */
2274 		pid = savedcmd->map_cmdline_to_pid[idx];
2275 		if (pid != NO_CMDLINE_MAP)
2276 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277 
2278 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280 
2281 		savedcmd->cmdline_idx = idx;
2282 	}
2283 
2284 	set_cmdline(idx, tsk->comm);
2285 
2286 	arch_spin_unlock(&trace_cmdline_lock);
2287 
2288 	return 1;
2289 }
2290 
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293 	unsigned map;
2294 
2295 	if (!pid) {
2296 		strcpy(comm, "<idle>");
2297 		return;
2298 	}
2299 
2300 	if (WARN_ON_ONCE(pid < 0)) {
2301 		strcpy(comm, "<XXX>");
2302 		return;
2303 	}
2304 
2305 	if (pid > PID_MAX_DEFAULT) {
2306 		strcpy(comm, "<...>");
2307 		return;
2308 	}
2309 
2310 	map = savedcmd->map_pid_to_cmdline[pid];
2311 	if (map != NO_CMDLINE_MAP)
2312 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313 	else
2314 		strcpy(comm, "<...>");
2315 }
2316 
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319 	preempt_disable();
2320 	arch_spin_lock(&trace_cmdline_lock);
2321 
2322 	__trace_find_cmdline(pid, comm);
2323 
2324 	arch_spin_unlock(&trace_cmdline_lock);
2325 	preempt_enable();
2326 }
2327 
2328 int trace_find_tgid(int pid)
2329 {
2330 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331 		return 0;
2332 
2333 	return tgid_map[pid];
2334 }
2335 
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338 	/* treat recording of idle task as a success */
2339 	if (!tsk->pid)
2340 		return 1;
2341 
2342 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343 		return 0;
2344 
2345 	tgid_map[tsk->pid] = tsk->tgid;
2346 	return 1;
2347 }
2348 
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352 		return true;
2353 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354 		return true;
2355 	if (!__this_cpu_read(trace_taskinfo_save))
2356 		return true;
2357 	return false;
2358 }
2359 
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369 	bool done;
2370 
2371 	if (tracing_record_taskinfo_skip(flags))
2372 		return;
2373 
2374 	/*
2375 	 * Record as much task information as possible. If some fail, continue
2376 	 * to try to record the others.
2377 	 */
2378 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380 
2381 	/* If recording any information failed, retry again soon. */
2382 	if (!done)
2383 		return;
2384 
2385 	__this_cpu_write(trace_taskinfo_save, false);
2386 }
2387 
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397 					  struct task_struct *next, int flags)
2398 {
2399 	bool done;
2400 
2401 	if (tracing_record_taskinfo_skip(flags))
2402 		return;
2403 
2404 	/*
2405 	 * Record as much task information as possible. If some fail, continue
2406 	 * to try to record the others.
2407 	 */
2408 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412 
2413 	/* If recording any information failed, retry again soon. */
2414 	if (!done)
2415 		return;
2416 
2417 	__this_cpu_write(trace_taskinfo_save, false);
2418 }
2419 
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425 
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430 
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438 	return trace_seq_has_overflowed(s) ?
2439 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442 
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445 			     unsigned long flags, int pc)
2446 {
2447 	struct task_struct *tsk = current;
2448 
2449 	entry->preempt_count		= pc & 0xff;
2450 	entry->pid			= (tsk) ? tsk->pid : 0;
2451 	entry->type			= type;
2452 	entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456 		TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465 
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468 			  int type,
2469 			  unsigned long len,
2470 			  unsigned long flags, int pc)
2471 {
2472 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474 
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478 
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495 	struct ring_buffer_event *event;
2496 	struct page *page;
2497 	int cpu;
2498 
2499 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500 
2501 	if (trace_buffered_event_ref++)
2502 		return;
2503 
2504 	for_each_tracing_cpu(cpu) {
2505 		page = alloc_pages_node(cpu_to_node(cpu),
2506 					GFP_KERNEL | __GFP_NORETRY, 0);
2507 		if (!page)
2508 			goto failed;
2509 
2510 		event = page_address(page);
2511 		memset(event, 0, sizeof(*event));
2512 
2513 		per_cpu(trace_buffered_event, cpu) = event;
2514 
2515 		preempt_disable();
2516 		if (cpu == smp_processor_id() &&
2517 		    this_cpu_read(trace_buffered_event) !=
2518 		    per_cpu(trace_buffered_event, cpu))
2519 			WARN_ON_ONCE(1);
2520 		preempt_enable();
2521 	}
2522 
2523 	return;
2524  failed:
2525 	trace_buffered_event_disable();
2526 }
2527 
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530 	/* Probably not needed, but do it anyway */
2531 	smp_rmb();
2532 	this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534 
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537 	this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539 
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550 	int cpu;
2551 
2552 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553 
2554 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555 		return;
2556 
2557 	if (--trace_buffered_event_ref)
2558 		return;
2559 
2560 	preempt_disable();
2561 	/* For each CPU, set the buffer as used. */
2562 	smp_call_function_many(tracing_buffer_mask,
2563 			       disable_trace_buffered_event, NULL, 1);
2564 	preempt_enable();
2565 
2566 	/* Wait for all current users to finish */
2567 	synchronize_rcu();
2568 
2569 	for_each_tracing_cpu(cpu) {
2570 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571 		per_cpu(trace_buffered_event, cpu) = NULL;
2572 	}
2573 	/*
2574 	 * Make sure trace_buffered_event is NULL before clearing
2575 	 * trace_buffered_event_cnt.
2576 	 */
2577 	smp_wmb();
2578 
2579 	preempt_disable();
2580 	/* Do the work on each cpu */
2581 	smp_call_function_many(tracing_buffer_mask,
2582 			       enable_trace_buffered_event, NULL, 1);
2583 	preempt_enable();
2584 }
2585 
2586 static struct trace_buffer *temp_buffer;
2587 
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590 			  struct trace_event_file *trace_file,
2591 			  int type, unsigned long len,
2592 			  unsigned long flags, int pc)
2593 {
2594 	struct ring_buffer_event *entry;
2595 	int val;
2596 
2597 	*current_rb = trace_file->tr->array_buffer.buffer;
2598 
2599 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601 	    (entry = this_cpu_read(trace_buffered_event))) {
2602 		/* Try to use the per cpu buffer first */
2603 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2604 		if (val == 1) {
2605 			trace_event_setup(entry, type, flags, pc);
2606 			entry->array[0] = len;
2607 			return entry;
2608 		}
2609 		this_cpu_dec(trace_buffered_event_cnt);
2610 	}
2611 
2612 	entry = __trace_buffer_lock_reserve(*current_rb,
2613 					    type, len, flags, pc);
2614 	/*
2615 	 * If tracing is off, but we have triggers enabled
2616 	 * we still need to look at the event data. Use the temp_buffer
2617 	 * to store the trace event for the tigger to use. It's recusive
2618 	 * safe and will not be recorded anywhere.
2619 	 */
2620 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621 		*current_rb = temp_buffer;
2622 		entry = __trace_buffer_lock_reserve(*current_rb,
2623 						    type, len, flags, pc);
2624 	}
2625 	return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628 
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631 
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634 	struct trace_event_call *event_call;
2635 	struct trace_event_file *file;
2636 	struct trace_event *event;
2637 	unsigned long flags;
2638 	struct trace_iterator *iter = tracepoint_print_iter;
2639 
2640 	/* We should never get here if iter is NULL */
2641 	if (WARN_ON_ONCE(!iter))
2642 		return;
2643 
2644 	event_call = fbuffer->trace_file->event_call;
2645 	if (!event_call || !event_call->event.funcs ||
2646 	    !event_call->event.funcs->trace)
2647 		return;
2648 
2649 	file = fbuffer->trace_file;
2650 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652 	     !filter_match_preds(file->filter, fbuffer->entry)))
2653 		return;
2654 
2655 	event = &fbuffer->trace_file->event_call->event;
2656 
2657 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658 	trace_seq_init(&iter->seq);
2659 	iter->ent = fbuffer->entry;
2660 	event_call->event.funcs->trace(iter, 0, event);
2661 	trace_seq_putc(&iter->seq, 0);
2662 	printk("%s", iter->seq.buffer);
2663 
2664 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666 
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668 			     void *buffer, size_t *lenp,
2669 			     loff_t *ppos)
2670 {
2671 	int save_tracepoint_printk;
2672 	int ret;
2673 
2674 	mutex_lock(&tracepoint_printk_mutex);
2675 	save_tracepoint_printk = tracepoint_printk;
2676 
2677 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678 
2679 	/*
2680 	 * This will force exiting early, as tracepoint_printk
2681 	 * is always zero when tracepoint_printk_iter is not allocated
2682 	 */
2683 	if (!tracepoint_print_iter)
2684 		tracepoint_printk = 0;
2685 
2686 	if (save_tracepoint_printk == tracepoint_printk)
2687 		goto out;
2688 
2689 	if (tracepoint_printk)
2690 		static_key_enable(&tracepoint_printk_key.key);
2691 	else
2692 		static_key_disable(&tracepoint_printk_key.key);
2693 
2694  out:
2695 	mutex_unlock(&tracepoint_printk_mutex);
2696 
2697 	return ret;
2698 }
2699 
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702 	if (static_key_false(&tracepoint_printk_key.key))
2703 		output_printk(fbuffer);
2704 
2705 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706 				    fbuffer->event, fbuffer->entry,
2707 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710 
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719 
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721 				     struct trace_buffer *buffer,
2722 				     struct ring_buffer_event *event,
2723 				     unsigned long flags, int pc,
2724 				     struct pt_regs *regs)
2725 {
2726 	__buffer_unlock_commit(buffer, event);
2727 
2728 	/*
2729 	 * If regs is not set, then skip the necessary functions.
2730 	 * Note, we can still get here via blktrace, wakeup tracer
2731 	 * and mmiotrace, but that's ok if they lose a function or
2732 	 * two. They are not that meaningful.
2733 	 */
2734 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735 	ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737 
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743 				   struct ring_buffer_event *event)
2744 {
2745 	__buffer_unlock_commit(buffer, event);
2746 }
2747 
2748 static void
2749 trace_process_export(struct trace_export *export,
2750 	       struct ring_buffer_event *event)
2751 {
2752 	struct trace_entry *entry;
2753 	unsigned int size = 0;
2754 
2755 	entry = ring_buffer_event_data(event);
2756 	size = ring_buffer_event_length(event);
2757 	export->write(export, entry, size);
2758 }
2759 
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761 
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763 
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765 
2766 static inline void ftrace_exports_enable(void)
2767 {
2768 	static_branch_enable(&ftrace_exports_enabled);
2769 }
2770 
2771 static inline void ftrace_exports_disable(void)
2772 {
2773 	static_branch_disable(&ftrace_exports_enabled);
2774 }
2775 
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778 	struct trace_export *export;
2779 
2780 	preempt_disable_notrace();
2781 
2782 	export = rcu_dereference_raw_check(ftrace_exports_list);
2783 	while (export) {
2784 		trace_process_export(export, event);
2785 		export = rcu_dereference_raw_check(export->next);
2786 	}
2787 
2788 	preempt_enable_notrace();
2789 }
2790 
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794 	rcu_assign_pointer(export->next, *list);
2795 	/*
2796 	 * We are entering export into the list but another
2797 	 * CPU might be walking that list. We need to make sure
2798 	 * the export->next pointer is valid before another CPU sees
2799 	 * the export pointer included into the list.
2800 	 */
2801 	rcu_assign_pointer(*list, export);
2802 }
2803 
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807 	struct trace_export **p;
2808 
2809 	for (p = list; *p != NULL; p = &(*p)->next)
2810 		if (*p == export)
2811 			break;
2812 
2813 	if (*p != export)
2814 		return -1;
2815 
2816 	rcu_assign_pointer(*p, (*p)->next);
2817 
2818 	return 0;
2819 }
2820 
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824 	if (*list == NULL)
2825 		ftrace_exports_enable();
2826 
2827 	add_trace_export(list, export);
2828 }
2829 
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833 	int ret;
2834 
2835 	ret = rm_trace_export(list, export);
2836 	if (*list == NULL)
2837 		ftrace_exports_disable();
2838 
2839 	return ret;
2840 }
2841 
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844 	if (WARN_ON_ONCE(!export->write))
2845 		return -1;
2846 
2847 	mutex_lock(&ftrace_export_lock);
2848 
2849 	add_ftrace_export(&ftrace_exports_list, export);
2850 
2851 	mutex_unlock(&ftrace_export_lock);
2852 
2853 	return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856 
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859 	int ret;
2860 
2861 	mutex_lock(&ftrace_export_lock);
2862 
2863 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2864 
2865 	mutex_unlock(&ftrace_export_lock);
2866 
2867 	return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870 
2871 void
2872 trace_function(struct trace_array *tr,
2873 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874 	       int pc)
2875 {
2876 	struct trace_event_call *call = &event_function;
2877 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2878 	struct ring_buffer_event *event;
2879 	struct ftrace_entry *entry;
2880 
2881 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882 					    flags, pc);
2883 	if (!event)
2884 		return;
2885 	entry	= ring_buffer_event_data(event);
2886 	entry->ip			= ip;
2887 	entry->parent_ip		= parent_ip;
2888 
2889 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2890 		if (static_branch_unlikely(&ftrace_exports_enabled))
2891 			ftrace_exports(event);
2892 		__buffer_unlock_commit(buffer, event);
2893 	}
2894 }
2895 
2896 #ifdef CONFIG_STACKTRACE
2897 
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING	4
2900 
2901 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902 
2903 struct ftrace_stack {
2904 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906 
2907 
2908 struct ftrace_stacks {
2909 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2910 };
2911 
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914 
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916 				 unsigned long flags,
2917 				 int skip, int pc, struct pt_regs *regs)
2918 {
2919 	struct trace_event_call *call = &event_kernel_stack;
2920 	struct ring_buffer_event *event;
2921 	unsigned int size, nr_entries;
2922 	struct ftrace_stack *fstack;
2923 	struct stack_entry *entry;
2924 	int stackidx;
2925 
2926 	/*
2927 	 * Add one, for this function and the call to save_stack_trace()
2928 	 * If regs is set, then these functions will not be in the way.
2929 	 */
2930 #ifndef CONFIG_UNWINDER_ORC
2931 	if (!regs)
2932 		skip++;
2933 #endif
2934 
2935 	/*
2936 	 * Since events can happen in NMIs there's no safe way to
2937 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938 	 * or NMI comes in, it will just have to use the default
2939 	 * FTRACE_STACK_SIZE.
2940 	 */
2941 	preempt_disable_notrace();
2942 
2943 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944 
2945 	/* This should never happen. If it does, yell once and skip */
2946 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947 		goto out;
2948 
2949 	/*
2950 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951 	 * interrupt will either see the value pre increment or post
2952 	 * increment. If the interrupt happens pre increment it will have
2953 	 * restored the counter when it returns.  We just need a barrier to
2954 	 * keep gcc from moving things around.
2955 	 */
2956 	barrier();
2957 
2958 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959 	size = ARRAY_SIZE(fstack->calls);
2960 
2961 	if (regs) {
2962 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963 						   size, skip);
2964 	} else {
2965 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2966 	}
2967 
2968 	size = nr_entries * sizeof(unsigned long);
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970 					    sizeof(*entry) + size, flags, pc);
2971 	if (!event)
2972 		goto out;
2973 	entry = ring_buffer_event_data(event);
2974 
2975 	memcpy(&entry->caller, fstack->calls, size);
2976 	entry->size = nr_entries;
2977 
2978 	if (!call_filter_check_discard(call, entry, buffer, event))
2979 		__buffer_unlock_commit(buffer, event);
2980 
2981  out:
2982 	/* Again, don't let gcc optimize things here */
2983 	barrier();
2984 	__this_cpu_dec(ftrace_stack_reserve);
2985 	preempt_enable_notrace();
2986 
2987 }
2988 
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990 				      struct trace_buffer *buffer,
2991 				      unsigned long flags,
2992 				      int skip, int pc, struct pt_regs *regs)
2993 {
2994 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995 		return;
2996 
2997 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999 
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001 		   int pc)
3002 {
3003 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3004 
3005 	if (rcu_is_watching()) {
3006 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007 		return;
3008 	}
3009 
3010 	/*
3011 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012 	 * but if the above rcu_is_watching() failed, then the NMI
3013 	 * triggered someplace critical, and rcu_irq_enter() should
3014 	 * not be called from NMI.
3015 	 */
3016 	if (unlikely(in_nmi()))
3017 		return;
3018 
3019 	rcu_irq_enter_irqson();
3020 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021 	rcu_irq_exit_irqson();
3022 }
3023 
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030 	unsigned long flags;
3031 
3032 	if (tracing_disabled || tracing_selftest_running)
3033 		return;
3034 
3035 	local_save_flags(flags);
3036 
3037 #ifndef CONFIG_UNWINDER_ORC
3038 	/* Skip 1 to skip this function. */
3039 	skip++;
3040 #endif
3041 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3042 			     flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045 
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048 
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052 	struct trace_event_call *call = &event_user_stack;
3053 	struct ring_buffer_event *event;
3054 	struct userstack_entry *entry;
3055 
3056 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057 		return;
3058 
3059 	/*
3060 	 * NMIs can not handle page faults, even with fix ups.
3061 	 * The save user stack can (and often does) fault.
3062 	 */
3063 	if (unlikely(in_nmi()))
3064 		return;
3065 
3066 	/*
3067 	 * prevent recursion, since the user stack tracing may
3068 	 * trigger other kernel events.
3069 	 */
3070 	preempt_disable();
3071 	if (__this_cpu_read(user_stack_count))
3072 		goto out;
3073 
3074 	__this_cpu_inc(user_stack_count);
3075 
3076 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077 					    sizeof(*entry), flags, pc);
3078 	if (!event)
3079 		goto out_drop_count;
3080 	entry	= ring_buffer_event_data(event);
3081 
3082 	entry->tgid		= current->tgid;
3083 	memset(&entry->caller, 0, sizeof(entry->caller));
3084 
3085 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086 	if (!call_filter_check_discard(call, entry, buffer, event))
3087 		__buffer_unlock_commit(buffer, event);
3088 
3089  out_drop_count:
3090 	__this_cpu_dec(user_stack_count);
3091  out:
3092 	preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096 				   unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100 
3101 #endif /* CONFIG_STACKTRACE */
3102 
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105 	int nesting;
3106 	char buffer[4][TRACE_BUF_SIZE];
3107 };
3108 
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110 
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118 
3119 	if (!buffer || buffer->nesting >= 4)
3120 		return NULL;
3121 
3122 	buffer->nesting++;
3123 
3124 	/* Interrupts must see nesting incremented before we use the buffer */
3125 	barrier();
3126 	return &buffer->buffer[buffer->nesting][0];
3127 }
3128 
3129 static void put_trace_buf(void)
3130 {
3131 	/* Don't let the decrement of nesting leak before this */
3132 	barrier();
3133 	this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135 
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138 	struct trace_buffer_struct *buffers;
3139 
3140 	buffers = alloc_percpu(struct trace_buffer_struct);
3141 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142 		return -ENOMEM;
3143 
3144 	trace_percpu_buffer = buffers;
3145 	return 0;
3146 }
3147 
3148 static int buffers_allocated;
3149 
3150 void trace_printk_init_buffers(void)
3151 {
3152 	if (buffers_allocated)
3153 		return;
3154 
3155 	if (alloc_percpu_trace_buffer())
3156 		return;
3157 
3158 	/* trace_printk() is for debug use only. Don't use it in production. */
3159 
3160 	pr_warn("\n");
3161 	pr_warn("**********************************************************\n");
3162 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163 	pr_warn("**                                                      **\n");
3164 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165 	pr_warn("**                                                      **\n");
3166 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167 	pr_warn("** unsafe for production use.                           **\n");
3168 	pr_warn("**                                                      **\n");
3169 	pr_warn("** If you see this message and you are not debugging    **\n");
3170 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171 	pr_warn("**                                                      **\n");
3172 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173 	pr_warn("**********************************************************\n");
3174 
3175 	/* Expand the buffers to set size */
3176 	tracing_update_buffers();
3177 
3178 	buffers_allocated = 1;
3179 
3180 	/*
3181 	 * trace_printk_init_buffers() can be called by modules.
3182 	 * If that happens, then we need to start cmdline recording
3183 	 * directly here. If the global_trace.buffer is already
3184 	 * allocated here, then this was called by module code.
3185 	 */
3186 	if (global_trace.array_buffer.buffer)
3187 		tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190 
3191 void trace_printk_start_comm(void)
3192 {
3193 	/* Start tracing comms if trace printk is set */
3194 	if (!buffers_allocated)
3195 		return;
3196 	tracing_start_cmdline_record();
3197 }
3198 
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201 	if (!buffers_allocated)
3202 		return;
3203 
3204 	if (enabled)
3205 		tracing_start_cmdline_record();
3206 	else
3207 		tracing_stop_cmdline_record();
3208 }
3209 
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218 	struct trace_event_call *call = &event_bprint;
3219 	struct ring_buffer_event *event;
3220 	struct trace_buffer *buffer;
3221 	struct trace_array *tr = &global_trace;
3222 	struct bprint_entry *entry;
3223 	unsigned long flags;
3224 	char *tbuffer;
3225 	int len = 0, size, pc;
3226 
3227 	if (unlikely(tracing_selftest_running || tracing_disabled))
3228 		return 0;
3229 
3230 	/* Don't pollute graph traces with trace_vprintk internals */
3231 	pause_graph_tracing();
3232 
3233 	pc = preempt_count();
3234 	preempt_disable_notrace();
3235 
3236 	tbuffer = get_trace_buf();
3237 	if (!tbuffer) {
3238 		len = 0;
3239 		goto out_nobuffer;
3240 	}
3241 
3242 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243 
3244 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245 		goto out_put;
3246 
3247 	local_save_flags(flags);
3248 	size = sizeof(*entry) + sizeof(u32) * len;
3249 	buffer = tr->array_buffer.buffer;
3250 	ring_buffer_nest_start(buffer);
3251 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252 					    flags, pc);
3253 	if (!event)
3254 		goto out;
3255 	entry = ring_buffer_event_data(event);
3256 	entry->ip			= ip;
3257 	entry->fmt			= fmt;
3258 
3259 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3261 		__buffer_unlock_commit(buffer, event);
3262 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263 	}
3264 
3265 out:
3266 	ring_buffer_nest_end(buffer);
3267 out_put:
3268 	put_trace_buf();
3269 
3270 out_nobuffer:
3271 	preempt_enable_notrace();
3272 	unpause_graph_tracing();
3273 
3274 	return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277 
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281 		      unsigned long ip, const char *fmt, va_list args)
3282 {
3283 	struct trace_event_call *call = &event_print;
3284 	struct ring_buffer_event *event;
3285 	int len = 0, size, pc;
3286 	struct print_entry *entry;
3287 	unsigned long flags;
3288 	char *tbuffer;
3289 
3290 	if (tracing_disabled || tracing_selftest_running)
3291 		return 0;
3292 
3293 	/* Don't pollute graph traces with trace_vprintk internals */
3294 	pause_graph_tracing();
3295 
3296 	pc = preempt_count();
3297 	preempt_disable_notrace();
3298 
3299 
3300 	tbuffer = get_trace_buf();
3301 	if (!tbuffer) {
3302 		len = 0;
3303 		goto out_nobuffer;
3304 	}
3305 
3306 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307 
3308 	local_save_flags(flags);
3309 	size = sizeof(*entry) + len + 1;
3310 	ring_buffer_nest_start(buffer);
3311 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312 					    flags, pc);
3313 	if (!event)
3314 		goto out;
3315 	entry = ring_buffer_event_data(event);
3316 	entry->ip = ip;
3317 
3318 	memcpy(&entry->buf, tbuffer, len + 1);
3319 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3320 		__buffer_unlock_commit(buffer, event);
3321 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322 	}
3323 
3324 out:
3325 	ring_buffer_nest_end(buffer);
3326 	put_trace_buf();
3327 
3328 out_nobuffer:
3329 	preempt_enable_notrace();
3330 	unpause_graph_tracing();
3331 
3332 	return len;
3333 }
3334 
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337 			unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341 
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344 		       unsigned long ip, const char *fmt, ...)
3345 {
3346 	int ret;
3347 	va_list ap;
3348 
3349 	if (!tr)
3350 		return -ENOENT;
3351 
3352 	/* This is only allowed for created instances */
3353 	if (tr == &global_trace)
3354 		return 0;
3355 
3356 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3357 		return 0;
3358 
3359 	va_start(ap, fmt);
3360 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3361 	va_end(ap);
3362 	return ret;
3363 }
3364 EXPORT_SYMBOL_GPL(trace_array_printk);
3365 
3366 __printf(3, 4)
3367 int trace_array_printk_buf(struct trace_buffer *buffer,
3368 			   unsigned long ip, const char *fmt, ...)
3369 {
3370 	int ret;
3371 	va_list ap;
3372 
3373 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3374 		return 0;
3375 
3376 	va_start(ap, fmt);
3377 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3378 	va_end(ap);
3379 	return ret;
3380 }
3381 
3382 __printf(2, 0)
3383 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3384 {
3385 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3386 }
3387 EXPORT_SYMBOL_GPL(trace_vprintk);
3388 
3389 static void trace_iterator_increment(struct trace_iterator *iter)
3390 {
3391 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3392 
3393 	iter->idx++;
3394 	if (buf_iter)
3395 		ring_buffer_iter_advance(buf_iter);
3396 }
3397 
3398 static struct trace_entry *
3399 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3400 		unsigned long *lost_events)
3401 {
3402 	struct ring_buffer_event *event;
3403 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3404 
3405 	if (buf_iter) {
3406 		event = ring_buffer_iter_peek(buf_iter, ts);
3407 		if (lost_events)
3408 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3409 				(unsigned long)-1 : 0;
3410 	} else {
3411 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3412 					 lost_events);
3413 	}
3414 
3415 	if (event) {
3416 		iter->ent_size = ring_buffer_event_length(event);
3417 		return ring_buffer_event_data(event);
3418 	}
3419 	iter->ent_size = 0;
3420 	return NULL;
3421 }
3422 
3423 static struct trace_entry *
3424 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3425 		  unsigned long *missing_events, u64 *ent_ts)
3426 {
3427 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3428 	struct trace_entry *ent, *next = NULL;
3429 	unsigned long lost_events = 0, next_lost = 0;
3430 	int cpu_file = iter->cpu_file;
3431 	u64 next_ts = 0, ts;
3432 	int next_cpu = -1;
3433 	int next_size = 0;
3434 	int cpu;
3435 
3436 	/*
3437 	 * If we are in a per_cpu trace file, don't bother by iterating over
3438 	 * all cpu and peek directly.
3439 	 */
3440 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3441 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3442 			return NULL;
3443 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3444 		if (ent_cpu)
3445 			*ent_cpu = cpu_file;
3446 
3447 		return ent;
3448 	}
3449 
3450 	for_each_tracing_cpu(cpu) {
3451 
3452 		if (ring_buffer_empty_cpu(buffer, cpu))
3453 			continue;
3454 
3455 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3456 
3457 		/*
3458 		 * Pick the entry with the smallest timestamp:
3459 		 */
3460 		if (ent && (!next || ts < next_ts)) {
3461 			next = ent;
3462 			next_cpu = cpu;
3463 			next_ts = ts;
3464 			next_lost = lost_events;
3465 			next_size = iter->ent_size;
3466 		}
3467 	}
3468 
3469 	iter->ent_size = next_size;
3470 
3471 	if (ent_cpu)
3472 		*ent_cpu = next_cpu;
3473 
3474 	if (ent_ts)
3475 		*ent_ts = next_ts;
3476 
3477 	if (missing_events)
3478 		*missing_events = next_lost;
3479 
3480 	return next;
3481 }
3482 
3483 #define STATIC_TEMP_BUF_SIZE	128
3484 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3485 
3486 /* Find the next real entry, without updating the iterator itself */
3487 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3488 					  int *ent_cpu, u64 *ent_ts)
3489 {
3490 	/* __find_next_entry will reset ent_size */
3491 	int ent_size = iter->ent_size;
3492 	struct trace_entry *entry;
3493 
3494 	/*
3495 	 * If called from ftrace_dump(), then the iter->temp buffer
3496 	 * will be the static_temp_buf and not created from kmalloc.
3497 	 * If the entry size is greater than the buffer, we can
3498 	 * not save it. Just return NULL in that case. This is only
3499 	 * used to add markers when two consecutive events' time
3500 	 * stamps have a large delta. See trace_print_lat_context()
3501 	 */
3502 	if (iter->temp == static_temp_buf &&
3503 	    STATIC_TEMP_BUF_SIZE < ent_size)
3504 		return NULL;
3505 
3506 	/*
3507 	 * The __find_next_entry() may call peek_next_entry(), which may
3508 	 * call ring_buffer_peek() that may make the contents of iter->ent
3509 	 * undefined. Need to copy iter->ent now.
3510 	 */
3511 	if (iter->ent && iter->ent != iter->temp) {
3512 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3513 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3514 			kfree(iter->temp);
3515 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3516 			if (!iter->temp)
3517 				return NULL;
3518 		}
3519 		memcpy(iter->temp, iter->ent, iter->ent_size);
3520 		iter->temp_size = iter->ent_size;
3521 		iter->ent = iter->temp;
3522 	}
3523 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3524 	/* Put back the original ent_size */
3525 	iter->ent_size = ent_size;
3526 
3527 	return entry;
3528 }
3529 
3530 /* Find the next real entry, and increment the iterator to the next entry */
3531 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3532 {
3533 	iter->ent = __find_next_entry(iter, &iter->cpu,
3534 				      &iter->lost_events, &iter->ts);
3535 
3536 	if (iter->ent)
3537 		trace_iterator_increment(iter);
3538 
3539 	return iter->ent ? iter : NULL;
3540 }
3541 
3542 static void trace_consume(struct trace_iterator *iter)
3543 {
3544 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3545 			    &iter->lost_events);
3546 }
3547 
3548 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3549 {
3550 	struct trace_iterator *iter = m->private;
3551 	int i = (int)*pos;
3552 	void *ent;
3553 
3554 	WARN_ON_ONCE(iter->leftover);
3555 
3556 	(*pos)++;
3557 
3558 	/* can't go backwards */
3559 	if (iter->idx > i)
3560 		return NULL;
3561 
3562 	if (iter->idx < 0)
3563 		ent = trace_find_next_entry_inc(iter);
3564 	else
3565 		ent = iter;
3566 
3567 	while (ent && iter->idx < i)
3568 		ent = trace_find_next_entry_inc(iter);
3569 
3570 	iter->pos = *pos;
3571 
3572 	return ent;
3573 }
3574 
3575 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3576 {
3577 	struct ring_buffer_iter *buf_iter;
3578 	unsigned long entries = 0;
3579 	u64 ts;
3580 
3581 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3582 
3583 	buf_iter = trace_buffer_iter(iter, cpu);
3584 	if (!buf_iter)
3585 		return;
3586 
3587 	ring_buffer_iter_reset(buf_iter);
3588 
3589 	/*
3590 	 * We could have the case with the max latency tracers
3591 	 * that a reset never took place on a cpu. This is evident
3592 	 * by the timestamp being before the start of the buffer.
3593 	 */
3594 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3595 		if (ts >= iter->array_buffer->time_start)
3596 			break;
3597 		entries++;
3598 		ring_buffer_iter_advance(buf_iter);
3599 	}
3600 
3601 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3602 }
3603 
3604 /*
3605  * The current tracer is copied to avoid a global locking
3606  * all around.
3607  */
3608 static void *s_start(struct seq_file *m, loff_t *pos)
3609 {
3610 	struct trace_iterator *iter = m->private;
3611 	struct trace_array *tr = iter->tr;
3612 	int cpu_file = iter->cpu_file;
3613 	void *p = NULL;
3614 	loff_t l = 0;
3615 	int cpu;
3616 
3617 	/*
3618 	 * copy the tracer to avoid using a global lock all around.
3619 	 * iter->trace is a copy of current_trace, the pointer to the
3620 	 * name may be used instead of a strcmp(), as iter->trace->name
3621 	 * will point to the same string as current_trace->name.
3622 	 */
3623 	mutex_lock(&trace_types_lock);
3624 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3625 		*iter->trace = *tr->current_trace;
3626 	mutex_unlock(&trace_types_lock);
3627 
3628 #ifdef CONFIG_TRACER_MAX_TRACE
3629 	if (iter->snapshot && iter->trace->use_max_tr)
3630 		return ERR_PTR(-EBUSY);
3631 #endif
3632 
3633 	if (!iter->snapshot)
3634 		atomic_inc(&trace_record_taskinfo_disabled);
3635 
3636 	if (*pos != iter->pos) {
3637 		iter->ent = NULL;
3638 		iter->cpu = 0;
3639 		iter->idx = -1;
3640 
3641 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3642 			for_each_tracing_cpu(cpu)
3643 				tracing_iter_reset(iter, cpu);
3644 		} else
3645 			tracing_iter_reset(iter, cpu_file);
3646 
3647 		iter->leftover = 0;
3648 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3649 			;
3650 
3651 	} else {
3652 		/*
3653 		 * If we overflowed the seq_file before, then we want
3654 		 * to just reuse the trace_seq buffer again.
3655 		 */
3656 		if (iter->leftover)
3657 			p = iter;
3658 		else {
3659 			l = *pos - 1;
3660 			p = s_next(m, p, &l);
3661 		}
3662 	}
3663 
3664 	trace_event_read_lock();
3665 	trace_access_lock(cpu_file);
3666 	return p;
3667 }
3668 
3669 static void s_stop(struct seq_file *m, void *p)
3670 {
3671 	struct trace_iterator *iter = m->private;
3672 
3673 #ifdef CONFIG_TRACER_MAX_TRACE
3674 	if (iter->snapshot && iter->trace->use_max_tr)
3675 		return;
3676 #endif
3677 
3678 	if (!iter->snapshot)
3679 		atomic_dec(&trace_record_taskinfo_disabled);
3680 
3681 	trace_access_unlock(iter->cpu_file);
3682 	trace_event_read_unlock();
3683 }
3684 
3685 static void
3686 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3687 		      unsigned long *entries, int cpu)
3688 {
3689 	unsigned long count;
3690 
3691 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3692 	/*
3693 	 * If this buffer has skipped entries, then we hold all
3694 	 * entries for the trace and we need to ignore the
3695 	 * ones before the time stamp.
3696 	 */
3697 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3698 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3699 		/* total is the same as the entries */
3700 		*total = count;
3701 	} else
3702 		*total = count +
3703 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3704 	*entries = count;
3705 }
3706 
3707 static void
3708 get_total_entries(struct array_buffer *buf,
3709 		  unsigned long *total, unsigned long *entries)
3710 {
3711 	unsigned long t, e;
3712 	int cpu;
3713 
3714 	*total = 0;
3715 	*entries = 0;
3716 
3717 	for_each_tracing_cpu(cpu) {
3718 		get_total_entries_cpu(buf, &t, &e, cpu);
3719 		*total += t;
3720 		*entries += e;
3721 	}
3722 }
3723 
3724 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3725 {
3726 	unsigned long total, entries;
3727 
3728 	if (!tr)
3729 		tr = &global_trace;
3730 
3731 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3732 
3733 	return entries;
3734 }
3735 
3736 unsigned long trace_total_entries(struct trace_array *tr)
3737 {
3738 	unsigned long total, entries;
3739 
3740 	if (!tr)
3741 		tr = &global_trace;
3742 
3743 	get_total_entries(&tr->array_buffer, &total, &entries);
3744 
3745 	return entries;
3746 }
3747 
3748 static void print_lat_help_header(struct seq_file *m)
3749 {
3750 	seq_puts(m, "#                  _------=> CPU#            \n"
3751 		    "#                 / _-----=> irqs-off        \n"
3752 		    "#                | / _----=> need-resched    \n"
3753 		    "#                || / _---=> hardirq/softirq \n"
3754 		    "#                ||| / _--=> preempt-depth   \n"
3755 		    "#                |||| /     delay            \n"
3756 		    "#  cmd     pid   ||||| time  |   caller      \n"
3757 		    "#     \\   /      |||||  \\    |   /         \n");
3758 }
3759 
3760 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3761 {
3762 	unsigned long total;
3763 	unsigned long entries;
3764 
3765 	get_total_entries(buf, &total, &entries);
3766 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3767 		   entries, total, num_online_cpus());
3768 	seq_puts(m, "#\n");
3769 }
3770 
3771 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3772 				   unsigned int flags)
3773 {
3774 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3775 
3776 	print_event_info(buf, m);
3777 
3778 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3779 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3780 }
3781 
3782 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3783 				       unsigned int flags)
3784 {
3785 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3786 	const char *space = "          ";
3787 	int prec = tgid ? 10 : 2;
3788 
3789 	print_event_info(buf, m);
3790 
3791 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3792 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3793 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3794 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3795 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3796 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3797 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3798 }
3799 
3800 void
3801 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3802 {
3803 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3804 	struct array_buffer *buf = iter->array_buffer;
3805 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3806 	struct tracer *type = iter->trace;
3807 	unsigned long entries;
3808 	unsigned long total;
3809 	const char *name = "preemption";
3810 
3811 	name = type->name;
3812 
3813 	get_total_entries(buf, &total, &entries);
3814 
3815 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3816 		   name, UTS_RELEASE);
3817 	seq_puts(m, "# -----------------------------------"
3818 		 "---------------------------------\n");
3819 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3820 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3821 		   nsecs_to_usecs(data->saved_latency),
3822 		   entries,
3823 		   total,
3824 		   buf->cpu,
3825 #if defined(CONFIG_PREEMPT_NONE)
3826 		   "server",
3827 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3828 		   "desktop",
3829 #elif defined(CONFIG_PREEMPT)
3830 		   "preempt",
3831 #elif defined(CONFIG_PREEMPT_RT)
3832 		   "preempt_rt",
3833 #else
3834 		   "unknown",
3835 #endif
3836 		   /* These are reserved for later use */
3837 		   0, 0, 0, 0);
3838 #ifdef CONFIG_SMP
3839 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3840 #else
3841 	seq_puts(m, ")\n");
3842 #endif
3843 	seq_puts(m, "#    -----------------\n");
3844 	seq_printf(m, "#    | task: %.16s-%d "
3845 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3846 		   data->comm, data->pid,
3847 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3848 		   data->policy, data->rt_priority);
3849 	seq_puts(m, "#    -----------------\n");
3850 
3851 	if (data->critical_start) {
3852 		seq_puts(m, "#  => started at: ");
3853 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3854 		trace_print_seq(m, &iter->seq);
3855 		seq_puts(m, "\n#  => ended at:   ");
3856 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3857 		trace_print_seq(m, &iter->seq);
3858 		seq_puts(m, "\n#\n");
3859 	}
3860 
3861 	seq_puts(m, "#\n");
3862 }
3863 
3864 static void test_cpu_buff_start(struct trace_iterator *iter)
3865 {
3866 	struct trace_seq *s = &iter->seq;
3867 	struct trace_array *tr = iter->tr;
3868 
3869 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3870 		return;
3871 
3872 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3873 		return;
3874 
3875 	if (cpumask_available(iter->started) &&
3876 	    cpumask_test_cpu(iter->cpu, iter->started))
3877 		return;
3878 
3879 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3880 		return;
3881 
3882 	if (cpumask_available(iter->started))
3883 		cpumask_set_cpu(iter->cpu, iter->started);
3884 
3885 	/* Don't print started cpu buffer for the first entry of the trace */
3886 	if (iter->idx > 1)
3887 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3888 				iter->cpu);
3889 }
3890 
3891 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3892 {
3893 	struct trace_array *tr = iter->tr;
3894 	struct trace_seq *s = &iter->seq;
3895 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3896 	struct trace_entry *entry;
3897 	struct trace_event *event;
3898 
3899 	entry = iter->ent;
3900 
3901 	test_cpu_buff_start(iter);
3902 
3903 	event = ftrace_find_event(entry->type);
3904 
3905 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3906 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3907 			trace_print_lat_context(iter);
3908 		else
3909 			trace_print_context(iter);
3910 	}
3911 
3912 	if (trace_seq_has_overflowed(s))
3913 		return TRACE_TYPE_PARTIAL_LINE;
3914 
3915 	if (event)
3916 		return event->funcs->trace(iter, sym_flags, event);
3917 
3918 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3919 
3920 	return trace_handle_return(s);
3921 }
3922 
3923 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3924 {
3925 	struct trace_array *tr = iter->tr;
3926 	struct trace_seq *s = &iter->seq;
3927 	struct trace_entry *entry;
3928 	struct trace_event *event;
3929 
3930 	entry = iter->ent;
3931 
3932 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3933 		trace_seq_printf(s, "%d %d %llu ",
3934 				 entry->pid, iter->cpu, iter->ts);
3935 
3936 	if (trace_seq_has_overflowed(s))
3937 		return TRACE_TYPE_PARTIAL_LINE;
3938 
3939 	event = ftrace_find_event(entry->type);
3940 	if (event)
3941 		return event->funcs->raw(iter, 0, event);
3942 
3943 	trace_seq_printf(s, "%d ?\n", entry->type);
3944 
3945 	return trace_handle_return(s);
3946 }
3947 
3948 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3949 {
3950 	struct trace_array *tr = iter->tr;
3951 	struct trace_seq *s = &iter->seq;
3952 	unsigned char newline = '\n';
3953 	struct trace_entry *entry;
3954 	struct trace_event *event;
3955 
3956 	entry = iter->ent;
3957 
3958 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3959 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3960 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3961 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3962 		if (trace_seq_has_overflowed(s))
3963 			return TRACE_TYPE_PARTIAL_LINE;
3964 	}
3965 
3966 	event = ftrace_find_event(entry->type);
3967 	if (event) {
3968 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3969 		if (ret != TRACE_TYPE_HANDLED)
3970 			return ret;
3971 	}
3972 
3973 	SEQ_PUT_FIELD(s, newline);
3974 
3975 	return trace_handle_return(s);
3976 }
3977 
3978 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3979 {
3980 	struct trace_array *tr = iter->tr;
3981 	struct trace_seq *s = &iter->seq;
3982 	struct trace_entry *entry;
3983 	struct trace_event *event;
3984 
3985 	entry = iter->ent;
3986 
3987 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3988 		SEQ_PUT_FIELD(s, entry->pid);
3989 		SEQ_PUT_FIELD(s, iter->cpu);
3990 		SEQ_PUT_FIELD(s, iter->ts);
3991 		if (trace_seq_has_overflowed(s))
3992 			return TRACE_TYPE_PARTIAL_LINE;
3993 	}
3994 
3995 	event = ftrace_find_event(entry->type);
3996 	return event ? event->funcs->binary(iter, 0, event) :
3997 		TRACE_TYPE_HANDLED;
3998 }
3999 
4000 int trace_empty(struct trace_iterator *iter)
4001 {
4002 	struct ring_buffer_iter *buf_iter;
4003 	int cpu;
4004 
4005 	/* If we are looking at one CPU buffer, only check that one */
4006 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4007 		cpu = iter->cpu_file;
4008 		buf_iter = trace_buffer_iter(iter, cpu);
4009 		if (buf_iter) {
4010 			if (!ring_buffer_iter_empty(buf_iter))
4011 				return 0;
4012 		} else {
4013 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4014 				return 0;
4015 		}
4016 		return 1;
4017 	}
4018 
4019 	for_each_tracing_cpu(cpu) {
4020 		buf_iter = trace_buffer_iter(iter, cpu);
4021 		if (buf_iter) {
4022 			if (!ring_buffer_iter_empty(buf_iter))
4023 				return 0;
4024 		} else {
4025 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4026 				return 0;
4027 		}
4028 	}
4029 
4030 	return 1;
4031 }
4032 
4033 /*  Called with trace_event_read_lock() held. */
4034 enum print_line_t print_trace_line(struct trace_iterator *iter)
4035 {
4036 	struct trace_array *tr = iter->tr;
4037 	unsigned long trace_flags = tr->trace_flags;
4038 	enum print_line_t ret;
4039 
4040 	if (iter->lost_events) {
4041 		if (iter->lost_events == (unsigned long)-1)
4042 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4043 					 iter->cpu);
4044 		else
4045 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4046 					 iter->cpu, iter->lost_events);
4047 		if (trace_seq_has_overflowed(&iter->seq))
4048 			return TRACE_TYPE_PARTIAL_LINE;
4049 	}
4050 
4051 	if (iter->trace && iter->trace->print_line) {
4052 		ret = iter->trace->print_line(iter);
4053 		if (ret != TRACE_TYPE_UNHANDLED)
4054 			return ret;
4055 	}
4056 
4057 	if (iter->ent->type == TRACE_BPUTS &&
4058 			trace_flags & TRACE_ITER_PRINTK &&
4059 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4060 		return trace_print_bputs_msg_only(iter);
4061 
4062 	if (iter->ent->type == TRACE_BPRINT &&
4063 			trace_flags & TRACE_ITER_PRINTK &&
4064 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4065 		return trace_print_bprintk_msg_only(iter);
4066 
4067 	if (iter->ent->type == TRACE_PRINT &&
4068 			trace_flags & TRACE_ITER_PRINTK &&
4069 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4070 		return trace_print_printk_msg_only(iter);
4071 
4072 	if (trace_flags & TRACE_ITER_BIN)
4073 		return print_bin_fmt(iter);
4074 
4075 	if (trace_flags & TRACE_ITER_HEX)
4076 		return print_hex_fmt(iter);
4077 
4078 	if (trace_flags & TRACE_ITER_RAW)
4079 		return print_raw_fmt(iter);
4080 
4081 	return print_trace_fmt(iter);
4082 }
4083 
4084 void trace_latency_header(struct seq_file *m)
4085 {
4086 	struct trace_iterator *iter = m->private;
4087 	struct trace_array *tr = iter->tr;
4088 
4089 	/* print nothing if the buffers are empty */
4090 	if (trace_empty(iter))
4091 		return;
4092 
4093 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4094 		print_trace_header(m, iter);
4095 
4096 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4097 		print_lat_help_header(m);
4098 }
4099 
4100 void trace_default_header(struct seq_file *m)
4101 {
4102 	struct trace_iterator *iter = m->private;
4103 	struct trace_array *tr = iter->tr;
4104 	unsigned long trace_flags = tr->trace_flags;
4105 
4106 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4107 		return;
4108 
4109 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4110 		/* print nothing if the buffers are empty */
4111 		if (trace_empty(iter))
4112 			return;
4113 		print_trace_header(m, iter);
4114 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4115 			print_lat_help_header(m);
4116 	} else {
4117 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4118 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4119 				print_func_help_header_irq(iter->array_buffer,
4120 							   m, trace_flags);
4121 			else
4122 				print_func_help_header(iter->array_buffer, m,
4123 						       trace_flags);
4124 		}
4125 	}
4126 }
4127 
4128 static void test_ftrace_alive(struct seq_file *m)
4129 {
4130 	if (!ftrace_is_dead())
4131 		return;
4132 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4133 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4134 }
4135 
4136 #ifdef CONFIG_TRACER_MAX_TRACE
4137 static void show_snapshot_main_help(struct seq_file *m)
4138 {
4139 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4140 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4141 		    "#                      Takes a snapshot of the main buffer.\n"
4142 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4143 		    "#                      (Doesn't have to be '2' works with any number that\n"
4144 		    "#                       is not a '0' or '1')\n");
4145 }
4146 
4147 static void show_snapshot_percpu_help(struct seq_file *m)
4148 {
4149 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4150 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4151 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4152 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4153 #else
4154 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4155 		    "#                     Must use main snapshot file to allocate.\n");
4156 #endif
4157 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4158 		    "#                      (Doesn't have to be '2' works with any number that\n"
4159 		    "#                       is not a '0' or '1')\n");
4160 }
4161 
4162 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4163 {
4164 	if (iter->tr->allocated_snapshot)
4165 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4166 	else
4167 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4168 
4169 	seq_puts(m, "# Snapshot commands:\n");
4170 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4171 		show_snapshot_main_help(m);
4172 	else
4173 		show_snapshot_percpu_help(m);
4174 }
4175 #else
4176 /* Should never be called */
4177 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4178 #endif
4179 
4180 static int s_show(struct seq_file *m, void *v)
4181 {
4182 	struct trace_iterator *iter = v;
4183 	int ret;
4184 
4185 	if (iter->ent == NULL) {
4186 		if (iter->tr) {
4187 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4188 			seq_puts(m, "#\n");
4189 			test_ftrace_alive(m);
4190 		}
4191 		if (iter->snapshot && trace_empty(iter))
4192 			print_snapshot_help(m, iter);
4193 		else if (iter->trace && iter->trace->print_header)
4194 			iter->trace->print_header(m);
4195 		else
4196 			trace_default_header(m);
4197 
4198 	} else if (iter->leftover) {
4199 		/*
4200 		 * If we filled the seq_file buffer earlier, we
4201 		 * want to just show it now.
4202 		 */
4203 		ret = trace_print_seq(m, &iter->seq);
4204 
4205 		/* ret should this time be zero, but you never know */
4206 		iter->leftover = ret;
4207 
4208 	} else {
4209 		print_trace_line(iter);
4210 		ret = trace_print_seq(m, &iter->seq);
4211 		/*
4212 		 * If we overflow the seq_file buffer, then it will
4213 		 * ask us for this data again at start up.
4214 		 * Use that instead.
4215 		 *  ret is 0 if seq_file write succeeded.
4216 		 *        -1 otherwise.
4217 		 */
4218 		iter->leftover = ret;
4219 	}
4220 
4221 	return 0;
4222 }
4223 
4224 /*
4225  * Should be used after trace_array_get(), trace_types_lock
4226  * ensures that i_cdev was already initialized.
4227  */
4228 static inline int tracing_get_cpu(struct inode *inode)
4229 {
4230 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4231 		return (long)inode->i_cdev - 1;
4232 	return RING_BUFFER_ALL_CPUS;
4233 }
4234 
4235 static const struct seq_operations tracer_seq_ops = {
4236 	.start		= s_start,
4237 	.next		= s_next,
4238 	.stop		= s_stop,
4239 	.show		= s_show,
4240 };
4241 
4242 static struct trace_iterator *
4243 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4244 {
4245 	struct trace_array *tr = inode->i_private;
4246 	struct trace_iterator *iter;
4247 	int cpu;
4248 
4249 	if (tracing_disabled)
4250 		return ERR_PTR(-ENODEV);
4251 
4252 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4253 	if (!iter)
4254 		return ERR_PTR(-ENOMEM);
4255 
4256 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4257 				    GFP_KERNEL);
4258 	if (!iter->buffer_iter)
4259 		goto release;
4260 
4261 	/*
4262 	 * trace_find_next_entry() may need to save off iter->ent.
4263 	 * It will place it into the iter->temp buffer. As most
4264 	 * events are less than 128, allocate a buffer of that size.
4265 	 * If one is greater, then trace_find_next_entry() will
4266 	 * allocate a new buffer to adjust for the bigger iter->ent.
4267 	 * It's not critical if it fails to get allocated here.
4268 	 */
4269 	iter->temp = kmalloc(128, GFP_KERNEL);
4270 	if (iter->temp)
4271 		iter->temp_size = 128;
4272 
4273 	/*
4274 	 * We make a copy of the current tracer to avoid concurrent
4275 	 * changes on it while we are reading.
4276 	 */
4277 	mutex_lock(&trace_types_lock);
4278 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4279 	if (!iter->trace)
4280 		goto fail;
4281 
4282 	*iter->trace = *tr->current_trace;
4283 
4284 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4285 		goto fail;
4286 
4287 	iter->tr = tr;
4288 
4289 #ifdef CONFIG_TRACER_MAX_TRACE
4290 	/* Currently only the top directory has a snapshot */
4291 	if (tr->current_trace->print_max || snapshot)
4292 		iter->array_buffer = &tr->max_buffer;
4293 	else
4294 #endif
4295 		iter->array_buffer = &tr->array_buffer;
4296 	iter->snapshot = snapshot;
4297 	iter->pos = -1;
4298 	iter->cpu_file = tracing_get_cpu(inode);
4299 	mutex_init(&iter->mutex);
4300 
4301 	/* Notify the tracer early; before we stop tracing. */
4302 	if (iter->trace->open)
4303 		iter->trace->open(iter);
4304 
4305 	/* Annotate start of buffers if we had overruns */
4306 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4307 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4308 
4309 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4310 	if (trace_clocks[tr->clock_id].in_ns)
4311 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4312 
4313 	/*
4314 	 * If pause-on-trace is enabled, then stop the trace while
4315 	 * dumping, unless this is the "snapshot" file
4316 	 */
4317 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4318 		tracing_stop_tr(tr);
4319 
4320 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4321 		for_each_tracing_cpu(cpu) {
4322 			iter->buffer_iter[cpu] =
4323 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4324 							 cpu, GFP_KERNEL);
4325 		}
4326 		ring_buffer_read_prepare_sync();
4327 		for_each_tracing_cpu(cpu) {
4328 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4329 			tracing_iter_reset(iter, cpu);
4330 		}
4331 	} else {
4332 		cpu = iter->cpu_file;
4333 		iter->buffer_iter[cpu] =
4334 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4335 						 cpu, GFP_KERNEL);
4336 		ring_buffer_read_prepare_sync();
4337 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4338 		tracing_iter_reset(iter, cpu);
4339 	}
4340 
4341 	mutex_unlock(&trace_types_lock);
4342 
4343 	return iter;
4344 
4345  fail:
4346 	mutex_unlock(&trace_types_lock);
4347 	kfree(iter->trace);
4348 	kfree(iter->temp);
4349 	kfree(iter->buffer_iter);
4350 release:
4351 	seq_release_private(inode, file);
4352 	return ERR_PTR(-ENOMEM);
4353 }
4354 
4355 int tracing_open_generic(struct inode *inode, struct file *filp)
4356 {
4357 	int ret;
4358 
4359 	ret = tracing_check_open_get_tr(NULL);
4360 	if (ret)
4361 		return ret;
4362 
4363 	filp->private_data = inode->i_private;
4364 	return 0;
4365 }
4366 
4367 bool tracing_is_disabled(void)
4368 {
4369 	return (tracing_disabled) ? true: false;
4370 }
4371 
4372 /*
4373  * Open and update trace_array ref count.
4374  * Must have the current trace_array passed to it.
4375  */
4376 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4377 {
4378 	struct trace_array *tr = inode->i_private;
4379 	int ret;
4380 
4381 	ret = tracing_check_open_get_tr(tr);
4382 	if (ret)
4383 		return ret;
4384 
4385 	filp->private_data = inode->i_private;
4386 
4387 	return 0;
4388 }
4389 
4390 static int tracing_release(struct inode *inode, struct file *file)
4391 {
4392 	struct trace_array *tr = inode->i_private;
4393 	struct seq_file *m = file->private_data;
4394 	struct trace_iterator *iter;
4395 	int cpu;
4396 
4397 	if (!(file->f_mode & FMODE_READ)) {
4398 		trace_array_put(tr);
4399 		return 0;
4400 	}
4401 
4402 	/* Writes do not use seq_file */
4403 	iter = m->private;
4404 	mutex_lock(&trace_types_lock);
4405 
4406 	for_each_tracing_cpu(cpu) {
4407 		if (iter->buffer_iter[cpu])
4408 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4409 	}
4410 
4411 	if (iter->trace && iter->trace->close)
4412 		iter->trace->close(iter);
4413 
4414 	if (!iter->snapshot && tr->stop_count)
4415 		/* reenable tracing if it was previously enabled */
4416 		tracing_start_tr(tr);
4417 
4418 	__trace_array_put(tr);
4419 
4420 	mutex_unlock(&trace_types_lock);
4421 
4422 	mutex_destroy(&iter->mutex);
4423 	free_cpumask_var(iter->started);
4424 	kfree(iter->temp);
4425 	kfree(iter->trace);
4426 	kfree(iter->buffer_iter);
4427 	seq_release_private(inode, file);
4428 
4429 	return 0;
4430 }
4431 
4432 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4433 {
4434 	struct trace_array *tr = inode->i_private;
4435 
4436 	trace_array_put(tr);
4437 	return 0;
4438 }
4439 
4440 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4441 {
4442 	struct trace_array *tr = inode->i_private;
4443 
4444 	trace_array_put(tr);
4445 
4446 	return single_release(inode, file);
4447 }
4448 
4449 static int tracing_open(struct inode *inode, struct file *file)
4450 {
4451 	struct trace_array *tr = inode->i_private;
4452 	struct trace_iterator *iter;
4453 	int ret;
4454 
4455 	ret = tracing_check_open_get_tr(tr);
4456 	if (ret)
4457 		return ret;
4458 
4459 	/* If this file was open for write, then erase contents */
4460 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4461 		int cpu = tracing_get_cpu(inode);
4462 		struct array_buffer *trace_buf = &tr->array_buffer;
4463 
4464 #ifdef CONFIG_TRACER_MAX_TRACE
4465 		if (tr->current_trace->print_max)
4466 			trace_buf = &tr->max_buffer;
4467 #endif
4468 
4469 		if (cpu == RING_BUFFER_ALL_CPUS)
4470 			tracing_reset_online_cpus(trace_buf);
4471 		else
4472 			tracing_reset_cpu(trace_buf, cpu);
4473 	}
4474 
4475 	if (file->f_mode & FMODE_READ) {
4476 		iter = __tracing_open(inode, file, false);
4477 		if (IS_ERR(iter))
4478 			ret = PTR_ERR(iter);
4479 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4480 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4481 	}
4482 
4483 	if (ret < 0)
4484 		trace_array_put(tr);
4485 
4486 	return ret;
4487 }
4488 
4489 /*
4490  * Some tracers are not suitable for instance buffers.
4491  * A tracer is always available for the global array (toplevel)
4492  * or if it explicitly states that it is.
4493  */
4494 static bool
4495 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4496 {
4497 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4498 }
4499 
4500 /* Find the next tracer that this trace array may use */
4501 static struct tracer *
4502 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4503 {
4504 	while (t && !trace_ok_for_array(t, tr))
4505 		t = t->next;
4506 
4507 	return t;
4508 }
4509 
4510 static void *
4511 t_next(struct seq_file *m, void *v, loff_t *pos)
4512 {
4513 	struct trace_array *tr = m->private;
4514 	struct tracer *t = v;
4515 
4516 	(*pos)++;
4517 
4518 	if (t)
4519 		t = get_tracer_for_array(tr, t->next);
4520 
4521 	return t;
4522 }
4523 
4524 static void *t_start(struct seq_file *m, loff_t *pos)
4525 {
4526 	struct trace_array *tr = m->private;
4527 	struct tracer *t;
4528 	loff_t l = 0;
4529 
4530 	mutex_lock(&trace_types_lock);
4531 
4532 	t = get_tracer_for_array(tr, trace_types);
4533 	for (; t && l < *pos; t = t_next(m, t, &l))
4534 			;
4535 
4536 	return t;
4537 }
4538 
4539 static void t_stop(struct seq_file *m, void *p)
4540 {
4541 	mutex_unlock(&trace_types_lock);
4542 }
4543 
4544 static int t_show(struct seq_file *m, void *v)
4545 {
4546 	struct tracer *t = v;
4547 
4548 	if (!t)
4549 		return 0;
4550 
4551 	seq_puts(m, t->name);
4552 	if (t->next)
4553 		seq_putc(m, ' ');
4554 	else
4555 		seq_putc(m, '\n');
4556 
4557 	return 0;
4558 }
4559 
4560 static const struct seq_operations show_traces_seq_ops = {
4561 	.start		= t_start,
4562 	.next		= t_next,
4563 	.stop		= t_stop,
4564 	.show		= t_show,
4565 };
4566 
4567 static int show_traces_open(struct inode *inode, struct file *file)
4568 {
4569 	struct trace_array *tr = inode->i_private;
4570 	struct seq_file *m;
4571 	int ret;
4572 
4573 	ret = tracing_check_open_get_tr(tr);
4574 	if (ret)
4575 		return ret;
4576 
4577 	ret = seq_open(file, &show_traces_seq_ops);
4578 	if (ret) {
4579 		trace_array_put(tr);
4580 		return ret;
4581 	}
4582 
4583 	m = file->private_data;
4584 	m->private = tr;
4585 
4586 	return 0;
4587 }
4588 
4589 static int show_traces_release(struct inode *inode, struct file *file)
4590 {
4591 	struct trace_array *tr = inode->i_private;
4592 
4593 	trace_array_put(tr);
4594 	return seq_release(inode, file);
4595 }
4596 
4597 static ssize_t
4598 tracing_write_stub(struct file *filp, const char __user *ubuf,
4599 		   size_t count, loff_t *ppos)
4600 {
4601 	return count;
4602 }
4603 
4604 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4605 {
4606 	int ret;
4607 
4608 	if (file->f_mode & FMODE_READ)
4609 		ret = seq_lseek(file, offset, whence);
4610 	else
4611 		file->f_pos = ret = 0;
4612 
4613 	return ret;
4614 }
4615 
4616 static const struct file_operations tracing_fops = {
4617 	.open		= tracing_open,
4618 	.read		= seq_read,
4619 	.write		= tracing_write_stub,
4620 	.llseek		= tracing_lseek,
4621 	.release	= tracing_release,
4622 };
4623 
4624 static const struct file_operations show_traces_fops = {
4625 	.open		= show_traces_open,
4626 	.read		= seq_read,
4627 	.llseek		= seq_lseek,
4628 	.release	= show_traces_release,
4629 };
4630 
4631 static ssize_t
4632 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4633 		     size_t count, loff_t *ppos)
4634 {
4635 	struct trace_array *tr = file_inode(filp)->i_private;
4636 	char *mask_str;
4637 	int len;
4638 
4639 	len = snprintf(NULL, 0, "%*pb\n",
4640 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4641 	mask_str = kmalloc(len, GFP_KERNEL);
4642 	if (!mask_str)
4643 		return -ENOMEM;
4644 
4645 	len = snprintf(mask_str, len, "%*pb\n",
4646 		       cpumask_pr_args(tr->tracing_cpumask));
4647 	if (len >= count) {
4648 		count = -EINVAL;
4649 		goto out_err;
4650 	}
4651 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4652 
4653 out_err:
4654 	kfree(mask_str);
4655 
4656 	return count;
4657 }
4658 
4659 int tracing_set_cpumask(struct trace_array *tr,
4660 			cpumask_var_t tracing_cpumask_new)
4661 {
4662 	int cpu;
4663 
4664 	if (!tr)
4665 		return -EINVAL;
4666 
4667 	local_irq_disable();
4668 	arch_spin_lock(&tr->max_lock);
4669 	for_each_tracing_cpu(cpu) {
4670 		/*
4671 		 * Increase/decrease the disabled counter if we are
4672 		 * about to flip a bit in the cpumask:
4673 		 */
4674 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4675 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4676 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4677 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4678 		}
4679 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4680 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4681 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4682 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4683 		}
4684 	}
4685 	arch_spin_unlock(&tr->max_lock);
4686 	local_irq_enable();
4687 
4688 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4689 
4690 	return 0;
4691 }
4692 
4693 static ssize_t
4694 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4695 		      size_t count, loff_t *ppos)
4696 {
4697 	struct trace_array *tr = file_inode(filp)->i_private;
4698 	cpumask_var_t tracing_cpumask_new;
4699 	int err;
4700 
4701 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4702 		return -ENOMEM;
4703 
4704 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4705 	if (err)
4706 		goto err_free;
4707 
4708 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4709 	if (err)
4710 		goto err_free;
4711 
4712 	free_cpumask_var(tracing_cpumask_new);
4713 
4714 	return count;
4715 
4716 err_free:
4717 	free_cpumask_var(tracing_cpumask_new);
4718 
4719 	return err;
4720 }
4721 
4722 static const struct file_operations tracing_cpumask_fops = {
4723 	.open		= tracing_open_generic_tr,
4724 	.read		= tracing_cpumask_read,
4725 	.write		= tracing_cpumask_write,
4726 	.release	= tracing_release_generic_tr,
4727 	.llseek		= generic_file_llseek,
4728 };
4729 
4730 static int tracing_trace_options_show(struct seq_file *m, void *v)
4731 {
4732 	struct tracer_opt *trace_opts;
4733 	struct trace_array *tr = m->private;
4734 	u32 tracer_flags;
4735 	int i;
4736 
4737 	mutex_lock(&trace_types_lock);
4738 	tracer_flags = tr->current_trace->flags->val;
4739 	trace_opts = tr->current_trace->flags->opts;
4740 
4741 	for (i = 0; trace_options[i]; i++) {
4742 		if (tr->trace_flags & (1 << i))
4743 			seq_printf(m, "%s\n", trace_options[i]);
4744 		else
4745 			seq_printf(m, "no%s\n", trace_options[i]);
4746 	}
4747 
4748 	for (i = 0; trace_opts[i].name; i++) {
4749 		if (tracer_flags & trace_opts[i].bit)
4750 			seq_printf(m, "%s\n", trace_opts[i].name);
4751 		else
4752 			seq_printf(m, "no%s\n", trace_opts[i].name);
4753 	}
4754 	mutex_unlock(&trace_types_lock);
4755 
4756 	return 0;
4757 }
4758 
4759 static int __set_tracer_option(struct trace_array *tr,
4760 			       struct tracer_flags *tracer_flags,
4761 			       struct tracer_opt *opts, int neg)
4762 {
4763 	struct tracer *trace = tracer_flags->trace;
4764 	int ret;
4765 
4766 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4767 	if (ret)
4768 		return ret;
4769 
4770 	if (neg)
4771 		tracer_flags->val &= ~opts->bit;
4772 	else
4773 		tracer_flags->val |= opts->bit;
4774 	return 0;
4775 }
4776 
4777 /* Try to assign a tracer specific option */
4778 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4779 {
4780 	struct tracer *trace = tr->current_trace;
4781 	struct tracer_flags *tracer_flags = trace->flags;
4782 	struct tracer_opt *opts = NULL;
4783 	int i;
4784 
4785 	for (i = 0; tracer_flags->opts[i].name; i++) {
4786 		opts = &tracer_flags->opts[i];
4787 
4788 		if (strcmp(cmp, opts->name) == 0)
4789 			return __set_tracer_option(tr, trace->flags, opts, neg);
4790 	}
4791 
4792 	return -EINVAL;
4793 }
4794 
4795 /* Some tracers require overwrite to stay enabled */
4796 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4797 {
4798 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4799 		return -1;
4800 
4801 	return 0;
4802 }
4803 
4804 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4805 {
4806 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4807 	    (mask == TRACE_ITER_RECORD_CMD))
4808 		lockdep_assert_held(&event_mutex);
4809 
4810 	/* do nothing if flag is already set */
4811 	if (!!(tr->trace_flags & mask) == !!enabled)
4812 		return 0;
4813 
4814 	/* Give the tracer a chance to approve the change */
4815 	if (tr->current_trace->flag_changed)
4816 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4817 			return -EINVAL;
4818 
4819 	if (enabled)
4820 		tr->trace_flags |= mask;
4821 	else
4822 		tr->trace_flags &= ~mask;
4823 
4824 	if (mask == TRACE_ITER_RECORD_CMD)
4825 		trace_event_enable_cmd_record(enabled);
4826 
4827 	if (mask == TRACE_ITER_RECORD_TGID) {
4828 		if (!tgid_map)
4829 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4830 					   sizeof(*tgid_map),
4831 					   GFP_KERNEL);
4832 		if (!tgid_map) {
4833 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4834 			return -ENOMEM;
4835 		}
4836 
4837 		trace_event_enable_tgid_record(enabled);
4838 	}
4839 
4840 	if (mask == TRACE_ITER_EVENT_FORK)
4841 		trace_event_follow_fork(tr, enabled);
4842 
4843 	if (mask == TRACE_ITER_FUNC_FORK)
4844 		ftrace_pid_follow_fork(tr, enabled);
4845 
4846 	if (mask == TRACE_ITER_OVERWRITE) {
4847 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4848 #ifdef CONFIG_TRACER_MAX_TRACE
4849 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4850 #endif
4851 	}
4852 
4853 	if (mask == TRACE_ITER_PRINTK) {
4854 		trace_printk_start_stop_comm(enabled);
4855 		trace_printk_control(enabled);
4856 	}
4857 
4858 	return 0;
4859 }
4860 
4861 int trace_set_options(struct trace_array *tr, char *option)
4862 {
4863 	char *cmp;
4864 	int neg = 0;
4865 	int ret;
4866 	size_t orig_len = strlen(option);
4867 	int len;
4868 
4869 	cmp = strstrip(option);
4870 
4871 	len = str_has_prefix(cmp, "no");
4872 	if (len)
4873 		neg = 1;
4874 
4875 	cmp += len;
4876 
4877 	mutex_lock(&event_mutex);
4878 	mutex_lock(&trace_types_lock);
4879 
4880 	ret = match_string(trace_options, -1, cmp);
4881 	/* If no option could be set, test the specific tracer options */
4882 	if (ret < 0)
4883 		ret = set_tracer_option(tr, cmp, neg);
4884 	else
4885 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4886 
4887 	mutex_unlock(&trace_types_lock);
4888 	mutex_unlock(&event_mutex);
4889 
4890 	/*
4891 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4892 	 * turn it back into a space.
4893 	 */
4894 	if (orig_len > strlen(option))
4895 		option[strlen(option)] = ' ';
4896 
4897 	return ret;
4898 }
4899 
4900 static void __init apply_trace_boot_options(void)
4901 {
4902 	char *buf = trace_boot_options_buf;
4903 	char *option;
4904 
4905 	while (true) {
4906 		option = strsep(&buf, ",");
4907 
4908 		if (!option)
4909 			break;
4910 
4911 		if (*option)
4912 			trace_set_options(&global_trace, option);
4913 
4914 		/* Put back the comma to allow this to be called again */
4915 		if (buf)
4916 			*(buf - 1) = ',';
4917 	}
4918 }
4919 
4920 static ssize_t
4921 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4922 			size_t cnt, loff_t *ppos)
4923 {
4924 	struct seq_file *m = filp->private_data;
4925 	struct trace_array *tr = m->private;
4926 	char buf[64];
4927 	int ret;
4928 
4929 	if (cnt >= sizeof(buf))
4930 		return -EINVAL;
4931 
4932 	if (copy_from_user(buf, ubuf, cnt))
4933 		return -EFAULT;
4934 
4935 	buf[cnt] = 0;
4936 
4937 	ret = trace_set_options(tr, buf);
4938 	if (ret < 0)
4939 		return ret;
4940 
4941 	*ppos += cnt;
4942 
4943 	return cnt;
4944 }
4945 
4946 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4947 {
4948 	struct trace_array *tr = inode->i_private;
4949 	int ret;
4950 
4951 	ret = tracing_check_open_get_tr(tr);
4952 	if (ret)
4953 		return ret;
4954 
4955 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4956 	if (ret < 0)
4957 		trace_array_put(tr);
4958 
4959 	return ret;
4960 }
4961 
4962 static const struct file_operations tracing_iter_fops = {
4963 	.open		= tracing_trace_options_open,
4964 	.read		= seq_read,
4965 	.llseek		= seq_lseek,
4966 	.release	= tracing_single_release_tr,
4967 	.write		= tracing_trace_options_write,
4968 };
4969 
4970 static const char readme_msg[] =
4971 	"tracing mini-HOWTO:\n\n"
4972 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4973 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4974 	" Important files:\n"
4975 	"  trace\t\t\t- The static contents of the buffer\n"
4976 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4977 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4978 	"  current_tracer\t- function and latency tracers\n"
4979 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4980 	"  error_log\t- error log for failed commands (that support it)\n"
4981 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4982 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4983 	"  trace_clock\t\t-change the clock used to order events\n"
4984 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4985 	"      global:   Synced across CPUs but slows tracing down.\n"
4986 	"     counter:   Not a clock, but just an increment\n"
4987 	"      uptime:   Jiffy counter from time of boot\n"
4988 	"        perf:   Same clock that perf events use\n"
4989 #ifdef CONFIG_X86_64
4990 	"     x86-tsc:   TSC cycle counter\n"
4991 #endif
4992 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4993 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4994 	"    absolute:   Absolute (standalone) timestamp\n"
4995 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4996 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4997 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4998 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4999 	"\t\t\t  Remove sub-buffer with rmdir\n"
5000 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5001 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5002 	"\t\t\t  option name\n"
5003 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5004 #ifdef CONFIG_DYNAMIC_FTRACE
5005 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5006 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5007 	"\t\t\t  functions\n"
5008 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5009 	"\t     modules: Can select a group via module\n"
5010 	"\t      Format: :mod:<module-name>\n"
5011 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5012 	"\t    triggers: a command to perform when function is hit\n"
5013 	"\t      Format: <function>:<trigger>[:count]\n"
5014 	"\t     trigger: traceon, traceoff\n"
5015 	"\t\t      enable_event:<system>:<event>\n"
5016 	"\t\t      disable_event:<system>:<event>\n"
5017 #ifdef CONFIG_STACKTRACE
5018 	"\t\t      stacktrace\n"
5019 #endif
5020 #ifdef CONFIG_TRACER_SNAPSHOT
5021 	"\t\t      snapshot\n"
5022 #endif
5023 	"\t\t      dump\n"
5024 	"\t\t      cpudump\n"
5025 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5026 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5027 	"\t     The first one will disable tracing every time do_fault is hit\n"
5028 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5029 	"\t       The first time do trap is hit and it disables tracing, the\n"
5030 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5031 	"\t       the counter will not decrement. It only decrements when the\n"
5032 	"\t       trigger did work\n"
5033 	"\t     To remove trigger without count:\n"
5034 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5035 	"\t     To remove trigger with a count:\n"
5036 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5037 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5038 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5039 	"\t    modules: Can select a group via module command :mod:\n"
5040 	"\t    Does not accept triggers\n"
5041 #endif /* CONFIG_DYNAMIC_FTRACE */
5042 #ifdef CONFIG_FUNCTION_TRACER
5043 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5044 	"\t\t    (function)\n"
5045 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5046 	"\t\t    (function)\n"
5047 #endif
5048 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5049 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5050 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5051 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5052 #endif
5053 #ifdef CONFIG_TRACER_SNAPSHOT
5054 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5055 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5056 	"\t\t\t  information\n"
5057 #endif
5058 #ifdef CONFIG_STACK_TRACER
5059 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5060 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5061 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5062 	"\t\t\t  new trace)\n"
5063 #ifdef CONFIG_DYNAMIC_FTRACE
5064 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5065 	"\t\t\t  traces\n"
5066 #endif
5067 #endif /* CONFIG_STACK_TRACER */
5068 #ifdef CONFIG_DYNAMIC_EVENTS
5069 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5070 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5071 #endif
5072 #ifdef CONFIG_KPROBE_EVENTS
5073 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5074 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5075 #endif
5076 #ifdef CONFIG_UPROBE_EVENTS
5077 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5078 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5079 #endif
5080 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5081 	"\t  accepts: event-definitions (one definition per line)\n"
5082 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5083 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5084 #ifdef CONFIG_HIST_TRIGGERS
5085 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5086 #endif
5087 	"\t           -:[<group>/]<event>\n"
5088 #ifdef CONFIG_KPROBE_EVENTS
5089 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5090   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5091 #endif
5092 #ifdef CONFIG_UPROBE_EVENTS
5093   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5094 #endif
5095 	"\t     args: <name>=fetcharg[:type]\n"
5096 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5097 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5098 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5099 #else
5100 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5101 #endif
5102 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5103 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5104 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5105 	"\t           <type>\\[<array-size>\\]\n"
5106 #ifdef CONFIG_HIST_TRIGGERS
5107 	"\t    field: <stype> <name>;\n"
5108 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5109 	"\t           [unsigned] char/int/long\n"
5110 #endif
5111 #endif
5112 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5113 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5114 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5115 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5116 	"\t\t\t  events\n"
5117 	"      filter\t\t- If set, only events passing filter are traced\n"
5118 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5119 	"\t\t\t  <event>:\n"
5120 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5121 	"      filter\t\t- If set, only events passing filter are traced\n"
5122 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5123 	"\t    Format: <trigger>[:count][if <filter>]\n"
5124 	"\t   trigger: traceon, traceoff\n"
5125 	"\t            enable_event:<system>:<event>\n"
5126 	"\t            disable_event:<system>:<event>\n"
5127 #ifdef CONFIG_HIST_TRIGGERS
5128 	"\t            enable_hist:<system>:<event>\n"
5129 	"\t            disable_hist:<system>:<event>\n"
5130 #endif
5131 #ifdef CONFIG_STACKTRACE
5132 	"\t\t    stacktrace\n"
5133 #endif
5134 #ifdef CONFIG_TRACER_SNAPSHOT
5135 	"\t\t    snapshot\n"
5136 #endif
5137 #ifdef CONFIG_HIST_TRIGGERS
5138 	"\t\t    hist (see below)\n"
5139 #endif
5140 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5141 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5142 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5143 	"\t                  events/block/block_unplug/trigger\n"
5144 	"\t   The first disables tracing every time block_unplug is hit.\n"
5145 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5146 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5147 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5148 	"\t   Like function triggers, the counter is only decremented if it\n"
5149 	"\t    enabled or disabled tracing.\n"
5150 	"\t   To remove a trigger without a count:\n"
5151 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5152 	"\t   To remove a trigger with a count:\n"
5153 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5154 	"\t   Filters can be ignored when removing a trigger.\n"
5155 #ifdef CONFIG_HIST_TRIGGERS
5156 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5157 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5158 	"\t            [:values=<field1[,field2,...]>]\n"
5159 	"\t            [:sort=<field1[,field2,...]>]\n"
5160 	"\t            [:size=#entries]\n"
5161 	"\t            [:pause][:continue][:clear]\n"
5162 	"\t            [:name=histname1]\n"
5163 	"\t            [:<handler>.<action>]\n"
5164 	"\t            [if <filter>]\n\n"
5165 	"\t    When a matching event is hit, an entry is added to a hash\n"
5166 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5167 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5168 	"\t    correspond to fields in the event's format description.  Keys\n"
5169 	"\t    can be any field, or the special string 'stacktrace'.\n"
5170 	"\t    Compound keys consisting of up to two fields can be specified\n"
5171 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5172 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5173 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5174 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5175 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5176 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5177 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5178 	"\t    its histogram data will be shared with other triggers of the\n"
5179 	"\t    same name, and trigger hits will update this common data.\n\n"
5180 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5181 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5182 	"\t    triggers attached to an event, there will be a table for each\n"
5183 	"\t    trigger in the output.  The table displayed for a named\n"
5184 	"\t    trigger will be the same as any other instance having the\n"
5185 	"\t    same name.  The default format used to display a given field\n"
5186 	"\t    can be modified by appending any of the following modifiers\n"
5187 	"\t    to the field name, as applicable:\n\n"
5188 	"\t            .hex        display a number as a hex value\n"
5189 	"\t            .sym        display an address as a symbol\n"
5190 	"\t            .sym-offset display an address as a symbol and offset\n"
5191 	"\t            .execname   display a common_pid as a program name\n"
5192 	"\t            .syscall    display a syscall id as a syscall name\n"
5193 	"\t            .log2       display log2 value rather than raw number\n"
5194 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5195 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5196 	"\t    trigger or to start a hist trigger but not log any events\n"
5197 	"\t    until told to do so.  'continue' can be used to start or\n"
5198 	"\t    restart a paused hist trigger.\n\n"
5199 	"\t    The 'clear' parameter will clear the contents of a running\n"
5200 	"\t    hist trigger and leave its current paused/active state\n"
5201 	"\t    unchanged.\n\n"
5202 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5203 	"\t    have one event conditionally start and stop another event's\n"
5204 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5205 	"\t    the enable_event and disable_event triggers.\n\n"
5206 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5207 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5208 	"\t        <handler>.<action>\n\n"
5209 	"\t    The available handlers are:\n\n"
5210 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5211 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5212 	"\t        onchange(var)            - invoke action if var changes\n\n"
5213 	"\t    The available actions are:\n\n"
5214 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5215 	"\t        save(field,...)                      - save current event fields\n"
5216 #ifdef CONFIG_TRACER_SNAPSHOT
5217 	"\t        snapshot()                           - snapshot the trace buffer\n"
5218 #endif
5219 #endif
5220 ;
5221 
5222 static ssize_t
5223 tracing_readme_read(struct file *filp, char __user *ubuf,
5224 		       size_t cnt, loff_t *ppos)
5225 {
5226 	return simple_read_from_buffer(ubuf, cnt, ppos,
5227 					readme_msg, strlen(readme_msg));
5228 }
5229 
5230 static const struct file_operations tracing_readme_fops = {
5231 	.open		= tracing_open_generic,
5232 	.read		= tracing_readme_read,
5233 	.llseek		= generic_file_llseek,
5234 };
5235 
5236 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5237 {
5238 	int *ptr = v;
5239 
5240 	if (*pos || m->count)
5241 		ptr++;
5242 
5243 	(*pos)++;
5244 
5245 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5246 		if (trace_find_tgid(*ptr))
5247 			return ptr;
5248 	}
5249 
5250 	return NULL;
5251 }
5252 
5253 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5254 {
5255 	void *v;
5256 	loff_t l = 0;
5257 
5258 	if (!tgid_map)
5259 		return NULL;
5260 
5261 	v = &tgid_map[0];
5262 	while (l <= *pos) {
5263 		v = saved_tgids_next(m, v, &l);
5264 		if (!v)
5265 			return NULL;
5266 	}
5267 
5268 	return v;
5269 }
5270 
5271 static void saved_tgids_stop(struct seq_file *m, void *v)
5272 {
5273 }
5274 
5275 static int saved_tgids_show(struct seq_file *m, void *v)
5276 {
5277 	int pid = (int *)v - tgid_map;
5278 
5279 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5280 	return 0;
5281 }
5282 
5283 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5284 	.start		= saved_tgids_start,
5285 	.stop		= saved_tgids_stop,
5286 	.next		= saved_tgids_next,
5287 	.show		= saved_tgids_show,
5288 };
5289 
5290 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5291 {
5292 	int ret;
5293 
5294 	ret = tracing_check_open_get_tr(NULL);
5295 	if (ret)
5296 		return ret;
5297 
5298 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5299 }
5300 
5301 
5302 static const struct file_operations tracing_saved_tgids_fops = {
5303 	.open		= tracing_saved_tgids_open,
5304 	.read		= seq_read,
5305 	.llseek		= seq_lseek,
5306 	.release	= seq_release,
5307 };
5308 
5309 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5310 {
5311 	unsigned int *ptr = v;
5312 
5313 	if (*pos || m->count)
5314 		ptr++;
5315 
5316 	(*pos)++;
5317 
5318 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5319 	     ptr++) {
5320 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5321 			continue;
5322 
5323 		return ptr;
5324 	}
5325 
5326 	return NULL;
5327 }
5328 
5329 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5330 {
5331 	void *v;
5332 	loff_t l = 0;
5333 
5334 	preempt_disable();
5335 	arch_spin_lock(&trace_cmdline_lock);
5336 
5337 	v = &savedcmd->map_cmdline_to_pid[0];
5338 	while (l <= *pos) {
5339 		v = saved_cmdlines_next(m, v, &l);
5340 		if (!v)
5341 			return NULL;
5342 	}
5343 
5344 	return v;
5345 }
5346 
5347 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5348 {
5349 	arch_spin_unlock(&trace_cmdline_lock);
5350 	preempt_enable();
5351 }
5352 
5353 static int saved_cmdlines_show(struct seq_file *m, void *v)
5354 {
5355 	char buf[TASK_COMM_LEN];
5356 	unsigned int *pid = v;
5357 
5358 	__trace_find_cmdline(*pid, buf);
5359 	seq_printf(m, "%d %s\n", *pid, buf);
5360 	return 0;
5361 }
5362 
5363 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5364 	.start		= saved_cmdlines_start,
5365 	.next		= saved_cmdlines_next,
5366 	.stop		= saved_cmdlines_stop,
5367 	.show		= saved_cmdlines_show,
5368 };
5369 
5370 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5371 {
5372 	int ret;
5373 
5374 	ret = tracing_check_open_get_tr(NULL);
5375 	if (ret)
5376 		return ret;
5377 
5378 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5379 }
5380 
5381 static const struct file_operations tracing_saved_cmdlines_fops = {
5382 	.open		= tracing_saved_cmdlines_open,
5383 	.read		= seq_read,
5384 	.llseek		= seq_lseek,
5385 	.release	= seq_release,
5386 };
5387 
5388 static ssize_t
5389 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5390 				 size_t cnt, loff_t *ppos)
5391 {
5392 	char buf[64];
5393 	int r;
5394 
5395 	arch_spin_lock(&trace_cmdline_lock);
5396 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5397 	arch_spin_unlock(&trace_cmdline_lock);
5398 
5399 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5400 }
5401 
5402 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5403 {
5404 	kfree(s->saved_cmdlines);
5405 	kfree(s->map_cmdline_to_pid);
5406 	kfree(s);
5407 }
5408 
5409 static int tracing_resize_saved_cmdlines(unsigned int val)
5410 {
5411 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5412 
5413 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5414 	if (!s)
5415 		return -ENOMEM;
5416 
5417 	if (allocate_cmdlines_buffer(val, s) < 0) {
5418 		kfree(s);
5419 		return -ENOMEM;
5420 	}
5421 
5422 	arch_spin_lock(&trace_cmdline_lock);
5423 	savedcmd_temp = savedcmd;
5424 	savedcmd = s;
5425 	arch_spin_unlock(&trace_cmdline_lock);
5426 	free_saved_cmdlines_buffer(savedcmd_temp);
5427 
5428 	return 0;
5429 }
5430 
5431 static ssize_t
5432 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5433 				  size_t cnt, loff_t *ppos)
5434 {
5435 	unsigned long val;
5436 	int ret;
5437 
5438 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5439 	if (ret)
5440 		return ret;
5441 
5442 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5443 	if (!val || val > PID_MAX_DEFAULT)
5444 		return -EINVAL;
5445 
5446 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5447 	if (ret < 0)
5448 		return ret;
5449 
5450 	*ppos += cnt;
5451 
5452 	return cnt;
5453 }
5454 
5455 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5456 	.open		= tracing_open_generic,
5457 	.read		= tracing_saved_cmdlines_size_read,
5458 	.write		= tracing_saved_cmdlines_size_write,
5459 };
5460 
5461 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5462 static union trace_eval_map_item *
5463 update_eval_map(union trace_eval_map_item *ptr)
5464 {
5465 	if (!ptr->map.eval_string) {
5466 		if (ptr->tail.next) {
5467 			ptr = ptr->tail.next;
5468 			/* Set ptr to the next real item (skip head) */
5469 			ptr++;
5470 		} else
5471 			return NULL;
5472 	}
5473 	return ptr;
5474 }
5475 
5476 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5477 {
5478 	union trace_eval_map_item *ptr = v;
5479 
5480 	/*
5481 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5482 	 * This really should never happen.
5483 	 */
5484 	(*pos)++;
5485 	ptr = update_eval_map(ptr);
5486 	if (WARN_ON_ONCE(!ptr))
5487 		return NULL;
5488 
5489 	ptr++;
5490 	ptr = update_eval_map(ptr);
5491 
5492 	return ptr;
5493 }
5494 
5495 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5496 {
5497 	union trace_eval_map_item *v;
5498 	loff_t l = 0;
5499 
5500 	mutex_lock(&trace_eval_mutex);
5501 
5502 	v = trace_eval_maps;
5503 	if (v)
5504 		v++;
5505 
5506 	while (v && l < *pos) {
5507 		v = eval_map_next(m, v, &l);
5508 	}
5509 
5510 	return v;
5511 }
5512 
5513 static void eval_map_stop(struct seq_file *m, void *v)
5514 {
5515 	mutex_unlock(&trace_eval_mutex);
5516 }
5517 
5518 static int eval_map_show(struct seq_file *m, void *v)
5519 {
5520 	union trace_eval_map_item *ptr = v;
5521 
5522 	seq_printf(m, "%s %ld (%s)\n",
5523 		   ptr->map.eval_string, ptr->map.eval_value,
5524 		   ptr->map.system);
5525 
5526 	return 0;
5527 }
5528 
5529 static const struct seq_operations tracing_eval_map_seq_ops = {
5530 	.start		= eval_map_start,
5531 	.next		= eval_map_next,
5532 	.stop		= eval_map_stop,
5533 	.show		= eval_map_show,
5534 };
5535 
5536 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5537 {
5538 	int ret;
5539 
5540 	ret = tracing_check_open_get_tr(NULL);
5541 	if (ret)
5542 		return ret;
5543 
5544 	return seq_open(filp, &tracing_eval_map_seq_ops);
5545 }
5546 
5547 static const struct file_operations tracing_eval_map_fops = {
5548 	.open		= tracing_eval_map_open,
5549 	.read		= seq_read,
5550 	.llseek		= seq_lseek,
5551 	.release	= seq_release,
5552 };
5553 
5554 static inline union trace_eval_map_item *
5555 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5556 {
5557 	/* Return tail of array given the head */
5558 	return ptr + ptr->head.length + 1;
5559 }
5560 
5561 static void
5562 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5563 			   int len)
5564 {
5565 	struct trace_eval_map **stop;
5566 	struct trace_eval_map **map;
5567 	union trace_eval_map_item *map_array;
5568 	union trace_eval_map_item *ptr;
5569 
5570 	stop = start + len;
5571 
5572 	/*
5573 	 * The trace_eval_maps contains the map plus a head and tail item,
5574 	 * where the head holds the module and length of array, and the
5575 	 * tail holds a pointer to the next list.
5576 	 */
5577 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5578 	if (!map_array) {
5579 		pr_warn("Unable to allocate trace eval mapping\n");
5580 		return;
5581 	}
5582 
5583 	mutex_lock(&trace_eval_mutex);
5584 
5585 	if (!trace_eval_maps)
5586 		trace_eval_maps = map_array;
5587 	else {
5588 		ptr = trace_eval_maps;
5589 		for (;;) {
5590 			ptr = trace_eval_jmp_to_tail(ptr);
5591 			if (!ptr->tail.next)
5592 				break;
5593 			ptr = ptr->tail.next;
5594 
5595 		}
5596 		ptr->tail.next = map_array;
5597 	}
5598 	map_array->head.mod = mod;
5599 	map_array->head.length = len;
5600 	map_array++;
5601 
5602 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5603 		map_array->map = **map;
5604 		map_array++;
5605 	}
5606 	memset(map_array, 0, sizeof(*map_array));
5607 
5608 	mutex_unlock(&trace_eval_mutex);
5609 }
5610 
5611 static void trace_create_eval_file(struct dentry *d_tracer)
5612 {
5613 	trace_create_file("eval_map", 0444, d_tracer,
5614 			  NULL, &tracing_eval_map_fops);
5615 }
5616 
5617 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5618 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5619 static inline void trace_insert_eval_map_file(struct module *mod,
5620 			      struct trace_eval_map **start, int len) { }
5621 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5622 
5623 static void trace_insert_eval_map(struct module *mod,
5624 				  struct trace_eval_map **start, int len)
5625 {
5626 	struct trace_eval_map **map;
5627 
5628 	if (len <= 0)
5629 		return;
5630 
5631 	map = start;
5632 
5633 	trace_event_eval_update(map, len);
5634 
5635 	trace_insert_eval_map_file(mod, start, len);
5636 }
5637 
5638 static ssize_t
5639 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5640 		       size_t cnt, loff_t *ppos)
5641 {
5642 	struct trace_array *tr = filp->private_data;
5643 	char buf[MAX_TRACER_SIZE+2];
5644 	int r;
5645 
5646 	mutex_lock(&trace_types_lock);
5647 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5648 	mutex_unlock(&trace_types_lock);
5649 
5650 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5651 }
5652 
5653 int tracer_init(struct tracer *t, struct trace_array *tr)
5654 {
5655 	tracing_reset_online_cpus(&tr->array_buffer);
5656 	return t->init(tr);
5657 }
5658 
5659 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5660 {
5661 	int cpu;
5662 
5663 	for_each_tracing_cpu(cpu)
5664 		per_cpu_ptr(buf->data, cpu)->entries = val;
5665 }
5666 
5667 #ifdef CONFIG_TRACER_MAX_TRACE
5668 /* resize @tr's buffer to the size of @size_tr's entries */
5669 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5670 					struct array_buffer *size_buf, int cpu_id)
5671 {
5672 	int cpu, ret = 0;
5673 
5674 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5675 		for_each_tracing_cpu(cpu) {
5676 			ret = ring_buffer_resize(trace_buf->buffer,
5677 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5678 			if (ret < 0)
5679 				break;
5680 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5681 				per_cpu_ptr(size_buf->data, cpu)->entries;
5682 		}
5683 	} else {
5684 		ret = ring_buffer_resize(trace_buf->buffer,
5685 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5686 		if (ret == 0)
5687 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5688 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5689 	}
5690 
5691 	return ret;
5692 }
5693 #endif /* CONFIG_TRACER_MAX_TRACE */
5694 
5695 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5696 					unsigned long size, int cpu)
5697 {
5698 	int ret;
5699 
5700 	/*
5701 	 * If kernel or user changes the size of the ring buffer
5702 	 * we use the size that was given, and we can forget about
5703 	 * expanding it later.
5704 	 */
5705 	ring_buffer_expanded = true;
5706 
5707 	/* May be called before buffers are initialized */
5708 	if (!tr->array_buffer.buffer)
5709 		return 0;
5710 
5711 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5712 	if (ret < 0)
5713 		return ret;
5714 
5715 #ifdef CONFIG_TRACER_MAX_TRACE
5716 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5717 	    !tr->current_trace->use_max_tr)
5718 		goto out;
5719 
5720 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5721 	if (ret < 0) {
5722 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5723 						     &tr->array_buffer, cpu);
5724 		if (r < 0) {
5725 			/*
5726 			 * AARGH! We are left with different
5727 			 * size max buffer!!!!
5728 			 * The max buffer is our "snapshot" buffer.
5729 			 * When a tracer needs a snapshot (one of the
5730 			 * latency tracers), it swaps the max buffer
5731 			 * with the saved snap shot. We succeeded to
5732 			 * update the size of the main buffer, but failed to
5733 			 * update the size of the max buffer. But when we tried
5734 			 * to reset the main buffer to the original size, we
5735 			 * failed there too. This is very unlikely to
5736 			 * happen, but if it does, warn and kill all
5737 			 * tracing.
5738 			 */
5739 			WARN_ON(1);
5740 			tracing_disabled = 1;
5741 		}
5742 		return ret;
5743 	}
5744 
5745 	if (cpu == RING_BUFFER_ALL_CPUS)
5746 		set_buffer_entries(&tr->max_buffer, size);
5747 	else
5748 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5749 
5750  out:
5751 #endif /* CONFIG_TRACER_MAX_TRACE */
5752 
5753 	if (cpu == RING_BUFFER_ALL_CPUS)
5754 		set_buffer_entries(&tr->array_buffer, size);
5755 	else
5756 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5757 
5758 	return ret;
5759 }
5760 
5761 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5762 				  unsigned long size, int cpu_id)
5763 {
5764 	int ret = size;
5765 
5766 	mutex_lock(&trace_types_lock);
5767 
5768 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5769 		/* make sure, this cpu is enabled in the mask */
5770 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5771 			ret = -EINVAL;
5772 			goto out;
5773 		}
5774 	}
5775 
5776 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5777 	if (ret < 0)
5778 		ret = -ENOMEM;
5779 
5780 out:
5781 	mutex_unlock(&trace_types_lock);
5782 
5783 	return ret;
5784 }
5785 
5786 
5787 /**
5788  * tracing_update_buffers - used by tracing facility to expand ring buffers
5789  *
5790  * To save on memory when the tracing is never used on a system with it
5791  * configured in. The ring buffers are set to a minimum size. But once
5792  * a user starts to use the tracing facility, then they need to grow
5793  * to their default size.
5794  *
5795  * This function is to be called when a tracer is about to be used.
5796  */
5797 int tracing_update_buffers(void)
5798 {
5799 	int ret = 0;
5800 
5801 	mutex_lock(&trace_types_lock);
5802 	if (!ring_buffer_expanded)
5803 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5804 						RING_BUFFER_ALL_CPUS);
5805 	mutex_unlock(&trace_types_lock);
5806 
5807 	return ret;
5808 }
5809 
5810 struct trace_option_dentry;
5811 
5812 static void
5813 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5814 
5815 /*
5816  * Used to clear out the tracer before deletion of an instance.
5817  * Must have trace_types_lock held.
5818  */
5819 static void tracing_set_nop(struct trace_array *tr)
5820 {
5821 	if (tr->current_trace == &nop_trace)
5822 		return;
5823 
5824 	tr->current_trace->enabled--;
5825 
5826 	if (tr->current_trace->reset)
5827 		tr->current_trace->reset(tr);
5828 
5829 	tr->current_trace = &nop_trace;
5830 }
5831 
5832 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5833 {
5834 	/* Only enable if the directory has been created already. */
5835 	if (!tr->dir)
5836 		return;
5837 
5838 	create_trace_option_files(tr, t);
5839 }
5840 
5841 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5842 {
5843 	struct tracer *t;
5844 #ifdef CONFIG_TRACER_MAX_TRACE
5845 	bool had_max_tr;
5846 #endif
5847 	int ret = 0;
5848 
5849 	mutex_lock(&trace_types_lock);
5850 
5851 	if (!ring_buffer_expanded) {
5852 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5853 						RING_BUFFER_ALL_CPUS);
5854 		if (ret < 0)
5855 			goto out;
5856 		ret = 0;
5857 	}
5858 
5859 	for (t = trace_types; t; t = t->next) {
5860 		if (strcmp(t->name, buf) == 0)
5861 			break;
5862 	}
5863 	if (!t) {
5864 		ret = -EINVAL;
5865 		goto out;
5866 	}
5867 	if (t == tr->current_trace)
5868 		goto out;
5869 
5870 #ifdef CONFIG_TRACER_SNAPSHOT
5871 	if (t->use_max_tr) {
5872 		arch_spin_lock(&tr->max_lock);
5873 		if (tr->cond_snapshot)
5874 			ret = -EBUSY;
5875 		arch_spin_unlock(&tr->max_lock);
5876 		if (ret)
5877 			goto out;
5878 	}
5879 #endif
5880 	/* Some tracers won't work on kernel command line */
5881 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5882 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5883 			t->name);
5884 		goto out;
5885 	}
5886 
5887 	/* Some tracers are only allowed for the top level buffer */
5888 	if (!trace_ok_for_array(t, tr)) {
5889 		ret = -EINVAL;
5890 		goto out;
5891 	}
5892 
5893 	/* If trace pipe files are being read, we can't change the tracer */
5894 	if (tr->trace_ref) {
5895 		ret = -EBUSY;
5896 		goto out;
5897 	}
5898 
5899 	trace_branch_disable();
5900 
5901 	tr->current_trace->enabled--;
5902 
5903 	if (tr->current_trace->reset)
5904 		tr->current_trace->reset(tr);
5905 
5906 	/* Current trace needs to be nop_trace before synchronize_rcu */
5907 	tr->current_trace = &nop_trace;
5908 
5909 #ifdef CONFIG_TRACER_MAX_TRACE
5910 	had_max_tr = tr->allocated_snapshot;
5911 
5912 	if (had_max_tr && !t->use_max_tr) {
5913 		/*
5914 		 * We need to make sure that the update_max_tr sees that
5915 		 * current_trace changed to nop_trace to keep it from
5916 		 * swapping the buffers after we resize it.
5917 		 * The update_max_tr is called from interrupts disabled
5918 		 * so a synchronized_sched() is sufficient.
5919 		 */
5920 		synchronize_rcu();
5921 		free_snapshot(tr);
5922 	}
5923 #endif
5924 
5925 #ifdef CONFIG_TRACER_MAX_TRACE
5926 	if (t->use_max_tr && !had_max_tr) {
5927 		ret = tracing_alloc_snapshot_instance(tr);
5928 		if (ret < 0)
5929 			goto out;
5930 	}
5931 #endif
5932 
5933 	if (t->init) {
5934 		ret = tracer_init(t, tr);
5935 		if (ret)
5936 			goto out;
5937 	}
5938 
5939 	tr->current_trace = t;
5940 	tr->current_trace->enabled++;
5941 	trace_branch_enable(tr);
5942  out:
5943 	mutex_unlock(&trace_types_lock);
5944 
5945 	return ret;
5946 }
5947 
5948 static ssize_t
5949 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5950 			size_t cnt, loff_t *ppos)
5951 {
5952 	struct trace_array *tr = filp->private_data;
5953 	char buf[MAX_TRACER_SIZE+1];
5954 	int i;
5955 	size_t ret;
5956 	int err;
5957 
5958 	ret = cnt;
5959 
5960 	if (cnt > MAX_TRACER_SIZE)
5961 		cnt = MAX_TRACER_SIZE;
5962 
5963 	if (copy_from_user(buf, ubuf, cnt))
5964 		return -EFAULT;
5965 
5966 	buf[cnt] = 0;
5967 
5968 	/* strip ending whitespace. */
5969 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5970 		buf[i] = 0;
5971 
5972 	err = tracing_set_tracer(tr, buf);
5973 	if (err)
5974 		return err;
5975 
5976 	*ppos += ret;
5977 
5978 	return ret;
5979 }
5980 
5981 static ssize_t
5982 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5983 		   size_t cnt, loff_t *ppos)
5984 {
5985 	char buf[64];
5986 	int r;
5987 
5988 	r = snprintf(buf, sizeof(buf), "%ld\n",
5989 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5990 	if (r > sizeof(buf))
5991 		r = sizeof(buf);
5992 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5993 }
5994 
5995 static ssize_t
5996 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5997 		    size_t cnt, loff_t *ppos)
5998 {
5999 	unsigned long val;
6000 	int ret;
6001 
6002 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6003 	if (ret)
6004 		return ret;
6005 
6006 	*ptr = val * 1000;
6007 
6008 	return cnt;
6009 }
6010 
6011 static ssize_t
6012 tracing_thresh_read(struct file *filp, char __user *ubuf,
6013 		    size_t cnt, loff_t *ppos)
6014 {
6015 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6016 }
6017 
6018 static ssize_t
6019 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6020 		     size_t cnt, loff_t *ppos)
6021 {
6022 	struct trace_array *tr = filp->private_data;
6023 	int ret;
6024 
6025 	mutex_lock(&trace_types_lock);
6026 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6027 	if (ret < 0)
6028 		goto out;
6029 
6030 	if (tr->current_trace->update_thresh) {
6031 		ret = tr->current_trace->update_thresh(tr);
6032 		if (ret < 0)
6033 			goto out;
6034 	}
6035 
6036 	ret = cnt;
6037 out:
6038 	mutex_unlock(&trace_types_lock);
6039 
6040 	return ret;
6041 }
6042 
6043 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6044 
6045 static ssize_t
6046 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6047 		     size_t cnt, loff_t *ppos)
6048 {
6049 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6050 }
6051 
6052 static ssize_t
6053 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6054 		      size_t cnt, loff_t *ppos)
6055 {
6056 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6057 }
6058 
6059 #endif
6060 
6061 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6062 {
6063 	struct trace_array *tr = inode->i_private;
6064 	struct trace_iterator *iter;
6065 	int ret;
6066 
6067 	ret = tracing_check_open_get_tr(tr);
6068 	if (ret)
6069 		return ret;
6070 
6071 	mutex_lock(&trace_types_lock);
6072 
6073 	/* create a buffer to store the information to pass to userspace */
6074 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6075 	if (!iter) {
6076 		ret = -ENOMEM;
6077 		__trace_array_put(tr);
6078 		goto out;
6079 	}
6080 
6081 	trace_seq_init(&iter->seq);
6082 	iter->trace = tr->current_trace;
6083 
6084 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6085 		ret = -ENOMEM;
6086 		goto fail;
6087 	}
6088 
6089 	/* trace pipe does not show start of buffer */
6090 	cpumask_setall(iter->started);
6091 
6092 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6093 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6094 
6095 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6096 	if (trace_clocks[tr->clock_id].in_ns)
6097 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6098 
6099 	iter->tr = tr;
6100 	iter->array_buffer = &tr->array_buffer;
6101 	iter->cpu_file = tracing_get_cpu(inode);
6102 	mutex_init(&iter->mutex);
6103 	filp->private_data = iter;
6104 
6105 	if (iter->trace->pipe_open)
6106 		iter->trace->pipe_open(iter);
6107 
6108 	nonseekable_open(inode, filp);
6109 
6110 	tr->trace_ref++;
6111 out:
6112 	mutex_unlock(&trace_types_lock);
6113 	return ret;
6114 
6115 fail:
6116 	kfree(iter);
6117 	__trace_array_put(tr);
6118 	mutex_unlock(&trace_types_lock);
6119 	return ret;
6120 }
6121 
6122 static int tracing_release_pipe(struct inode *inode, struct file *file)
6123 {
6124 	struct trace_iterator *iter = file->private_data;
6125 	struct trace_array *tr = inode->i_private;
6126 
6127 	mutex_lock(&trace_types_lock);
6128 
6129 	tr->trace_ref--;
6130 
6131 	if (iter->trace->pipe_close)
6132 		iter->trace->pipe_close(iter);
6133 
6134 	mutex_unlock(&trace_types_lock);
6135 
6136 	free_cpumask_var(iter->started);
6137 	mutex_destroy(&iter->mutex);
6138 	kfree(iter);
6139 
6140 	trace_array_put(tr);
6141 
6142 	return 0;
6143 }
6144 
6145 static __poll_t
6146 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6147 {
6148 	struct trace_array *tr = iter->tr;
6149 
6150 	/* Iterators are static, they should be filled or empty */
6151 	if (trace_buffer_iter(iter, iter->cpu_file))
6152 		return EPOLLIN | EPOLLRDNORM;
6153 
6154 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6155 		/*
6156 		 * Always select as readable when in blocking mode
6157 		 */
6158 		return EPOLLIN | EPOLLRDNORM;
6159 	else
6160 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6161 					     filp, poll_table);
6162 }
6163 
6164 static __poll_t
6165 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6166 {
6167 	struct trace_iterator *iter = filp->private_data;
6168 
6169 	return trace_poll(iter, filp, poll_table);
6170 }
6171 
6172 /* Must be called with iter->mutex held. */
6173 static int tracing_wait_pipe(struct file *filp)
6174 {
6175 	struct trace_iterator *iter = filp->private_data;
6176 	int ret;
6177 
6178 	while (trace_empty(iter)) {
6179 
6180 		if ((filp->f_flags & O_NONBLOCK)) {
6181 			return -EAGAIN;
6182 		}
6183 
6184 		/*
6185 		 * We block until we read something and tracing is disabled.
6186 		 * We still block if tracing is disabled, but we have never
6187 		 * read anything. This allows a user to cat this file, and
6188 		 * then enable tracing. But after we have read something,
6189 		 * we give an EOF when tracing is again disabled.
6190 		 *
6191 		 * iter->pos will be 0 if we haven't read anything.
6192 		 */
6193 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6194 			break;
6195 
6196 		mutex_unlock(&iter->mutex);
6197 
6198 		ret = wait_on_pipe(iter, 0);
6199 
6200 		mutex_lock(&iter->mutex);
6201 
6202 		if (ret)
6203 			return ret;
6204 	}
6205 
6206 	return 1;
6207 }
6208 
6209 /*
6210  * Consumer reader.
6211  */
6212 static ssize_t
6213 tracing_read_pipe(struct file *filp, char __user *ubuf,
6214 		  size_t cnt, loff_t *ppos)
6215 {
6216 	struct trace_iterator *iter = filp->private_data;
6217 	ssize_t sret;
6218 
6219 	/*
6220 	 * Avoid more than one consumer on a single file descriptor
6221 	 * This is just a matter of traces coherency, the ring buffer itself
6222 	 * is protected.
6223 	 */
6224 	mutex_lock(&iter->mutex);
6225 
6226 	/* return any leftover data */
6227 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6228 	if (sret != -EBUSY)
6229 		goto out;
6230 
6231 	trace_seq_init(&iter->seq);
6232 
6233 	if (iter->trace->read) {
6234 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6235 		if (sret)
6236 			goto out;
6237 	}
6238 
6239 waitagain:
6240 	sret = tracing_wait_pipe(filp);
6241 	if (sret <= 0)
6242 		goto out;
6243 
6244 	/* stop when tracing is finished */
6245 	if (trace_empty(iter)) {
6246 		sret = 0;
6247 		goto out;
6248 	}
6249 
6250 	if (cnt >= PAGE_SIZE)
6251 		cnt = PAGE_SIZE - 1;
6252 
6253 	/* reset all but tr, trace, and overruns */
6254 	memset(&iter->seq, 0,
6255 	       sizeof(struct trace_iterator) -
6256 	       offsetof(struct trace_iterator, seq));
6257 	cpumask_clear(iter->started);
6258 	trace_seq_init(&iter->seq);
6259 	iter->pos = -1;
6260 
6261 	trace_event_read_lock();
6262 	trace_access_lock(iter->cpu_file);
6263 	while (trace_find_next_entry_inc(iter) != NULL) {
6264 		enum print_line_t ret;
6265 		int save_len = iter->seq.seq.len;
6266 
6267 		ret = print_trace_line(iter);
6268 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6269 			/* don't print partial lines */
6270 			iter->seq.seq.len = save_len;
6271 			break;
6272 		}
6273 		if (ret != TRACE_TYPE_NO_CONSUME)
6274 			trace_consume(iter);
6275 
6276 		if (trace_seq_used(&iter->seq) >= cnt)
6277 			break;
6278 
6279 		/*
6280 		 * Setting the full flag means we reached the trace_seq buffer
6281 		 * size and we should leave by partial output condition above.
6282 		 * One of the trace_seq_* functions is not used properly.
6283 		 */
6284 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6285 			  iter->ent->type);
6286 	}
6287 	trace_access_unlock(iter->cpu_file);
6288 	trace_event_read_unlock();
6289 
6290 	/* Now copy what we have to the user */
6291 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6292 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6293 		trace_seq_init(&iter->seq);
6294 
6295 	/*
6296 	 * If there was nothing to send to user, in spite of consuming trace
6297 	 * entries, go back to wait for more entries.
6298 	 */
6299 	if (sret == -EBUSY)
6300 		goto waitagain;
6301 
6302 out:
6303 	mutex_unlock(&iter->mutex);
6304 
6305 	return sret;
6306 }
6307 
6308 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6309 				     unsigned int idx)
6310 {
6311 	__free_page(spd->pages[idx]);
6312 }
6313 
6314 static size_t
6315 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6316 {
6317 	size_t count;
6318 	int save_len;
6319 	int ret;
6320 
6321 	/* Seq buffer is page-sized, exactly what we need. */
6322 	for (;;) {
6323 		save_len = iter->seq.seq.len;
6324 		ret = print_trace_line(iter);
6325 
6326 		if (trace_seq_has_overflowed(&iter->seq)) {
6327 			iter->seq.seq.len = save_len;
6328 			break;
6329 		}
6330 
6331 		/*
6332 		 * This should not be hit, because it should only
6333 		 * be set if the iter->seq overflowed. But check it
6334 		 * anyway to be safe.
6335 		 */
6336 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6337 			iter->seq.seq.len = save_len;
6338 			break;
6339 		}
6340 
6341 		count = trace_seq_used(&iter->seq) - save_len;
6342 		if (rem < count) {
6343 			rem = 0;
6344 			iter->seq.seq.len = save_len;
6345 			break;
6346 		}
6347 
6348 		if (ret != TRACE_TYPE_NO_CONSUME)
6349 			trace_consume(iter);
6350 		rem -= count;
6351 		if (!trace_find_next_entry_inc(iter))	{
6352 			rem = 0;
6353 			iter->ent = NULL;
6354 			break;
6355 		}
6356 	}
6357 
6358 	return rem;
6359 }
6360 
6361 static ssize_t tracing_splice_read_pipe(struct file *filp,
6362 					loff_t *ppos,
6363 					struct pipe_inode_info *pipe,
6364 					size_t len,
6365 					unsigned int flags)
6366 {
6367 	struct page *pages_def[PIPE_DEF_BUFFERS];
6368 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6369 	struct trace_iterator *iter = filp->private_data;
6370 	struct splice_pipe_desc spd = {
6371 		.pages		= pages_def,
6372 		.partial	= partial_def,
6373 		.nr_pages	= 0, /* This gets updated below. */
6374 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6375 		.ops		= &default_pipe_buf_ops,
6376 		.spd_release	= tracing_spd_release_pipe,
6377 	};
6378 	ssize_t ret;
6379 	size_t rem;
6380 	unsigned int i;
6381 
6382 	if (splice_grow_spd(pipe, &spd))
6383 		return -ENOMEM;
6384 
6385 	mutex_lock(&iter->mutex);
6386 
6387 	if (iter->trace->splice_read) {
6388 		ret = iter->trace->splice_read(iter, filp,
6389 					       ppos, pipe, len, flags);
6390 		if (ret)
6391 			goto out_err;
6392 	}
6393 
6394 	ret = tracing_wait_pipe(filp);
6395 	if (ret <= 0)
6396 		goto out_err;
6397 
6398 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6399 		ret = -EFAULT;
6400 		goto out_err;
6401 	}
6402 
6403 	trace_event_read_lock();
6404 	trace_access_lock(iter->cpu_file);
6405 
6406 	/* Fill as many pages as possible. */
6407 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6408 		spd.pages[i] = alloc_page(GFP_KERNEL);
6409 		if (!spd.pages[i])
6410 			break;
6411 
6412 		rem = tracing_fill_pipe_page(rem, iter);
6413 
6414 		/* Copy the data into the page, so we can start over. */
6415 		ret = trace_seq_to_buffer(&iter->seq,
6416 					  page_address(spd.pages[i]),
6417 					  trace_seq_used(&iter->seq));
6418 		if (ret < 0) {
6419 			__free_page(spd.pages[i]);
6420 			break;
6421 		}
6422 		spd.partial[i].offset = 0;
6423 		spd.partial[i].len = trace_seq_used(&iter->seq);
6424 
6425 		trace_seq_init(&iter->seq);
6426 	}
6427 
6428 	trace_access_unlock(iter->cpu_file);
6429 	trace_event_read_unlock();
6430 	mutex_unlock(&iter->mutex);
6431 
6432 	spd.nr_pages = i;
6433 
6434 	if (i)
6435 		ret = splice_to_pipe(pipe, &spd);
6436 	else
6437 		ret = 0;
6438 out:
6439 	splice_shrink_spd(&spd);
6440 	return ret;
6441 
6442 out_err:
6443 	mutex_unlock(&iter->mutex);
6444 	goto out;
6445 }
6446 
6447 static ssize_t
6448 tracing_entries_read(struct file *filp, char __user *ubuf,
6449 		     size_t cnt, loff_t *ppos)
6450 {
6451 	struct inode *inode = file_inode(filp);
6452 	struct trace_array *tr = inode->i_private;
6453 	int cpu = tracing_get_cpu(inode);
6454 	char buf[64];
6455 	int r = 0;
6456 	ssize_t ret;
6457 
6458 	mutex_lock(&trace_types_lock);
6459 
6460 	if (cpu == RING_BUFFER_ALL_CPUS) {
6461 		int cpu, buf_size_same;
6462 		unsigned long size;
6463 
6464 		size = 0;
6465 		buf_size_same = 1;
6466 		/* check if all cpu sizes are same */
6467 		for_each_tracing_cpu(cpu) {
6468 			/* fill in the size from first enabled cpu */
6469 			if (size == 0)
6470 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6471 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6472 				buf_size_same = 0;
6473 				break;
6474 			}
6475 		}
6476 
6477 		if (buf_size_same) {
6478 			if (!ring_buffer_expanded)
6479 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6480 					    size >> 10,
6481 					    trace_buf_size >> 10);
6482 			else
6483 				r = sprintf(buf, "%lu\n", size >> 10);
6484 		} else
6485 			r = sprintf(buf, "X\n");
6486 	} else
6487 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6488 
6489 	mutex_unlock(&trace_types_lock);
6490 
6491 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6492 	return ret;
6493 }
6494 
6495 static ssize_t
6496 tracing_entries_write(struct file *filp, const char __user *ubuf,
6497 		      size_t cnt, loff_t *ppos)
6498 {
6499 	struct inode *inode = file_inode(filp);
6500 	struct trace_array *tr = inode->i_private;
6501 	unsigned long val;
6502 	int ret;
6503 
6504 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6505 	if (ret)
6506 		return ret;
6507 
6508 	/* must have at least 1 entry */
6509 	if (!val)
6510 		return -EINVAL;
6511 
6512 	/* value is in KB */
6513 	val <<= 10;
6514 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6515 	if (ret < 0)
6516 		return ret;
6517 
6518 	*ppos += cnt;
6519 
6520 	return cnt;
6521 }
6522 
6523 static ssize_t
6524 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6525 				size_t cnt, loff_t *ppos)
6526 {
6527 	struct trace_array *tr = filp->private_data;
6528 	char buf[64];
6529 	int r, cpu;
6530 	unsigned long size = 0, expanded_size = 0;
6531 
6532 	mutex_lock(&trace_types_lock);
6533 	for_each_tracing_cpu(cpu) {
6534 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6535 		if (!ring_buffer_expanded)
6536 			expanded_size += trace_buf_size >> 10;
6537 	}
6538 	if (ring_buffer_expanded)
6539 		r = sprintf(buf, "%lu\n", size);
6540 	else
6541 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6542 	mutex_unlock(&trace_types_lock);
6543 
6544 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6545 }
6546 
6547 static ssize_t
6548 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6549 			  size_t cnt, loff_t *ppos)
6550 {
6551 	/*
6552 	 * There is no need to read what the user has written, this function
6553 	 * is just to make sure that there is no error when "echo" is used
6554 	 */
6555 
6556 	*ppos += cnt;
6557 
6558 	return cnt;
6559 }
6560 
6561 static int
6562 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6563 {
6564 	struct trace_array *tr = inode->i_private;
6565 
6566 	/* disable tracing ? */
6567 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6568 		tracer_tracing_off(tr);
6569 	/* resize the ring buffer to 0 */
6570 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6571 
6572 	trace_array_put(tr);
6573 
6574 	return 0;
6575 }
6576 
6577 static ssize_t
6578 tracing_mark_write(struct file *filp, const char __user *ubuf,
6579 					size_t cnt, loff_t *fpos)
6580 {
6581 	struct trace_array *tr = filp->private_data;
6582 	struct ring_buffer_event *event;
6583 	enum event_trigger_type tt = ETT_NONE;
6584 	struct trace_buffer *buffer;
6585 	struct print_entry *entry;
6586 	unsigned long irq_flags;
6587 	ssize_t written;
6588 	int size;
6589 	int len;
6590 
6591 /* Used in tracing_mark_raw_write() as well */
6592 #define FAULTED_STR "<faulted>"
6593 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6594 
6595 	if (tracing_disabled)
6596 		return -EINVAL;
6597 
6598 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6599 		return -EINVAL;
6600 
6601 	if (cnt > TRACE_BUF_SIZE)
6602 		cnt = TRACE_BUF_SIZE;
6603 
6604 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6605 
6606 	local_save_flags(irq_flags);
6607 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6608 
6609 	/* If less than "<faulted>", then make sure we can still add that */
6610 	if (cnt < FAULTED_SIZE)
6611 		size += FAULTED_SIZE - cnt;
6612 
6613 	buffer = tr->array_buffer.buffer;
6614 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6615 					    irq_flags, preempt_count());
6616 	if (unlikely(!event))
6617 		/* Ring buffer disabled, return as if not open for write */
6618 		return -EBADF;
6619 
6620 	entry = ring_buffer_event_data(event);
6621 	entry->ip = _THIS_IP_;
6622 
6623 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6624 	if (len) {
6625 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6626 		cnt = FAULTED_SIZE;
6627 		written = -EFAULT;
6628 	} else
6629 		written = cnt;
6630 	len = cnt;
6631 
6632 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6633 		/* do not add \n before testing triggers, but add \0 */
6634 		entry->buf[cnt] = '\0';
6635 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6636 	}
6637 
6638 	if (entry->buf[cnt - 1] != '\n') {
6639 		entry->buf[cnt] = '\n';
6640 		entry->buf[cnt + 1] = '\0';
6641 	} else
6642 		entry->buf[cnt] = '\0';
6643 
6644 	__buffer_unlock_commit(buffer, event);
6645 
6646 	if (tt)
6647 		event_triggers_post_call(tr->trace_marker_file, tt);
6648 
6649 	if (written > 0)
6650 		*fpos += written;
6651 
6652 	return written;
6653 }
6654 
6655 /* Limit it for now to 3K (including tag) */
6656 #define RAW_DATA_MAX_SIZE (1024*3)
6657 
6658 static ssize_t
6659 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6660 					size_t cnt, loff_t *fpos)
6661 {
6662 	struct trace_array *tr = filp->private_data;
6663 	struct ring_buffer_event *event;
6664 	struct trace_buffer *buffer;
6665 	struct raw_data_entry *entry;
6666 	unsigned long irq_flags;
6667 	ssize_t written;
6668 	int size;
6669 	int len;
6670 
6671 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6672 
6673 	if (tracing_disabled)
6674 		return -EINVAL;
6675 
6676 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6677 		return -EINVAL;
6678 
6679 	/* The marker must at least have a tag id */
6680 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6681 		return -EINVAL;
6682 
6683 	if (cnt > TRACE_BUF_SIZE)
6684 		cnt = TRACE_BUF_SIZE;
6685 
6686 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6687 
6688 	local_save_flags(irq_flags);
6689 	size = sizeof(*entry) + cnt;
6690 	if (cnt < FAULT_SIZE_ID)
6691 		size += FAULT_SIZE_ID - cnt;
6692 
6693 	buffer = tr->array_buffer.buffer;
6694 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6695 					    irq_flags, preempt_count());
6696 	if (!event)
6697 		/* Ring buffer disabled, return as if not open for write */
6698 		return -EBADF;
6699 
6700 	entry = ring_buffer_event_data(event);
6701 
6702 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6703 	if (len) {
6704 		entry->id = -1;
6705 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6706 		written = -EFAULT;
6707 	} else
6708 		written = cnt;
6709 
6710 	__buffer_unlock_commit(buffer, event);
6711 
6712 	if (written > 0)
6713 		*fpos += written;
6714 
6715 	return written;
6716 }
6717 
6718 static int tracing_clock_show(struct seq_file *m, void *v)
6719 {
6720 	struct trace_array *tr = m->private;
6721 	int i;
6722 
6723 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6724 		seq_printf(m,
6725 			"%s%s%s%s", i ? " " : "",
6726 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6727 			i == tr->clock_id ? "]" : "");
6728 	seq_putc(m, '\n');
6729 
6730 	return 0;
6731 }
6732 
6733 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6734 {
6735 	int i;
6736 
6737 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6738 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6739 			break;
6740 	}
6741 	if (i == ARRAY_SIZE(trace_clocks))
6742 		return -EINVAL;
6743 
6744 	mutex_lock(&trace_types_lock);
6745 
6746 	tr->clock_id = i;
6747 
6748 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6749 
6750 	/*
6751 	 * New clock may not be consistent with the previous clock.
6752 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6753 	 */
6754 	tracing_reset_online_cpus(&tr->array_buffer);
6755 
6756 #ifdef CONFIG_TRACER_MAX_TRACE
6757 	if (tr->max_buffer.buffer)
6758 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6759 	tracing_reset_online_cpus(&tr->max_buffer);
6760 #endif
6761 
6762 	mutex_unlock(&trace_types_lock);
6763 
6764 	return 0;
6765 }
6766 
6767 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6768 				   size_t cnt, loff_t *fpos)
6769 {
6770 	struct seq_file *m = filp->private_data;
6771 	struct trace_array *tr = m->private;
6772 	char buf[64];
6773 	const char *clockstr;
6774 	int ret;
6775 
6776 	if (cnt >= sizeof(buf))
6777 		return -EINVAL;
6778 
6779 	if (copy_from_user(buf, ubuf, cnt))
6780 		return -EFAULT;
6781 
6782 	buf[cnt] = 0;
6783 
6784 	clockstr = strstrip(buf);
6785 
6786 	ret = tracing_set_clock(tr, clockstr);
6787 	if (ret)
6788 		return ret;
6789 
6790 	*fpos += cnt;
6791 
6792 	return cnt;
6793 }
6794 
6795 static int tracing_clock_open(struct inode *inode, struct file *file)
6796 {
6797 	struct trace_array *tr = inode->i_private;
6798 	int ret;
6799 
6800 	ret = tracing_check_open_get_tr(tr);
6801 	if (ret)
6802 		return ret;
6803 
6804 	ret = single_open(file, tracing_clock_show, inode->i_private);
6805 	if (ret < 0)
6806 		trace_array_put(tr);
6807 
6808 	return ret;
6809 }
6810 
6811 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6812 {
6813 	struct trace_array *tr = m->private;
6814 
6815 	mutex_lock(&trace_types_lock);
6816 
6817 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6818 		seq_puts(m, "delta [absolute]\n");
6819 	else
6820 		seq_puts(m, "[delta] absolute\n");
6821 
6822 	mutex_unlock(&trace_types_lock);
6823 
6824 	return 0;
6825 }
6826 
6827 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6828 {
6829 	struct trace_array *tr = inode->i_private;
6830 	int ret;
6831 
6832 	ret = tracing_check_open_get_tr(tr);
6833 	if (ret)
6834 		return ret;
6835 
6836 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6837 	if (ret < 0)
6838 		trace_array_put(tr);
6839 
6840 	return ret;
6841 }
6842 
6843 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6844 {
6845 	int ret = 0;
6846 
6847 	mutex_lock(&trace_types_lock);
6848 
6849 	if (abs && tr->time_stamp_abs_ref++)
6850 		goto out;
6851 
6852 	if (!abs) {
6853 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6854 			ret = -EINVAL;
6855 			goto out;
6856 		}
6857 
6858 		if (--tr->time_stamp_abs_ref)
6859 			goto out;
6860 	}
6861 
6862 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6863 
6864 #ifdef CONFIG_TRACER_MAX_TRACE
6865 	if (tr->max_buffer.buffer)
6866 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6867 #endif
6868  out:
6869 	mutex_unlock(&trace_types_lock);
6870 
6871 	return ret;
6872 }
6873 
6874 struct ftrace_buffer_info {
6875 	struct trace_iterator	iter;
6876 	void			*spare;
6877 	unsigned int		spare_cpu;
6878 	unsigned int		read;
6879 };
6880 
6881 #ifdef CONFIG_TRACER_SNAPSHOT
6882 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6883 {
6884 	struct trace_array *tr = inode->i_private;
6885 	struct trace_iterator *iter;
6886 	struct seq_file *m;
6887 	int ret;
6888 
6889 	ret = tracing_check_open_get_tr(tr);
6890 	if (ret)
6891 		return ret;
6892 
6893 	if (file->f_mode & FMODE_READ) {
6894 		iter = __tracing_open(inode, file, true);
6895 		if (IS_ERR(iter))
6896 			ret = PTR_ERR(iter);
6897 	} else {
6898 		/* Writes still need the seq_file to hold the private data */
6899 		ret = -ENOMEM;
6900 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6901 		if (!m)
6902 			goto out;
6903 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6904 		if (!iter) {
6905 			kfree(m);
6906 			goto out;
6907 		}
6908 		ret = 0;
6909 
6910 		iter->tr = tr;
6911 		iter->array_buffer = &tr->max_buffer;
6912 		iter->cpu_file = tracing_get_cpu(inode);
6913 		m->private = iter;
6914 		file->private_data = m;
6915 	}
6916 out:
6917 	if (ret < 0)
6918 		trace_array_put(tr);
6919 
6920 	return ret;
6921 }
6922 
6923 static ssize_t
6924 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6925 		       loff_t *ppos)
6926 {
6927 	struct seq_file *m = filp->private_data;
6928 	struct trace_iterator *iter = m->private;
6929 	struct trace_array *tr = iter->tr;
6930 	unsigned long val;
6931 	int ret;
6932 
6933 	ret = tracing_update_buffers();
6934 	if (ret < 0)
6935 		return ret;
6936 
6937 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6938 	if (ret)
6939 		return ret;
6940 
6941 	mutex_lock(&trace_types_lock);
6942 
6943 	if (tr->current_trace->use_max_tr) {
6944 		ret = -EBUSY;
6945 		goto out;
6946 	}
6947 
6948 	arch_spin_lock(&tr->max_lock);
6949 	if (tr->cond_snapshot)
6950 		ret = -EBUSY;
6951 	arch_spin_unlock(&tr->max_lock);
6952 	if (ret)
6953 		goto out;
6954 
6955 	switch (val) {
6956 	case 0:
6957 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6958 			ret = -EINVAL;
6959 			break;
6960 		}
6961 		if (tr->allocated_snapshot)
6962 			free_snapshot(tr);
6963 		break;
6964 	case 1:
6965 /* Only allow per-cpu swap if the ring buffer supports it */
6966 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6967 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6968 			ret = -EINVAL;
6969 			break;
6970 		}
6971 #endif
6972 		if (tr->allocated_snapshot)
6973 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6974 					&tr->array_buffer, iter->cpu_file);
6975 		else
6976 			ret = tracing_alloc_snapshot_instance(tr);
6977 		if (ret < 0)
6978 			break;
6979 		local_irq_disable();
6980 		/* Now, we're going to swap */
6981 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6982 			update_max_tr(tr, current, smp_processor_id(), NULL);
6983 		else
6984 			update_max_tr_single(tr, current, iter->cpu_file);
6985 		local_irq_enable();
6986 		break;
6987 	default:
6988 		if (tr->allocated_snapshot) {
6989 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6990 				tracing_reset_online_cpus(&tr->max_buffer);
6991 			else
6992 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6993 		}
6994 		break;
6995 	}
6996 
6997 	if (ret >= 0) {
6998 		*ppos += cnt;
6999 		ret = cnt;
7000 	}
7001 out:
7002 	mutex_unlock(&trace_types_lock);
7003 	return ret;
7004 }
7005 
7006 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7007 {
7008 	struct seq_file *m = file->private_data;
7009 	int ret;
7010 
7011 	ret = tracing_release(inode, file);
7012 
7013 	if (file->f_mode & FMODE_READ)
7014 		return ret;
7015 
7016 	/* If write only, the seq_file is just a stub */
7017 	if (m)
7018 		kfree(m->private);
7019 	kfree(m);
7020 
7021 	return 0;
7022 }
7023 
7024 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7025 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7026 				    size_t count, loff_t *ppos);
7027 static int tracing_buffers_release(struct inode *inode, struct file *file);
7028 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7029 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7030 
7031 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7032 {
7033 	struct ftrace_buffer_info *info;
7034 	int ret;
7035 
7036 	/* The following checks for tracefs lockdown */
7037 	ret = tracing_buffers_open(inode, filp);
7038 	if (ret < 0)
7039 		return ret;
7040 
7041 	info = filp->private_data;
7042 
7043 	if (info->iter.trace->use_max_tr) {
7044 		tracing_buffers_release(inode, filp);
7045 		return -EBUSY;
7046 	}
7047 
7048 	info->iter.snapshot = true;
7049 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7050 
7051 	return ret;
7052 }
7053 
7054 #endif /* CONFIG_TRACER_SNAPSHOT */
7055 
7056 
7057 static const struct file_operations tracing_thresh_fops = {
7058 	.open		= tracing_open_generic,
7059 	.read		= tracing_thresh_read,
7060 	.write		= tracing_thresh_write,
7061 	.llseek		= generic_file_llseek,
7062 };
7063 
7064 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7065 static const struct file_operations tracing_max_lat_fops = {
7066 	.open		= tracing_open_generic,
7067 	.read		= tracing_max_lat_read,
7068 	.write		= tracing_max_lat_write,
7069 	.llseek		= generic_file_llseek,
7070 };
7071 #endif
7072 
7073 static const struct file_operations set_tracer_fops = {
7074 	.open		= tracing_open_generic,
7075 	.read		= tracing_set_trace_read,
7076 	.write		= tracing_set_trace_write,
7077 	.llseek		= generic_file_llseek,
7078 };
7079 
7080 static const struct file_operations tracing_pipe_fops = {
7081 	.open		= tracing_open_pipe,
7082 	.poll		= tracing_poll_pipe,
7083 	.read		= tracing_read_pipe,
7084 	.splice_read	= tracing_splice_read_pipe,
7085 	.release	= tracing_release_pipe,
7086 	.llseek		= no_llseek,
7087 };
7088 
7089 static const struct file_operations tracing_entries_fops = {
7090 	.open		= tracing_open_generic_tr,
7091 	.read		= tracing_entries_read,
7092 	.write		= tracing_entries_write,
7093 	.llseek		= generic_file_llseek,
7094 	.release	= tracing_release_generic_tr,
7095 };
7096 
7097 static const struct file_operations tracing_total_entries_fops = {
7098 	.open		= tracing_open_generic_tr,
7099 	.read		= tracing_total_entries_read,
7100 	.llseek		= generic_file_llseek,
7101 	.release	= tracing_release_generic_tr,
7102 };
7103 
7104 static const struct file_operations tracing_free_buffer_fops = {
7105 	.open		= tracing_open_generic_tr,
7106 	.write		= tracing_free_buffer_write,
7107 	.release	= tracing_free_buffer_release,
7108 };
7109 
7110 static const struct file_operations tracing_mark_fops = {
7111 	.open		= tracing_open_generic_tr,
7112 	.write		= tracing_mark_write,
7113 	.llseek		= generic_file_llseek,
7114 	.release	= tracing_release_generic_tr,
7115 };
7116 
7117 static const struct file_operations tracing_mark_raw_fops = {
7118 	.open		= tracing_open_generic_tr,
7119 	.write		= tracing_mark_raw_write,
7120 	.llseek		= generic_file_llseek,
7121 	.release	= tracing_release_generic_tr,
7122 };
7123 
7124 static const struct file_operations trace_clock_fops = {
7125 	.open		= tracing_clock_open,
7126 	.read		= seq_read,
7127 	.llseek		= seq_lseek,
7128 	.release	= tracing_single_release_tr,
7129 	.write		= tracing_clock_write,
7130 };
7131 
7132 static const struct file_operations trace_time_stamp_mode_fops = {
7133 	.open		= tracing_time_stamp_mode_open,
7134 	.read		= seq_read,
7135 	.llseek		= seq_lseek,
7136 	.release	= tracing_single_release_tr,
7137 };
7138 
7139 #ifdef CONFIG_TRACER_SNAPSHOT
7140 static const struct file_operations snapshot_fops = {
7141 	.open		= tracing_snapshot_open,
7142 	.read		= seq_read,
7143 	.write		= tracing_snapshot_write,
7144 	.llseek		= tracing_lseek,
7145 	.release	= tracing_snapshot_release,
7146 };
7147 
7148 static const struct file_operations snapshot_raw_fops = {
7149 	.open		= snapshot_raw_open,
7150 	.read		= tracing_buffers_read,
7151 	.release	= tracing_buffers_release,
7152 	.splice_read	= tracing_buffers_splice_read,
7153 	.llseek		= no_llseek,
7154 };
7155 
7156 #endif /* CONFIG_TRACER_SNAPSHOT */
7157 
7158 #define TRACING_LOG_ERRS_MAX	8
7159 #define TRACING_LOG_LOC_MAX	128
7160 
7161 #define CMD_PREFIX "  Command: "
7162 
7163 struct err_info {
7164 	const char	**errs;	/* ptr to loc-specific array of err strings */
7165 	u8		type;	/* index into errs -> specific err string */
7166 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7167 	u64		ts;
7168 };
7169 
7170 struct tracing_log_err {
7171 	struct list_head	list;
7172 	struct err_info		info;
7173 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7174 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7175 };
7176 
7177 static DEFINE_MUTEX(tracing_err_log_lock);
7178 
7179 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7180 {
7181 	struct tracing_log_err *err;
7182 
7183 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7184 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7185 		if (!err)
7186 			err = ERR_PTR(-ENOMEM);
7187 		tr->n_err_log_entries++;
7188 
7189 		return err;
7190 	}
7191 
7192 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7193 	list_del(&err->list);
7194 
7195 	return err;
7196 }
7197 
7198 /**
7199  * err_pos - find the position of a string within a command for error careting
7200  * @cmd: The tracing command that caused the error
7201  * @str: The string to position the caret at within @cmd
7202  *
7203  * Finds the position of the first occurence of @str within @cmd.  The
7204  * return value can be passed to tracing_log_err() for caret placement
7205  * within @cmd.
7206  *
7207  * Returns the index within @cmd of the first occurence of @str or 0
7208  * if @str was not found.
7209  */
7210 unsigned int err_pos(char *cmd, const char *str)
7211 {
7212 	char *found;
7213 
7214 	if (WARN_ON(!strlen(cmd)))
7215 		return 0;
7216 
7217 	found = strstr(cmd, str);
7218 	if (found)
7219 		return found - cmd;
7220 
7221 	return 0;
7222 }
7223 
7224 /**
7225  * tracing_log_err - write an error to the tracing error log
7226  * @tr: The associated trace array for the error (NULL for top level array)
7227  * @loc: A string describing where the error occurred
7228  * @cmd: The tracing command that caused the error
7229  * @errs: The array of loc-specific static error strings
7230  * @type: The index into errs[], which produces the specific static err string
7231  * @pos: The position the caret should be placed in the cmd
7232  *
7233  * Writes an error into tracing/error_log of the form:
7234  *
7235  * <loc>: error: <text>
7236  *   Command: <cmd>
7237  *              ^
7238  *
7239  * tracing/error_log is a small log file containing the last
7240  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7241  * unless there has been a tracing error, and the error log can be
7242  * cleared and have its memory freed by writing the empty string in
7243  * truncation mode to it i.e. echo > tracing/error_log.
7244  *
7245  * NOTE: the @errs array along with the @type param are used to
7246  * produce a static error string - this string is not copied and saved
7247  * when the error is logged - only a pointer to it is saved.  See
7248  * existing callers for examples of how static strings are typically
7249  * defined for use with tracing_log_err().
7250  */
7251 void tracing_log_err(struct trace_array *tr,
7252 		     const char *loc, const char *cmd,
7253 		     const char **errs, u8 type, u8 pos)
7254 {
7255 	struct tracing_log_err *err;
7256 
7257 	if (!tr)
7258 		tr = &global_trace;
7259 
7260 	mutex_lock(&tracing_err_log_lock);
7261 	err = get_tracing_log_err(tr);
7262 	if (PTR_ERR(err) == -ENOMEM) {
7263 		mutex_unlock(&tracing_err_log_lock);
7264 		return;
7265 	}
7266 
7267 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7268 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7269 
7270 	err->info.errs = errs;
7271 	err->info.type = type;
7272 	err->info.pos = pos;
7273 	err->info.ts = local_clock();
7274 
7275 	list_add_tail(&err->list, &tr->err_log);
7276 	mutex_unlock(&tracing_err_log_lock);
7277 }
7278 
7279 static void clear_tracing_err_log(struct trace_array *tr)
7280 {
7281 	struct tracing_log_err *err, *next;
7282 
7283 	mutex_lock(&tracing_err_log_lock);
7284 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7285 		list_del(&err->list);
7286 		kfree(err);
7287 	}
7288 
7289 	tr->n_err_log_entries = 0;
7290 	mutex_unlock(&tracing_err_log_lock);
7291 }
7292 
7293 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7294 {
7295 	struct trace_array *tr = m->private;
7296 
7297 	mutex_lock(&tracing_err_log_lock);
7298 
7299 	return seq_list_start(&tr->err_log, *pos);
7300 }
7301 
7302 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7303 {
7304 	struct trace_array *tr = m->private;
7305 
7306 	return seq_list_next(v, &tr->err_log, pos);
7307 }
7308 
7309 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7310 {
7311 	mutex_unlock(&tracing_err_log_lock);
7312 }
7313 
7314 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7315 {
7316 	u8 i;
7317 
7318 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7319 		seq_putc(m, ' ');
7320 	for (i = 0; i < pos; i++)
7321 		seq_putc(m, ' ');
7322 	seq_puts(m, "^\n");
7323 }
7324 
7325 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7326 {
7327 	struct tracing_log_err *err = v;
7328 
7329 	if (err) {
7330 		const char *err_text = err->info.errs[err->info.type];
7331 		u64 sec = err->info.ts;
7332 		u32 nsec;
7333 
7334 		nsec = do_div(sec, NSEC_PER_SEC);
7335 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7336 			   err->loc, err_text);
7337 		seq_printf(m, "%s", err->cmd);
7338 		tracing_err_log_show_pos(m, err->info.pos);
7339 	}
7340 
7341 	return 0;
7342 }
7343 
7344 static const struct seq_operations tracing_err_log_seq_ops = {
7345 	.start  = tracing_err_log_seq_start,
7346 	.next   = tracing_err_log_seq_next,
7347 	.stop   = tracing_err_log_seq_stop,
7348 	.show   = tracing_err_log_seq_show
7349 };
7350 
7351 static int tracing_err_log_open(struct inode *inode, struct file *file)
7352 {
7353 	struct trace_array *tr = inode->i_private;
7354 	int ret = 0;
7355 
7356 	ret = tracing_check_open_get_tr(tr);
7357 	if (ret)
7358 		return ret;
7359 
7360 	/* If this file was opened for write, then erase contents */
7361 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7362 		clear_tracing_err_log(tr);
7363 
7364 	if (file->f_mode & FMODE_READ) {
7365 		ret = seq_open(file, &tracing_err_log_seq_ops);
7366 		if (!ret) {
7367 			struct seq_file *m = file->private_data;
7368 			m->private = tr;
7369 		} else {
7370 			trace_array_put(tr);
7371 		}
7372 	}
7373 	return ret;
7374 }
7375 
7376 static ssize_t tracing_err_log_write(struct file *file,
7377 				     const char __user *buffer,
7378 				     size_t count, loff_t *ppos)
7379 {
7380 	return count;
7381 }
7382 
7383 static int tracing_err_log_release(struct inode *inode, struct file *file)
7384 {
7385 	struct trace_array *tr = inode->i_private;
7386 
7387 	trace_array_put(tr);
7388 
7389 	if (file->f_mode & FMODE_READ)
7390 		seq_release(inode, file);
7391 
7392 	return 0;
7393 }
7394 
7395 static const struct file_operations tracing_err_log_fops = {
7396 	.open           = tracing_err_log_open,
7397 	.write		= tracing_err_log_write,
7398 	.read           = seq_read,
7399 	.llseek         = seq_lseek,
7400 	.release        = tracing_err_log_release,
7401 };
7402 
7403 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7404 {
7405 	struct trace_array *tr = inode->i_private;
7406 	struct ftrace_buffer_info *info;
7407 	int ret;
7408 
7409 	ret = tracing_check_open_get_tr(tr);
7410 	if (ret)
7411 		return ret;
7412 
7413 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7414 	if (!info) {
7415 		trace_array_put(tr);
7416 		return -ENOMEM;
7417 	}
7418 
7419 	mutex_lock(&trace_types_lock);
7420 
7421 	info->iter.tr		= tr;
7422 	info->iter.cpu_file	= tracing_get_cpu(inode);
7423 	info->iter.trace	= tr->current_trace;
7424 	info->iter.array_buffer = &tr->array_buffer;
7425 	info->spare		= NULL;
7426 	/* Force reading ring buffer for first read */
7427 	info->read		= (unsigned int)-1;
7428 
7429 	filp->private_data = info;
7430 
7431 	tr->trace_ref++;
7432 
7433 	mutex_unlock(&trace_types_lock);
7434 
7435 	ret = nonseekable_open(inode, filp);
7436 	if (ret < 0)
7437 		trace_array_put(tr);
7438 
7439 	return ret;
7440 }
7441 
7442 static __poll_t
7443 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7444 {
7445 	struct ftrace_buffer_info *info = filp->private_data;
7446 	struct trace_iterator *iter = &info->iter;
7447 
7448 	return trace_poll(iter, filp, poll_table);
7449 }
7450 
7451 static ssize_t
7452 tracing_buffers_read(struct file *filp, char __user *ubuf,
7453 		     size_t count, loff_t *ppos)
7454 {
7455 	struct ftrace_buffer_info *info = filp->private_data;
7456 	struct trace_iterator *iter = &info->iter;
7457 	ssize_t ret = 0;
7458 	ssize_t size;
7459 
7460 	if (!count)
7461 		return 0;
7462 
7463 #ifdef CONFIG_TRACER_MAX_TRACE
7464 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7465 		return -EBUSY;
7466 #endif
7467 
7468 	if (!info->spare) {
7469 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7470 							  iter->cpu_file);
7471 		if (IS_ERR(info->spare)) {
7472 			ret = PTR_ERR(info->spare);
7473 			info->spare = NULL;
7474 		} else {
7475 			info->spare_cpu = iter->cpu_file;
7476 		}
7477 	}
7478 	if (!info->spare)
7479 		return ret;
7480 
7481 	/* Do we have previous read data to read? */
7482 	if (info->read < PAGE_SIZE)
7483 		goto read;
7484 
7485  again:
7486 	trace_access_lock(iter->cpu_file);
7487 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7488 				    &info->spare,
7489 				    count,
7490 				    iter->cpu_file, 0);
7491 	trace_access_unlock(iter->cpu_file);
7492 
7493 	if (ret < 0) {
7494 		if (trace_empty(iter)) {
7495 			if ((filp->f_flags & O_NONBLOCK))
7496 				return -EAGAIN;
7497 
7498 			ret = wait_on_pipe(iter, 0);
7499 			if (ret)
7500 				return ret;
7501 
7502 			goto again;
7503 		}
7504 		return 0;
7505 	}
7506 
7507 	info->read = 0;
7508  read:
7509 	size = PAGE_SIZE - info->read;
7510 	if (size > count)
7511 		size = count;
7512 
7513 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7514 	if (ret == size)
7515 		return -EFAULT;
7516 
7517 	size -= ret;
7518 
7519 	*ppos += size;
7520 	info->read += size;
7521 
7522 	return size;
7523 }
7524 
7525 static int tracing_buffers_release(struct inode *inode, struct file *file)
7526 {
7527 	struct ftrace_buffer_info *info = file->private_data;
7528 	struct trace_iterator *iter = &info->iter;
7529 
7530 	mutex_lock(&trace_types_lock);
7531 
7532 	iter->tr->trace_ref--;
7533 
7534 	__trace_array_put(iter->tr);
7535 
7536 	if (info->spare)
7537 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7538 					   info->spare_cpu, info->spare);
7539 	kfree(info);
7540 
7541 	mutex_unlock(&trace_types_lock);
7542 
7543 	return 0;
7544 }
7545 
7546 struct buffer_ref {
7547 	struct trace_buffer	*buffer;
7548 	void			*page;
7549 	int			cpu;
7550 	refcount_t		refcount;
7551 };
7552 
7553 static void buffer_ref_release(struct buffer_ref *ref)
7554 {
7555 	if (!refcount_dec_and_test(&ref->refcount))
7556 		return;
7557 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7558 	kfree(ref);
7559 }
7560 
7561 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7562 				    struct pipe_buffer *buf)
7563 {
7564 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7565 
7566 	buffer_ref_release(ref);
7567 	buf->private = 0;
7568 }
7569 
7570 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7571 				struct pipe_buffer *buf)
7572 {
7573 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7574 
7575 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7576 		return false;
7577 
7578 	refcount_inc(&ref->refcount);
7579 	return true;
7580 }
7581 
7582 /* Pipe buffer operations for a buffer. */
7583 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7584 	.release		= buffer_pipe_buf_release,
7585 	.get			= buffer_pipe_buf_get,
7586 };
7587 
7588 /*
7589  * Callback from splice_to_pipe(), if we need to release some pages
7590  * at the end of the spd in case we error'ed out in filling the pipe.
7591  */
7592 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7593 {
7594 	struct buffer_ref *ref =
7595 		(struct buffer_ref *)spd->partial[i].private;
7596 
7597 	buffer_ref_release(ref);
7598 	spd->partial[i].private = 0;
7599 }
7600 
7601 static ssize_t
7602 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7603 			    struct pipe_inode_info *pipe, size_t len,
7604 			    unsigned int flags)
7605 {
7606 	struct ftrace_buffer_info *info = file->private_data;
7607 	struct trace_iterator *iter = &info->iter;
7608 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7609 	struct page *pages_def[PIPE_DEF_BUFFERS];
7610 	struct splice_pipe_desc spd = {
7611 		.pages		= pages_def,
7612 		.partial	= partial_def,
7613 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7614 		.ops		= &buffer_pipe_buf_ops,
7615 		.spd_release	= buffer_spd_release,
7616 	};
7617 	struct buffer_ref *ref;
7618 	int entries, i;
7619 	ssize_t ret = 0;
7620 
7621 #ifdef CONFIG_TRACER_MAX_TRACE
7622 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7623 		return -EBUSY;
7624 #endif
7625 
7626 	if (*ppos & (PAGE_SIZE - 1))
7627 		return -EINVAL;
7628 
7629 	if (len & (PAGE_SIZE - 1)) {
7630 		if (len < PAGE_SIZE)
7631 			return -EINVAL;
7632 		len &= PAGE_MASK;
7633 	}
7634 
7635 	if (splice_grow_spd(pipe, &spd))
7636 		return -ENOMEM;
7637 
7638  again:
7639 	trace_access_lock(iter->cpu_file);
7640 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7641 
7642 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7643 		struct page *page;
7644 		int r;
7645 
7646 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7647 		if (!ref) {
7648 			ret = -ENOMEM;
7649 			break;
7650 		}
7651 
7652 		refcount_set(&ref->refcount, 1);
7653 		ref->buffer = iter->array_buffer->buffer;
7654 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7655 		if (IS_ERR(ref->page)) {
7656 			ret = PTR_ERR(ref->page);
7657 			ref->page = NULL;
7658 			kfree(ref);
7659 			break;
7660 		}
7661 		ref->cpu = iter->cpu_file;
7662 
7663 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7664 					  len, iter->cpu_file, 1);
7665 		if (r < 0) {
7666 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7667 						   ref->page);
7668 			kfree(ref);
7669 			break;
7670 		}
7671 
7672 		page = virt_to_page(ref->page);
7673 
7674 		spd.pages[i] = page;
7675 		spd.partial[i].len = PAGE_SIZE;
7676 		spd.partial[i].offset = 0;
7677 		spd.partial[i].private = (unsigned long)ref;
7678 		spd.nr_pages++;
7679 		*ppos += PAGE_SIZE;
7680 
7681 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7682 	}
7683 
7684 	trace_access_unlock(iter->cpu_file);
7685 	spd.nr_pages = i;
7686 
7687 	/* did we read anything? */
7688 	if (!spd.nr_pages) {
7689 		if (ret)
7690 			goto out;
7691 
7692 		ret = -EAGAIN;
7693 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7694 			goto out;
7695 
7696 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7697 		if (ret)
7698 			goto out;
7699 
7700 		goto again;
7701 	}
7702 
7703 	ret = splice_to_pipe(pipe, &spd);
7704 out:
7705 	splice_shrink_spd(&spd);
7706 
7707 	return ret;
7708 }
7709 
7710 static const struct file_operations tracing_buffers_fops = {
7711 	.open		= tracing_buffers_open,
7712 	.read		= tracing_buffers_read,
7713 	.poll		= tracing_buffers_poll,
7714 	.release	= tracing_buffers_release,
7715 	.splice_read	= tracing_buffers_splice_read,
7716 	.llseek		= no_llseek,
7717 };
7718 
7719 static ssize_t
7720 tracing_stats_read(struct file *filp, char __user *ubuf,
7721 		   size_t count, loff_t *ppos)
7722 {
7723 	struct inode *inode = file_inode(filp);
7724 	struct trace_array *tr = inode->i_private;
7725 	struct array_buffer *trace_buf = &tr->array_buffer;
7726 	int cpu = tracing_get_cpu(inode);
7727 	struct trace_seq *s;
7728 	unsigned long cnt;
7729 	unsigned long long t;
7730 	unsigned long usec_rem;
7731 
7732 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7733 	if (!s)
7734 		return -ENOMEM;
7735 
7736 	trace_seq_init(s);
7737 
7738 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7739 	trace_seq_printf(s, "entries: %ld\n", cnt);
7740 
7741 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7742 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7743 
7744 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7745 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7746 
7747 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7748 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7749 
7750 	if (trace_clocks[tr->clock_id].in_ns) {
7751 		/* local or global for trace_clock */
7752 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7753 		usec_rem = do_div(t, USEC_PER_SEC);
7754 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7755 								t, usec_rem);
7756 
7757 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7758 		usec_rem = do_div(t, USEC_PER_SEC);
7759 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7760 	} else {
7761 		/* counter or tsc mode for trace_clock */
7762 		trace_seq_printf(s, "oldest event ts: %llu\n",
7763 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7764 
7765 		trace_seq_printf(s, "now ts: %llu\n",
7766 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7767 	}
7768 
7769 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7770 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7771 
7772 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7773 	trace_seq_printf(s, "read events: %ld\n", cnt);
7774 
7775 	count = simple_read_from_buffer(ubuf, count, ppos,
7776 					s->buffer, trace_seq_used(s));
7777 
7778 	kfree(s);
7779 
7780 	return count;
7781 }
7782 
7783 static const struct file_operations tracing_stats_fops = {
7784 	.open		= tracing_open_generic_tr,
7785 	.read		= tracing_stats_read,
7786 	.llseek		= generic_file_llseek,
7787 	.release	= tracing_release_generic_tr,
7788 };
7789 
7790 #ifdef CONFIG_DYNAMIC_FTRACE
7791 
7792 static ssize_t
7793 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7794 		  size_t cnt, loff_t *ppos)
7795 {
7796 	ssize_t ret;
7797 	char *buf;
7798 	int r;
7799 
7800 	/* 256 should be plenty to hold the amount needed */
7801 	buf = kmalloc(256, GFP_KERNEL);
7802 	if (!buf)
7803 		return -ENOMEM;
7804 
7805 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7806 		      ftrace_update_tot_cnt,
7807 		      ftrace_number_of_pages,
7808 		      ftrace_number_of_groups);
7809 
7810 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7811 	kfree(buf);
7812 	return ret;
7813 }
7814 
7815 static const struct file_operations tracing_dyn_info_fops = {
7816 	.open		= tracing_open_generic,
7817 	.read		= tracing_read_dyn_info,
7818 	.llseek		= generic_file_llseek,
7819 };
7820 #endif /* CONFIG_DYNAMIC_FTRACE */
7821 
7822 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7823 static void
7824 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7825 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7826 		void *data)
7827 {
7828 	tracing_snapshot_instance(tr);
7829 }
7830 
7831 static void
7832 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7833 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7834 		      void *data)
7835 {
7836 	struct ftrace_func_mapper *mapper = data;
7837 	long *count = NULL;
7838 
7839 	if (mapper)
7840 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7841 
7842 	if (count) {
7843 
7844 		if (*count <= 0)
7845 			return;
7846 
7847 		(*count)--;
7848 	}
7849 
7850 	tracing_snapshot_instance(tr);
7851 }
7852 
7853 static int
7854 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7855 		      struct ftrace_probe_ops *ops, void *data)
7856 {
7857 	struct ftrace_func_mapper *mapper = data;
7858 	long *count = NULL;
7859 
7860 	seq_printf(m, "%ps:", (void *)ip);
7861 
7862 	seq_puts(m, "snapshot");
7863 
7864 	if (mapper)
7865 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7866 
7867 	if (count)
7868 		seq_printf(m, ":count=%ld\n", *count);
7869 	else
7870 		seq_puts(m, ":unlimited\n");
7871 
7872 	return 0;
7873 }
7874 
7875 static int
7876 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7877 		     unsigned long ip, void *init_data, void **data)
7878 {
7879 	struct ftrace_func_mapper *mapper = *data;
7880 
7881 	if (!mapper) {
7882 		mapper = allocate_ftrace_func_mapper();
7883 		if (!mapper)
7884 			return -ENOMEM;
7885 		*data = mapper;
7886 	}
7887 
7888 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7889 }
7890 
7891 static void
7892 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7893 		     unsigned long ip, void *data)
7894 {
7895 	struct ftrace_func_mapper *mapper = data;
7896 
7897 	if (!ip) {
7898 		if (!mapper)
7899 			return;
7900 		free_ftrace_func_mapper(mapper, NULL);
7901 		return;
7902 	}
7903 
7904 	ftrace_func_mapper_remove_ip(mapper, ip);
7905 }
7906 
7907 static struct ftrace_probe_ops snapshot_probe_ops = {
7908 	.func			= ftrace_snapshot,
7909 	.print			= ftrace_snapshot_print,
7910 };
7911 
7912 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7913 	.func			= ftrace_count_snapshot,
7914 	.print			= ftrace_snapshot_print,
7915 	.init			= ftrace_snapshot_init,
7916 	.free			= ftrace_snapshot_free,
7917 };
7918 
7919 static int
7920 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7921 			       char *glob, char *cmd, char *param, int enable)
7922 {
7923 	struct ftrace_probe_ops *ops;
7924 	void *count = (void *)-1;
7925 	char *number;
7926 	int ret;
7927 
7928 	if (!tr)
7929 		return -ENODEV;
7930 
7931 	/* hash funcs only work with set_ftrace_filter */
7932 	if (!enable)
7933 		return -EINVAL;
7934 
7935 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7936 
7937 	if (glob[0] == '!')
7938 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7939 
7940 	if (!param)
7941 		goto out_reg;
7942 
7943 	number = strsep(&param, ":");
7944 
7945 	if (!strlen(number))
7946 		goto out_reg;
7947 
7948 	/*
7949 	 * We use the callback data field (which is a pointer)
7950 	 * as our counter.
7951 	 */
7952 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7953 	if (ret)
7954 		return ret;
7955 
7956  out_reg:
7957 	ret = tracing_alloc_snapshot_instance(tr);
7958 	if (ret < 0)
7959 		goto out;
7960 
7961 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7962 
7963  out:
7964 	return ret < 0 ? ret : 0;
7965 }
7966 
7967 static struct ftrace_func_command ftrace_snapshot_cmd = {
7968 	.name			= "snapshot",
7969 	.func			= ftrace_trace_snapshot_callback,
7970 };
7971 
7972 static __init int register_snapshot_cmd(void)
7973 {
7974 	return register_ftrace_command(&ftrace_snapshot_cmd);
7975 }
7976 #else
7977 static inline __init int register_snapshot_cmd(void) { return 0; }
7978 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7979 
7980 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7981 {
7982 	if (WARN_ON(!tr->dir))
7983 		return ERR_PTR(-ENODEV);
7984 
7985 	/* Top directory uses NULL as the parent */
7986 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7987 		return NULL;
7988 
7989 	/* All sub buffers have a descriptor */
7990 	return tr->dir;
7991 }
7992 
7993 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7994 {
7995 	struct dentry *d_tracer;
7996 
7997 	if (tr->percpu_dir)
7998 		return tr->percpu_dir;
7999 
8000 	d_tracer = tracing_get_dentry(tr);
8001 	if (IS_ERR(d_tracer))
8002 		return NULL;
8003 
8004 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8005 
8006 	MEM_FAIL(!tr->percpu_dir,
8007 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8008 
8009 	return tr->percpu_dir;
8010 }
8011 
8012 static struct dentry *
8013 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8014 		      void *data, long cpu, const struct file_operations *fops)
8015 {
8016 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8017 
8018 	if (ret) /* See tracing_get_cpu() */
8019 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8020 	return ret;
8021 }
8022 
8023 static void
8024 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8025 {
8026 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8027 	struct dentry *d_cpu;
8028 	char cpu_dir[30]; /* 30 characters should be more than enough */
8029 
8030 	if (!d_percpu)
8031 		return;
8032 
8033 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8034 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8035 	if (!d_cpu) {
8036 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8037 		return;
8038 	}
8039 
8040 	/* per cpu trace_pipe */
8041 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8042 				tr, cpu, &tracing_pipe_fops);
8043 
8044 	/* per cpu trace */
8045 	trace_create_cpu_file("trace", 0644, d_cpu,
8046 				tr, cpu, &tracing_fops);
8047 
8048 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8049 				tr, cpu, &tracing_buffers_fops);
8050 
8051 	trace_create_cpu_file("stats", 0444, d_cpu,
8052 				tr, cpu, &tracing_stats_fops);
8053 
8054 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8055 				tr, cpu, &tracing_entries_fops);
8056 
8057 #ifdef CONFIG_TRACER_SNAPSHOT
8058 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8059 				tr, cpu, &snapshot_fops);
8060 
8061 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8062 				tr, cpu, &snapshot_raw_fops);
8063 #endif
8064 }
8065 
8066 #ifdef CONFIG_FTRACE_SELFTEST
8067 /* Let selftest have access to static functions in this file */
8068 #include "trace_selftest.c"
8069 #endif
8070 
8071 static ssize_t
8072 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8073 			loff_t *ppos)
8074 {
8075 	struct trace_option_dentry *topt = filp->private_data;
8076 	char *buf;
8077 
8078 	if (topt->flags->val & topt->opt->bit)
8079 		buf = "1\n";
8080 	else
8081 		buf = "0\n";
8082 
8083 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8084 }
8085 
8086 static ssize_t
8087 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8088 			 loff_t *ppos)
8089 {
8090 	struct trace_option_dentry *topt = filp->private_data;
8091 	unsigned long val;
8092 	int ret;
8093 
8094 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8095 	if (ret)
8096 		return ret;
8097 
8098 	if (val != 0 && val != 1)
8099 		return -EINVAL;
8100 
8101 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8102 		mutex_lock(&trace_types_lock);
8103 		ret = __set_tracer_option(topt->tr, topt->flags,
8104 					  topt->opt, !val);
8105 		mutex_unlock(&trace_types_lock);
8106 		if (ret)
8107 			return ret;
8108 	}
8109 
8110 	*ppos += cnt;
8111 
8112 	return cnt;
8113 }
8114 
8115 
8116 static const struct file_operations trace_options_fops = {
8117 	.open = tracing_open_generic,
8118 	.read = trace_options_read,
8119 	.write = trace_options_write,
8120 	.llseek	= generic_file_llseek,
8121 };
8122 
8123 /*
8124  * In order to pass in both the trace_array descriptor as well as the index
8125  * to the flag that the trace option file represents, the trace_array
8126  * has a character array of trace_flags_index[], which holds the index
8127  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8128  * The address of this character array is passed to the flag option file
8129  * read/write callbacks.
8130  *
8131  * In order to extract both the index and the trace_array descriptor,
8132  * get_tr_index() uses the following algorithm.
8133  *
8134  *   idx = *ptr;
8135  *
8136  * As the pointer itself contains the address of the index (remember
8137  * index[1] == 1).
8138  *
8139  * Then to get the trace_array descriptor, by subtracting that index
8140  * from the ptr, we get to the start of the index itself.
8141  *
8142  *   ptr - idx == &index[0]
8143  *
8144  * Then a simple container_of() from that pointer gets us to the
8145  * trace_array descriptor.
8146  */
8147 static void get_tr_index(void *data, struct trace_array **ptr,
8148 			 unsigned int *pindex)
8149 {
8150 	*pindex = *(unsigned char *)data;
8151 
8152 	*ptr = container_of(data - *pindex, struct trace_array,
8153 			    trace_flags_index);
8154 }
8155 
8156 static ssize_t
8157 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8158 			loff_t *ppos)
8159 {
8160 	void *tr_index = filp->private_data;
8161 	struct trace_array *tr;
8162 	unsigned int index;
8163 	char *buf;
8164 
8165 	get_tr_index(tr_index, &tr, &index);
8166 
8167 	if (tr->trace_flags & (1 << index))
8168 		buf = "1\n";
8169 	else
8170 		buf = "0\n";
8171 
8172 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8173 }
8174 
8175 static ssize_t
8176 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8177 			 loff_t *ppos)
8178 {
8179 	void *tr_index = filp->private_data;
8180 	struct trace_array *tr;
8181 	unsigned int index;
8182 	unsigned long val;
8183 	int ret;
8184 
8185 	get_tr_index(tr_index, &tr, &index);
8186 
8187 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8188 	if (ret)
8189 		return ret;
8190 
8191 	if (val != 0 && val != 1)
8192 		return -EINVAL;
8193 
8194 	mutex_lock(&event_mutex);
8195 	mutex_lock(&trace_types_lock);
8196 	ret = set_tracer_flag(tr, 1 << index, val);
8197 	mutex_unlock(&trace_types_lock);
8198 	mutex_unlock(&event_mutex);
8199 
8200 	if (ret < 0)
8201 		return ret;
8202 
8203 	*ppos += cnt;
8204 
8205 	return cnt;
8206 }
8207 
8208 static const struct file_operations trace_options_core_fops = {
8209 	.open = tracing_open_generic,
8210 	.read = trace_options_core_read,
8211 	.write = trace_options_core_write,
8212 	.llseek = generic_file_llseek,
8213 };
8214 
8215 struct dentry *trace_create_file(const char *name,
8216 				 umode_t mode,
8217 				 struct dentry *parent,
8218 				 void *data,
8219 				 const struct file_operations *fops)
8220 {
8221 	struct dentry *ret;
8222 
8223 	ret = tracefs_create_file(name, mode, parent, data, fops);
8224 	if (!ret)
8225 		pr_warn("Could not create tracefs '%s' entry\n", name);
8226 
8227 	return ret;
8228 }
8229 
8230 
8231 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8232 {
8233 	struct dentry *d_tracer;
8234 
8235 	if (tr->options)
8236 		return tr->options;
8237 
8238 	d_tracer = tracing_get_dentry(tr);
8239 	if (IS_ERR(d_tracer))
8240 		return NULL;
8241 
8242 	tr->options = tracefs_create_dir("options", d_tracer);
8243 	if (!tr->options) {
8244 		pr_warn("Could not create tracefs directory 'options'\n");
8245 		return NULL;
8246 	}
8247 
8248 	return tr->options;
8249 }
8250 
8251 static void
8252 create_trace_option_file(struct trace_array *tr,
8253 			 struct trace_option_dentry *topt,
8254 			 struct tracer_flags *flags,
8255 			 struct tracer_opt *opt)
8256 {
8257 	struct dentry *t_options;
8258 
8259 	t_options = trace_options_init_dentry(tr);
8260 	if (!t_options)
8261 		return;
8262 
8263 	topt->flags = flags;
8264 	topt->opt = opt;
8265 	topt->tr = tr;
8266 
8267 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8268 				    &trace_options_fops);
8269 
8270 }
8271 
8272 static void
8273 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8274 {
8275 	struct trace_option_dentry *topts;
8276 	struct trace_options *tr_topts;
8277 	struct tracer_flags *flags;
8278 	struct tracer_opt *opts;
8279 	int cnt;
8280 	int i;
8281 
8282 	if (!tracer)
8283 		return;
8284 
8285 	flags = tracer->flags;
8286 
8287 	if (!flags || !flags->opts)
8288 		return;
8289 
8290 	/*
8291 	 * If this is an instance, only create flags for tracers
8292 	 * the instance may have.
8293 	 */
8294 	if (!trace_ok_for_array(tracer, tr))
8295 		return;
8296 
8297 	for (i = 0; i < tr->nr_topts; i++) {
8298 		/* Make sure there's no duplicate flags. */
8299 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8300 			return;
8301 	}
8302 
8303 	opts = flags->opts;
8304 
8305 	for (cnt = 0; opts[cnt].name; cnt++)
8306 		;
8307 
8308 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8309 	if (!topts)
8310 		return;
8311 
8312 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8313 			    GFP_KERNEL);
8314 	if (!tr_topts) {
8315 		kfree(topts);
8316 		return;
8317 	}
8318 
8319 	tr->topts = tr_topts;
8320 	tr->topts[tr->nr_topts].tracer = tracer;
8321 	tr->topts[tr->nr_topts].topts = topts;
8322 	tr->nr_topts++;
8323 
8324 	for (cnt = 0; opts[cnt].name; cnt++) {
8325 		create_trace_option_file(tr, &topts[cnt], flags,
8326 					 &opts[cnt]);
8327 		MEM_FAIL(topts[cnt].entry == NULL,
8328 			  "Failed to create trace option: %s",
8329 			  opts[cnt].name);
8330 	}
8331 }
8332 
8333 static struct dentry *
8334 create_trace_option_core_file(struct trace_array *tr,
8335 			      const char *option, long index)
8336 {
8337 	struct dentry *t_options;
8338 
8339 	t_options = trace_options_init_dentry(tr);
8340 	if (!t_options)
8341 		return NULL;
8342 
8343 	return trace_create_file(option, 0644, t_options,
8344 				 (void *)&tr->trace_flags_index[index],
8345 				 &trace_options_core_fops);
8346 }
8347 
8348 static void create_trace_options_dir(struct trace_array *tr)
8349 {
8350 	struct dentry *t_options;
8351 	bool top_level = tr == &global_trace;
8352 	int i;
8353 
8354 	t_options = trace_options_init_dentry(tr);
8355 	if (!t_options)
8356 		return;
8357 
8358 	for (i = 0; trace_options[i]; i++) {
8359 		if (top_level ||
8360 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8361 			create_trace_option_core_file(tr, trace_options[i], i);
8362 	}
8363 }
8364 
8365 static ssize_t
8366 rb_simple_read(struct file *filp, char __user *ubuf,
8367 	       size_t cnt, loff_t *ppos)
8368 {
8369 	struct trace_array *tr = filp->private_data;
8370 	char buf[64];
8371 	int r;
8372 
8373 	r = tracer_tracing_is_on(tr);
8374 	r = sprintf(buf, "%d\n", r);
8375 
8376 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8377 }
8378 
8379 static ssize_t
8380 rb_simple_write(struct file *filp, const char __user *ubuf,
8381 		size_t cnt, loff_t *ppos)
8382 {
8383 	struct trace_array *tr = filp->private_data;
8384 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8385 	unsigned long val;
8386 	int ret;
8387 
8388 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8389 	if (ret)
8390 		return ret;
8391 
8392 	if (buffer) {
8393 		mutex_lock(&trace_types_lock);
8394 		if (!!val == tracer_tracing_is_on(tr)) {
8395 			val = 0; /* do nothing */
8396 		} else if (val) {
8397 			tracer_tracing_on(tr);
8398 			if (tr->current_trace->start)
8399 				tr->current_trace->start(tr);
8400 		} else {
8401 			tracer_tracing_off(tr);
8402 			if (tr->current_trace->stop)
8403 				tr->current_trace->stop(tr);
8404 		}
8405 		mutex_unlock(&trace_types_lock);
8406 	}
8407 
8408 	(*ppos)++;
8409 
8410 	return cnt;
8411 }
8412 
8413 static const struct file_operations rb_simple_fops = {
8414 	.open		= tracing_open_generic_tr,
8415 	.read		= rb_simple_read,
8416 	.write		= rb_simple_write,
8417 	.release	= tracing_release_generic_tr,
8418 	.llseek		= default_llseek,
8419 };
8420 
8421 static ssize_t
8422 buffer_percent_read(struct file *filp, char __user *ubuf,
8423 		    size_t cnt, loff_t *ppos)
8424 {
8425 	struct trace_array *tr = filp->private_data;
8426 	char buf[64];
8427 	int r;
8428 
8429 	r = tr->buffer_percent;
8430 	r = sprintf(buf, "%d\n", r);
8431 
8432 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8433 }
8434 
8435 static ssize_t
8436 buffer_percent_write(struct file *filp, const char __user *ubuf,
8437 		     size_t cnt, loff_t *ppos)
8438 {
8439 	struct trace_array *tr = filp->private_data;
8440 	unsigned long val;
8441 	int ret;
8442 
8443 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8444 	if (ret)
8445 		return ret;
8446 
8447 	if (val > 100)
8448 		return -EINVAL;
8449 
8450 	if (!val)
8451 		val = 1;
8452 
8453 	tr->buffer_percent = val;
8454 
8455 	(*ppos)++;
8456 
8457 	return cnt;
8458 }
8459 
8460 static const struct file_operations buffer_percent_fops = {
8461 	.open		= tracing_open_generic_tr,
8462 	.read		= buffer_percent_read,
8463 	.write		= buffer_percent_write,
8464 	.release	= tracing_release_generic_tr,
8465 	.llseek		= default_llseek,
8466 };
8467 
8468 static struct dentry *trace_instance_dir;
8469 
8470 static void
8471 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8472 
8473 static int
8474 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8475 {
8476 	enum ring_buffer_flags rb_flags;
8477 
8478 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8479 
8480 	buf->tr = tr;
8481 
8482 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8483 	if (!buf->buffer)
8484 		return -ENOMEM;
8485 
8486 	buf->data = alloc_percpu(struct trace_array_cpu);
8487 	if (!buf->data) {
8488 		ring_buffer_free(buf->buffer);
8489 		buf->buffer = NULL;
8490 		return -ENOMEM;
8491 	}
8492 
8493 	/* Allocate the first page for all buffers */
8494 	set_buffer_entries(&tr->array_buffer,
8495 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8496 
8497 	return 0;
8498 }
8499 
8500 static int allocate_trace_buffers(struct trace_array *tr, int size)
8501 {
8502 	int ret;
8503 
8504 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8505 	if (ret)
8506 		return ret;
8507 
8508 #ifdef CONFIG_TRACER_MAX_TRACE
8509 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8510 				    allocate_snapshot ? size : 1);
8511 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8512 		ring_buffer_free(tr->array_buffer.buffer);
8513 		tr->array_buffer.buffer = NULL;
8514 		free_percpu(tr->array_buffer.data);
8515 		tr->array_buffer.data = NULL;
8516 		return -ENOMEM;
8517 	}
8518 	tr->allocated_snapshot = allocate_snapshot;
8519 
8520 	/*
8521 	 * Only the top level trace array gets its snapshot allocated
8522 	 * from the kernel command line.
8523 	 */
8524 	allocate_snapshot = false;
8525 #endif
8526 
8527 	return 0;
8528 }
8529 
8530 static void free_trace_buffer(struct array_buffer *buf)
8531 {
8532 	if (buf->buffer) {
8533 		ring_buffer_free(buf->buffer);
8534 		buf->buffer = NULL;
8535 		free_percpu(buf->data);
8536 		buf->data = NULL;
8537 	}
8538 }
8539 
8540 static void free_trace_buffers(struct trace_array *tr)
8541 {
8542 	if (!tr)
8543 		return;
8544 
8545 	free_trace_buffer(&tr->array_buffer);
8546 
8547 #ifdef CONFIG_TRACER_MAX_TRACE
8548 	free_trace_buffer(&tr->max_buffer);
8549 #endif
8550 }
8551 
8552 static void init_trace_flags_index(struct trace_array *tr)
8553 {
8554 	int i;
8555 
8556 	/* Used by the trace options files */
8557 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8558 		tr->trace_flags_index[i] = i;
8559 }
8560 
8561 static void __update_tracer_options(struct trace_array *tr)
8562 {
8563 	struct tracer *t;
8564 
8565 	for (t = trace_types; t; t = t->next)
8566 		add_tracer_options(tr, t);
8567 }
8568 
8569 static void update_tracer_options(struct trace_array *tr)
8570 {
8571 	mutex_lock(&trace_types_lock);
8572 	__update_tracer_options(tr);
8573 	mutex_unlock(&trace_types_lock);
8574 }
8575 
8576 /* Must have trace_types_lock held */
8577 struct trace_array *trace_array_find(const char *instance)
8578 {
8579 	struct trace_array *tr, *found = NULL;
8580 
8581 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8582 		if (tr->name && strcmp(tr->name, instance) == 0) {
8583 			found = tr;
8584 			break;
8585 		}
8586 	}
8587 
8588 	return found;
8589 }
8590 
8591 struct trace_array *trace_array_find_get(const char *instance)
8592 {
8593 	struct trace_array *tr;
8594 
8595 	mutex_lock(&trace_types_lock);
8596 	tr = trace_array_find(instance);
8597 	if (tr)
8598 		tr->ref++;
8599 	mutex_unlock(&trace_types_lock);
8600 
8601 	return tr;
8602 }
8603 
8604 static struct trace_array *trace_array_create(const char *name)
8605 {
8606 	struct trace_array *tr;
8607 	int ret;
8608 
8609 	ret = -ENOMEM;
8610 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8611 	if (!tr)
8612 		return ERR_PTR(ret);
8613 
8614 	tr->name = kstrdup(name, GFP_KERNEL);
8615 	if (!tr->name)
8616 		goto out_free_tr;
8617 
8618 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8619 		goto out_free_tr;
8620 
8621 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8622 
8623 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8624 
8625 	raw_spin_lock_init(&tr->start_lock);
8626 
8627 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8628 
8629 	tr->current_trace = &nop_trace;
8630 
8631 	INIT_LIST_HEAD(&tr->systems);
8632 	INIT_LIST_HEAD(&tr->events);
8633 	INIT_LIST_HEAD(&tr->hist_vars);
8634 	INIT_LIST_HEAD(&tr->err_log);
8635 
8636 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8637 		goto out_free_tr;
8638 
8639 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8640 	if (!tr->dir)
8641 		goto out_free_tr;
8642 
8643 	ret = event_trace_add_tracer(tr->dir, tr);
8644 	if (ret) {
8645 		tracefs_remove(tr->dir);
8646 		goto out_free_tr;
8647 	}
8648 
8649 	ftrace_init_trace_array(tr);
8650 
8651 	init_tracer_tracefs(tr, tr->dir);
8652 	init_trace_flags_index(tr);
8653 	__update_tracer_options(tr);
8654 
8655 	list_add(&tr->list, &ftrace_trace_arrays);
8656 
8657 	tr->ref++;
8658 
8659 
8660 	return tr;
8661 
8662  out_free_tr:
8663 	free_trace_buffers(tr);
8664 	free_cpumask_var(tr->tracing_cpumask);
8665 	kfree(tr->name);
8666 	kfree(tr);
8667 
8668 	return ERR_PTR(ret);
8669 }
8670 
8671 static int instance_mkdir(const char *name)
8672 {
8673 	struct trace_array *tr;
8674 	int ret;
8675 
8676 	mutex_lock(&event_mutex);
8677 	mutex_lock(&trace_types_lock);
8678 
8679 	ret = -EEXIST;
8680 	if (trace_array_find(name))
8681 		goto out_unlock;
8682 
8683 	tr = trace_array_create(name);
8684 
8685 	ret = PTR_ERR_OR_ZERO(tr);
8686 
8687 out_unlock:
8688 	mutex_unlock(&trace_types_lock);
8689 	mutex_unlock(&event_mutex);
8690 	return ret;
8691 }
8692 
8693 /**
8694  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8695  * @name: The name of the trace array to be looked up/created.
8696  *
8697  * Returns pointer to trace array with given name.
8698  * NULL, if it cannot be created.
8699  *
8700  * NOTE: This function increments the reference counter associated with the
8701  * trace array returned. This makes sure it cannot be freed while in use.
8702  * Use trace_array_put() once the trace array is no longer needed.
8703  * If the trace_array is to be freed, trace_array_destroy() needs to
8704  * be called after the trace_array_put(), or simply let user space delete
8705  * it from the tracefs instances directory. But until the
8706  * trace_array_put() is called, user space can not delete it.
8707  *
8708  */
8709 struct trace_array *trace_array_get_by_name(const char *name)
8710 {
8711 	struct trace_array *tr;
8712 
8713 	mutex_lock(&event_mutex);
8714 	mutex_lock(&trace_types_lock);
8715 
8716 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8717 		if (tr->name && strcmp(tr->name, name) == 0)
8718 			goto out_unlock;
8719 	}
8720 
8721 	tr = trace_array_create(name);
8722 
8723 	if (IS_ERR(tr))
8724 		tr = NULL;
8725 out_unlock:
8726 	if (tr)
8727 		tr->ref++;
8728 
8729 	mutex_unlock(&trace_types_lock);
8730 	mutex_unlock(&event_mutex);
8731 	return tr;
8732 }
8733 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8734 
8735 static int __remove_instance(struct trace_array *tr)
8736 {
8737 	int i;
8738 
8739 	/* Reference counter for a newly created trace array = 1. */
8740 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8741 		return -EBUSY;
8742 
8743 	list_del(&tr->list);
8744 
8745 	/* Disable all the flags that were enabled coming in */
8746 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8747 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8748 			set_tracer_flag(tr, 1 << i, 0);
8749 	}
8750 
8751 	tracing_set_nop(tr);
8752 	clear_ftrace_function_probes(tr);
8753 	event_trace_del_tracer(tr);
8754 	ftrace_clear_pids(tr);
8755 	ftrace_destroy_function_files(tr);
8756 	tracefs_remove(tr->dir);
8757 	free_trace_buffers(tr);
8758 
8759 	for (i = 0; i < tr->nr_topts; i++) {
8760 		kfree(tr->topts[i].topts);
8761 	}
8762 	kfree(tr->topts);
8763 
8764 	free_cpumask_var(tr->tracing_cpumask);
8765 	kfree(tr->name);
8766 	kfree(tr);
8767 	tr = NULL;
8768 
8769 	return 0;
8770 }
8771 
8772 int trace_array_destroy(struct trace_array *this_tr)
8773 {
8774 	struct trace_array *tr;
8775 	int ret;
8776 
8777 	if (!this_tr)
8778 		return -EINVAL;
8779 
8780 	mutex_lock(&event_mutex);
8781 	mutex_lock(&trace_types_lock);
8782 
8783 	ret = -ENODEV;
8784 
8785 	/* Making sure trace array exists before destroying it. */
8786 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8787 		if (tr == this_tr) {
8788 			ret = __remove_instance(tr);
8789 			break;
8790 		}
8791 	}
8792 
8793 	mutex_unlock(&trace_types_lock);
8794 	mutex_unlock(&event_mutex);
8795 
8796 	return ret;
8797 }
8798 EXPORT_SYMBOL_GPL(trace_array_destroy);
8799 
8800 static int instance_rmdir(const char *name)
8801 {
8802 	struct trace_array *tr;
8803 	int ret;
8804 
8805 	mutex_lock(&event_mutex);
8806 	mutex_lock(&trace_types_lock);
8807 
8808 	ret = -ENODEV;
8809 	tr = trace_array_find(name);
8810 	if (tr)
8811 		ret = __remove_instance(tr);
8812 
8813 	mutex_unlock(&trace_types_lock);
8814 	mutex_unlock(&event_mutex);
8815 
8816 	return ret;
8817 }
8818 
8819 static __init void create_trace_instances(struct dentry *d_tracer)
8820 {
8821 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8822 							 instance_mkdir,
8823 							 instance_rmdir);
8824 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8825 		return;
8826 }
8827 
8828 static void
8829 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8830 {
8831 	struct trace_event_file *file;
8832 	int cpu;
8833 
8834 	trace_create_file("available_tracers", 0444, d_tracer,
8835 			tr, &show_traces_fops);
8836 
8837 	trace_create_file("current_tracer", 0644, d_tracer,
8838 			tr, &set_tracer_fops);
8839 
8840 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8841 			  tr, &tracing_cpumask_fops);
8842 
8843 	trace_create_file("trace_options", 0644, d_tracer,
8844 			  tr, &tracing_iter_fops);
8845 
8846 	trace_create_file("trace", 0644, d_tracer,
8847 			  tr, &tracing_fops);
8848 
8849 	trace_create_file("trace_pipe", 0444, d_tracer,
8850 			  tr, &tracing_pipe_fops);
8851 
8852 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8853 			  tr, &tracing_entries_fops);
8854 
8855 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8856 			  tr, &tracing_total_entries_fops);
8857 
8858 	trace_create_file("free_buffer", 0200, d_tracer,
8859 			  tr, &tracing_free_buffer_fops);
8860 
8861 	trace_create_file("trace_marker", 0220, d_tracer,
8862 			  tr, &tracing_mark_fops);
8863 
8864 	file = __find_event_file(tr, "ftrace", "print");
8865 	if (file && file->dir)
8866 		trace_create_file("trigger", 0644, file->dir, file,
8867 				  &event_trigger_fops);
8868 	tr->trace_marker_file = file;
8869 
8870 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8871 			  tr, &tracing_mark_raw_fops);
8872 
8873 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8874 			  &trace_clock_fops);
8875 
8876 	trace_create_file("tracing_on", 0644, d_tracer,
8877 			  tr, &rb_simple_fops);
8878 
8879 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8880 			  &trace_time_stamp_mode_fops);
8881 
8882 	tr->buffer_percent = 50;
8883 
8884 	trace_create_file("buffer_percent", 0444, d_tracer,
8885 			tr, &buffer_percent_fops);
8886 
8887 	create_trace_options_dir(tr);
8888 
8889 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8890 	trace_create_maxlat_file(tr, d_tracer);
8891 #endif
8892 
8893 	if (ftrace_create_function_files(tr, d_tracer))
8894 		MEM_FAIL(1, "Could not allocate function filter files");
8895 
8896 #ifdef CONFIG_TRACER_SNAPSHOT
8897 	trace_create_file("snapshot", 0644, d_tracer,
8898 			  tr, &snapshot_fops);
8899 #endif
8900 
8901 	trace_create_file("error_log", 0644, d_tracer,
8902 			  tr, &tracing_err_log_fops);
8903 
8904 	for_each_tracing_cpu(cpu)
8905 		tracing_init_tracefs_percpu(tr, cpu);
8906 
8907 	ftrace_init_tracefs(tr, d_tracer);
8908 }
8909 
8910 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8911 {
8912 	struct vfsmount *mnt;
8913 	struct file_system_type *type;
8914 
8915 	/*
8916 	 * To maintain backward compatibility for tools that mount
8917 	 * debugfs to get to the tracing facility, tracefs is automatically
8918 	 * mounted to the debugfs/tracing directory.
8919 	 */
8920 	type = get_fs_type("tracefs");
8921 	if (!type)
8922 		return NULL;
8923 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8924 	put_filesystem(type);
8925 	if (IS_ERR(mnt))
8926 		return NULL;
8927 	mntget(mnt);
8928 
8929 	return mnt;
8930 }
8931 
8932 /**
8933  * tracing_init_dentry - initialize top level trace array
8934  *
8935  * This is called when creating files or directories in the tracing
8936  * directory. It is called via fs_initcall() by any of the boot up code
8937  * and expects to return the dentry of the top level tracing directory.
8938  */
8939 struct dentry *tracing_init_dentry(void)
8940 {
8941 	struct trace_array *tr = &global_trace;
8942 
8943 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8944 		pr_warn("Tracing disabled due to lockdown\n");
8945 		return ERR_PTR(-EPERM);
8946 	}
8947 
8948 	/* The top level trace array uses  NULL as parent */
8949 	if (tr->dir)
8950 		return NULL;
8951 
8952 	if (WARN_ON(!tracefs_initialized()) ||
8953 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8954 		 WARN_ON(!debugfs_initialized())))
8955 		return ERR_PTR(-ENODEV);
8956 
8957 	/*
8958 	 * As there may still be users that expect the tracing
8959 	 * files to exist in debugfs/tracing, we must automount
8960 	 * the tracefs file system there, so older tools still
8961 	 * work with the newer kerenl.
8962 	 */
8963 	tr->dir = debugfs_create_automount("tracing", NULL,
8964 					   trace_automount, NULL);
8965 
8966 	return NULL;
8967 }
8968 
8969 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8970 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8971 
8972 static void __init trace_eval_init(void)
8973 {
8974 	int len;
8975 
8976 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8977 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8978 }
8979 
8980 #ifdef CONFIG_MODULES
8981 static void trace_module_add_evals(struct module *mod)
8982 {
8983 	if (!mod->num_trace_evals)
8984 		return;
8985 
8986 	/*
8987 	 * Modules with bad taint do not have events created, do
8988 	 * not bother with enums either.
8989 	 */
8990 	if (trace_module_has_bad_taint(mod))
8991 		return;
8992 
8993 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8994 }
8995 
8996 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8997 static void trace_module_remove_evals(struct module *mod)
8998 {
8999 	union trace_eval_map_item *map;
9000 	union trace_eval_map_item **last = &trace_eval_maps;
9001 
9002 	if (!mod->num_trace_evals)
9003 		return;
9004 
9005 	mutex_lock(&trace_eval_mutex);
9006 
9007 	map = trace_eval_maps;
9008 
9009 	while (map) {
9010 		if (map->head.mod == mod)
9011 			break;
9012 		map = trace_eval_jmp_to_tail(map);
9013 		last = &map->tail.next;
9014 		map = map->tail.next;
9015 	}
9016 	if (!map)
9017 		goto out;
9018 
9019 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9020 	kfree(map);
9021  out:
9022 	mutex_unlock(&trace_eval_mutex);
9023 }
9024 #else
9025 static inline void trace_module_remove_evals(struct module *mod) { }
9026 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9027 
9028 static int trace_module_notify(struct notifier_block *self,
9029 			       unsigned long val, void *data)
9030 {
9031 	struct module *mod = data;
9032 
9033 	switch (val) {
9034 	case MODULE_STATE_COMING:
9035 		trace_module_add_evals(mod);
9036 		break;
9037 	case MODULE_STATE_GOING:
9038 		trace_module_remove_evals(mod);
9039 		break;
9040 	}
9041 
9042 	return 0;
9043 }
9044 
9045 static struct notifier_block trace_module_nb = {
9046 	.notifier_call = trace_module_notify,
9047 	.priority = 0,
9048 };
9049 #endif /* CONFIG_MODULES */
9050 
9051 static __init int tracer_init_tracefs(void)
9052 {
9053 	struct dentry *d_tracer;
9054 
9055 	trace_access_lock_init();
9056 
9057 	d_tracer = tracing_init_dentry();
9058 	if (IS_ERR(d_tracer))
9059 		return 0;
9060 
9061 	event_trace_init();
9062 
9063 	init_tracer_tracefs(&global_trace, d_tracer);
9064 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9065 
9066 	trace_create_file("tracing_thresh", 0644, d_tracer,
9067 			&global_trace, &tracing_thresh_fops);
9068 
9069 	trace_create_file("README", 0444, d_tracer,
9070 			NULL, &tracing_readme_fops);
9071 
9072 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9073 			NULL, &tracing_saved_cmdlines_fops);
9074 
9075 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9076 			  NULL, &tracing_saved_cmdlines_size_fops);
9077 
9078 	trace_create_file("saved_tgids", 0444, d_tracer,
9079 			NULL, &tracing_saved_tgids_fops);
9080 
9081 	trace_eval_init();
9082 
9083 	trace_create_eval_file(d_tracer);
9084 
9085 #ifdef CONFIG_MODULES
9086 	register_module_notifier(&trace_module_nb);
9087 #endif
9088 
9089 #ifdef CONFIG_DYNAMIC_FTRACE
9090 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9091 			NULL, &tracing_dyn_info_fops);
9092 #endif
9093 
9094 	create_trace_instances(d_tracer);
9095 
9096 	update_tracer_options(&global_trace);
9097 
9098 	return 0;
9099 }
9100 
9101 static int trace_panic_handler(struct notifier_block *this,
9102 			       unsigned long event, void *unused)
9103 {
9104 	if (ftrace_dump_on_oops)
9105 		ftrace_dump(ftrace_dump_on_oops);
9106 	return NOTIFY_OK;
9107 }
9108 
9109 static struct notifier_block trace_panic_notifier = {
9110 	.notifier_call  = trace_panic_handler,
9111 	.next           = NULL,
9112 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9113 };
9114 
9115 static int trace_die_handler(struct notifier_block *self,
9116 			     unsigned long val,
9117 			     void *data)
9118 {
9119 	switch (val) {
9120 	case DIE_OOPS:
9121 		if (ftrace_dump_on_oops)
9122 			ftrace_dump(ftrace_dump_on_oops);
9123 		break;
9124 	default:
9125 		break;
9126 	}
9127 	return NOTIFY_OK;
9128 }
9129 
9130 static struct notifier_block trace_die_notifier = {
9131 	.notifier_call = trace_die_handler,
9132 	.priority = 200
9133 };
9134 
9135 /*
9136  * printk is set to max of 1024, we really don't need it that big.
9137  * Nothing should be printing 1000 characters anyway.
9138  */
9139 #define TRACE_MAX_PRINT		1000
9140 
9141 /*
9142  * Define here KERN_TRACE so that we have one place to modify
9143  * it if we decide to change what log level the ftrace dump
9144  * should be at.
9145  */
9146 #define KERN_TRACE		KERN_EMERG
9147 
9148 void
9149 trace_printk_seq(struct trace_seq *s)
9150 {
9151 	/* Probably should print a warning here. */
9152 	if (s->seq.len >= TRACE_MAX_PRINT)
9153 		s->seq.len = TRACE_MAX_PRINT;
9154 
9155 	/*
9156 	 * More paranoid code. Although the buffer size is set to
9157 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9158 	 * an extra layer of protection.
9159 	 */
9160 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9161 		s->seq.len = s->seq.size - 1;
9162 
9163 	/* should be zero ended, but we are paranoid. */
9164 	s->buffer[s->seq.len] = 0;
9165 
9166 	printk(KERN_TRACE "%s", s->buffer);
9167 
9168 	trace_seq_init(s);
9169 }
9170 
9171 void trace_init_global_iter(struct trace_iterator *iter)
9172 {
9173 	iter->tr = &global_trace;
9174 	iter->trace = iter->tr->current_trace;
9175 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9176 	iter->array_buffer = &global_trace.array_buffer;
9177 
9178 	if (iter->trace && iter->trace->open)
9179 		iter->trace->open(iter);
9180 
9181 	/* Annotate start of buffers if we had overruns */
9182 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9183 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9184 
9185 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9186 	if (trace_clocks[iter->tr->clock_id].in_ns)
9187 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9188 }
9189 
9190 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9191 {
9192 	/* use static because iter can be a bit big for the stack */
9193 	static struct trace_iterator iter;
9194 	static atomic_t dump_running;
9195 	struct trace_array *tr = &global_trace;
9196 	unsigned int old_userobj;
9197 	unsigned long flags;
9198 	int cnt = 0, cpu;
9199 
9200 	/* Only allow one dump user at a time. */
9201 	if (atomic_inc_return(&dump_running) != 1) {
9202 		atomic_dec(&dump_running);
9203 		return;
9204 	}
9205 
9206 	/*
9207 	 * Always turn off tracing when we dump.
9208 	 * We don't need to show trace output of what happens
9209 	 * between multiple crashes.
9210 	 *
9211 	 * If the user does a sysrq-z, then they can re-enable
9212 	 * tracing with echo 1 > tracing_on.
9213 	 */
9214 	tracing_off();
9215 
9216 	local_irq_save(flags);
9217 	printk_nmi_direct_enter();
9218 
9219 	/* Simulate the iterator */
9220 	trace_init_global_iter(&iter);
9221 	/* Can not use kmalloc for iter.temp */
9222 	iter.temp = static_temp_buf;
9223 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9224 
9225 	for_each_tracing_cpu(cpu) {
9226 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9227 	}
9228 
9229 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9230 
9231 	/* don't look at user memory in panic mode */
9232 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9233 
9234 	switch (oops_dump_mode) {
9235 	case DUMP_ALL:
9236 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9237 		break;
9238 	case DUMP_ORIG:
9239 		iter.cpu_file = raw_smp_processor_id();
9240 		break;
9241 	case DUMP_NONE:
9242 		goto out_enable;
9243 	default:
9244 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9245 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9246 	}
9247 
9248 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9249 
9250 	/* Did function tracer already get disabled? */
9251 	if (ftrace_is_dead()) {
9252 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9253 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9254 	}
9255 
9256 	/*
9257 	 * We need to stop all tracing on all CPUS to read the
9258 	 * the next buffer. This is a bit expensive, but is
9259 	 * not done often. We fill all what we can read,
9260 	 * and then release the locks again.
9261 	 */
9262 
9263 	while (!trace_empty(&iter)) {
9264 
9265 		if (!cnt)
9266 			printk(KERN_TRACE "---------------------------------\n");
9267 
9268 		cnt++;
9269 
9270 		trace_iterator_reset(&iter);
9271 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9272 
9273 		if (trace_find_next_entry_inc(&iter) != NULL) {
9274 			int ret;
9275 
9276 			ret = print_trace_line(&iter);
9277 			if (ret != TRACE_TYPE_NO_CONSUME)
9278 				trace_consume(&iter);
9279 		}
9280 		touch_nmi_watchdog();
9281 
9282 		trace_printk_seq(&iter.seq);
9283 	}
9284 
9285 	if (!cnt)
9286 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9287 	else
9288 		printk(KERN_TRACE "---------------------------------\n");
9289 
9290  out_enable:
9291 	tr->trace_flags |= old_userobj;
9292 
9293 	for_each_tracing_cpu(cpu) {
9294 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9295 	}
9296 	atomic_dec(&dump_running);
9297 	printk_nmi_direct_exit();
9298 	local_irq_restore(flags);
9299 }
9300 EXPORT_SYMBOL_GPL(ftrace_dump);
9301 
9302 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9303 {
9304 	char **argv;
9305 	int argc, ret;
9306 
9307 	argc = 0;
9308 	ret = 0;
9309 	argv = argv_split(GFP_KERNEL, buf, &argc);
9310 	if (!argv)
9311 		return -ENOMEM;
9312 
9313 	if (argc)
9314 		ret = createfn(argc, argv);
9315 
9316 	argv_free(argv);
9317 
9318 	return ret;
9319 }
9320 
9321 #define WRITE_BUFSIZE  4096
9322 
9323 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9324 				size_t count, loff_t *ppos,
9325 				int (*createfn)(int, char **))
9326 {
9327 	char *kbuf, *buf, *tmp;
9328 	int ret = 0;
9329 	size_t done = 0;
9330 	size_t size;
9331 
9332 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9333 	if (!kbuf)
9334 		return -ENOMEM;
9335 
9336 	while (done < count) {
9337 		size = count - done;
9338 
9339 		if (size >= WRITE_BUFSIZE)
9340 			size = WRITE_BUFSIZE - 1;
9341 
9342 		if (copy_from_user(kbuf, buffer + done, size)) {
9343 			ret = -EFAULT;
9344 			goto out;
9345 		}
9346 		kbuf[size] = '\0';
9347 		buf = kbuf;
9348 		do {
9349 			tmp = strchr(buf, '\n');
9350 			if (tmp) {
9351 				*tmp = '\0';
9352 				size = tmp - buf + 1;
9353 			} else {
9354 				size = strlen(buf);
9355 				if (done + size < count) {
9356 					if (buf != kbuf)
9357 						break;
9358 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9359 					pr_warn("Line length is too long: Should be less than %d\n",
9360 						WRITE_BUFSIZE - 2);
9361 					ret = -EINVAL;
9362 					goto out;
9363 				}
9364 			}
9365 			done += size;
9366 
9367 			/* Remove comments */
9368 			tmp = strchr(buf, '#');
9369 
9370 			if (tmp)
9371 				*tmp = '\0';
9372 
9373 			ret = trace_run_command(buf, createfn);
9374 			if (ret)
9375 				goto out;
9376 			buf += size;
9377 
9378 		} while (done < count);
9379 	}
9380 	ret = done;
9381 
9382 out:
9383 	kfree(kbuf);
9384 
9385 	return ret;
9386 }
9387 
9388 __init static int tracer_alloc_buffers(void)
9389 {
9390 	int ring_buf_size;
9391 	int ret = -ENOMEM;
9392 
9393 
9394 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9395 		pr_warn("Tracing disabled due to lockdown\n");
9396 		return -EPERM;
9397 	}
9398 
9399 	/*
9400 	 * Make sure we don't accidently add more trace options
9401 	 * than we have bits for.
9402 	 */
9403 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9404 
9405 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9406 		goto out;
9407 
9408 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9409 		goto out_free_buffer_mask;
9410 
9411 	/* Only allocate trace_printk buffers if a trace_printk exists */
9412 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9413 		/* Must be called before global_trace.buffer is allocated */
9414 		trace_printk_init_buffers();
9415 
9416 	/* To save memory, keep the ring buffer size to its minimum */
9417 	if (ring_buffer_expanded)
9418 		ring_buf_size = trace_buf_size;
9419 	else
9420 		ring_buf_size = 1;
9421 
9422 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9423 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9424 
9425 	raw_spin_lock_init(&global_trace.start_lock);
9426 
9427 	/*
9428 	 * The prepare callbacks allocates some memory for the ring buffer. We
9429 	 * don't free the buffer if the if the CPU goes down. If we were to free
9430 	 * the buffer, then the user would lose any trace that was in the
9431 	 * buffer. The memory will be removed once the "instance" is removed.
9432 	 */
9433 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9434 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9435 				      NULL);
9436 	if (ret < 0)
9437 		goto out_free_cpumask;
9438 	/* Used for event triggers */
9439 	ret = -ENOMEM;
9440 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9441 	if (!temp_buffer)
9442 		goto out_rm_hp_state;
9443 
9444 	if (trace_create_savedcmd() < 0)
9445 		goto out_free_temp_buffer;
9446 
9447 	/* TODO: make the number of buffers hot pluggable with CPUS */
9448 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9449 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9450 		goto out_free_savedcmd;
9451 	}
9452 
9453 	if (global_trace.buffer_disabled)
9454 		tracing_off();
9455 
9456 	if (trace_boot_clock) {
9457 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9458 		if (ret < 0)
9459 			pr_warn("Trace clock %s not defined, going back to default\n",
9460 				trace_boot_clock);
9461 	}
9462 
9463 	/*
9464 	 * register_tracer() might reference current_trace, so it
9465 	 * needs to be set before we register anything. This is
9466 	 * just a bootstrap of current_trace anyway.
9467 	 */
9468 	global_trace.current_trace = &nop_trace;
9469 
9470 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9471 
9472 	ftrace_init_global_array_ops(&global_trace);
9473 
9474 	init_trace_flags_index(&global_trace);
9475 
9476 	register_tracer(&nop_trace);
9477 
9478 	/* Function tracing may start here (via kernel command line) */
9479 	init_function_trace();
9480 
9481 	/* All seems OK, enable tracing */
9482 	tracing_disabled = 0;
9483 
9484 	atomic_notifier_chain_register(&panic_notifier_list,
9485 				       &trace_panic_notifier);
9486 
9487 	register_die_notifier(&trace_die_notifier);
9488 
9489 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9490 
9491 	INIT_LIST_HEAD(&global_trace.systems);
9492 	INIT_LIST_HEAD(&global_trace.events);
9493 	INIT_LIST_HEAD(&global_trace.hist_vars);
9494 	INIT_LIST_HEAD(&global_trace.err_log);
9495 	list_add(&global_trace.list, &ftrace_trace_arrays);
9496 
9497 	apply_trace_boot_options();
9498 
9499 	register_snapshot_cmd();
9500 
9501 	return 0;
9502 
9503 out_free_savedcmd:
9504 	free_saved_cmdlines_buffer(savedcmd);
9505 out_free_temp_buffer:
9506 	ring_buffer_free(temp_buffer);
9507 out_rm_hp_state:
9508 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9509 out_free_cpumask:
9510 	free_cpumask_var(global_trace.tracing_cpumask);
9511 out_free_buffer_mask:
9512 	free_cpumask_var(tracing_buffer_mask);
9513 out:
9514 	return ret;
9515 }
9516 
9517 void __init early_trace_init(void)
9518 {
9519 	if (tracepoint_printk) {
9520 		tracepoint_print_iter =
9521 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9522 		if (MEM_FAIL(!tracepoint_print_iter,
9523 			     "Failed to allocate trace iterator\n"))
9524 			tracepoint_printk = 0;
9525 		else
9526 			static_key_enable(&tracepoint_printk_key.key);
9527 	}
9528 	tracer_alloc_buffers();
9529 }
9530 
9531 void __init trace_init(void)
9532 {
9533 	trace_event_init();
9534 }
9535 
9536 __init static int clear_boot_tracer(void)
9537 {
9538 	/*
9539 	 * The default tracer at boot buffer is an init section.
9540 	 * This function is called in lateinit. If we did not
9541 	 * find the boot tracer, then clear it out, to prevent
9542 	 * later registration from accessing the buffer that is
9543 	 * about to be freed.
9544 	 */
9545 	if (!default_bootup_tracer)
9546 		return 0;
9547 
9548 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9549 	       default_bootup_tracer);
9550 	default_bootup_tracer = NULL;
9551 
9552 	return 0;
9553 }
9554 
9555 fs_initcall(tracer_init_tracefs);
9556 late_initcall_sync(clear_boot_tracer);
9557 
9558 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9559 __init static int tracing_set_default_clock(void)
9560 {
9561 	/* sched_clock_stable() is determined in late_initcall */
9562 	if (!trace_boot_clock && !sched_clock_stable()) {
9563 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9564 			pr_warn("Can not set tracing clock due to lockdown\n");
9565 			return -EPERM;
9566 		}
9567 
9568 		printk(KERN_WARNING
9569 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9570 		       "If you want to keep using the local clock, then add:\n"
9571 		       "  \"trace_clock=local\"\n"
9572 		       "on the kernel command line\n");
9573 		tracing_set_clock(&global_trace, "global");
9574 	}
9575 
9576 	return 0;
9577 }
9578 late_initcall_sync(tracing_set_default_clock);
9579 #endif
9580