xref: /openbmc/linux/kernel/trace/trace.c (revision cff11abeca78aa782378401ca2800bd2194aa14e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning)
1303 		tracing_off();
1304 }
1305 
1306 /**
1307  * tracer_tracing_is_on - show real state of ring buffer enabled
1308  * @tr : the trace array to know if ring buffer is enabled
1309  *
1310  * Shows real state of the ring buffer if it is enabled or not.
1311  */
1312 bool tracer_tracing_is_on(struct trace_array *tr)
1313 {
1314 	if (tr->array_buffer.buffer)
1315 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1316 	return !tr->buffer_disabled;
1317 }
1318 
1319 /**
1320  * tracing_is_on - show state of ring buffers enabled
1321  */
1322 int tracing_is_on(void)
1323 {
1324 	return tracer_tracing_is_on(&global_trace);
1325 }
1326 EXPORT_SYMBOL_GPL(tracing_is_on);
1327 
1328 static int __init set_buf_size(char *str)
1329 {
1330 	unsigned long buf_size;
1331 
1332 	if (!str)
1333 		return 0;
1334 	buf_size = memparse(str, &str);
1335 	/* nr_entries can not be zero */
1336 	if (buf_size == 0)
1337 		return 0;
1338 	trace_buf_size = buf_size;
1339 	return 1;
1340 }
1341 __setup("trace_buf_size=", set_buf_size);
1342 
1343 static int __init set_tracing_thresh(char *str)
1344 {
1345 	unsigned long threshold;
1346 	int ret;
1347 
1348 	if (!str)
1349 		return 0;
1350 	ret = kstrtoul(str, 0, &threshold);
1351 	if (ret < 0)
1352 		return 0;
1353 	tracing_thresh = threshold * 1000;
1354 	return 1;
1355 }
1356 __setup("tracing_thresh=", set_tracing_thresh);
1357 
1358 unsigned long nsecs_to_usecs(unsigned long nsecs)
1359 {
1360 	return nsecs / 1000;
1361 }
1362 
1363 /*
1364  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1365  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1366  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1367  * of strings in the order that the evals (enum) were defined.
1368  */
1369 #undef C
1370 #define C(a, b) b
1371 
1372 /* These must match the bit postions in trace_iterator_flags */
1373 static const char *trace_options[] = {
1374 	TRACE_FLAGS
1375 	NULL
1376 };
1377 
1378 static struct {
1379 	u64 (*func)(void);
1380 	const char *name;
1381 	int in_ns;		/* is this clock in nanoseconds? */
1382 } trace_clocks[] = {
1383 	{ trace_clock_local,		"local",	1 },
1384 	{ trace_clock_global,		"global",	1 },
1385 	{ trace_clock_counter,		"counter",	0 },
1386 	{ trace_clock_jiffies,		"uptime",	0 },
1387 	{ trace_clock,			"perf",		1 },
1388 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1389 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1390 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1391 	ARCH_TRACE_CLOCKS
1392 };
1393 
1394 bool trace_clock_in_ns(struct trace_array *tr)
1395 {
1396 	if (trace_clocks[tr->clock_id].in_ns)
1397 		return true;
1398 
1399 	return false;
1400 }
1401 
1402 /*
1403  * trace_parser_get_init - gets the buffer for trace parser
1404  */
1405 int trace_parser_get_init(struct trace_parser *parser, int size)
1406 {
1407 	memset(parser, 0, sizeof(*parser));
1408 
1409 	parser->buffer = kmalloc(size, GFP_KERNEL);
1410 	if (!parser->buffer)
1411 		return 1;
1412 
1413 	parser->size = size;
1414 	return 0;
1415 }
1416 
1417 /*
1418  * trace_parser_put - frees the buffer for trace parser
1419  */
1420 void trace_parser_put(struct trace_parser *parser)
1421 {
1422 	kfree(parser->buffer);
1423 	parser->buffer = NULL;
1424 }
1425 
1426 /*
1427  * trace_get_user - reads the user input string separated by  space
1428  * (matched by isspace(ch))
1429  *
1430  * For each string found the 'struct trace_parser' is updated,
1431  * and the function returns.
1432  *
1433  * Returns number of bytes read.
1434  *
1435  * See kernel/trace/trace.h for 'struct trace_parser' details.
1436  */
1437 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1438 	size_t cnt, loff_t *ppos)
1439 {
1440 	char ch;
1441 	size_t read = 0;
1442 	ssize_t ret;
1443 
1444 	if (!*ppos)
1445 		trace_parser_clear(parser);
1446 
1447 	ret = get_user(ch, ubuf++);
1448 	if (ret)
1449 		goto out;
1450 
1451 	read++;
1452 	cnt--;
1453 
1454 	/*
1455 	 * The parser is not finished with the last write,
1456 	 * continue reading the user input without skipping spaces.
1457 	 */
1458 	if (!parser->cont) {
1459 		/* skip white space */
1460 		while (cnt && isspace(ch)) {
1461 			ret = get_user(ch, ubuf++);
1462 			if (ret)
1463 				goto out;
1464 			read++;
1465 			cnt--;
1466 		}
1467 
1468 		parser->idx = 0;
1469 
1470 		/* only spaces were written */
1471 		if (isspace(ch) || !ch) {
1472 			*ppos += read;
1473 			ret = read;
1474 			goto out;
1475 		}
1476 	}
1477 
1478 	/* read the non-space input */
1479 	while (cnt && !isspace(ch) && ch) {
1480 		if (parser->idx < parser->size - 1)
1481 			parser->buffer[parser->idx++] = ch;
1482 		else {
1483 			ret = -EINVAL;
1484 			goto out;
1485 		}
1486 		ret = get_user(ch, ubuf++);
1487 		if (ret)
1488 			goto out;
1489 		read++;
1490 		cnt--;
1491 	}
1492 
1493 	/* We either got finished input or we have to wait for another call. */
1494 	if (isspace(ch) || !ch) {
1495 		parser->buffer[parser->idx] = 0;
1496 		parser->cont = false;
1497 	} else if (parser->idx < parser->size - 1) {
1498 		parser->cont = true;
1499 		parser->buffer[parser->idx++] = ch;
1500 		/* Make sure the parsed string always terminates with '\0'. */
1501 		parser->buffer[parser->idx] = 0;
1502 	} else {
1503 		ret = -EINVAL;
1504 		goto out;
1505 	}
1506 
1507 	*ppos += read;
1508 	ret = read;
1509 
1510 out:
1511 	return ret;
1512 }
1513 
1514 /* TODO add a seq_buf_to_buffer() */
1515 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1516 {
1517 	int len;
1518 
1519 	if (trace_seq_used(s) <= s->seq.readpos)
1520 		return -EBUSY;
1521 
1522 	len = trace_seq_used(s) - s->seq.readpos;
1523 	if (cnt > len)
1524 		cnt = len;
1525 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1526 
1527 	s->seq.readpos += cnt;
1528 	return cnt;
1529 }
1530 
1531 unsigned long __read_mostly	tracing_thresh;
1532 static const struct file_operations tracing_max_lat_fops;
1533 
1534 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1535 	defined(CONFIG_FSNOTIFY)
1536 
1537 static struct workqueue_struct *fsnotify_wq;
1538 
1539 static void latency_fsnotify_workfn(struct work_struct *work)
1540 {
1541 	struct trace_array *tr = container_of(work, struct trace_array,
1542 					      fsnotify_work);
1543 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1544 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1545 }
1546 
1547 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1548 {
1549 	struct trace_array *tr = container_of(iwork, struct trace_array,
1550 					      fsnotify_irqwork);
1551 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1552 }
1553 
1554 static void trace_create_maxlat_file(struct trace_array *tr,
1555 				     struct dentry *d_tracer)
1556 {
1557 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1558 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1559 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1560 					      d_tracer, &tr->max_latency,
1561 					      &tracing_max_lat_fops);
1562 }
1563 
1564 __init static int latency_fsnotify_init(void)
1565 {
1566 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1567 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1568 	if (!fsnotify_wq) {
1569 		pr_err("Unable to allocate tr_max_lat_wq\n");
1570 		return -ENOMEM;
1571 	}
1572 	return 0;
1573 }
1574 
1575 late_initcall_sync(latency_fsnotify_init);
1576 
1577 void latency_fsnotify(struct trace_array *tr)
1578 {
1579 	if (!fsnotify_wq)
1580 		return;
1581 	/*
1582 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1583 	 * possible that we are called from __schedule() or do_idle(), which
1584 	 * could cause a deadlock.
1585 	 */
1586 	irq_work_queue(&tr->fsnotify_irqwork);
1587 }
1588 
1589 /*
1590  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1591  *  defined(CONFIG_FSNOTIFY)
1592  */
1593 #else
1594 
1595 #define trace_create_maxlat_file(tr, d_tracer)				\
1596 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1597 			  &tr->max_latency, &tracing_max_lat_fops)
1598 
1599 #endif
1600 
1601 #ifdef CONFIG_TRACER_MAX_TRACE
1602 /*
1603  * Copy the new maximum trace into the separate maximum-trace
1604  * structure. (this way the maximum trace is permanently saved,
1605  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1606  */
1607 static void
1608 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1609 {
1610 	struct array_buffer *trace_buf = &tr->array_buffer;
1611 	struct array_buffer *max_buf = &tr->max_buffer;
1612 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1613 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1614 
1615 	max_buf->cpu = cpu;
1616 	max_buf->time_start = data->preempt_timestamp;
1617 
1618 	max_data->saved_latency = tr->max_latency;
1619 	max_data->critical_start = data->critical_start;
1620 	max_data->critical_end = data->critical_end;
1621 
1622 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1623 	max_data->pid = tsk->pid;
1624 	/*
1625 	 * If tsk == current, then use current_uid(), as that does not use
1626 	 * RCU. The irq tracer can be called out of RCU scope.
1627 	 */
1628 	if (tsk == current)
1629 		max_data->uid = current_uid();
1630 	else
1631 		max_data->uid = task_uid(tsk);
1632 
1633 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1634 	max_data->policy = tsk->policy;
1635 	max_data->rt_priority = tsk->rt_priority;
1636 
1637 	/* record this tasks comm */
1638 	tracing_record_cmdline(tsk);
1639 	latency_fsnotify(tr);
1640 }
1641 
1642 /**
1643  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1644  * @tr: tracer
1645  * @tsk: the task with the latency
1646  * @cpu: The cpu that initiated the trace.
1647  * @cond_data: User data associated with a conditional snapshot
1648  *
1649  * Flip the buffers between the @tr and the max_tr and record information
1650  * about which task was the cause of this latency.
1651  */
1652 void
1653 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1654 	      void *cond_data)
1655 {
1656 	if (tr->stop_count)
1657 		return;
1658 
1659 	WARN_ON_ONCE(!irqs_disabled());
1660 
1661 	if (!tr->allocated_snapshot) {
1662 		/* Only the nop tracer should hit this when disabling */
1663 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664 		return;
1665 	}
1666 
1667 	arch_spin_lock(&tr->max_lock);
1668 
1669 	/* Inherit the recordable setting from array_buffer */
1670 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1671 		ring_buffer_record_on(tr->max_buffer.buffer);
1672 	else
1673 		ring_buffer_record_off(tr->max_buffer.buffer);
1674 
1675 #ifdef CONFIG_TRACER_SNAPSHOT
1676 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1677 		goto out_unlock;
1678 #endif
1679 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1680 
1681 	__update_max_tr(tr, tsk, cpu);
1682 
1683  out_unlock:
1684 	arch_spin_unlock(&tr->max_lock);
1685 }
1686 
1687 /**
1688  * update_max_tr_single - only copy one trace over, and reset the rest
1689  * @tr: tracer
1690  * @tsk: task with the latency
1691  * @cpu: the cpu of the buffer to copy.
1692  *
1693  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1694  */
1695 void
1696 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1697 {
1698 	int ret;
1699 
1700 	if (tr->stop_count)
1701 		return;
1702 
1703 	WARN_ON_ONCE(!irqs_disabled());
1704 	if (!tr->allocated_snapshot) {
1705 		/* Only the nop tracer should hit this when disabling */
1706 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1707 		return;
1708 	}
1709 
1710 	arch_spin_lock(&tr->max_lock);
1711 
1712 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1713 
1714 	if (ret == -EBUSY) {
1715 		/*
1716 		 * We failed to swap the buffer due to a commit taking
1717 		 * place on this CPU. We fail to record, but we reset
1718 		 * the max trace buffer (no one writes directly to it)
1719 		 * and flag that it failed.
1720 		 */
1721 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1722 			"Failed to swap buffers due to commit in progress\n");
1723 	}
1724 
1725 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1726 
1727 	__update_max_tr(tr, tsk, cpu);
1728 	arch_spin_unlock(&tr->max_lock);
1729 }
1730 #endif /* CONFIG_TRACER_MAX_TRACE */
1731 
1732 static int wait_on_pipe(struct trace_iterator *iter, int full)
1733 {
1734 	/* Iterators are static, they should be filled or empty */
1735 	if (trace_buffer_iter(iter, iter->cpu_file))
1736 		return 0;
1737 
1738 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1739 				full);
1740 }
1741 
1742 #ifdef CONFIG_FTRACE_STARTUP_TEST
1743 static bool selftests_can_run;
1744 
1745 struct trace_selftests {
1746 	struct list_head		list;
1747 	struct tracer			*type;
1748 };
1749 
1750 static LIST_HEAD(postponed_selftests);
1751 
1752 static int save_selftest(struct tracer *type)
1753 {
1754 	struct trace_selftests *selftest;
1755 
1756 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1757 	if (!selftest)
1758 		return -ENOMEM;
1759 
1760 	selftest->type = type;
1761 	list_add(&selftest->list, &postponed_selftests);
1762 	return 0;
1763 }
1764 
1765 static int run_tracer_selftest(struct tracer *type)
1766 {
1767 	struct trace_array *tr = &global_trace;
1768 	struct tracer *saved_tracer = tr->current_trace;
1769 	int ret;
1770 
1771 	if (!type->selftest || tracing_selftest_disabled)
1772 		return 0;
1773 
1774 	/*
1775 	 * If a tracer registers early in boot up (before scheduling is
1776 	 * initialized and such), then do not run its selftests yet.
1777 	 * Instead, run it a little later in the boot process.
1778 	 */
1779 	if (!selftests_can_run)
1780 		return save_selftest(type);
1781 
1782 	/*
1783 	 * Run a selftest on this tracer.
1784 	 * Here we reset the trace buffer, and set the current
1785 	 * tracer to be this tracer. The tracer can then run some
1786 	 * internal tracing to verify that everything is in order.
1787 	 * If we fail, we do not register this tracer.
1788 	 */
1789 	tracing_reset_online_cpus(&tr->array_buffer);
1790 
1791 	tr->current_trace = type;
1792 
1793 #ifdef CONFIG_TRACER_MAX_TRACE
1794 	if (type->use_max_tr) {
1795 		/* If we expanded the buffers, make sure the max is expanded too */
1796 		if (ring_buffer_expanded)
1797 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1798 					   RING_BUFFER_ALL_CPUS);
1799 		tr->allocated_snapshot = true;
1800 	}
1801 #endif
1802 
1803 	/* the test is responsible for initializing and enabling */
1804 	pr_info("Testing tracer %s: ", type->name);
1805 	ret = type->selftest(type, tr);
1806 	/* the test is responsible for resetting too */
1807 	tr->current_trace = saved_tracer;
1808 	if (ret) {
1809 		printk(KERN_CONT "FAILED!\n");
1810 		/* Add the warning after printing 'FAILED' */
1811 		WARN_ON(1);
1812 		return -1;
1813 	}
1814 	/* Only reset on passing, to avoid touching corrupted buffers */
1815 	tracing_reset_online_cpus(&tr->array_buffer);
1816 
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818 	if (type->use_max_tr) {
1819 		tr->allocated_snapshot = false;
1820 
1821 		/* Shrink the max buffer again */
1822 		if (ring_buffer_expanded)
1823 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1824 					   RING_BUFFER_ALL_CPUS);
1825 	}
1826 #endif
1827 
1828 	printk(KERN_CONT "PASSED\n");
1829 	return 0;
1830 }
1831 
1832 static __init int init_trace_selftests(void)
1833 {
1834 	struct trace_selftests *p, *n;
1835 	struct tracer *t, **last;
1836 	int ret;
1837 
1838 	selftests_can_run = true;
1839 
1840 	mutex_lock(&trace_types_lock);
1841 
1842 	if (list_empty(&postponed_selftests))
1843 		goto out;
1844 
1845 	pr_info("Running postponed tracer tests:\n");
1846 
1847 	tracing_selftest_running = true;
1848 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1849 		/* This loop can take minutes when sanitizers are enabled, so
1850 		 * lets make sure we allow RCU processing.
1851 		 */
1852 		cond_resched();
1853 		ret = run_tracer_selftest(p->type);
1854 		/* If the test fails, then warn and remove from available_tracers */
1855 		if (ret < 0) {
1856 			WARN(1, "tracer: %s failed selftest, disabling\n",
1857 			     p->type->name);
1858 			last = &trace_types;
1859 			for (t = trace_types; t; t = t->next) {
1860 				if (t == p->type) {
1861 					*last = t->next;
1862 					break;
1863 				}
1864 				last = &t->next;
1865 			}
1866 		}
1867 		list_del(&p->list);
1868 		kfree(p);
1869 	}
1870 	tracing_selftest_running = false;
1871 
1872  out:
1873 	mutex_unlock(&trace_types_lock);
1874 
1875 	return 0;
1876 }
1877 core_initcall(init_trace_selftests);
1878 #else
1879 static inline int run_tracer_selftest(struct tracer *type)
1880 {
1881 	return 0;
1882 }
1883 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1884 
1885 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1886 
1887 static void __init apply_trace_boot_options(void);
1888 
1889 /**
1890  * register_tracer - register a tracer with the ftrace system.
1891  * @type: the plugin for the tracer
1892  *
1893  * Register a new plugin tracer.
1894  */
1895 int __init register_tracer(struct tracer *type)
1896 {
1897 	struct tracer *t;
1898 	int ret = 0;
1899 
1900 	if (!type->name) {
1901 		pr_info("Tracer must have a name\n");
1902 		return -1;
1903 	}
1904 
1905 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1906 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1907 		return -1;
1908 	}
1909 
1910 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1911 		pr_warn("Can not register tracer %s due to lockdown\n",
1912 			   type->name);
1913 		return -EPERM;
1914 	}
1915 
1916 	mutex_lock(&trace_types_lock);
1917 
1918 	tracing_selftest_running = true;
1919 
1920 	for (t = trace_types; t; t = t->next) {
1921 		if (strcmp(type->name, t->name) == 0) {
1922 			/* already found */
1923 			pr_info("Tracer %s already registered\n",
1924 				type->name);
1925 			ret = -1;
1926 			goto out;
1927 		}
1928 	}
1929 
1930 	if (!type->set_flag)
1931 		type->set_flag = &dummy_set_flag;
1932 	if (!type->flags) {
1933 		/*allocate a dummy tracer_flags*/
1934 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1935 		if (!type->flags) {
1936 			ret = -ENOMEM;
1937 			goto out;
1938 		}
1939 		type->flags->val = 0;
1940 		type->flags->opts = dummy_tracer_opt;
1941 	} else
1942 		if (!type->flags->opts)
1943 			type->flags->opts = dummy_tracer_opt;
1944 
1945 	/* store the tracer for __set_tracer_option */
1946 	type->flags->trace = type;
1947 
1948 	ret = run_tracer_selftest(type);
1949 	if (ret < 0)
1950 		goto out;
1951 
1952 	type->next = trace_types;
1953 	trace_types = type;
1954 	add_tracer_options(&global_trace, type);
1955 
1956  out:
1957 	tracing_selftest_running = false;
1958 	mutex_unlock(&trace_types_lock);
1959 
1960 	if (ret || !default_bootup_tracer)
1961 		goto out_unlock;
1962 
1963 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1964 		goto out_unlock;
1965 
1966 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1967 	/* Do we want this tracer to start on bootup? */
1968 	tracing_set_tracer(&global_trace, type->name);
1969 	default_bootup_tracer = NULL;
1970 
1971 	apply_trace_boot_options();
1972 
1973 	/* disable other selftests, since this will break it. */
1974 	tracing_selftest_disabled = true;
1975 #ifdef CONFIG_FTRACE_STARTUP_TEST
1976 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1977 	       type->name);
1978 #endif
1979 
1980  out_unlock:
1981 	return ret;
1982 }
1983 
1984 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1985 {
1986 	struct trace_buffer *buffer = buf->buffer;
1987 
1988 	if (!buffer)
1989 		return;
1990 
1991 	ring_buffer_record_disable(buffer);
1992 
1993 	/* Make sure all commits have finished */
1994 	synchronize_rcu();
1995 	ring_buffer_reset_cpu(buffer, cpu);
1996 
1997 	ring_buffer_record_enable(buffer);
1998 }
1999 
2000 void tracing_reset_online_cpus(struct array_buffer *buf)
2001 {
2002 	struct trace_buffer *buffer = buf->buffer;
2003 	int cpu;
2004 
2005 	if (!buffer)
2006 		return;
2007 
2008 	ring_buffer_record_disable(buffer);
2009 
2010 	/* Make sure all commits have finished */
2011 	synchronize_rcu();
2012 
2013 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2014 
2015 	for_each_online_cpu(cpu)
2016 		ring_buffer_reset_cpu(buffer, cpu);
2017 
2018 	ring_buffer_record_enable(buffer);
2019 }
2020 
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2023 {
2024 	struct trace_array *tr;
2025 
2026 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027 		if (!tr->clear_trace)
2028 			continue;
2029 		tr->clear_trace = false;
2030 		tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032 		tracing_reset_online_cpus(&tr->max_buffer);
2033 #endif
2034 	}
2035 }
2036 
2037 static int *tgid_map;
2038 
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044 	unsigned *map_cmdline_to_pid;
2045 	unsigned cmdline_num;
2046 	int cmdline_idx;
2047 	char *saved_cmdlines;
2048 };
2049 static struct saved_cmdlines_buffer *savedcmd;
2050 
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2053 
2054 static inline char *get_saved_cmdlines(int idx)
2055 {
2056 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2057 }
2058 
2059 static inline void set_cmdline(int idx, const char *cmdline)
2060 {
2061 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2062 }
2063 
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065 				    struct saved_cmdlines_buffer *s)
2066 {
2067 	s->map_cmdline_to_pid = kmalloc_array(val,
2068 					      sizeof(*s->map_cmdline_to_pid),
2069 					      GFP_KERNEL);
2070 	if (!s->map_cmdline_to_pid)
2071 		return -ENOMEM;
2072 
2073 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074 	if (!s->saved_cmdlines) {
2075 		kfree(s->map_cmdline_to_pid);
2076 		return -ENOMEM;
2077 	}
2078 
2079 	s->cmdline_idx = 0;
2080 	s->cmdline_num = val;
2081 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082 	       sizeof(s->map_pid_to_cmdline));
2083 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084 	       val * sizeof(*s->map_cmdline_to_pid));
2085 
2086 	return 0;
2087 }
2088 
2089 static int trace_create_savedcmd(void)
2090 {
2091 	int ret;
2092 
2093 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2094 	if (!savedcmd)
2095 		return -ENOMEM;
2096 
2097 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2098 	if (ret < 0) {
2099 		kfree(savedcmd);
2100 		savedcmd = NULL;
2101 		return -ENOMEM;
2102 	}
2103 
2104 	return 0;
2105 }
2106 
2107 int is_tracing_stopped(void)
2108 {
2109 	return global_trace.stop_count;
2110 }
2111 
2112 /**
2113  * tracing_start - quick start of the tracer
2114  *
2115  * If tracing is enabled but was stopped by tracing_stop,
2116  * this will start the tracer back up.
2117  */
2118 void tracing_start(void)
2119 {
2120 	struct trace_buffer *buffer;
2121 	unsigned long flags;
2122 
2123 	if (tracing_disabled)
2124 		return;
2125 
2126 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127 	if (--global_trace.stop_count) {
2128 		if (global_trace.stop_count < 0) {
2129 			/* Someone screwed up their debugging */
2130 			WARN_ON_ONCE(1);
2131 			global_trace.stop_count = 0;
2132 		}
2133 		goto out;
2134 	}
2135 
2136 	/* Prevent the buffers from switching */
2137 	arch_spin_lock(&global_trace.max_lock);
2138 
2139 	buffer = global_trace.array_buffer.buffer;
2140 	if (buffer)
2141 		ring_buffer_record_enable(buffer);
2142 
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144 	buffer = global_trace.max_buffer.buffer;
2145 	if (buffer)
2146 		ring_buffer_record_enable(buffer);
2147 #endif
2148 
2149 	arch_spin_unlock(&global_trace.max_lock);
2150 
2151  out:
2152 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2153 }
2154 
2155 static void tracing_start_tr(struct trace_array *tr)
2156 {
2157 	struct trace_buffer *buffer;
2158 	unsigned long flags;
2159 
2160 	if (tracing_disabled)
2161 		return;
2162 
2163 	/* If global, we need to also start the max tracer */
2164 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165 		return tracing_start();
2166 
2167 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2168 
2169 	if (--tr->stop_count) {
2170 		if (tr->stop_count < 0) {
2171 			/* Someone screwed up their debugging */
2172 			WARN_ON_ONCE(1);
2173 			tr->stop_count = 0;
2174 		}
2175 		goto out;
2176 	}
2177 
2178 	buffer = tr->array_buffer.buffer;
2179 	if (buffer)
2180 		ring_buffer_record_enable(buffer);
2181 
2182  out:
2183 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2184 }
2185 
2186 /**
2187  * tracing_stop - quick stop of the tracer
2188  *
2189  * Light weight way to stop tracing. Use in conjunction with
2190  * tracing_start.
2191  */
2192 void tracing_stop(void)
2193 {
2194 	struct trace_buffer *buffer;
2195 	unsigned long flags;
2196 
2197 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198 	if (global_trace.stop_count++)
2199 		goto out;
2200 
2201 	/* Prevent the buffers from switching */
2202 	arch_spin_lock(&global_trace.max_lock);
2203 
2204 	buffer = global_trace.array_buffer.buffer;
2205 	if (buffer)
2206 		ring_buffer_record_disable(buffer);
2207 
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209 	buffer = global_trace.max_buffer.buffer;
2210 	if (buffer)
2211 		ring_buffer_record_disable(buffer);
2212 #endif
2213 
2214 	arch_spin_unlock(&global_trace.max_lock);
2215 
2216  out:
2217 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2218 }
2219 
2220 static void tracing_stop_tr(struct trace_array *tr)
2221 {
2222 	struct trace_buffer *buffer;
2223 	unsigned long flags;
2224 
2225 	/* If global, we need to also stop the max tracer */
2226 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227 		return tracing_stop();
2228 
2229 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2230 	if (tr->stop_count++)
2231 		goto out;
2232 
2233 	buffer = tr->array_buffer.buffer;
2234 	if (buffer)
2235 		ring_buffer_record_disable(buffer);
2236 
2237  out:
2238 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2239 }
2240 
2241 static int trace_save_cmdline(struct task_struct *tsk)
2242 {
2243 	unsigned pid, idx;
2244 
2245 	/* treat recording of idle task as a success */
2246 	if (!tsk->pid)
2247 		return 1;
2248 
2249 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2250 		return 0;
2251 
2252 	/*
2253 	 * It's not the end of the world if we don't get
2254 	 * the lock, but we also don't want to spin
2255 	 * nor do we want to disable interrupts,
2256 	 * so if we miss here, then better luck next time.
2257 	 */
2258 	if (!arch_spin_trylock(&trace_cmdline_lock))
2259 		return 0;
2260 
2261 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262 	if (idx == NO_CMDLINE_MAP) {
2263 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2264 
2265 		/*
2266 		 * Check whether the cmdline buffer at idx has a pid
2267 		 * mapped. We are going to overwrite that entry so we
2268 		 * need to clear the map_pid_to_cmdline. Otherwise we
2269 		 * would read the new comm for the old pid.
2270 		 */
2271 		pid = savedcmd->map_cmdline_to_pid[idx];
2272 		if (pid != NO_CMDLINE_MAP)
2273 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2274 
2275 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2277 
2278 		savedcmd->cmdline_idx = idx;
2279 	}
2280 
2281 	set_cmdline(idx, tsk->comm);
2282 
2283 	arch_spin_unlock(&trace_cmdline_lock);
2284 
2285 	return 1;
2286 }
2287 
2288 static void __trace_find_cmdline(int pid, char comm[])
2289 {
2290 	unsigned map;
2291 
2292 	if (!pid) {
2293 		strcpy(comm, "<idle>");
2294 		return;
2295 	}
2296 
2297 	if (WARN_ON_ONCE(pid < 0)) {
2298 		strcpy(comm, "<XXX>");
2299 		return;
2300 	}
2301 
2302 	if (pid > PID_MAX_DEFAULT) {
2303 		strcpy(comm, "<...>");
2304 		return;
2305 	}
2306 
2307 	map = savedcmd->map_pid_to_cmdline[pid];
2308 	if (map != NO_CMDLINE_MAP)
2309 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2310 	else
2311 		strcpy(comm, "<...>");
2312 }
2313 
2314 void trace_find_cmdline(int pid, char comm[])
2315 {
2316 	preempt_disable();
2317 	arch_spin_lock(&trace_cmdline_lock);
2318 
2319 	__trace_find_cmdline(pid, comm);
2320 
2321 	arch_spin_unlock(&trace_cmdline_lock);
2322 	preempt_enable();
2323 }
2324 
2325 int trace_find_tgid(int pid)
2326 {
2327 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2328 		return 0;
2329 
2330 	return tgid_map[pid];
2331 }
2332 
2333 static int trace_save_tgid(struct task_struct *tsk)
2334 {
2335 	/* treat recording of idle task as a success */
2336 	if (!tsk->pid)
2337 		return 1;
2338 
2339 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2340 		return 0;
2341 
2342 	tgid_map[tsk->pid] = tsk->tgid;
2343 	return 1;
2344 }
2345 
2346 static bool tracing_record_taskinfo_skip(int flags)
2347 {
2348 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2349 		return true;
2350 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2351 		return true;
2352 	if (!__this_cpu_read(trace_taskinfo_save))
2353 		return true;
2354 	return false;
2355 }
2356 
2357 /**
2358  * tracing_record_taskinfo - record the task info of a task
2359  *
2360  * @task:  task to record
2361  * @flags: TRACE_RECORD_CMDLINE for recording comm
2362  *         TRACE_RECORD_TGID for recording tgid
2363  */
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2365 {
2366 	bool done;
2367 
2368 	if (tracing_record_taskinfo_skip(flags))
2369 		return;
2370 
2371 	/*
2372 	 * Record as much task information as possible. If some fail, continue
2373 	 * to try to record the others.
2374 	 */
2375 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2377 
2378 	/* If recording any information failed, retry again soon. */
2379 	if (!done)
2380 		return;
2381 
2382 	__this_cpu_write(trace_taskinfo_save, false);
2383 }
2384 
2385 /**
2386  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2387  *
2388  * @prev: previous task during sched_switch
2389  * @next: next task during sched_switch
2390  * @flags: TRACE_RECORD_CMDLINE for recording comm
2391  *         TRACE_RECORD_TGID for recording tgid
2392  */
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394 					  struct task_struct *next, int flags)
2395 {
2396 	bool done;
2397 
2398 	if (tracing_record_taskinfo_skip(flags))
2399 		return;
2400 
2401 	/*
2402 	 * Record as much task information as possible. If some fail, continue
2403 	 * to try to record the others.
2404 	 */
2405 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2409 
2410 	/* If recording any information failed, retry again soon. */
2411 	if (!done)
2412 		return;
2413 
2414 	__this_cpu_write(trace_taskinfo_save, false);
2415 }
2416 
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2419 {
2420 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2421 }
2422 
2423 void tracing_record_tgid(struct task_struct *task)
2424 {
2425 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2426 }
2427 
2428 /*
2429  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431  * simplifies those functions and keeps them in sync.
2432  */
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2434 {
2435 	return trace_seq_has_overflowed(s) ?
2436 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2437 }
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2439 
2440 void
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442 			     unsigned long flags, int pc)
2443 {
2444 	struct task_struct *tsk = current;
2445 
2446 	entry->preempt_count		= pc & 0xff;
2447 	entry->pid			= (tsk) ? tsk->pid : 0;
2448 	entry->type			= type;
2449 	entry->flags =
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2452 #else
2453 		TRACE_FLAG_IRQS_NOSUPPORT |
2454 #endif
2455 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2456 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2460 }
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2462 
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2465 			  int type,
2466 			  unsigned long len,
2467 			  unsigned long flags, int pc)
2468 {
2469 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2470 }
2471 
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2475 
2476 /**
2477  * trace_buffered_event_enable - enable buffering events
2478  *
2479  * When events are being filtered, it is quicker to use a temporary
2480  * buffer to write the event data into if there's a likely chance
2481  * that it will not be committed. The discard of the ring buffer
2482  * is not as fast as committing, and is much slower than copying
2483  * a commit.
2484  *
2485  * When an event is to be filtered, allocate per cpu buffers to
2486  * write the event data into, and if the event is filtered and discarded
2487  * it is simply dropped, otherwise, the entire data is to be committed
2488  * in one shot.
2489  */
2490 void trace_buffered_event_enable(void)
2491 {
2492 	struct ring_buffer_event *event;
2493 	struct page *page;
2494 	int cpu;
2495 
2496 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2497 
2498 	if (trace_buffered_event_ref++)
2499 		return;
2500 
2501 	for_each_tracing_cpu(cpu) {
2502 		page = alloc_pages_node(cpu_to_node(cpu),
2503 					GFP_KERNEL | __GFP_NORETRY, 0);
2504 		if (!page)
2505 			goto failed;
2506 
2507 		event = page_address(page);
2508 		memset(event, 0, sizeof(*event));
2509 
2510 		per_cpu(trace_buffered_event, cpu) = event;
2511 
2512 		preempt_disable();
2513 		if (cpu == smp_processor_id() &&
2514 		    this_cpu_read(trace_buffered_event) !=
2515 		    per_cpu(trace_buffered_event, cpu))
2516 			WARN_ON_ONCE(1);
2517 		preempt_enable();
2518 	}
2519 
2520 	return;
2521  failed:
2522 	trace_buffered_event_disable();
2523 }
2524 
2525 static void enable_trace_buffered_event(void *data)
2526 {
2527 	/* Probably not needed, but do it anyway */
2528 	smp_rmb();
2529 	this_cpu_dec(trace_buffered_event_cnt);
2530 }
2531 
2532 static void disable_trace_buffered_event(void *data)
2533 {
2534 	this_cpu_inc(trace_buffered_event_cnt);
2535 }
2536 
2537 /**
2538  * trace_buffered_event_disable - disable buffering events
2539  *
2540  * When a filter is removed, it is faster to not use the buffered
2541  * events, and to commit directly into the ring buffer. Free up
2542  * the temp buffers when there are no more users. This requires
2543  * special synchronization with current events.
2544  */
2545 void trace_buffered_event_disable(void)
2546 {
2547 	int cpu;
2548 
2549 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2550 
2551 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2552 		return;
2553 
2554 	if (--trace_buffered_event_ref)
2555 		return;
2556 
2557 	preempt_disable();
2558 	/* For each CPU, set the buffer as used. */
2559 	smp_call_function_many(tracing_buffer_mask,
2560 			       disable_trace_buffered_event, NULL, 1);
2561 	preempt_enable();
2562 
2563 	/* Wait for all current users to finish */
2564 	synchronize_rcu();
2565 
2566 	for_each_tracing_cpu(cpu) {
2567 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568 		per_cpu(trace_buffered_event, cpu) = NULL;
2569 	}
2570 	/*
2571 	 * Make sure trace_buffered_event is NULL before clearing
2572 	 * trace_buffered_event_cnt.
2573 	 */
2574 	smp_wmb();
2575 
2576 	preempt_disable();
2577 	/* Do the work on each cpu */
2578 	smp_call_function_many(tracing_buffer_mask,
2579 			       enable_trace_buffered_event, NULL, 1);
2580 	preempt_enable();
2581 }
2582 
2583 static struct trace_buffer *temp_buffer;
2584 
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587 			  struct trace_event_file *trace_file,
2588 			  int type, unsigned long len,
2589 			  unsigned long flags, int pc)
2590 {
2591 	struct ring_buffer_event *entry;
2592 	int val;
2593 
2594 	*current_rb = trace_file->tr->array_buffer.buffer;
2595 
2596 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598 	    (entry = this_cpu_read(trace_buffered_event))) {
2599 		/* Try to use the per cpu buffer first */
2600 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2601 		if (val == 1) {
2602 			trace_event_setup(entry, type, flags, pc);
2603 			entry->array[0] = len;
2604 			return entry;
2605 		}
2606 		this_cpu_dec(trace_buffered_event_cnt);
2607 	}
2608 
2609 	entry = __trace_buffer_lock_reserve(*current_rb,
2610 					    type, len, flags, pc);
2611 	/*
2612 	 * If tracing is off, but we have triggers enabled
2613 	 * we still need to look at the event data. Use the temp_buffer
2614 	 * to store the trace event for the tigger to use. It's recusive
2615 	 * safe and will not be recorded anywhere.
2616 	 */
2617 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618 		*current_rb = temp_buffer;
2619 		entry = __trace_buffer_lock_reserve(*current_rb,
2620 						    type, len, flags, pc);
2621 	}
2622 	return entry;
2623 }
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2625 
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2628 
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2630 {
2631 	struct trace_event_call *event_call;
2632 	struct trace_event_file *file;
2633 	struct trace_event *event;
2634 	unsigned long flags;
2635 	struct trace_iterator *iter = tracepoint_print_iter;
2636 
2637 	/* We should never get here if iter is NULL */
2638 	if (WARN_ON_ONCE(!iter))
2639 		return;
2640 
2641 	event_call = fbuffer->trace_file->event_call;
2642 	if (!event_call || !event_call->event.funcs ||
2643 	    !event_call->event.funcs->trace)
2644 		return;
2645 
2646 	file = fbuffer->trace_file;
2647 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649 	     !filter_match_preds(file->filter, fbuffer->entry)))
2650 		return;
2651 
2652 	event = &fbuffer->trace_file->event_call->event;
2653 
2654 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655 	trace_seq_init(&iter->seq);
2656 	iter->ent = fbuffer->entry;
2657 	event_call->event.funcs->trace(iter, 0, event);
2658 	trace_seq_putc(&iter->seq, 0);
2659 	printk("%s", iter->seq.buffer);
2660 
2661 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2662 }
2663 
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665 			     void *buffer, size_t *lenp,
2666 			     loff_t *ppos)
2667 {
2668 	int save_tracepoint_printk;
2669 	int ret;
2670 
2671 	mutex_lock(&tracepoint_printk_mutex);
2672 	save_tracepoint_printk = tracepoint_printk;
2673 
2674 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2675 
2676 	/*
2677 	 * This will force exiting early, as tracepoint_printk
2678 	 * is always zero when tracepoint_printk_iter is not allocated
2679 	 */
2680 	if (!tracepoint_print_iter)
2681 		tracepoint_printk = 0;
2682 
2683 	if (save_tracepoint_printk == tracepoint_printk)
2684 		goto out;
2685 
2686 	if (tracepoint_printk)
2687 		static_key_enable(&tracepoint_printk_key.key);
2688 	else
2689 		static_key_disable(&tracepoint_printk_key.key);
2690 
2691  out:
2692 	mutex_unlock(&tracepoint_printk_mutex);
2693 
2694 	return ret;
2695 }
2696 
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2698 {
2699 	if (static_key_false(&tracepoint_printk_key.key))
2700 		output_printk(fbuffer);
2701 
2702 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703 				    fbuffer->event, fbuffer->entry,
2704 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2705 }
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2707 
2708 /*
2709  * Skip 3:
2710  *
2711  *   trace_buffer_unlock_commit_regs()
2712  *   trace_event_buffer_commit()
2713  *   trace_event_raw_event_xxx()
2714  */
2715 # define STACK_SKIP 3
2716 
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718 				     struct trace_buffer *buffer,
2719 				     struct ring_buffer_event *event,
2720 				     unsigned long flags, int pc,
2721 				     struct pt_regs *regs)
2722 {
2723 	__buffer_unlock_commit(buffer, event);
2724 
2725 	/*
2726 	 * If regs is not set, then skip the necessary functions.
2727 	 * Note, we can still get here via blktrace, wakeup tracer
2728 	 * and mmiotrace, but that's ok if they lose a function or
2729 	 * two. They are not that meaningful.
2730 	 */
2731 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732 	ftrace_trace_userstack(buffer, flags, pc);
2733 }
2734 
2735 /*
2736  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2737  */
2738 void
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740 				   struct ring_buffer_event *event)
2741 {
2742 	__buffer_unlock_commit(buffer, event);
2743 }
2744 
2745 static void
2746 trace_process_export(struct trace_export *export,
2747 	       struct ring_buffer_event *event)
2748 {
2749 	struct trace_entry *entry;
2750 	unsigned int size = 0;
2751 
2752 	entry = ring_buffer_event_data(event);
2753 	size = ring_buffer_event_length(event);
2754 	export->write(export, entry, size);
2755 }
2756 
2757 static DEFINE_MUTEX(ftrace_export_lock);
2758 
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2760 
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2762 
2763 static inline void ftrace_exports_enable(void)
2764 {
2765 	static_branch_enable(&ftrace_exports_enabled);
2766 }
2767 
2768 static inline void ftrace_exports_disable(void)
2769 {
2770 	static_branch_disable(&ftrace_exports_enabled);
2771 }
2772 
2773 static void ftrace_exports(struct ring_buffer_event *event)
2774 {
2775 	struct trace_export *export;
2776 
2777 	preempt_disable_notrace();
2778 
2779 	export = rcu_dereference_raw_check(ftrace_exports_list);
2780 	while (export) {
2781 		trace_process_export(export, event);
2782 		export = rcu_dereference_raw_check(export->next);
2783 	}
2784 
2785 	preempt_enable_notrace();
2786 }
2787 
2788 static inline void
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2790 {
2791 	rcu_assign_pointer(export->next, *list);
2792 	/*
2793 	 * We are entering export into the list but another
2794 	 * CPU might be walking that list. We need to make sure
2795 	 * the export->next pointer is valid before another CPU sees
2796 	 * the export pointer included into the list.
2797 	 */
2798 	rcu_assign_pointer(*list, export);
2799 }
2800 
2801 static inline int
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804 	struct trace_export **p;
2805 
2806 	for (p = list; *p != NULL; p = &(*p)->next)
2807 		if (*p == export)
2808 			break;
2809 
2810 	if (*p != export)
2811 		return -1;
2812 
2813 	rcu_assign_pointer(*p, (*p)->next);
2814 
2815 	return 0;
2816 }
2817 
2818 static inline void
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2820 {
2821 	if (*list == NULL)
2822 		ftrace_exports_enable();
2823 
2824 	add_trace_export(list, export);
2825 }
2826 
2827 static inline int
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2829 {
2830 	int ret;
2831 
2832 	ret = rm_trace_export(list, export);
2833 	if (*list == NULL)
2834 		ftrace_exports_disable();
2835 
2836 	return ret;
2837 }
2838 
2839 int register_ftrace_export(struct trace_export *export)
2840 {
2841 	if (WARN_ON_ONCE(!export->write))
2842 		return -1;
2843 
2844 	mutex_lock(&ftrace_export_lock);
2845 
2846 	add_ftrace_export(&ftrace_exports_list, export);
2847 
2848 	mutex_unlock(&ftrace_export_lock);
2849 
2850 	return 0;
2851 }
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2853 
2854 int unregister_ftrace_export(struct trace_export *export)
2855 {
2856 	int ret;
2857 
2858 	mutex_lock(&ftrace_export_lock);
2859 
2860 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2861 
2862 	mutex_unlock(&ftrace_export_lock);
2863 
2864 	return ret;
2865 }
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2867 
2868 void
2869 trace_function(struct trace_array *tr,
2870 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2871 	       int pc)
2872 {
2873 	struct trace_event_call *call = &event_function;
2874 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2875 	struct ring_buffer_event *event;
2876 	struct ftrace_entry *entry;
2877 
2878 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2879 					    flags, pc);
2880 	if (!event)
2881 		return;
2882 	entry	= ring_buffer_event_data(event);
2883 	entry->ip			= ip;
2884 	entry->parent_ip		= parent_ip;
2885 
2886 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2887 		if (static_branch_unlikely(&ftrace_exports_enabled))
2888 			ftrace_exports(event);
2889 		__buffer_unlock_commit(buffer, event);
2890 	}
2891 }
2892 
2893 #ifdef CONFIG_STACKTRACE
2894 
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING	4
2897 
2898 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2899 
2900 struct ftrace_stack {
2901 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2902 };
2903 
2904 
2905 struct ftrace_stacks {
2906 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2907 };
2908 
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2911 
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913 				 unsigned long flags,
2914 				 int skip, int pc, struct pt_regs *regs)
2915 {
2916 	struct trace_event_call *call = &event_kernel_stack;
2917 	struct ring_buffer_event *event;
2918 	unsigned int size, nr_entries;
2919 	struct ftrace_stack *fstack;
2920 	struct stack_entry *entry;
2921 	int stackidx;
2922 
2923 	/*
2924 	 * Add one, for this function and the call to save_stack_trace()
2925 	 * If regs is set, then these functions will not be in the way.
2926 	 */
2927 #ifndef CONFIG_UNWINDER_ORC
2928 	if (!regs)
2929 		skip++;
2930 #endif
2931 
2932 	/*
2933 	 * Since events can happen in NMIs there's no safe way to
2934 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2935 	 * or NMI comes in, it will just have to use the default
2936 	 * FTRACE_STACK_SIZE.
2937 	 */
2938 	preempt_disable_notrace();
2939 
2940 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2941 
2942 	/* This should never happen. If it does, yell once and skip */
2943 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2944 		goto out;
2945 
2946 	/*
2947 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2948 	 * interrupt will either see the value pre increment or post
2949 	 * increment. If the interrupt happens pre increment it will have
2950 	 * restored the counter when it returns.  We just need a barrier to
2951 	 * keep gcc from moving things around.
2952 	 */
2953 	barrier();
2954 
2955 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2956 	size = ARRAY_SIZE(fstack->calls);
2957 
2958 	if (regs) {
2959 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2960 						   size, skip);
2961 	} else {
2962 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2963 	}
2964 
2965 	size = nr_entries * sizeof(unsigned long);
2966 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2967 					    sizeof(*entry) + size, flags, pc);
2968 	if (!event)
2969 		goto out;
2970 	entry = ring_buffer_event_data(event);
2971 
2972 	memcpy(&entry->caller, fstack->calls, size);
2973 	entry->size = nr_entries;
2974 
2975 	if (!call_filter_check_discard(call, entry, buffer, event))
2976 		__buffer_unlock_commit(buffer, event);
2977 
2978  out:
2979 	/* Again, don't let gcc optimize things here */
2980 	barrier();
2981 	__this_cpu_dec(ftrace_stack_reserve);
2982 	preempt_enable_notrace();
2983 
2984 }
2985 
2986 static inline void ftrace_trace_stack(struct trace_array *tr,
2987 				      struct trace_buffer *buffer,
2988 				      unsigned long flags,
2989 				      int skip, int pc, struct pt_regs *regs)
2990 {
2991 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2992 		return;
2993 
2994 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2995 }
2996 
2997 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2998 		   int pc)
2999 {
3000 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3001 
3002 	if (rcu_is_watching()) {
3003 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3004 		return;
3005 	}
3006 
3007 	/*
3008 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3009 	 * but if the above rcu_is_watching() failed, then the NMI
3010 	 * triggered someplace critical, and rcu_irq_enter() should
3011 	 * not be called from NMI.
3012 	 */
3013 	if (unlikely(in_nmi()))
3014 		return;
3015 
3016 	rcu_irq_enter_irqson();
3017 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3018 	rcu_irq_exit_irqson();
3019 }
3020 
3021 /**
3022  * trace_dump_stack - record a stack back trace in the trace buffer
3023  * @skip: Number of functions to skip (helper handlers)
3024  */
3025 void trace_dump_stack(int skip)
3026 {
3027 	unsigned long flags;
3028 
3029 	if (tracing_disabled || tracing_selftest_running)
3030 		return;
3031 
3032 	local_save_flags(flags);
3033 
3034 #ifndef CONFIG_UNWINDER_ORC
3035 	/* Skip 1 to skip this function. */
3036 	skip++;
3037 #endif
3038 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3039 			     flags, skip, preempt_count(), NULL);
3040 }
3041 EXPORT_SYMBOL_GPL(trace_dump_stack);
3042 
3043 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3044 static DEFINE_PER_CPU(int, user_stack_count);
3045 
3046 static void
3047 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3048 {
3049 	struct trace_event_call *call = &event_user_stack;
3050 	struct ring_buffer_event *event;
3051 	struct userstack_entry *entry;
3052 
3053 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3054 		return;
3055 
3056 	/*
3057 	 * NMIs can not handle page faults, even with fix ups.
3058 	 * The save user stack can (and often does) fault.
3059 	 */
3060 	if (unlikely(in_nmi()))
3061 		return;
3062 
3063 	/*
3064 	 * prevent recursion, since the user stack tracing may
3065 	 * trigger other kernel events.
3066 	 */
3067 	preempt_disable();
3068 	if (__this_cpu_read(user_stack_count))
3069 		goto out;
3070 
3071 	__this_cpu_inc(user_stack_count);
3072 
3073 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3074 					    sizeof(*entry), flags, pc);
3075 	if (!event)
3076 		goto out_drop_count;
3077 	entry	= ring_buffer_event_data(event);
3078 
3079 	entry->tgid		= current->tgid;
3080 	memset(&entry->caller, 0, sizeof(entry->caller));
3081 
3082 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3083 	if (!call_filter_check_discard(call, entry, buffer, event))
3084 		__buffer_unlock_commit(buffer, event);
3085 
3086  out_drop_count:
3087 	__this_cpu_dec(user_stack_count);
3088  out:
3089 	preempt_enable();
3090 }
3091 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3092 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3093 				   unsigned long flags, int pc)
3094 {
3095 }
3096 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3097 
3098 #endif /* CONFIG_STACKTRACE */
3099 
3100 /* created for use with alloc_percpu */
3101 struct trace_buffer_struct {
3102 	int nesting;
3103 	char buffer[4][TRACE_BUF_SIZE];
3104 };
3105 
3106 static struct trace_buffer_struct *trace_percpu_buffer;
3107 
3108 /*
3109  * Thise allows for lockless recording.  If we're nested too deeply, then
3110  * this returns NULL.
3111  */
3112 static char *get_trace_buf(void)
3113 {
3114 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3115 
3116 	if (!buffer || buffer->nesting >= 4)
3117 		return NULL;
3118 
3119 	buffer->nesting++;
3120 
3121 	/* Interrupts must see nesting incremented before we use the buffer */
3122 	barrier();
3123 	return &buffer->buffer[buffer->nesting][0];
3124 }
3125 
3126 static void put_trace_buf(void)
3127 {
3128 	/* Don't let the decrement of nesting leak before this */
3129 	barrier();
3130 	this_cpu_dec(trace_percpu_buffer->nesting);
3131 }
3132 
3133 static int alloc_percpu_trace_buffer(void)
3134 {
3135 	struct trace_buffer_struct *buffers;
3136 
3137 	buffers = alloc_percpu(struct trace_buffer_struct);
3138 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3139 		return -ENOMEM;
3140 
3141 	trace_percpu_buffer = buffers;
3142 	return 0;
3143 }
3144 
3145 static int buffers_allocated;
3146 
3147 void trace_printk_init_buffers(void)
3148 {
3149 	if (buffers_allocated)
3150 		return;
3151 
3152 	if (alloc_percpu_trace_buffer())
3153 		return;
3154 
3155 	/* trace_printk() is for debug use only. Don't use it in production. */
3156 
3157 	pr_warn("\n");
3158 	pr_warn("**********************************************************\n");
3159 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3160 	pr_warn("**                                                      **\n");
3161 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3162 	pr_warn("**                                                      **\n");
3163 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3164 	pr_warn("** unsafe for production use.                           **\n");
3165 	pr_warn("**                                                      **\n");
3166 	pr_warn("** If you see this message and you are not debugging    **\n");
3167 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3168 	pr_warn("**                                                      **\n");
3169 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3170 	pr_warn("**********************************************************\n");
3171 
3172 	/* Expand the buffers to set size */
3173 	tracing_update_buffers();
3174 
3175 	buffers_allocated = 1;
3176 
3177 	/*
3178 	 * trace_printk_init_buffers() can be called by modules.
3179 	 * If that happens, then we need to start cmdline recording
3180 	 * directly here. If the global_trace.buffer is already
3181 	 * allocated here, then this was called by module code.
3182 	 */
3183 	if (global_trace.array_buffer.buffer)
3184 		tracing_start_cmdline_record();
3185 }
3186 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3187 
3188 void trace_printk_start_comm(void)
3189 {
3190 	/* Start tracing comms if trace printk is set */
3191 	if (!buffers_allocated)
3192 		return;
3193 	tracing_start_cmdline_record();
3194 }
3195 
3196 static void trace_printk_start_stop_comm(int enabled)
3197 {
3198 	if (!buffers_allocated)
3199 		return;
3200 
3201 	if (enabled)
3202 		tracing_start_cmdline_record();
3203 	else
3204 		tracing_stop_cmdline_record();
3205 }
3206 
3207 /**
3208  * trace_vbprintk - write binary msg to tracing buffer
3209  * @ip:    The address of the caller
3210  * @fmt:   The string format to write to the buffer
3211  * @args:  Arguments for @fmt
3212  */
3213 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3214 {
3215 	struct trace_event_call *call = &event_bprint;
3216 	struct ring_buffer_event *event;
3217 	struct trace_buffer *buffer;
3218 	struct trace_array *tr = &global_trace;
3219 	struct bprint_entry *entry;
3220 	unsigned long flags;
3221 	char *tbuffer;
3222 	int len = 0, size, pc;
3223 
3224 	if (unlikely(tracing_selftest_running || tracing_disabled))
3225 		return 0;
3226 
3227 	/* Don't pollute graph traces with trace_vprintk internals */
3228 	pause_graph_tracing();
3229 
3230 	pc = preempt_count();
3231 	preempt_disable_notrace();
3232 
3233 	tbuffer = get_trace_buf();
3234 	if (!tbuffer) {
3235 		len = 0;
3236 		goto out_nobuffer;
3237 	}
3238 
3239 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3240 
3241 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3242 		goto out_put;
3243 
3244 	local_save_flags(flags);
3245 	size = sizeof(*entry) + sizeof(u32) * len;
3246 	buffer = tr->array_buffer.buffer;
3247 	ring_buffer_nest_start(buffer);
3248 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3249 					    flags, pc);
3250 	if (!event)
3251 		goto out;
3252 	entry = ring_buffer_event_data(event);
3253 	entry->ip			= ip;
3254 	entry->fmt			= fmt;
3255 
3256 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3257 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3258 		__buffer_unlock_commit(buffer, event);
3259 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3260 	}
3261 
3262 out:
3263 	ring_buffer_nest_end(buffer);
3264 out_put:
3265 	put_trace_buf();
3266 
3267 out_nobuffer:
3268 	preempt_enable_notrace();
3269 	unpause_graph_tracing();
3270 
3271 	return len;
3272 }
3273 EXPORT_SYMBOL_GPL(trace_vbprintk);
3274 
3275 __printf(3, 0)
3276 static int
3277 __trace_array_vprintk(struct trace_buffer *buffer,
3278 		      unsigned long ip, const char *fmt, va_list args)
3279 {
3280 	struct trace_event_call *call = &event_print;
3281 	struct ring_buffer_event *event;
3282 	int len = 0, size, pc;
3283 	struct print_entry *entry;
3284 	unsigned long flags;
3285 	char *tbuffer;
3286 
3287 	if (tracing_disabled || tracing_selftest_running)
3288 		return 0;
3289 
3290 	/* Don't pollute graph traces with trace_vprintk internals */
3291 	pause_graph_tracing();
3292 
3293 	pc = preempt_count();
3294 	preempt_disable_notrace();
3295 
3296 
3297 	tbuffer = get_trace_buf();
3298 	if (!tbuffer) {
3299 		len = 0;
3300 		goto out_nobuffer;
3301 	}
3302 
3303 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3304 
3305 	local_save_flags(flags);
3306 	size = sizeof(*entry) + len + 1;
3307 	ring_buffer_nest_start(buffer);
3308 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3309 					    flags, pc);
3310 	if (!event)
3311 		goto out;
3312 	entry = ring_buffer_event_data(event);
3313 	entry->ip = ip;
3314 
3315 	memcpy(&entry->buf, tbuffer, len + 1);
3316 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3317 		__buffer_unlock_commit(buffer, event);
3318 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3319 	}
3320 
3321 out:
3322 	ring_buffer_nest_end(buffer);
3323 	put_trace_buf();
3324 
3325 out_nobuffer:
3326 	preempt_enable_notrace();
3327 	unpause_graph_tracing();
3328 
3329 	return len;
3330 }
3331 
3332 __printf(3, 0)
3333 int trace_array_vprintk(struct trace_array *tr,
3334 			unsigned long ip, const char *fmt, va_list args)
3335 {
3336 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3337 }
3338 
3339 __printf(3, 0)
3340 int trace_array_printk(struct trace_array *tr,
3341 		       unsigned long ip, const char *fmt, ...)
3342 {
3343 	int ret;
3344 	va_list ap;
3345 
3346 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3347 		return 0;
3348 
3349 	if (!tr)
3350 		return -ENOENT;
3351 
3352 	va_start(ap, fmt);
3353 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3354 	va_end(ap);
3355 	return ret;
3356 }
3357 EXPORT_SYMBOL_GPL(trace_array_printk);
3358 
3359 __printf(3, 4)
3360 int trace_array_printk_buf(struct trace_buffer *buffer,
3361 			   unsigned long ip, const char *fmt, ...)
3362 {
3363 	int ret;
3364 	va_list ap;
3365 
3366 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3367 		return 0;
3368 
3369 	va_start(ap, fmt);
3370 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3371 	va_end(ap);
3372 	return ret;
3373 }
3374 
3375 __printf(2, 0)
3376 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3377 {
3378 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3379 }
3380 EXPORT_SYMBOL_GPL(trace_vprintk);
3381 
3382 static void trace_iterator_increment(struct trace_iterator *iter)
3383 {
3384 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3385 
3386 	iter->idx++;
3387 	if (buf_iter)
3388 		ring_buffer_iter_advance(buf_iter);
3389 }
3390 
3391 static struct trace_entry *
3392 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3393 		unsigned long *lost_events)
3394 {
3395 	struct ring_buffer_event *event;
3396 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3397 
3398 	if (buf_iter) {
3399 		event = ring_buffer_iter_peek(buf_iter, ts);
3400 		if (lost_events)
3401 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3402 				(unsigned long)-1 : 0;
3403 	} else {
3404 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3405 					 lost_events);
3406 	}
3407 
3408 	if (event) {
3409 		iter->ent_size = ring_buffer_event_length(event);
3410 		return ring_buffer_event_data(event);
3411 	}
3412 	iter->ent_size = 0;
3413 	return NULL;
3414 }
3415 
3416 static struct trace_entry *
3417 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3418 		  unsigned long *missing_events, u64 *ent_ts)
3419 {
3420 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3421 	struct trace_entry *ent, *next = NULL;
3422 	unsigned long lost_events = 0, next_lost = 0;
3423 	int cpu_file = iter->cpu_file;
3424 	u64 next_ts = 0, ts;
3425 	int next_cpu = -1;
3426 	int next_size = 0;
3427 	int cpu;
3428 
3429 	/*
3430 	 * If we are in a per_cpu trace file, don't bother by iterating over
3431 	 * all cpu and peek directly.
3432 	 */
3433 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3434 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3435 			return NULL;
3436 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3437 		if (ent_cpu)
3438 			*ent_cpu = cpu_file;
3439 
3440 		return ent;
3441 	}
3442 
3443 	for_each_tracing_cpu(cpu) {
3444 
3445 		if (ring_buffer_empty_cpu(buffer, cpu))
3446 			continue;
3447 
3448 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3449 
3450 		/*
3451 		 * Pick the entry with the smallest timestamp:
3452 		 */
3453 		if (ent && (!next || ts < next_ts)) {
3454 			next = ent;
3455 			next_cpu = cpu;
3456 			next_ts = ts;
3457 			next_lost = lost_events;
3458 			next_size = iter->ent_size;
3459 		}
3460 	}
3461 
3462 	iter->ent_size = next_size;
3463 
3464 	if (ent_cpu)
3465 		*ent_cpu = next_cpu;
3466 
3467 	if (ent_ts)
3468 		*ent_ts = next_ts;
3469 
3470 	if (missing_events)
3471 		*missing_events = next_lost;
3472 
3473 	return next;
3474 }
3475 
3476 #define STATIC_TEMP_BUF_SIZE	128
3477 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3478 
3479 /* Find the next real entry, without updating the iterator itself */
3480 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3481 					  int *ent_cpu, u64 *ent_ts)
3482 {
3483 	/* __find_next_entry will reset ent_size */
3484 	int ent_size = iter->ent_size;
3485 	struct trace_entry *entry;
3486 
3487 	/*
3488 	 * If called from ftrace_dump(), then the iter->temp buffer
3489 	 * will be the static_temp_buf and not created from kmalloc.
3490 	 * If the entry size is greater than the buffer, we can
3491 	 * not save it. Just return NULL in that case. This is only
3492 	 * used to add markers when two consecutive events' time
3493 	 * stamps have a large delta. See trace_print_lat_context()
3494 	 */
3495 	if (iter->temp == static_temp_buf &&
3496 	    STATIC_TEMP_BUF_SIZE < ent_size)
3497 		return NULL;
3498 
3499 	/*
3500 	 * The __find_next_entry() may call peek_next_entry(), which may
3501 	 * call ring_buffer_peek() that may make the contents of iter->ent
3502 	 * undefined. Need to copy iter->ent now.
3503 	 */
3504 	if (iter->ent && iter->ent != iter->temp) {
3505 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3506 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3507 			kfree(iter->temp);
3508 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3509 			if (!iter->temp)
3510 				return NULL;
3511 		}
3512 		memcpy(iter->temp, iter->ent, iter->ent_size);
3513 		iter->temp_size = iter->ent_size;
3514 		iter->ent = iter->temp;
3515 	}
3516 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3517 	/* Put back the original ent_size */
3518 	iter->ent_size = ent_size;
3519 
3520 	return entry;
3521 }
3522 
3523 /* Find the next real entry, and increment the iterator to the next entry */
3524 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3525 {
3526 	iter->ent = __find_next_entry(iter, &iter->cpu,
3527 				      &iter->lost_events, &iter->ts);
3528 
3529 	if (iter->ent)
3530 		trace_iterator_increment(iter);
3531 
3532 	return iter->ent ? iter : NULL;
3533 }
3534 
3535 static void trace_consume(struct trace_iterator *iter)
3536 {
3537 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3538 			    &iter->lost_events);
3539 }
3540 
3541 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3542 {
3543 	struct trace_iterator *iter = m->private;
3544 	int i = (int)*pos;
3545 	void *ent;
3546 
3547 	WARN_ON_ONCE(iter->leftover);
3548 
3549 	(*pos)++;
3550 
3551 	/* can't go backwards */
3552 	if (iter->idx > i)
3553 		return NULL;
3554 
3555 	if (iter->idx < 0)
3556 		ent = trace_find_next_entry_inc(iter);
3557 	else
3558 		ent = iter;
3559 
3560 	while (ent && iter->idx < i)
3561 		ent = trace_find_next_entry_inc(iter);
3562 
3563 	iter->pos = *pos;
3564 
3565 	return ent;
3566 }
3567 
3568 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3569 {
3570 	struct ring_buffer_event *event;
3571 	struct ring_buffer_iter *buf_iter;
3572 	unsigned long entries = 0;
3573 	u64 ts;
3574 
3575 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3576 
3577 	buf_iter = trace_buffer_iter(iter, cpu);
3578 	if (!buf_iter)
3579 		return;
3580 
3581 	ring_buffer_iter_reset(buf_iter);
3582 
3583 	/*
3584 	 * We could have the case with the max latency tracers
3585 	 * that a reset never took place on a cpu. This is evident
3586 	 * by the timestamp being before the start of the buffer.
3587 	 */
3588 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3589 		if (ts >= iter->array_buffer->time_start)
3590 			break;
3591 		entries++;
3592 		ring_buffer_iter_advance(buf_iter);
3593 	}
3594 
3595 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3596 }
3597 
3598 /*
3599  * The current tracer is copied to avoid a global locking
3600  * all around.
3601  */
3602 static void *s_start(struct seq_file *m, loff_t *pos)
3603 {
3604 	struct trace_iterator *iter = m->private;
3605 	struct trace_array *tr = iter->tr;
3606 	int cpu_file = iter->cpu_file;
3607 	void *p = NULL;
3608 	loff_t l = 0;
3609 	int cpu;
3610 
3611 	/*
3612 	 * copy the tracer to avoid using a global lock all around.
3613 	 * iter->trace is a copy of current_trace, the pointer to the
3614 	 * name may be used instead of a strcmp(), as iter->trace->name
3615 	 * will point to the same string as current_trace->name.
3616 	 */
3617 	mutex_lock(&trace_types_lock);
3618 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3619 		*iter->trace = *tr->current_trace;
3620 	mutex_unlock(&trace_types_lock);
3621 
3622 #ifdef CONFIG_TRACER_MAX_TRACE
3623 	if (iter->snapshot && iter->trace->use_max_tr)
3624 		return ERR_PTR(-EBUSY);
3625 #endif
3626 
3627 	if (!iter->snapshot)
3628 		atomic_inc(&trace_record_taskinfo_disabled);
3629 
3630 	if (*pos != iter->pos) {
3631 		iter->ent = NULL;
3632 		iter->cpu = 0;
3633 		iter->idx = -1;
3634 
3635 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3636 			for_each_tracing_cpu(cpu)
3637 				tracing_iter_reset(iter, cpu);
3638 		} else
3639 			tracing_iter_reset(iter, cpu_file);
3640 
3641 		iter->leftover = 0;
3642 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3643 			;
3644 
3645 	} else {
3646 		/*
3647 		 * If we overflowed the seq_file before, then we want
3648 		 * to just reuse the trace_seq buffer again.
3649 		 */
3650 		if (iter->leftover)
3651 			p = iter;
3652 		else {
3653 			l = *pos - 1;
3654 			p = s_next(m, p, &l);
3655 		}
3656 	}
3657 
3658 	trace_event_read_lock();
3659 	trace_access_lock(cpu_file);
3660 	return p;
3661 }
3662 
3663 static void s_stop(struct seq_file *m, void *p)
3664 {
3665 	struct trace_iterator *iter = m->private;
3666 
3667 #ifdef CONFIG_TRACER_MAX_TRACE
3668 	if (iter->snapshot && iter->trace->use_max_tr)
3669 		return;
3670 #endif
3671 
3672 	if (!iter->snapshot)
3673 		atomic_dec(&trace_record_taskinfo_disabled);
3674 
3675 	trace_access_unlock(iter->cpu_file);
3676 	trace_event_read_unlock();
3677 }
3678 
3679 static void
3680 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3681 		      unsigned long *entries, int cpu)
3682 {
3683 	unsigned long count;
3684 
3685 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3686 	/*
3687 	 * If this buffer has skipped entries, then we hold all
3688 	 * entries for the trace and we need to ignore the
3689 	 * ones before the time stamp.
3690 	 */
3691 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3692 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3693 		/* total is the same as the entries */
3694 		*total = count;
3695 	} else
3696 		*total = count +
3697 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3698 	*entries = count;
3699 }
3700 
3701 static void
3702 get_total_entries(struct array_buffer *buf,
3703 		  unsigned long *total, unsigned long *entries)
3704 {
3705 	unsigned long t, e;
3706 	int cpu;
3707 
3708 	*total = 0;
3709 	*entries = 0;
3710 
3711 	for_each_tracing_cpu(cpu) {
3712 		get_total_entries_cpu(buf, &t, &e, cpu);
3713 		*total += t;
3714 		*entries += e;
3715 	}
3716 }
3717 
3718 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3719 {
3720 	unsigned long total, entries;
3721 
3722 	if (!tr)
3723 		tr = &global_trace;
3724 
3725 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3726 
3727 	return entries;
3728 }
3729 
3730 unsigned long trace_total_entries(struct trace_array *tr)
3731 {
3732 	unsigned long total, entries;
3733 
3734 	if (!tr)
3735 		tr = &global_trace;
3736 
3737 	get_total_entries(&tr->array_buffer, &total, &entries);
3738 
3739 	return entries;
3740 }
3741 
3742 static void print_lat_help_header(struct seq_file *m)
3743 {
3744 	seq_puts(m, "#                  _------=> CPU#            \n"
3745 		    "#                 / _-----=> irqs-off        \n"
3746 		    "#                | / _----=> need-resched    \n"
3747 		    "#                || / _---=> hardirq/softirq \n"
3748 		    "#                ||| / _--=> preempt-depth   \n"
3749 		    "#                |||| /     delay            \n"
3750 		    "#  cmd     pid   ||||| time  |   caller      \n"
3751 		    "#     \\   /      |||||  \\    |   /         \n");
3752 }
3753 
3754 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3755 {
3756 	unsigned long total;
3757 	unsigned long entries;
3758 
3759 	get_total_entries(buf, &total, &entries);
3760 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3761 		   entries, total, num_online_cpus());
3762 	seq_puts(m, "#\n");
3763 }
3764 
3765 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3766 				   unsigned int flags)
3767 {
3768 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3769 
3770 	print_event_info(buf, m);
3771 
3772 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3773 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3774 }
3775 
3776 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3777 				       unsigned int flags)
3778 {
3779 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3780 	const char *space = "          ";
3781 	int prec = tgid ? 10 : 2;
3782 
3783 	print_event_info(buf, m);
3784 
3785 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3786 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3787 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3788 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3789 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3790 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3791 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3792 }
3793 
3794 void
3795 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3796 {
3797 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3798 	struct array_buffer *buf = iter->array_buffer;
3799 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3800 	struct tracer *type = iter->trace;
3801 	unsigned long entries;
3802 	unsigned long total;
3803 	const char *name = "preemption";
3804 
3805 	name = type->name;
3806 
3807 	get_total_entries(buf, &total, &entries);
3808 
3809 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3810 		   name, UTS_RELEASE);
3811 	seq_puts(m, "# -----------------------------------"
3812 		 "---------------------------------\n");
3813 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3814 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3815 		   nsecs_to_usecs(data->saved_latency),
3816 		   entries,
3817 		   total,
3818 		   buf->cpu,
3819 #if defined(CONFIG_PREEMPT_NONE)
3820 		   "server",
3821 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3822 		   "desktop",
3823 #elif defined(CONFIG_PREEMPT)
3824 		   "preempt",
3825 #elif defined(CONFIG_PREEMPT_RT)
3826 		   "preempt_rt",
3827 #else
3828 		   "unknown",
3829 #endif
3830 		   /* These are reserved for later use */
3831 		   0, 0, 0, 0);
3832 #ifdef CONFIG_SMP
3833 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3834 #else
3835 	seq_puts(m, ")\n");
3836 #endif
3837 	seq_puts(m, "#    -----------------\n");
3838 	seq_printf(m, "#    | task: %.16s-%d "
3839 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3840 		   data->comm, data->pid,
3841 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3842 		   data->policy, data->rt_priority);
3843 	seq_puts(m, "#    -----------------\n");
3844 
3845 	if (data->critical_start) {
3846 		seq_puts(m, "#  => started at: ");
3847 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3848 		trace_print_seq(m, &iter->seq);
3849 		seq_puts(m, "\n#  => ended at:   ");
3850 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3851 		trace_print_seq(m, &iter->seq);
3852 		seq_puts(m, "\n#\n");
3853 	}
3854 
3855 	seq_puts(m, "#\n");
3856 }
3857 
3858 static void test_cpu_buff_start(struct trace_iterator *iter)
3859 {
3860 	struct trace_seq *s = &iter->seq;
3861 	struct trace_array *tr = iter->tr;
3862 
3863 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3864 		return;
3865 
3866 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3867 		return;
3868 
3869 	if (cpumask_available(iter->started) &&
3870 	    cpumask_test_cpu(iter->cpu, iter->started))
3871 		return;
3872 
3873 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3874 		return;
3875 
3876 	if (cpumask_available(iter->started))
3877 		cpumask_set_cpu(iter->cpu, iter->started);
3878 
3879 	/* Don't print started cpu buffer for the first entry of the trace */
3880 	if (iter->idx > 1)
3881 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3882 				iter->cpu);
3883 }
3884 
3885 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3886 {
3887 	struct trace_array *tr = iter->tr;
3888 	struct trace_seq *s = &iter->seq;
3889 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3890 	struct trace_entry *entry;
3891 	struct trace_event *event;
3892 
3893 	entry = iter->ent;
3894 
3895 	test_cpu_buff_start(iter);
3896 
3897 	event = ftrace_find_event(entry->type);
3898 
3899 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3900 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3901 			trace_print_lat_context(iter);
3902 		else
3903 			trace_print_context(iter);
3904 	}
3905 
3906 	if (trace_seq_has_overflowed(s))
3907 		return TRACE_TYPE_PARTIAL_LINE;
3908 
3909 	if (event)
3910 		return event->funcs->trace(iter, sym_flags, event);
3911 
3912 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3913 
3914 	return trace_handle_return(s);
3915 }
3916 
3917 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3918 {
3919 	struct trace_array *tr = iter->tr;
3920 	struct trace_seq *s = &iter->seq;
3921 	struct trace_entry *entry;
3922 	struct trace_event *event;
3923 
3924 	entry = iter->ent;
3925 
3926 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3927 		trace_seq_printf(s, "%d %d %llu ",
3928 				 entry->pid, iter->cpu, iter->ts);
3929 
3930 	if (trace_seq_has_overflowed(s))
3931 		return TRACE_TYPE_PARTIAL_LINE;
3932 
3933 	event = ftrace_find_event(entry->type);
3934 	if (event)
3935 		return event->funcs->raw(iter, 0, event);
3936 
3937 	trace_seq_printf(s, "%d ?\n", entry->type);
3938 
3939 	return trace_handle_return(s);
3940 }
3941 
3942 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3943 {
3944 	struct trace_array *tr = iter->tr;
3945 	struct trace_seq *s = &iter->seq;
3946 	unsigned char newline = '\n';
3947 	struct trace_entry *entry;
3948 	struct trace_event *event;
3949 
3950 	entry = iter->ent;
3951 
3952 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3953 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3954 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3955 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3956 		if (trace_seq_has_overflowed(s))
3957 			return TRACE_TYPE_PARTIAL_LINE;
3958 	}
3959 
3960 	event = ftrace_find_event(entry->type);
3961 	if (event) {
3962 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3963 		if (ret != TRACE_TYPE_HANDLED)
3964 			return ret;
3965 	}
3966 
3967 	SEQ_PUT_FIELD(s, newline);
3968 
3969 	return trace_handle_return(s);
3970 }
3971 
3972 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3973 {
3974 	struct trace_array *tr = iter->tr;
3975 	struct trace_seq *s = &iter->seq;
3976 	struct trace_entry *entry;
3977 	struct trace_event *event;
3978 
3979 	entry = iter->ent;
3980 
3981 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3982 		SEQ_PUT_FIELD(s, entry->pid);
3983 		SEQ_PUT_FIELD(s, iter->cpu);
3984 		SEQ_PUT_FIELD(s, iter->ts);
3985 		if (trace_seq_has_overflowed(s))
3986 			return TRACE_TYPE_PARTIAL_LINE;
3987 	}
3988 
3989 	event = ftrace_find_event(entry->type);
3990 	return event ? event->funcs->binary(iter, 0, event) :
3991 		TRACE_TYPE_HANDLED;
3992 }
3993 
3994 int trace_empty(struct trace_iterator *iter)
3995 {
3996 	struct ring_buffer_iter *buf_iter;
3997 	int cpu;
3998 
3999 	/* If we are looking at one CPU buffer, only check that one */
4000 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4001 		cpu = iter->cpu_file;
4002 		buf_iter = trace_buffer_iter(iter, cpu);
4003 		if (buf_iter) {
4004 			if (!ring_buffer_iter_empty(buf_iter))
4005 				return 0;
4006 		} else {
4007 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4008 				return 0;
4009 		}
4010 		return 1;
4011 	}
4012 
4013 	for_each_tracing_cpu(cpu) {
4014 		buf_iter = trace_buffer_iter(iter, cpu);
4015 		if (buf_iter) {
4016 			if (!ring_buffer_iter_empty(buf_iter))
4017 				return 0;
4018 		} else {
4019 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4020 				return 0;
4021 		}
4022 	}
4023 
4024 	return 1;
4025 }
4026 
4027 /*  Called with trace_event_read_lock() held. */
4028 enum print_line_t print_trace_line(struct trace_iterator *iter)
4029 {
4030 	struct trace_array *tr = iter->tr;
4031 	unsigned long trace_flags = tr->trace_flags;
4032 	enum print_line_t ret;
4033 
4034 	if (iter->lost_events) {
4035 		if (iter->lost_events == (unsigned long)-1)
4036 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4037 					 iter->cpu);
4038 		else
4039 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4040 					 iter->cpu, iter->lost_events);
4041 		if (trace_seq_has_overflowed(&iter->seq))
4042 			return TRACE_TYPE_PARTIAL_LINE;
4043 	}
4044 
4045 	if (iter->trace && iter->trace->print_line) {
4046 		ret = iter->trace->print_line(iter);
4047 		if (ret != TRACE_TYPE_UNHANDLED)
4048 			return ret;
4049 	}
4050 
4051 	if (iter->ent->type == TRACE_BPUTS &&
4052 			trace_flags & TRACE_ITER_PRINTK &&
4053 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4054 		return trace_print_bputs_msg_only(iter);
4055 
4056 	if (iter->ent->type == TRACE_BPRINT &&
4057 			trace_flags & TRACE_ITER_PRINTK &&
4058 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4059 		return trace_print_bprintk_msg_only(iter);
4060 
4061 	if (iter->ent->type == TRACE_PRINT &&
4062 			trace_flags & TRACE_ITER_PRINTK &&
4063 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4064 		return trace_print_printk_msg_only(iter);
4065 
4066 	if (trace_flags & TRACE_ITER_BIN)
4067 		return print_bin_fmt(iter);
4068 
4069 	if (trace_flags & TRACE_ITER_HEX)
4070 		return print_hex_fmt(iter);
4071 
4072 	if (trace_flags & TRACE_ITER_RAW)
4073 		return print_raw_fmt(iter);
4074 
4075 	return print_trace_fmt(iter);
4076 }
4077 
4078 void trace_latency_header(struct seq_file *m)
4079 {
4080 	struct trace_iterator *iter = m->private;
4081 	struct trace_array *tr = iter->tr;
4082 
4083 	/* print nothing if the buffers are empty */
4084 	if (trace_empty(iter))
4085 		return;
4086 
4087 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4088 		print_trace_header(m, iter);
4089 
4090 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4091 		print_lat_help_header(m);
4092 }
4093 
4094 void trace_default_header(struct seq_file *m)
4095 {
4096 	struct trace_iterator *iter = m->private;
4097 	struct trace_array *tr = iter->tr;
4098 	unsigned long trace_flags = tr->trace_flags;
4099 
4100 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4101 		return;
4102 
4103 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4104 		/* print nothing if the buffers are empty */
4105 		if (trace_empty(iter))
4106 			return;
4107 		print_trace_header(m, iter);
4108 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4109 			print_lat_help_header(m);
4110 	} else {
4111 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4112 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4113 				print_func_help_header_irq(iter->array_buffer,
4114 							   m, trace_flags);
4115 			else
4116 				print_func_help_header(iter->array_buffer, m,
4117 						       trace_flags);
4118 		}
4119 	}
4120 }
4121 
4122 static void test_ftrace_alive(struct seq_file *m)
4123 {
4124 	if (!ftrace_is_dead())
4125 		return;
4126 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4127 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4128 }
4129 
4130 #ifdef CONFIG_TRACER_MAX_TRACE
4131 static void show_snapshot_main_help(struct seq_file *m)
4132 {
4133 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4134 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4135 		    "#                      Takes a snapshot of the main buffer.\n"
4136 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4137 		    "#                      (Doesn't have to be '2' works with any number that\n"
4138 		    "#                       is not a '0' or '1')\n");
4139 }
4140 
4141 static void show_snapshot_percpu_help(struct seq_file *m)
4142 {
4143 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4144 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4145 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4146 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4147 #else
4148 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4149 		    "#                     Must use main snapshot file to allocate.\n");
4150 #endif
4151 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4152 		    "#                      (Doesn't have to be '2' works with any number that\n"
4153 		    "#                       is not a '0' or '1')\n");
4154 }
4155 
4156 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4157 {
4158 	if (iter->tr->allocated_snapshot)
4159 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4160 	else
4161 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4162 
4163 	seq_puts(m, "# Snapshot commands:\n");
4164 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4165 		show_snapshot_main_help(m);
4166 	else
4167 		show_snapshot_percpu_help(m);
4168 }
4169 #else
4170 /* Should never be called */
4171 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4172 #endif
4173 
4174 static int s_show(struct seq_file *m, void *v)
4175 {
4176 	struct trace_iterator *iter = v;
4177 	int ret;
4178 
4179 	if (iter->ent == NULL) {
4180 		if (iter->tr) {
4181 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4182 			seq_puts(m, "#\n");
4183 			test_ftrace_alive(m);
4184 		}
4185 		if (iter->snapshot && trace_empty(iter))
4186 			print_snapshot_help(m, iter);
4187 		else if (iter->trace && iter->trace->print_header)
4188 			iter->trace->print_header(m);
4189 		else
4190 			trace_default_header(m);
4191 
4192 	} else if (iter->leftover) {
4193 		/*
4194 		 * If we filled the seq_file buffer earlier, we
4195 		 * want to just show it now.
4196 		 */
4197 		ret = trace_print_seq(m, &iter->seq);
4198 
4199 		/* ret should this time be zero, but you never know */
4200 		iter->leftover = ret;
4201 
4202 	} else {
4203 		print_trace_line(iter);
4204 		ret = trace_print_seq(m, &iter->seq);
4205 		/*
4206 		 * If we overflow the seq_file buffer, then it will
4207 		 * ask us for this data again at start up.
4208 		 * Use that instead.
4209 		 *  ret is 0 if seq_file write succeeded.
4210 		 *        -1 otherwise.
4211 		 */
4212 		iter->leftover = ret;
4213 	}
4214 
4215 	return 0;
4216 }
4217 
4218 /*
4219  * Should be used after trace_array_get(), trace_types_lock
4220  * ensures that i_cdev was already initialized.
4221  */
4222 static inline int tracing_get_cpu(struct inode *inode)
4223 {
4224 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4225 		return (long)inode->i_cdev - 1;
4226 	return RING_BUFFER_ALL_CPUS;
4227 }
4228 
4229 static const struct seq_operations tracer_seq_ops = {
4230 	.start		= s_start,
4231 	.next		= s_next,
4232 	.stop		= s_stop,
4233 	.show		= s_show,
4234 };
4235 
4236 static struct trace_iterator *
4237 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4238 {
4239 	struct trace_array *tr = inode->i_private;
4240 	struct trace_iterator *iter;
4241 	int cpu;
4242 
4243 	if (tracing_disabled)
4244 		return ERR_PTR(-ENODEV);
4245 
4246 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4247 	if (!iter)
4248 		return ERR_PTR(-ENOMEM);
4249 
4250 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4251 				    GFP_KERNEL);
4252 	if (!iter->buffer_iter)
4253 		goto release;
4254 
4255 	/*
4256 	 * trace_find_next_entry() may need to save off iter->ent.
4257 	 * It will place it into the iter->temp buffer. As most
4258 	 * events are less than 128, allocate a buffer of that size.
4259 	 * If one is greater, then trace_find_next_entry() will
4260 	 * allocate a new buffer to adjust for the bigger iter->ent.
4261 	 * It's not critical if it fails to get allocated here.
4262 	 */
4263 	iter->temp = kmalloc(128, GFP_KERNEL);
4264 	if (iter->temp)
4265 		iter->temp_size = 128;
4266 
4267 	/*
4268 	 * We make a copy of the current tracer to avoid concurrent
4269 	 * changes on it while we are reading.
4270 	 */
4271 	mutex_lock(&trace_types_lock);
4272 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4273 	if (!iter->trace)
4274 		goto fail;
4275 
4276 	*iter->trace = *tr->current_trace;
4277 
4278 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4279 		goto fail;
4280 
4281 	iter->tr = tr;
4282 
4283 #ifdef CONFIG_TRACER_MAX_TRACE
4284 	/* Currently only the top directory has a snapshot */
4285 	if (tr->current_trace->print_max || snapshot)
4286 		iter->array_buffer = &tr->max_buffer;
4287 	else
4288 #endif
4289 		iter->array_buffer = &tr->array_buffer;
4290 	iter->snapshot = snapshot;
4291 	iter->pos = -1;
4292 	iter->cpu_file = tracing_get_cpu(inode);
4293 	mutex_init(&iter->mutex);
4294 
4295 	/* Notify the tracer early; before we stop tracing. */
4296 	if (iter->trace->open)
4297 		iter->trace->open(iter);
4298 
4299 	/* Annotate start of buffers if we had overruns */
4300 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4301 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4302 
4303 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4304 	if (trace_clocks[tr->clock_id].in_ns)
4305 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4306 
4307 	/*
4308 	 * If pause-on-trace is enabled, then stop the trace while
4309 	 * dumping, unless this is the "snapshot" file
4310 	 */
4311 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4312 		tracing_stop_tr(tr);
4313 
4314 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4315 		for_each_tracing_cpu(cpu) {
4316 			iter->buffer_iter[cpu] =
4317 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4318 							 cpu, GFP_KERNEL);
4319 		}
4320 		ring_buffer_read_prepare_sync();
4321 		for_each_tracing_cpu(cpu) {
4322 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4323 			tracing_iter_reset(iter, cpu);
4324 		}
4325 	} else {
4326 		cpu = iter->cpu_file;
4327 		iter->buffer_iter[cpu] =
4328 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4329 						 cpu, GFP_KERNEL);
4330 		ring_buffer_read_prepare_sync();
4331 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4332 		tracing_iter_reset(iter, cpu);
4333 	}
4334 
4335 	mutex_unlock(&trace_types_lock);
4336 
4337 	return iter;
4338 
4339  fail:
4340 	mutex_unlock(&trace_types_lock);
4341 	kfree(iter->trace);
4342 	kfree(iter->temp);
4343 	kfree(iter->buffer_iter);
4344 release:
4345 	seq_release_private(inode, file);
4346 	return ERR_PTR(-ENOMEM);
4347 }
4348 
4349 int tracing_open_generic(struct inode *inode, struct file *filp)
4350 {
4351 	int ret;
4352 
4353 	ret = tracing_check_open_get_tr(NULL);
4354 	if (ret)
4355 		return ret;
4356 
4357 	filp->private_data = inode->i_private;
4358 	return 0;
4359 }
4360 
4361 bool tracing_is_disabled(void)
4362 {
4363 	return (tracing_disabled) ? true: false;
4364 }
4365 
4366 /*
4367  * Open and update trace_array ref count.
4368  * Must have the current trace_array passed to it.
4369  */
4370 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4371 {
4372 	struct trace_array *tr = inode->i_private;
4373 	int ret;
4374 
4375 	ret = tracing_check_open_get_tr(tr);
4376 	if (ret)
4377 		return ret;
4378 
4379 	filp->private_data = inode->i_private;
4380 
4381 	return 0;
4382 }
4383 
4384 static int tracing_release(struct inode *inode, struct file *file)
4385 {
4386 	struct trace_array *tr = inode->i_private;
4387 	struct seq_file *m = file->private_data;
4388 	struct trace_iterator *iter;
4389 	int cpu;
4390 
4391 	if (!(file->f_mode & FMODE_READ)) {
4392 		trace_array_put(tr);
4393 		return 0;
4394 	}
4395 
4396 	/* Writes do not use seq_file */
4397 	iter = m->private;
4398 	mutex_lock(&trace_types_lock);
4399 
4400 	for_each_tracing_cpu(cpu) {
4401 		if (iter->buffer_iter[cpu])
4402 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4403 	}
4404 
4405 	if (iter->trace && iter->trace->close)
4406 		iter->trace->close(iter);
4407 
4408 	if (!iter->snapshot && tr->stop_count)
4409 		/* reenable tracing if it was previously enabled */
4410 		tracing_start_tr(tr);
4411 
4412 	__trace_array_put(tr);
4413 
4414 	mutex_unlock(&trace_types_lock);
4415 
4416 	mutex_destroy(&iter->mutex);
4417 	free_cpumask_var(iter->started);
4418 	kfree(iter->temp);
4419 	kfree(iter->trace);
4420 	kfree(iter->buffer_iter);
4421 	seq_release_private(inode, file);
4422 
4423 	return 0;
4424 }
4425 
4426 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4427 {
4428 	struct trace_array *tr = inode->i_private;
4429 
4430 	trace_array_put(tr);
4431 	return 0;
4432 }
4433 
4434 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4435 {
4436 	struct trace_array *tr = inode->i_private;
4437 
4438 	trace_array_put(tr);
4439 
4440 	return single_release(inode, file);
4441 }
4442 
4443 static int tracing_open(struct inode *inode, struct file *file)
4444 {
4445 	struct trace_array *tr = inode->i_private;
4446 	struct trace_iterator *iter;
4447 	int ret;
4448 
4449 	ret = tracing_check_open_get_tr(tr);
4450 	if (ret)
4451 		return ret;
4452 
4453 	/* If this file was open for write, then erase contents */
4454 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4455 		int cpu = tracing_get_cpu(inode);
4456 		struct array_buffer *trace_buf = &tr->array_buffer;
4457 
4458 #ifdef CONFIG_TRACER_MAX_TRACE
4459 		if (tr->current_trace->print_max)
4460 			trace_buf = &tr->max_buffer;
4461 #endif
4462 
4463 		if (cpu == RING_BUFFER_ALL_CPUS)
4464 			tracing_reset_online_cpus(trace_buf);
4465 		else
4466 			tracing_reset_cpu(trace_buf, cpu);
4467 	}
4468 
4469 	if (file->f_mode & FMODE_READ) {
4470 		iter = __tracing_open(inode, file, false);
4471 		if (IS_ERR(iter))
4472 			ret = PTR_ERR(iter);
4473 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4474 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4475 	}
4476 
4477 	if (ret < 0)
4478 		trace_array_put(tr);
4479 
4480 	return ret;
4481 }
4482 
4483 /*
4484  * Some tracers are not suitable for instance buffers.
4485  * A tracer is always available for the global array (toplevel)
4486  * or if it explicitly states that it is.
4487  */
4488 static bool
4489 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4490 {
4491 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4492 }
4493 
4494 /* Find the next tracer that this trace array may use */
4495 static struct tracer *
4496 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4497 {
4498 	while (t && !trace_ok_for_array(t, tr))
4499 		t = t->next;
4500 
4501 	return t;
4502 }
4503 
4504 static void *
4505 t_next(struct seq_file *m, void *v, loff_t *pos)
4506 {
4507 	struct trace_array *tr = m->private;
4508 	struct tracer *t = v;
4509 
4510 	(*pos)++;
4511 
4512 	if (t)
4513 		t = get_tracer_for_array(tr, t->next);
4514 
4515 	return t;
4516 }
4517 
4518 static void *t_start(struct seq_file *m, loff_t *pos)
4519 {
4520 	struct trace_array *tr = m->private;
4521 	struct tracer *t;
4522 	loff_t l = 0;
4523 
4524 	mutex_lock(&trace_types_lock);
4525 
4526 	t = get_tracer_for_array(tr, trace_types);
4527 	for (; t && l < *pos; t = t_next(m, t, &l))
4528 			;
4529 
4530 	return t;
4531 }
4532 
4533 static void t_stop(struct seq_file *m, void *p)
4534 {
4535 	mutex_unlock(&trace_types_lock);
4536 }
4537 
4538 static int t_show(struct seq_file *m, void *v)
4539 {
4540 	struct tracer *t = v;
4541 
4542 	if (!t)
4543 		return 0;
4544 
4545 	seq_puts(m, t->name);
4546 	if (t->next)
4547 		seq_putc(m, ' ');
4548 	else
4549 		seq_putc(m, '\n');
4550 
4551 	return 0;
4552 }
4553 
4554 static const struct seq_operations show_traces_seq_ops = {
4555 	.start		= t_start,
4556 	.next		= t_next,
4557 	.stop		= t_stop,
4558 	.show		= t_show,
4559 };
4560 
4561 static int show_traces_open(struct inode *inode, struct file *file)
4562 {
4563 	struct trace_array *tr = inode->i_private;
4564 	struct seq_file *m;
4565 	int ret;
4566 
4567 	ret = tracing_check_open_get_tr(tr);
4568 	if (ret)
4569 		return ret;
4570 
4571 	ret = seq_open(file, &show_traces_seq_ops);
4572 	if (ret) {
4573 		trace_array_put(tr);
4574 		return ret;
4575 	}
4576 
4577 	m = file->private_data;
4578 	m->private = tr;
4579 
4580 	return 0;
4581 }
4582 
4583 static int show_traces_release(struct inode *inode, struct file *file)
4584 {
4585 	struct trace_array *tr = inode->i_private;
4586 
4587 	trace_array_put(tr);
4588 	return seq_release(inode, file);
4589 }
4590 
4591 static ssize_t
4592 tracing_write_stub(struct file *filp, const char __user *ubuf,
4593 		   size_t count, loff_t *ppos)
4594 {
4595 	return count;
4596 }
4597 
4598 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4599 {
4600 	int ret;
4601 
4602 	if (file->f_mode & FMODE_READ)
4603 		ret = seq_lseek(file, offset, whence);
4604 	else
4605 		file->f_pos = ret = 0;
4606 
4607 	return ret;
4608 }
4609 
4610 static const struct file_operations tracing_fops = {
4611 	.open		= tracing_open,
4612 	.read		= seq_read,
4613 	.write		= tracing_write_stub,
4614 	.llseek		= tracing_lseek,
4615 	.release	= tracing_release,
4616 };
4617 
4618 static const struct file_operations show_traces_fops = {
4619 	.open		= show_traces_open,
4620 	.read		= seq_read,
4621 	.llseek		= seq_lseek,
4622 	.release	= show_traces_release,
4623 };
4624 
4625 static ssize_t
4626 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4627 		     size_t count, loff_t *ppos)
4628 {
4629 	struct trace_array *tr = file_inode(filp)->i_private;
4630 	char *mask_str;
4631 	int len;
4632 
4633 	len = snprintf(NULL, 0, "%*pb\n",
4634 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4635 	mask_str = kmalloc(len, GFP_KERNEL);
4636 	if (!mask_str)
4637 		return -ENOMEM;
4638 
4639 	len = snprintf(mask_str, len, "%*pb\n",
4640 		       cpumask_pr_args(tr->tracing_cpumask));
4641 	if (len >= count) {
4642 		count = -EINVAL;
4643 		goto out_err;
4644 	}
4645 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4646 
4647 out_err:
4648 	kfree(mask_str);
4649 
4650 	return count;
4651 }
4652 
4653 int tracing_set_cpumask(struct trace_array *tr,
4654 			cpumask_var_t tracing_cpumask_new)
4655 {
4656 	int cpu;
4657 
4658 	if (!tr)
4659 		return -EINVAL;
4660 
4661 	local_irq_disable();
4662 	arch_spin_lock(&tr->max_lock);
4663 	for_each_tracing_cpu(cpu) {
4664 		/*
4665 		 * Increase/decrease the disabled counter if we are
4666 		 * about to flip a bit in the cpumask:
4667 		 */
4668 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4669 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4670 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4671 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4672 		}
4673 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4674 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4675 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4676 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4677 		}
4678 	}
4679 	arch_spin_unlock(&tr->max_lock);
4680 	local_irq_enable();
4681 
4682 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4683 
4684 	return 0;
4685 }
4686 
4687 static ssize_t
4688 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4689 		      size_t count, loff_t *ppos)
4690 {
4691 	struct trace_array *tr = file_inode(filp)->i_private;
4692 	cpumask_var_t tracing_cpumask_new;
4693 	int err;
4694 
4695 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4696 		return -ENOMEM;
4697 
4698 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4699 	if (err)
4700 		goto err_free;
4701 
4702 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4703 	if (err)
4704 		goto err_free;
4705 
4706 	free_cpumask_var(tracing_cpumask_new);
4707 
4708 	return count;
4709 
4710 err_free:
4711 	free_cpumask_var(tracing_cpumask_new);
4712 
4713 	return err;
4714 }
4715 
4716 static const struct file_operations tracing_cpumask_fops = {
4717 	.open		= tracing_open_generic_tr,
4718 	.read		= tracing_cpumask_read,
4719 	.write		= tracing_cpumask_write,
4720 	.release	= tracing_release_generic_tr,
4721 	.llseek		= generic_file_llseek,
4722 };
4723 
4724 static int tracing_trace_options_show(struct seq_file *m, void *v)
4725 {
4726 	struct tracer_opt *trace_opts;
4727 	struct trace_array *tr = m->private;
4728 	u32 tracer_flags;
4729 	int i;
4730 
4731 	mutex_lock(&trace_types_lock);
4732 	tracer_flags = tr->current_trace->flags->val;
4733 	trace_opts = tr->current_trace->flags->opts;
4734 
4735 	for (i = 0; trace_options[i]; i++) {
4736 		if (tr->trace_flags & (1 << i))
4737 			seq_printf(m, "%s\n", trace_options[i]);
4738 		else
4739 			seq_printf(m, "no%s\n", trace_options[i]);
4740 	}
4741 
4742 	for (i = 0; trace_opts[i].name; i++) {
4743 		if (tracer_flags & trace_opts[i].bit)
4744 			seq_printf(m, "%s\n", trace_opts[i].name);
4745 		else
4746 			seq_printf(m, "no%s\n", trace_opts[i].name);
4747 	}
4748 	mutex_unlock(&trace_types_lock);
4749 
4750 	return 0;
4751 }
4752 
4753 static int __set_tracer_option(struct trace_array *tr,
4754 			       struct tracer_flags *tracer_flags,
4755 			       struct tracer_opt *opts, int neg)
4756 {
4757 	struct tracer *trace = tracer_flags->trace;
4758 	int ret;
4759 
4760 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4761 	if (ret)
4762 		return ret;
4763 
4764 	if (neg)
4765 		tracer_flags->val &= ~opts->bit;
4766 	else
4767 		tracer_flags->val |= opts->bit;
4768 	return 0;
4769 }
4770 
4771 /* Try to assign a tracer specific option */
4772 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4773 {
4774 	struct tracer *trace = tr->current_trace;
4775 	struct tracer_flags *tracer_flags = trace->flags;
4776 	struct tracer_opt *opts = NULL;
4777 	int i;
4778 
4779 	for (i = 0; tracer_flags->opts[i].name; i++) {
4780 		opts = &tracer_flags->opts[i];
4781 
4782 		if (strcmp(cmp, opts->name) == 0)
4783 			return __set_tracer_option(tr, trace->flags, opts, neg);
4784 	}
4785 
4786 	return -EINVAL;
4787 }
4788 
4789 /* Some tracers require overwrite to stay enabled */
4790 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4791 {
4792 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4793 		return -1;
4794 
4795 	return 0;
4796 }
4797 
4798 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4799 {
4800 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4801 	    (mask == TRACE_ITER_RECORD_CMD))
4802 		lockdep_assert_held(&event_mutex);
4803 
4804 	/* do nothing if flag is already set */
4805 	if (!!(tr->trace_flags & mask) == !!enabled)
4806 		return 0;
4807 
4808 	/* Give the tracer a chance to approve the change */
4809 	if (tr->current_trace->flag_changed)
4810 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4811 			return -EINVAL;
4812 
4813 	if (enabled)
4814 		tr->trace_flags |= mask;
4815 	else
4816 		tr->trace_flags &= ~mask;
4817 
4818 	if (mask == TRACE_ITER_RECORD_CMD)
4819 		trace_event_enable_cmd_record(enabled);
4820 
4821 	if (mask == TRACE_ITER_RECORD_TGID) {
4822 		if (!tgid_map)
4823 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4824 					   sizeof(*tgid_map),
4825 					   GFP_KERNEL);
4826 		if (!tgid_map) {
4827 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4828 			return -ENOMEM;
4829 		}
4830 
4831 		trace_event_enable_tgid_record(enabled);
4832 	}
4833 
4834 	if (mask == TRACE_ITER_EVENT_FORK)
4835 		trace_event_follow_fork(tr, enabled);
4836 
4837 	if (mask == TRACE_ITER_FUNC_FORK)
4838 		ftrace_pid_follow_fork(tr, enabled);
4839 
4840 	if (mask == TRACE_ITER_OVERWRITE) {
4841 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4842 #ifdef CONFIG_TRACER_MAX_TRACE
4843 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4844 #endif
4845 	}
4846 
4847 	if (mask == TRACE_ITER_PRINTK) {
4848 		trace_printk_start_stop_comm(enabled);
4849 		trace_printk_control(enabled);
4850 	}
4851 
4852 	return 0;
4853 }
4854 
4855 int trace_set_options(struct trace_array *tr, char *option)
4856 {
4857 	char *cmp;
4858 	int neg = 0;
4859 	int ret;
4860 	size_t orig_len = strlen(option);
4861 	int len;
4862 
4863 	cmp = strstrip(option);
4864 
4865 	len = str_has_prefix(cmp, "no");
4866 	if (len)
4867 		neg = 1;
4868 
4869 	cmp += len;
4870 
4871 	mutex_lock(&event_mutex);
4872 	mutex_lock(&trace_types_lock);
4873 
4874 	ret = match_string(trace_options, -1, cmp);
4875 	/* If no option could be set, test the specific tracer options */
4876 	if (ret < 0)
4877 		ret = set_tracer_option(tr, cmp, neg);
4878 	else
4879 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4880 
4881 	mutex_unlock(&trace_types_lock);
4882 	mutex_unlock(&event_mutex);
4883 
4884 	/*
4885 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4886 	 * turn it back into a space.
4887 	 */
4888 	if (orig_len > strlen(option))
4889 		option[strlen(option)] = ' ';
4890 
4891 	return ret;
4892 }
4893 
4894 static void __init apply_trace_boot_options(void)
4895 {
4896 	char *buf = trace_boot_options_buf;
4897 	char *option;
4898 
4899 	while (true) {
4900 		option = strsep(&buf, ",");
4901 
4902 		if (!option)
4903 			break;
4904 
4905 		if (*option)
4906 			trace_set_options(&global_trace, option);
4907 
4908 		/* Put back the comma to allow this to be called again */
4909 		if (buf)
4910 			*(buf - 1) = ',';
4911 	}
4912 }
4913 
4914 static ssize_t
4915 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4916 			size_t cnt, loff_t *ppos)
4917 {
4918 	struct seq_file *m = filp->private_data;
4919 	struct trace_array *tr = m->private;
4920 	char buf[64];
4921 	int ret;
4922 
4923 	if (cnt >= sizeof(buf))
4924 		return -EINVAL;
4925 
4926 	if (copy_from_user(buf, ubuf, cnt))
4927 		return -EFAULT;
4928 
4929 	buf[cnt] = 0;
4930 
4931 	ret = trace_set_options(tr, buf);
4932 	if (ret < 0)
4933 		return ret;
4934 
4935 	*ppos += cnt;
4936 
4937 	return cnt;
4938 }
4939 
4940 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4941 {
4942 	struct trace_array *tr = inode->i_private;
4943 	int ret;
4944 
4945 	ret = tracing_check_open_get_tr(tr);
4946 	if (ret)
4947 		return ret;
4948 
4949 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4950 	if (ret < 0)
4951 		trace_array_put(tr);
4952 
4953 	return ret;
4954 }
4955 
4956 static const struct file_operations tracing_iter_fops = {
4957 	.open		= tracing_trace_options_open,
4958 	.read		= seq_read,
4959 	.llseek		= seq_lseek,
4960 	.release	= tracing_single_release_tr,
4961 	.write		= tracing_trace_options_write,
4962 };
4963 
4964 static const char readme_msg[] =
4965 	"tracing mini-HOWTO:\n\n"
4966 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4967 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4968 	" Important files:\n"
4969 	"  trace\t\t\t- The static contents of the buffer\n"
4970 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4971 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4972 	"  current_tracer\t- function and latency tracers\n"
4973 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4974 	"  error_log\t- error log for failed commands (that support it)\n"
4975 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4976 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4977 	"  trace_clock\t\t-change the clock used to order events\n"
4978 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4979 	"      global:   Synced across CPUs but slows tracing down.\n"
4980 	"     counter:   Not a clock, but just an increment\n"
4981 	"      uptime:   Jiffy counter from time of boot\n"
4982 	"        perf:   Same clock that perf events use\n"
4983 #ifdef CONFIG_X86_64
4984 	"     x86-tsc:   TSC cycle counter\n"
4985 #endif
4986 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4987 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4988 	"    absolute:   Absolute (standalone) timestamp\n"
4989 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4990 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4991 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4992 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4993 	"\t\t\t  Remove sub-buffer with rmdir\n"
4994 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4995 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4996 	"\t\t\t  option name\n"
4997 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4998 #ifdef CONFIG_DYNAMIC_FTRACE
4999 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5000 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5001 	"\t\t\t  functions\n"
5002 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5003 	"\t     modules: Can select a group via module\n"
5004 	"\t      Format: :mod:<module-name>\n"
5005 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5006 	"\t    triggers: a command to perform when function is hit\n"
5007 	"\t      Format: <function>:<trigger>[:count]\n"
5008 	"\t     trigger: traceon, traceoff\n"
5009 	"\t\t      enable_event:<system>:<event>\n"
5010 	"\t\t      disable_event:<system>:<event>\n"
5011 #ifdef CONFIG_STACKTRACE
5012 	"\t\t      stacktrace\n"
5013 #endif
5014 #ifdef CONFIG_TRACER_SNAPSHOT
5015 	"\t\t      snapshot\n"
5016 #endif
5017 	"\t\t      dump\n"
5018 	"\t\t      cpudump\n"
5019 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5020 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5021 	"\t     The first one will disable tracing every time do_fault is hit\n"
5022 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5023 	"\t       The first time do trap is hit and it disables tracing, the\n"
5024 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5025 	"\t       the counter will not decrement. It only decrements when the\n"
5026 	"\t       trigger did work\n"
5027 	"\t     To remove trigger without count:\n"
5028 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5029 	"\t     To remove trigger with a count:\n"
5030 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5031 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5032 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5033 	"\t    modules: Can select a group via module command :mod:\n"
5034 	"\t    Does not accept triggers\n"
5035 #endif /* CONFIG_DYNAMIC_FTRACE */
5036 #ifdef CONFIG_FUNCTION_TRACER
5037 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5038 	"\t\t    (function)\n"
5039 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5040 	"\t\t    (function)\n"
5041 #endif
5042 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5043 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5044 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5045 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5046 #endif
5047 #ifdef CONFIG_TRACER_SNAPSHOT
5048 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5049 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5050 	"\t\t\t  information\n"
5051 #endif
5052 #ifdef CONFIG_STACK_TRACER
5053 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5054 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5055 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5056 	"\t\t\t  new trace)\n"
5057 #ifdef CONFIG_DYNAMIC_FTRACE
5058 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5059 	"\t\t\t  traces\n"
5060 #endif
5061 #endif /* CONFIG_STACK_TRACER */
5062 #ifdef CONFIG_DYNAMIC_EVENTS
5063 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5064 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5065 #endif
5066 #ifdef CONFIG_KPROBE_EVENTS
5067 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5068 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5069 #endif
5070 #ifdef CONFIG_UPROBE_EVENTS
5071 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5072 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5073 #endif
5074 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5075 	"\t  accepts: event-definitions (one definition per line)\n"
5076 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5077 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5078 #ifdef CONFIG_HIST_TRIGGERS
5079 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5080 #endif
5081 	"\t           -:[<group>/]<event>\n"
5082 #ifdef CONFIG_KPROBE_EVENTS
5083 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5084   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5085 #endif
5086 #ifdef CONFIG_UPROBE_EVENTS
5087   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5088 #endif
5089 	"\t     args: <name>=fetcharg[:type]\n"
5090 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5091 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5092 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5093 #else
5094 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5095 #endif
5096 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5097 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5098 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5099 	"\t           <type>\\[<array-size>\\]\n"
5100 #ifdef CONFIG_HIST_TRIGGERS
5101 	"\t    field: <stype> <name>;\n"
5102 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5103 	"\t           [unsigned] char/int/long\n"
5104 #endif
5105 #endif
5106 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5107 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5108 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5109 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5110 	"\t\t\t  events\n"
5111 	"      filter\t\t- If set, only events passing filter are traced\n"
5112 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5113 	"\t\t\t  <event>:\n"
5114 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5115 	"      filter\t\t- If set, only events passing filter are traced\n"
5116 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5117 	"\t    Format: <trigger>[:count][if <filter>]\n"
5118 	"\t   trigger: traceon, traceoff\n"
5119 	"\t            enable_event:<system>:<event>\n"
5120 	"\t            disable_event:<system>:<event>\n"
5121 #ifdef CONFIG_HIST_TRIGGERS
5122 	"\t            enable_hist:<system>:<event>\n"
5123 	"\t            disable_hist:<system>:<event>\n"
5124 #endif
5125 #ifdef CONFIG_STACKTRACE
5126 	"\t\t    stacktrace\n"
5127 #endif
5128 #ifdef CONFIG_TRACER_SNAPSHOT
5129 	"\t\t    snapshot\n"
5130 #endif
5131 #ifdef CONFIG_HIST_TRIGGERS
5132 	"\t\t    hist (see below)\n"
5133 #endif
5134 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5135 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5136 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5137 	"\t                  events/block/block_unplug/trigger\n"
5138 	"\t   The first disables tracing every time block_unplug is hit.\n"
5139 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5140 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5141 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5142 	"\t   Like function triggers, the counter is only decremented if it\n"
5143 	"\t    enabled or disabled tracing.\n"
5144 	"\t   To remove a trigger without a count:\n"
5145 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5146 	"\t   To remove a trigger with a count:\n"
5147 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5148 	"\t   Filters can be ignored when removing a trigger.\n"
5149 #ifdef CONFIG_HIST_TRIGGERS
5150 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5151 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5152 	"\t            [:values=<field1[,field2,...]>]\n"
5153 	"\t            [:sort=<field1[,field2,...]>]\n"
5154 	"\t            [:size=#entries]\n"
5155 	"\t            [:pause][:continue][:clear]\n"
5156 	"\t            [:name=histname1]\n"
5157 	"\t            [:<handler>.<action>]\n"
5158 	"\t            [if <filter>]\n\n"
5159 	"\t    When a matching event is hit, an entry is added to a hash\n"
5160 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5161 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5162 	"\t    correspond to fields in the event's format description.  Keys\n"
5163 	"\t    can be any field, or the special string 'stacktrace'.\n"
5164 	"\t    Compound keys consisting of up to two fields can be specified\n"
5165 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5166 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5167 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5168 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5169 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5170 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5171 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5172 	"\t    its histogram data will be shared with other triggers of the\n"
5173 	"\t    same name, and trigger hits will update this common data.\n\n"
5174 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5175 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5176 	"\t    triggers attached to an event, there will be a table for each\n"
5177 	"\t    trigger in the output.  The table displayed for a named\n"
5178 	"\t    trigger will be the same as any other instance having the\n"
5179 	"\t    same name.  The default format used to display a given field\n"
5180 	"\t    can be modified by appending any of the following modifiers\n"
5181 	"\t    to the field name, as applicable:\n\n"
5182 	"\t            .hex        display a number as a hex value\n"
5183 	"\t            .sym        display an address as a symbol\n"
5184 	"\t            .sym-offset display an address as a symbol and offset\n"
5185 	"\t            .execname   display a common_pid as a program name\n"
5186 	"\t            .syscall    display a syscall id as a syscall name\n"
5187 	"\t            .log2       display log2 value rather than raw number\n"
5188 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5189 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5190 	"\t    trigger or to start a hist trigger but not log any events\n"
5191 	"\t    until told to do so.  'continue' can be used to start or\n"
5192 	"\t    restart a paused hist trigger.\n\n"
5193 	"\t    The 'clear' parameter will clear the contents of a running\n"
5194 	"\t    hist trigger and leave its current paused/active state\n"
5195 	"\t    unchanged.\n\n"
5196 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5197 	"\t    have one event conditionally start and stop another event's\n"
5198 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5199 	"\t    the enable_event and disable_event triggers.\n\n"
5200 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5201 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5202 	"\t        <handler>.<action>\n\n"
5203 	"\t    The available handlers are:\n\n"
5204 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5205 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5206 	"\t        onchange(var)            - invoke action if var changes\n\n"
5207 	"\t    The available actions are:\n\n"
5208 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5209 	"\t        save(field,...)                      - save current event fields\n"
5210 #ifdef CONFIG_TRACER_SNAPSHOT
5211 	"\t        snapshot()                           - snapshot the trace buffer\n"
5212 #endif
5213 #endif
5214 ;
5215 
5216 static ssize_t
5217 tracing_readme_read(struct file *filp, char __user *ubuf,
5218 		       size_t cnt, loff_t *ppos)
5219 {
5220 	return simple_read_from_buffer(ubuf, cnt, ppos,
5221 					readme_msg, strlen(readme_msg));
5222 }
5223 
5224 static const struct file_operations tracing_readme_fops = {
5225 	.open		= tracing_open_generic,
5226 	.read		= tracing_readme_read,
5227 	.llseek		= generic_file_llseek,
5228 };
5229 
5230 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5231 {
5232 	int *ptr = v;
5233 
5234 	if (*pos || m->count)
5235 		ptr++;
5236 
5237 	(*pos)++;
5238 
5239 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5240 		if (trace_find_tgid(*ptr))
5241 			return ptr;
5242 	}
5243 
5244 	return NULL;
5245 }
5246 
5247 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5248 {
5249 	void *v;
5250 	loff_t l = 0;
5251 
5252 	if (!tgid_map)
5253 		return NULL;
5254 
5255 	v = &tgid_map[0];
5256 	while (l <= *pos) {
5257 		v = saved_tgids_next(m, v, &l);
5258 		if (!v)
5259 			return NULL;
5260 	}
5261 
5262 	return v;
5263 }
5264 
5265 static void saved_tgids_stop(struct seq_file *m, void *v)
5266 {
5267 }
5268 
5269 static int saved_tgids_show(struct seq_file *m, void *v)
5270 {
5271 	int pid = (int *)v - tgid_map;
5272 
5273 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5274 	return 0;
5275 }
5276 
5277 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5278 	.start		= saved_tgids_start,
5279 	.stop		= saved_tgids_stop,
5280 	.next		= saved_tgids_next,
5281 	.show		= saved_tgids_show,
5282 };
5283 
5284 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5285 {
5286 	int ret;
5287 
5288 	ret = tracing_check_open_get_tr(NULL);
5289 	if (ret)
5290 		return ret;
5291 
5292 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5293 }
5294 
5295 
5296 static const struct file_operations tracing_saved_tgids_fops = {
5297 	.open		= tracing_saved_tgids_open,
5298 	.read		= seq_read,
5299 	.llseek		= seq_lseek,
5300 	.release	= seq_release,
5301 };
5302 
5303 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5304 {
5305 	unsigned int *ptr = v;
5306 
5307 	if (*pos || m->count)
5308 		ptr++;
5309 
5310 	(*pos)++;
5311 
5312 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5313 	     ptr++) {
5314 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5315 			continue;
5316 
5317 		return ptr;
5318 	}
5319 
5320 	return NULL;
5321 }
5322 
5323 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5324 {
5325 	void *v;
5326 	loff_t l = 0;
5327 
5328 	preempt_disable();
5329 	arch_spin_lock(&trace_cmdline_lock);
5330 
5331 	v = &savedcmd->map_cmdline_to_pid[0];
5332 	while (l <= *pos) {
5333 		v = saved_cmdlines_next(m, v, &l);
5334 		if (!v)
5335 			return NULL;
5336 	}
5337 
5338 	return v;
5339 }
5340 
5341 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5342 {
5343 	arch_spin_unlock(&trace_cmdline_lock);
5344 	preempt_enable();
5345 }
5346 
5347 static int saved_cmdlines_show(struct seq_file *m, void *v)
5348 {
5349 	char buf[TASK_COMM_LEN];
5350 	unsigned int *pid = v;
5351 
5352 	__trace_find_cmdline(*pid, buf);
5353 	seq_printf(m, "%d %s\n", *pid, buf);
5354 	return 0;
5355 }
5356 
5357 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5358 	.start		= saved_cmdlines_start,
5359 	.next		= saved_cmdlines_next,
5360 	.stop		= saved_cmdlines_stop,
5361 	.show		= saved_cmdlines_show,
5362 };
5363 
5364 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5365 {
5366 	int ret;
5367 
5368 	ret = tracing_check_open_get_tr(NULL);
5369 	if (ret)
5370 		return ret;
5371 
5372 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5373 }
5374 
5375 static const struct file_operations tracing_saved_cmdlines_fops = {
5376 	.open		= tracing_saved_cmdlines_open,
5377 	.read		= seq_read,
5378 	.llseek		= seq_lseek,
5379 	.release	= seq_release,
5380 };
5381 
5382 static ssize_t
5383 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5384 				 size_t cnt, loff_t *ppos)
5385 {
5386 	char buf[64];
5387 	int r;
5388 
5389 	arch_spin_lock(&trace_cmdline_lock);
5390 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5391 	arch_spin_unlock(&trace_cmdline_lock);
5392 
5393 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5394 }
5395 
5396 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5397 {
5398 	kfree(s->saved_cmdlines);
5399 	kfree(s->map_cmdline_to_pid);
5400 	kfree(s);
5401 }
5402 
5403 static int tracing_resize_saved_cmdlines(unsigned int val)
5404 {
5405 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5406 
5407 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5408 	if (!s)
5409 		return -ENOMEM;
5410 
5411 	if (allocate_cmdlines_buffer(val, s) < 0) {
5412 		kfree(s);
5413 		return -ENOMEM;
5414 	}
5415 
5416 	arch_spin_lock(&trace_cmdline_lock);
5417 	savedcmd_temp = savedcmd;
5418 	savedcmd = s;
5419 	arch_spin_unlock(&trace_cmdline_lock);
5420 	free_saved_cmdlines_buffer(savedcmd_temp);
5421 
5422 	return 0;
5423 }
5424 
5425 static ssize_t
5426 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5427 				  size_t cnt, loff_t *ppos)
5428 {
5429 	unsigned long val;
5430 	int ret;
5431 
5432 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5433 	if (ret)
5434 		return ret;
5435 
5436 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5437 	if (!val || val > PID_MAX_DEFAULT)
5438 		return -EINVAL;
5439 
5440 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5441 	if (ret < 0)
5442 		return ret;
5443 
5444 	*ppos += cnt;
5445 
5446 	return cnt;
5447 }
5448 
5449 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5450 	.open		= tracing_open_generic,
5451 	.read		= tracing_saved_cmdlines_size_read,
5452 	.write		= tracing_saved_cmdlines_size_write,
5453 };
5454 
5455 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5456 static union trace_eval_map_item *
5457 update_eval_map(union trace_eval_map_item *ptr)
5458 {
5459 	if (!ptr->map.eval_string) {
5460 		if (ptr->tail.next) {
5461 			ptr = ptr->tail.next;
5462 			/* Set ptr to the next real item (skip head) */
5463 			ptr++;
5464 		} else
5465 			return NULL;
5466 	}
5467 	return ptr;
5468 }
5469 
5470 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5471 {
5472 	union trace_eval_map_item *ptr = v;
5473 
5474 	/*
5475 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5476 	 * This really should never happen.
5477 	 */
5478 	(*pos)++;
5479 	ptr = update_eval_map(ptr);
5480 	if (WARN_ON_ONCE(!ptr))
5481 		return NULL;
5482 
5483 	ptr++;
5484 	ptr = update_eval_map(ptr);
5485 
5486 	return ptr;
5487 }
5488 
5489 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5490 {
5491 	union trace_eval_map_item *v;
5492 	loff_t l = 0;
5493 
5494 	mutex_lock(&trace_eval_mutex);
5495 
5496 	v = trace_eval_maps;
5497 	if (v)
5498 		v++;
5499 
5500 	while (v && l < *pos) {
5501 		v = eval_map_next(m, v, &l);
5502 	}
5503 
5504 	return v;
5505 }
5506 
5507 static void eval_map_stop(struct seq_file *m, void *v)
5508 {
5509 	mutex_unlock(&trace_eval_mutex);
5510 }
5511 
5512 static int eval_map_show(struct seq_file *m, void *v)
5513 {
5514 	union trace_eval_map_item *ptr = v;
5515 
5516 	seq_printf(m, "%s %ld (%s)\n",
5517 		   ptr->map.eval_string, ptr->map.eval_value,
5518 		   ptr->map.system);
5519 
5520 	return 0;
5521 }
5522 
5523 static const struct seq_operations tracing_eval_map_seq_ops = {
5524 	.start		= eval_map_start,
5525 	.next		= eval_map_next,
5526 	.stop		= eval_map_stop,
5527 	.show		= eval_map_show,
5528 };
5529 
5530 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5531 {
5532 	int ret;
5533 
5534 	ret = tracing_check_open_get_tr(NULL);
5535 	if (ret)
5536 		return ret;
5537 
5538 	return seq_open(filp, &tracing_eval_map_seq_ops);
5539 }
5540 
5541 static const struct file_operations tracing_eval_map_fops = {
5542 	.open		= tracing_eval_map_open,
5543 	.read		= seq_read,
5544 	.llseek		= seq_lseek,
5545 	.release	= seq_release,
5546 };
5547 
5548 static inline union trace_eval_map_item *
5549 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5550 {
5551 	/* Return tail of array given the head */
5552 	return ptr + ptr->head.length + 1;
5553 }
5554 
5555 static void
5556 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5557 			   int len)
5558 {
5559 	struct trace_eval_map **stop;
5560 	struct trace_eval_map **map;
5561 	union trace_eval_map_item *map_array;
5562 	union trace_eval_map_item *ptr;
5563 
5564 	stop = start + len;
5565 
5566 	/*
5567 	 * The trace_eval_maps contains the map plus a head and tail item,
5568 	 * where the head holds the module and length of array, and the
5569 	 * tail holds a pointer to the next list.
5570 	 */
5571 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5572 	if (!map_array) {
5573 		pr_warn("Unable to allocate trace eval mapping\n");
5574 		return;
5575 	}
5576 
5577 	mutex_lock(&trace_eval_mutex);
5578 
5579 	if (!trace_eval_maps)
5580 		trace_eval_maps = map_array;
5581 	else {
5582 		ptr = trace_eval_maps;
5583 		for (;;) {
5584 			ptr = trace_eval_jmp_to_tail(ptr);
5585 			if (!ptr->tail.next)
5586 				break;
5587 			ptr = ptr->tail.next;
5588 
5589 		}
5590 		ptr->tail.next = map_array;
5591 	}
5592 	map_array->head.mod = mod;
5593 	map_array->head.length = len;
5594 	map_array++;
5595 
5596 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5597 		map_array->map = **map;
5598 		map_array++;
5599 	}
5600 	memset(map_array, 0, sizeof(*map_array));
5601 
5602 	mutex_unlock(&trace_eval_mutex);
5603 }
5604 
5605 static void trace_create_eval_file(struct dentry *d_tracer)
5606 {
5607 	trace_create_file("eval_map", 0444, d_tracer,
5608 			  NULL, &tracing_eval_map_fops);
5609 }
5610 
5611 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5612 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5613 static inline void trace_insert_eval_map_file(struct module *mod,
5614 			      struct trace_eval_map **start, int len) { }
5615 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5616 
5617 static void trace_insert_eval_map(struct module *mod,
5618 				  struct trace_eval_map **start, int len)
5619 {
5620 	struct trace_eval_map **map;
5621 
5622 	if (len <= 0)
5623 		return;
5624 
5625 	map = start;
5626 
5627 	trace_event_eval_update(map, len);
5628 
5629 	trace_insert_eval_map_file(mod, start, len);
5630 }
5631 
5632 static ssize_t
5633 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5634 		       size_t cnt, loff_t *ppos)
5635 {
5636 	struct trace_array *tr = filp->private_data;
5637 	char buf[MAX_TRACER_SIZE+2];
5638 	int r;
5639 
5640 	mutex_lock(&trace_types_lock);
5641 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5642 	mutex_unlock(&trace_types_lock);
5643 
5644 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5645 }
5646 
5647 int tracer_init(struct tracer *t, struct trace_array *tr)
5648 {
5649 	tracing_reset_online_cpus(&tr->array_buffer);
5650 	return t->init(tr);
5651 }
5652 
5653 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5654 {
5655 	int cpu;
5656 
5657 	for_each_tracing_cpu(cpu)
5658 		per_cpu_ptr(buf->data, cpu)->entries = val;
5659 }
5660 
5661 #ifdef CONFIG_TRACER_MAX_TRACE
5662 /* resize @tr's buffer to the size of @size_tr's entries */
5663 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5664 					struct array_buffer *size_buf, int cpu_id)
5665 {
5666 	int cpu, ret = 0;
5667 
5668 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5669 		for_each_tracing_cpu(cpu) {
5670 			ret = ring_buffer_resize(trace_buf->buffer,
5671 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5672 			if (ret < 0)
5673 				break;
5674 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5675 				per_cpu_ptr(size_buf->data, cpu)->entries;
5676 		}
5677 	} else {
5678 		ret = ring_buffer_resize(trace_buf->buffer,
5679 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5680 		if (ret == 0)
5681 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5682 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5683 	}
5684 
5685 	return ret;
5686 }
5687 #endif /* CONFIG_TRACER_MAX_TRACE */
5688 
5689 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5690 					unsigned long size, int cpu)
5691 {
5692 	int ret;
5693 
5694 	/*
5695 	 * If kernel or user changes the size of the ring buffer
5696 	 * we use the size that was given, and we can forget about
5697 	 * expanding it later.
5698 	 */
5699 	ring_buffer_expanded = true;
5700 
5701 	/* May be called before buffers are initialized */
5702 	if (!tr->array_buffer.buffer)
5703 		return 0;
5704 
5705 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5706 	if (ret < 0)
5707 		return ret;
5708 
5709 #ifdef CONFIG_TRACER_MAX_TRACE
5710 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5711 	    !tr->current_trace->use_max_tr)
5712 		goto out;
5713 
5714 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5715 	if (ret < 0) {
5716 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5717 						     &tr->array_buffer, cpu);
5718 		if (r < 0) {
5719 			/*
5720 			 * AARGH! We are left with different
5721 			 * size max buffer!!!!
5722 			 * The max buffer is our "snapshot" buffer.
5723 			 * When a tracer needs a snapshot (one of the
5724 			 * latency tracers), it swaps the max buffer
5725 			 * with the saved snap shot. We succeeded to
5726 			 * update the size of the main buffer, but failed to
5727 			 * update the size of the max buffer. But when we tried
5728 			 * to reset the main buffer to the original size, we
5729 			 * failed there too. This is very unlikely to
5730 			 * happen, but if it does, warn and kill all
5731 			 * tracing.
5732 			 */
5733 			WARN_ON(1);
5734 			tracing_disabled = 1;
5735 		}
5736 		return ret;
5737 	}
5738 
5739 	if (cpu == RING_BUFFER_ALL_CPUS)
5740 		set_buffer_entries(&tr->max_buffer, size);
5741 	else
5742 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5743 
5744  out:
5745 #endif /* CONFIG_TRACER_MAX_TRACE */
5746 
5747 	if (cpu == RING_BUFFER_ALL_CPUS)
5748 		set_buffer_entries(&tr->array_buffer, size);
5749 	else
5750 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5751 
5752 	return ret;
5753 }
5754 
5755 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5756 				  unsigned long size, int cpu_id)
5757 {
5758 	int ret = size;
5759 
5760 	mutex_lock(&trace_types_lock);
5761 
5762 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5763 		/* make sure, this cpu is enabled in the mask */
5764 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5765 			ret = -EINVAL;
5766 			goto out;
5767 		}
5768 	}
5769 
5770 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5771 	if (ret < 0)
5772 		ret = -ENOMEM;
5773 
5774 out:
5775 	mutex_unlock(&trace_types_lock);
5776 
5777 	return ret;
5778 }
5779 
5780 
5781 /**
5782  * tracing_update_buffers - used by tracing facility to expand ring buffers
5783  *
5784  * To save on memory when the tracing is never used on a system with it
5785  * configured in. The ring buffers are set to a minimum size. But once
5786  * a user starts to use the tracing facility, then they need to grow
5787  * to their default size.
5788  *
5789  * This function is to be called when a tracer is about to be used.
5790  */
5791 int tracing_update_buffers(void)
5792 {
5793 	int ret = 0;
5794 
5795 	mutex_lock(&trace_types_lock);
5796 	if (!ring_buffer_expanded)
5797 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5798 						RING_BUFFER_ALL_CPUS);
5799 	mutex_unlock(&trace_types_lock);
5800 
5801 	return ret;
5802 }
5803 
5804 struct trace_option_dentry;
5805 
5806 static void
5807 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5808 
5809 /*
5810  * Used to clear out the tracer before deletion of an instance.
5811  * Must have trace_types_lock held.
5812  */
5813 static void tracing_set_nop(struct trace_array *tr)
5814 {
5815 	if (tr->current_trace == &nop_trace)
5816 		return;
5817 
5818 	tr->current_trace->enabled--;
5819 
5820 	if (tr->current_trace->reset)
5821 		tr->current_trace->reset(tr);
5822 
5823 	tr->current_trace = &nop_trace;
5824 }
5825 
5826 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5827 {
5828 	/* Only enable if the directory has been created already. */
5829 	if (!tr->dir)
5830 		return;
5831 
5832 	create_trace_option_files(tr, t);
5833 }
5834 
5835 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5836 {
5837 	struct tracer *t;
5838 #ifdef CONFIG_TRACER_MAX_TRACE
5839 	bool had_max_tr;
5840 #endif
5841 	int ret = 0;
5842 
5843 	mutex_lock(&trace_types_lock);
5844 
5845 	if (!ring_buffer_expanded) {
5846 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5847 						RING_BUFFER_ALL_CPUS);
5848 		if (ret < 0)
5849 			goto out;
5850 		ret = 0;
5851 	}
5852 
5853 	for (t = trace_types; t; t = t->next) {
5854 		if (strcmp(t->name, buf) == 0)
5855 			break;
5856 	}
5857 	if (!t) {
5858 		ret = -EINVAL;
5859 		goto out;
5860 	}
5861 	if (t == tr->current_trace)
5862 		goto out;
5863 
5864 #ifdef CONFIG_TRACER_SNAPSHOT
5865 	if (t->use_max_tr) {
5866 		arch_spin_lock(&tr->max_lock);
5867 		if (tr->cond_snapshot)
5868 			ret = -EBUSY;
5869 		arch_spin_unlock(&tr->max_lock);
5870 		if (ret)
5871 			goto out;
5872 	}
5873 #endif
5874 	/* Some tracers won't work on kernel command line */
5875 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5876 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5877 			t->name);
5878 		goto out;
5879 	}
5880 
5881 	/* Some tracers are only allowed for the top level buffer */
5882 	if (!trace_ok_for_array(t, tr)) {
5883 		ret = -EINVAL;
5884 		goto out;
5885 	}
5886 
5887 	/* If trace pipe files are being read, we can't change the tracer */
5888 	if (tr->current_trace->ref) {
5889 		ret = -EBUSY;
5890 		goto out;
5891 	}
5892 
5893 	trace_branch_disable();
5894 
5895 	tr->current_trace->enabled--;
5896 
5897 	if (tr->current_trace->reset)
5898 		tr->current_trace->reset(tr);
5899 
5900 	/* Current trace needs to be nop_trace before synchronize_rcu */
5901 	tr->current_trace = &nop_trace;
5902 
5903 #ifdef CONFIG_TRACER_MAX_TRACE
5904 	had_max_tr = tr->allocated_snapshot;
5905 
5906 	if (had_max_tr && !t->use_max_tr) {
5907 		/*
5908 		 * We need to make sure that the update_max_tr sees that
5909 		 * current_trace changed to nop_trace to keep it from
5910 		 * swapping the buffers after we resize it.
5911 		 * The update_max_tr is called from interrupts disabled
5912 		 * so a synchronized_sched() is sufficient.
5913 		 */
5914 		synchronize_rcu();
5915 		free_snapshot(tr);
5916 	}
5917 #endif
5918 
5919 #ifdef CONFIG_TRACER_MAX_TRACE
5920 	if (t->use_max_tr && !had_max_tr) {
5921 		ret = tracing_alloc_snapshot_instance(tr);
5922 		if (ret < 0)
5923 			goto out;
5924 	}
5925 #endif
5926 
5927 	if (t->init) {
5928 		ret = tracer_init(t, tr);
5929 		if (ret)
5930 			goto out;
5931 	}
5932 
5933 	tr->current_trace = t;
5934 	tr->current_trace->enabled++;
5935 	trace_branch_enable(tr);
5936  out:
5937 	mutex_unlock(&trace_types_lock);
5938 
5939 	return ret;
5940 }
5941 
5942 static ssize_t
5943 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5944 			size_t cnt, loff_t *ppos)
5945 {
5946 	struct trace_array *tr = filp->private_data;
5947 	char buf[MAX_TRACER_SIZE+1];
5948 	int i;
5949 	size_t ret;
5950 	int err;
5951 
5952 	ret = cnt;
5953 
5954 	if (cnt > MAX_TRACER_SIZE)
5955 		cnt = MAX_TRACER_SIZE;
5956 
5957 	if (copy_from_user(buf, ubuf, cnt))
5958 		return -EFAULT;
5959 
5960 	buf[cnt] = 0;
5961 
5962 	/* strip ending whitespace. */
5963 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5964 		buf[i] = 0;
5965 
5966 	err = tracing_set_tracer(tr, buf);
5967 	if (err)
5968 		return err;
5969 
5970 	*ppos += ret;
5971 
5972 	return ret;
5973 }
5974 
5975 static ssize_t
5976 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5977 		   size_t cnt, loff_t *ppos)
5978 {
5979 	char buf[64];
5980 	int r;
5981 
5982 	r = snprintf(buf, sizeof(buf), "%ld\n",
5983 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5984 	if (r > sizeof(buf))
5985 		r = sizeof(buf);
5986 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5987 }
5988 
5989 static ssize_t
5990 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5991 		    size_t cnt, loff_t *ppos)
5992 {
5993 	unsigned long val;
5994 	int ret;
5995 
5996 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5997 	if (ret)
5998 		return ret;
5999 
6000 	*ptr = val * 1000;
6001 
6002 	return cnt;
6003 }
6004 
6005 static ssize_t
6006 tracing_thresh_read(struct file *filp, char __user *ubuf,
6007 		    size_t cnt, loff_t *ppos)
6008 {
6009 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6010 }
6011 
6012 static ssize_t
6013 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6014 		     size_t cnt, loff_t *ppos)
6015 {
6016 	struct trace_array *tr = filp->private_data;
6017 	int ret;
6018 
6019 	mutex_lock(&trace_types_lock);
6020 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6021 	if (ret < 0)
6022 		goto out;
6023 
6024 	if (tr->current_trace->update_thresh) {
6025 		ret = tr->current_trace->update_thresh(tr);
6026 		if (ret < 0)
6027 			goto out;
6028 	}
6029 
6030 	ret = cnt;
6031 out:
6032 	mutex_unlock(&trace_types_lock);
6033 
6034 	return ret;
6035 }
6036 
6037 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6038 
6039 static ssize_t
6040 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6041 		     size_t cnt, loff_t *ppos)
6042 {
6043 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6044 }
6045 
6046 static ssize_t
6047 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6048 		      size_t cnt, loff_t *ppos)
6049 {
6050 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6051 }
6052 
6053 #endif
6054 
6055 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6056 {
6057 	struct trace_array *tr = inode->i_private;
6058 	struct trace_iterator *iter;
6059 	int ret;
6060 
6061 	ret = tracing_check_open_get_tr(tr);
6062 	if (ret)
6063 		return ret;
6064 
6065 	mutex_lock(&trace_types_lock);
6066 
6067 	/* create a buffer to store the information to pass to userspace */
6068 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6069 	if (!iter) {
6070 		ret = -ENOMEM;
6071 		__trace_array_put(tr);
6072 		goto out;
6073 	}
6074 
6075 	trace_seq_init(&iter->seq);
6076 	iter->trace = tr->current_trace;
6077 
6078 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6079 		ret = -ENOMEM;
6080 		goto fail;
6081 	}
6082 
6083 	/* trace pipe does not show start of buffer */
6084 	cpumask_setall(iter->started);
6085 
6086 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6087 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6088 
6089 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6090 	if (trace_clocks[tr->clock_id].in_ns)
6091 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6092 
6093 	iter->tr = tr;
6094 	iter->array_buffer = &tr->array_buffer;
6095 	iter->cpu_file = tracing_get_cpu(inode);
6096 	mutex_init(&iter->mutex);
6097 	filp->private_data = iter;
6098 
6099 	if (iter->trace->pipe_open)
6100 		iter->trace->pipe_open(iter);
6101 
6102 	nonseekable_open(inode, filp);
6103 
6104 	tr->current_trace->ref++;
6105 out:
6106 	mutex_unlock(&trace_types_lock);
6107 	return ret;
6108 
6109 fail:
6110 	kfree(iter);
6111 	__trace_array_put(tr);
6112 	mutex_unlock(&trace_types_lock);
6113 	return ret;
6114 }
6115 
6116 static int tracing_release_pipe(struct inode *inode, struct file *file)
6117 {
6118 	struct trace_iterator *iter = file->private_data;
6119 	struct trace_array *tr = inode->i_private;
6120 
6121 	mutex_lock(&trace_types_lock);
6122 
6123 	tr->current_trace->ref--;
6124 
6125 	if (iter->trace->pipe_close)
6126 		iter->trace->pipe_close(iter);
6127 
6128 	mutex_unlock(&trace_types_lock);
6129 
6130 	free_cpumask_var(iter->started);
6131 	mutex_destroy(&iter->mutex);
6132 	kfree(iter);
6133 
6134 	trace_array_put(tr);
6135 
6136 	return 0;
6137 }
6138 
6139 static __poll_t
6140 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6141 {
6142 	struct trace_array *tr = iter->tr;
6143 
6144 	/* Iterators are static, they should be filled or empty */
6145 	if (trace_buffer_iter(iter, iter->cpu_file))
6146 		return EPOLLIN | EPOLLRDNORM;
6147 
6148 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6149 		/*
6150 		 * Always select as readable when in blocking mode
6151 		 */
6152 		return EPOLLIN | EPOLLRDNORM;
6153 	else
6154 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6155 					     filp, poll_table);
6156 }
6157 
6158 static __poll_t
6159 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6160 {
6161 	struct trace_iterator *iter = filp->private_data;
6162 
6163 	return trace_poll(iter, filp, poll_table);
6164 }
6165 
6166 /* Must be called with iter->mutex held. */
6167 static int tracing_wait_pipe(struct file *filp)
6168 {
6169 	struct trace_iterator *iter = filp->private_data;
6170 	int ret;
6171 
6172 	while (trace_empty(iter)) {
6173 
6174 		if ((filp->f_flags & O_NONBLOCK)) {
6175 			return -EAGAIN;
6176 		}
6177 
6178 		/*
6179 		 * We block until we read something and tracing is disabled.
6180 		 * We still block if tracing is disabled, but we have never
6181 		 * read anything. This allows a user to cat this file, and
6182 		 * then enable tracing. But after we have read something,
6183 		 * we give an EOF when tracing is again disabled.
6184 		 *
6185 		 * iter->pos will be 0 if we haven't read anything.
6186 		 */
6187 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6188 			break;
6189 
6190 		mutex_unlock(&iter->mutex);
6191 
6192 		ret = wait_on_pipe(iter, 0);
6193 
6194 		mutex_lock(&iter->mutex);
6195 
6196 		if (ret)
6197 			return ret;
6198 	}
6199 
6200 	return 1;
6201 }
6202 
6203 /*
6204  * Consumer reader.
6205  */
6206 static ssize_t
6207 tracing_read_pipe(struct file *filp, char __user *ubuf,
6208 		  size_t cnt, loff_t *ppos)
6209 {
6210 	struct trace_iterator *iter = filp->private_data;
6211 	ssize_t sret;
6212 
6213 	/*
6214 	 * Avoid more than one consumer on a single file descriptor
6215 	 * This is just a matter of traces coherency, the ring buffer itself
6216 	 * is protected.
6217 	 */
6218 	mutex_lock(&iter->mutex);
6219 
6220 	/* return any leftover data */
6221 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6222 	if (sret != -EBUSY)
6223 		goto out;
6224 
6225 	trace_seq_init(&iter->seq);
6226 
6227 	if (iter->trace->read) {
6228 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6229 		if (sret)
6230 			goto out;
6231 	}
6232 
6233 waitagain:
6234 	sret = tracing_wait_pipe(filp);
6235 	if (sret <= 0)
6236 		goto out;
6237 
6238 	/* stop when tracing is finished */
6239 	if (trace_empty(iter)) {
6240 		sret = 0;
6241 		goto out;
6242 	}
6243 
6244 	if (cnt >= PAGE_SIZE)
6245 		cnt = PAGE_SIZE - 1;
6246 
6247 	/* reset all but tr, trace, and overruns */
6248 	memset(&iter->seq, 0,
6249 	       sizeof(struct trace_iterator) -
6250 	       offsetof(struct trace_iterator, seq));
6251 	cpumask_clear(iter->started);
6252 	trace_seq_init(&iter->seq);
6253 	iter->pos = -1;
6254 
6255 	trace_event_read_lock();
6256 	trace_access_lock(iter->cpu_file);
6257 	while (trace_find_next_entry_inc(iter) != NULL) {
6258 		enum print_line_t ret;
6259 		int save_len = iter->seq.seq.len;
6260 
6261 		ret = print_trace_line(iter);
6262 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6263 			/* don't print partial lines */
6264 			iter->seq.seq.len = save_len;
6265 			break;
6266 		}
6267 		if (ret != TRACE_TYPE_NO_CONSUME)
6268 			trace_consume(iter);
6269 
6270 		if (trace_seq_used(&iter->seq) >= cnt)
6271 			break;
6272 
6273 		/*
6274 		 * Setting the full flag means we reached the trace_seq buffer
6275 		 * size and we should leave by partial output condition above.
6276 		 * One of the trace_seq_* functions is not used properly.
6277 		 */
6278 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6279 			  iter->ent->type);
6280 	}
6281 	trace_access_unlock(iter->cpu_file);
6282 	trace_event_read_unlock();
6283 
6284 	/* Now copy what we have to the user */
6285 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6286 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6287 		trace_seq_init(&iter->seq);
6288 
6289 	/*
6290 	 * If there was nothing to send to user, in spite of consuming trace
6291 	 * entries, go back to wait for more entries.
6292 	 */
6293 	if (sret == -EBUSY)
6294 		goto waitagain;
6295 
6296 out:
6297 	mutex_unlock(&iter->mutex);
6298 
6299 	return sret;
6300 }
6301 
6302 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6303 				     unsigned int idx)
6304 {
6305 	__free_page(spd->pages[idx]);
6306 }
6307 
6308 static size_t
6309 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6310 {
6311 	size_t count;
6312 	int save_len;
6313 	int ret;
6314 
6315 	/* Seq buffer is page-sized, exactly what we need. */
6316 	for (;;) {
6317 		save_len = iter->seq.seq.len;
6318 		ret = print_trace_line(iter);
6319 
6320 		if (trace_seq_has_overflowed(&iter->seq)) {
6321 			iter->seq.seq.len = save_len;
6322 			break;
6323 		}
6324 
6325 		/*
6326 		 * This should not be hit, because it should only
6327 		 * be set if the iter->seq overflowed. But check it
6328 		 * anyway to be safe.
6329 		 */
6330 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6331 			iter->seq.seq.len = save_len;
6332 			break;
6333 		}
6334 
6335 		count = trace_seq_used(&iter->seq) - save_len;
6336 		if (rem < count) {
6337 			rem = 0;
6338 			iter->seq.seq.len = save_len;
6339 			break;
6340 		}
6341 
6342 		if (ret != TRACE_TYPE_NO_CONSUME)
6343 			trace_consume(iter);
6344 		rem -= count;
6345 		if (!trace_find_next_entry_inc(iter))	{
6346 			rem = 0;
6347 			iter->ent = NULL;
6348 			break;
6349 		}
6350 	}
6351 
6352 	return rem;
6353 }
6354 
6355 static ssize_t tracing_splice_read_pipe(struct file *filp,
6356 					loff_t *ppos,
6357 					struct pipe_inode_info *pipe,
6358 					size_t len,
6359 					unsigned int flags)
6360 {
6361 	struct page *pages_def[PIPE_DEF_BUFFERS];
6362 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6363 	struct trace_iterator *iter = filp->private_data;
6364 	struct splice_pipe_desc spd = {
6365 		.pages		= pages_def,
6366 		.partial	= partial_def,
6367 		.nr_pages	= 0, /* This gets updated below. */
6368 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6369 		.ops		= &default_pipe_buf_ops,
6370 		.spd_release	= tracing_spd_release_pipe,
6371 	};
6372 	ssize_t ret;
6373 	size_t rem;
6374 	unsigned int i;
6375 
6376 	if (splice_grow_spd(pipe, &spd))
6377 		return -ENOMEM;
6378 
6379 	mutex_lock(&iter->mutex);
6380 
6381 	if (iter->trace->splice_read) {
6382 		ret = iter->trace->splice_read(iter, filp,
6383 					       ppos, pipe, len, flags);
6384 		if (ret)
6385 			goto out_err;
6386 	}
6387 
6388 	ret = tracing_wait_pipe(filp);
6389 	if (ret <= 0)
6390 		goto out_err;
6391 
6392 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6393 		ret = -EFAULT;
6394 		goto out_err;
6395 	}
6396 
6397 	trace_event_read_lock();
6398 	trace_access_lock(iter->cpu_file);
6399 
6400 	/* Fill as many pages as possible. */
6401 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6402 		spd.pages[i] = alloc_page(GFP_KERNEL);
6403 		if (!spd.pages[i])
6404 			break;
6405 
6406 		rem = tracing_fill_pipe_page(rem, iter);
6407 
6408 		/* Copy the data into the page, so we can start over. */
6409 		ret = trace_seq_to_buffer(&iter->seq,
6410 					  page_address(spd.pages[i]),
6411 					  trace_seq_used(&iter->seq));
6412 		if (ret < 0) {
6413 			__free_page(spd.pages[i]);
6414 			break;
6415 		}
6416 		spd.partial[i].offset = 0;
6417 		spd.partial[i].len = trace_seq_used(&iter->seq);
6418 
6419 		trace_seq_init(&iter->seq);
6420 	}
6421 
6422 	trace_access_unlock(iter->cpu_file);
6423 	trace_event_read_unlock();
6424 	mutex_unlock(&iter->mutex);
6425 
6426 	spd.nr_pages = i;
6427 
6428 	if (i)
6429 		ret = splice_to_pipe(pipe, &spd);
6430 	else
6431 		ret = 0;
6432 out:
6433 	splice_shrink_spd(&spd);
6434 	return ret;
6435 
6436 out_err:
6437 	mutex_unlock(&iter->mutex);
6438 	goto out;
6439 }
6440 
6441 static ssize_t
6442 tracing_entries_read(struct file *filp, char __user *ubuf,
6443 		     size_t cnt, loff_t *ppos)
6444 {
6445 	struct inode *inode = file_inode(filp);
6446 	struct trace_array *tr = inode->i_private;
6447 	int cpu = tracing_get_cpu(inode);
6448 	char buf[64];
6449 	int r = 0;
6450 	ssize_t ret;
6451 
6452 	mutex_lock(&trace_types_lock);
6453 
6454 	if (cpu == RING_BUFFER_ALL_CPUS) {
6455 		int cpu, buf_size_same;
6456 		unsigned long size;
6457 
6458 		size = 0;
6459 		buf_size_same = 1;
6460 		/* check if all cpu sizes are same */
6461 		for_each_tracing_cpu(cpu) {
6462 			/* fill in the size from first enabled cpu */
6463 			if (size == 0)
6464 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6465 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6466 				buf_size_same = 0;
6467 				break;
6468 			}
6469 		}
6470 
6471 		if (buf_size_same) {
6472 			if (!ring_buffer_expanded)
6473 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6474 					    size >> 10,
6475 					    trace_buf_size >> 10);
6476 			else
6477 				r = sprintf(buf, "%lu\n", size >> 10);
6478 		} else
6479 			r = sprintf(buf, "X\n");
6480 	} else
6481 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6482 
6483 	mutex_unlock(&trace_types_lock);
6484 
6485 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6486 	return ret;
6487 }
6488 
6489 static ssize_t
6490 tracing_entries_write(struct file *filp, const char __user *ubuf,
6491 		      size_t cnt, loff_t *ppos)
6492 {
6493 	struct inode *inode = file_inode(filp);
6494 	struct trace_array *tr = inode->i_private;
6495 	unsigned long val;
6496 	int ret;
6497 
6498 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6499 	if (ret)
6500 		return ret;
6501 
6502 	/* must have at least 1 entry */
6503 	if (!val)
6504 		return -EINVAL;
6505 
6506 	/* value is in KB */
6507 	val <<= 10;
6508 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6509 	if (ret < 0)
6510 		return ret;
6511 
6512 	*ppos += cnt;
6513 
6514 	return cnt;
6515 }
6516 
6517 static ssize_t
6518 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6519 				size_t cnt, loff_t *ppos)
6520 {
6521 	struct trace_array *tr = filp->private_data;
6522 	char buf[64];
6523 	int r, cpu;
6524 	unsigned long size = 0, expanded_size = 0;
6525 
6526 	mutex_lock(&trace_types_lock);
6527 	for_each_tracing_cpu(cpu) {
6528 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6529 		if (!ring_buffer_expanded)
6530 			expanded_size += trace_buf_size >> 10;
6531 	}
6532 	if (ring_buffer_expanded)
6533 		r = sprintf(buf, "%lu\n", size);
6534 	else
6535 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6536 	mutex_unlock(&trace_types_lock);
6537 
6538 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6539 }
6540 
6541 static ssize_t
6542 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6543 			  size_t cnt, loff_t *ppos)
6544 {
6545 	/*
6546 	 * There is no need to read what the user has written, this function
6547 	 * is just to make sure that there is no error when "echo" is used
6548 	 */
6549 
6550 	*ppos += cnt;
6551 
6552 	return cnt;
6553 }
6554 
6555 static int
6556 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6557 {
6558 	struct trace_array *tr = inode->i_private;
6559 
6560 	/* disable tracing ? */
6561 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6562 		tracer_tracing_off(tr);
6563 	/* resize the ring buffer to 0 */
6564 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6565 
6566 	trace_array_put(tr);
6567 
6568 	return 0;
6569 }
6570 
6571 static ssize_t
6572 tracing_mark_write(struct file *filp, const char __user *ubuf,
6573 					size_t cnt, loff_t *fpos)
6574 {
6575 	struct trace_array *tr = filp->private_data;
6576 	struct ring_buffer_event *event;
6577 	enum event_trigger_type tt = ETT_NONE;
6578 	struct trace_buffer *buffer;
6579 	struct print_entry *entry;
6580 	unsigned long irq_flags;
6581 	ssize_t written;
6582 	int size;
6583 	int len;
6584 
6585 /* Used in tracing_mark_raw_write() as well */
6586 #define FAULTED_STR "<faulted>"
6587 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6588 
6589 	if (tracing_disabled)
6590 		return -EINVAL;
6591 
6592 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6593 		return -EINVAL;
6594 
6595 	if (cnt > TRACE_BUF_SIZE)
6596 		cnt = TRACE_BUF_SIZE;
6597 
6598 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6599 
6600 	local_save_flags(irq_flags);
6601 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6602 
6603 	/* If less than "<faulted>", then make sure we can still add that */
6604 	if (cnt < FAULTED_SIZE)
6605 		size += FAULTED_SIZE - cnt;
6606 
6607 	buffer = tr->array_buffer.buffer;
6608 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6609 					    irq_flags, preempt_count());
6610 	if (unlikely(!event))
6611 		/* Ring buffer disabled, return as if not open for write */
6612 		return -EBADF;
6613 
6614 	entry = ring_buffer_event_data(event);
6615 	entry->ip = _THIS_IP_;
6616 
6617 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6618 	if (len) {
6619 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6620 		cnt = FAULTED_SIZE;
6621 		written = -EFAULT;
6622 	} else
6623 		written = cnt;
6624 	len = cnt;
6625 
6626 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6627 		/* do not add \n before testing triggers, but add \0 */
6628 		entry->buf[cnt] = '\0';
6629 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6630 	}
6631 
6632 	if (entry->buf[cnt - 1] != '\n') {
6633 		entry->buf[cnt] = '\n';
6634 		entry->buf[cnt + 1] = '\0';
6635 	} else
6636 		entry->buf[cnt] = '\0';
6637 
6638 	__buffer_unlock_commit(buffer, event);
6639 
6640 	if (tt)
6641 		event_triggers_post_call(tr->trace_marker_file, tt);
6642 
6643 	if (written > 0)
6644 		*fpos += written;
6645 
6646 	return written;
6647 }
6648 
6649 /* Limit it for now to 3K (including tag) */
6650 #define RAW_DATA_MAX_SIZE (1024*3)
6651 
6652 static ssize_t
6653 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6654 					size_t cnt, loff_t *fpos)
6655 {
6656 	struct trace_array *tr = filp->private_data;
6657 	struct ring_buffer_event *event;
6658 	struct trace_buffer *buffer;
6659 	struct raw_data_entry *entry;
6660 	unsigned long irq_flags;
6661 	ssize_t written;
6662 	int size;
6663 	int len;
6664 
6665 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6666 
6667 	if (tracing_disabled)
6668 		return -EINVAL;
6669 
6670 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6671 		return -EINVAL;
6672 
6673 	/* The marker must at least have a tag id */
6674 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6675 		return -EINVAL;
6676 
6677 	if (cnt > TRACE_BUF_SIZE)
6678 		cnt = TRACE_BUF_SIZE;
6679 
6680 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6681 
6682 	local_save_flags(irq_flags);
6683 	size = sizeof(*entry) + cnt;
6684 	if (cnt < FAULT_SIZE_ID)
6685 		size += FAULT_SIZE_ID - cnt;
6686 
6687 	buffer = tr->array_buffer.buffer;
6688 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6689 					    irq_flags, preempt_count());
6690 	if (!event)
6691 		/* Ring buffer disabled, return as if not open for write */
6692 		return -EBADF;
6693 
6694 	entry = ring_buffer_event_data(event);
6695 
6696 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6697 	if (len) {
6698 		entry->id = -1;
6699 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6700 		written = -EFAULT;
6701 	} else
6702 		written = cnt;
6703 
6704 	__buffer_unlock_commit(buffer, event);
6705 
6706 	if (written > 0)
6707 		*fpos += written;
6708 
6709 	return written;
6710 }
6711 
6712 static int tracing_clock_show(struct seq_file *m, void *v)
6713 {
6714 	struct trace_array *tr = m->private;
6715 	int i;
6716 
6717 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6718 		seq_printf(m,
6719 			"%s%s%s%s", i ? " " : "",
6720 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6721 			i == tr->clock_id ? "]" : "");
6722 	seq_putc(m, '\n');
6723 
6724 	return 0;
6725 }
6726 
6727 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6728 {
6729 	int i;
6730 
6731 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6732 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6733 			break;
6734 	}
6735 	if (i == ARRAY_SIZE(trace_clocks))
6736 		return -EINVAL;
6737 
6738 	mutex_lock(&trace_types_lock);
6739 
6740 	tr->clock_id = i;
6741 
6742 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6743 
6744 	/*
6745 	 * New clock may not be consistent with the previous clock.
6746 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6747 	 */
6748 	tracing_reset_online_cpus(&tr->array_buffer);
6749 
6750 #ifdef CONFIG_TRACER_MAX_TRACE
6751 	if (tr->max_buffer.buffer)
6752 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6753 	tracing_reset_online_cpus(&tr->max_buffer);
6754 #endif
6755 
6756 	mutex_unlock(&trace_types_lock);
6757 
6758 	return 0;
6759 }
6760 
6761 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6762 				   size_t cnt, loff_t *fpos)
6763 {
6764 	struct seq_file *m = filp->private_data;
6765 	struct trace_array *tr = m->private;
6766 	char buf[64];
6767 	const char *clockstr;
6768 	int ret;
6769 
6770 	if (cnt >= sizeof(buf))
6771 		return -EINVAL;
6772 
6773 	if (copy_from_user(buf, ubuf, cnt))
6774 		return -EFAULT;
6775 
6776 	buf[cnt] = 0;
6777 
6778 	clockstr = strstrip(buf);
6779 
6780 	ret = tracing_set_clock(tr, clockstr);
6781 	if (ret)
6782 		return ret;
6783 
6784 	*fpos += cnt;
6785 
6786 	return cnt;
6787 }
6788 
6789 static int tracing_clock_open(struct inode *inode, struct file *file)
6790 {
6791 	struct trace_array *tr = inode->i_private;
6792 	int ret;
6793 
6794 	ret = tracing_check_open_get_tr(tr);
6795 	if (ret)
6796 		return ret;
6797 
6798 	ret = single_open(file, tracing_clock_show, inode->i_private);
6799 	if (ret < 0)
6800 		trace_array_put(tr);
6801 
6802 	return ret;
6803 }
6804 
6805 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6806 {
6807 	struct trace_array *tr = m->private;
6808 
6809 	mutex_lock(&trace_types_lock);
6810 
6811 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6812 		seq_puts(m, "delta [absolute]\n");
6813 	else
6814 		seq_puts(m, "[delta] absolute\n");
6815 
6816 	mutex_unlock(&trace_types_lock);
6817 
6818 	return 0;
6819 }
6820 
6821 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6822 {
6823 	struct trace_array *tr = inode->i_private;
6824 	int ret;
6825 
6826 	ret = tracing_check_open_get_tr(tr);
6827 	if (ret)
6828 		return ret;
6829 
6830 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6831 	if (ret < 0)
6832 		trace_array_put(tr);
6833 
6834 	return ret;
6835 }
6836 
6837 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6838 {
6839 	int ret = 0;
6840 
6841 	mutex_lock(&trace_types_lock);
6842 
6843 	if (abs && tr->time_stamp_abs_ref++)
6844 		goto out;
6845 
6846 	if (!abs) {
6847 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6848 			ret = -EINVAL;
6849 			goto out;
6850 		}
6851 
6852 		if (--tr->time_stamp_abs_ref)
6853 			goto out;
6854 	}
6855 
6856 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6857 
6858 #ifdef CONFIG_TRACER_MAX_TRACE
6859 	if (tr->max_buffer.buffer)
6860 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6861 #endif
6862  out:
6863 	mutex_unlock(&trace_types_lock);
6864 
6865 	return ret;
6866 }
6867 
6868 struct ftrace_buffer_info {
6869 	struct trace_iterator	iter;
6870 	void			*spare;
6871 	unsigned int		spare_cpu;
6872 	unsigned int		read;
6873 };
6874 
6875 #ifdef CONFIG_TRACER_SNAPSHOT
6876 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6877 {
6878 	struct trace_array *tr = inode->i_private;
6879 	struct trace_iterator *iter;
6880 	struct seq_file *m;
6881 	int ret;
6882 
6883 	ret = tracing_check_open_get_tr(tr);
6884 	if (ret)
6885 		return ret;
6886 
6887 	if (file->f_mode & FMODE_READ) {
6888 		iter = __tracing_open(inode, file, true);
6889 		if (IS_ERR(iter))
6890 			ret = PTR_ERR(iter);
6891 	} else {
6892 		/* Writes still need the seq_file to hold the private data */
6893 		ret = -ENOMEM;
6894 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6895 		if (!m)
6896 			goto out;
6897 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6898 		if (!iter) {
6899 			kfree(m);
6900 			goto out;
6901 		}
6902 		ret = 0;
6903 
6904 		iter->tr = tr;
6905 		iter->array_buffer = &tr->max_buffer;
6906 		iter->cpu_file = tracing_get_cpu(inode);
6907 		m->private = iter;
6908 		file->private_data = m;
6909 	}
6910 out:
6911 	if (ret < 0)
6912 		trace_array_put(tr);
6913 
6914 	return ret;
6915 }
6916 
6917 static ssize_t
6918 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6919 		       loff_t *ppos)
6920 {
6921 	struct seq_file *m = filp->private_data;
6922 	struct trace_iterator *iter = m->private;
6923 	struct trace_array *tr = iter->tr;
6924 	unsigned long val;
6925 	int ret;
6926 
6927 	ret = tracing_update_buffers();
6928 	if (ret < 0)
6929 		return ret;
6930 
6931 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6932 	if (ret)
6933 		return ret;
6934 
6935 	mutex_lock(&trace_types_lock);
6936 
6937 	if (tr->current_trace->use_max_tr) {
6938 		ret = -EBUSY;
6939 		goto out;
6940 	}
6941 
6942 	arch_spin_lock(&tr->max_lock);
6943 	if (tr->cond_snapshot)
6944 		ret = -EBUSY;
6945 	arch_spin_unlock(&tr->max_lock);
6946 	if (ret)
6947 		goto out;
6948 
6949 	switch (val) {
6950 	case 0:
6951 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6952 			ret = -EINVAL;
6953 			break;
6954 		}
6955 		if (tr->allocated_snapshot)
6956 			free_snapshot(tr);
6957 		break;
6958 	case 1:
6959 /* Only allow per-cpu swap if the ring buffer supports it */
6960 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6961 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6962 			ret = -EINVAL;
6963 			break;
6964 		}
6965 #endif
6966 		if (tr->allocated_snapshot)
6967 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6968 					&tr->array_buffer, iter->cpu_file);
6969 		else
6970 			ret = tracing_alloc_snapshot_instance(tr);
6971 		if (ret < 0)
6972 			break;
6973 		local_irq_disable();
6974 		/* Now, we're going to swap */
6975 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6976 			update_max_tr(tr, current, smp_processor_id(), NULL);
6977 		else
6978 			update_max_tr_single(tr, current, iter->cpu_file);
6979 		local_irq_enable();
6980 		break;
6981 	default:
6982 		if (tr->allocated_snapshot) {
6983 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6984 				tracing_reset_online_cpus(&tr->max_buffer);
6985 			else
6986 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6987 		}
6988 		break;
6989 	}
6990 
6991 	if (ret >= 0) {
6992 		*ppos += cnt;
6993 		ret = cnt;
6994 	}
6995 out:
6996 	mutex_unlock(&trace_types_lock);
6997 	return ret;
6998 }
6999 
7000 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7001 {
7002 	struct seq_file *m = file->private_data;
7003 	int ret;
7004 
7005 	ret = tracing_release(inode, file);
7006 
7007 	if (file->f_mode & FMODE_READ)
7008 		return ret;
7009 
7010 	/* If write only, the seq_file is just a stub */
7011 	if (m)
7012 		kfree(m->private);
7013 	kfree(m);
7014 
7015 	return 0;
7016 }
7017 
7018 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7019 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7020 				    size_t count, loff_t *ppos);
7021 static int tracing_buffers_release(struct inode *inode, struct file *file);
7022 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7023 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7024 
7025 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7026 {
7027 	struct ftrace_buffer_info *info;
7028 	int ret;
7029 
7030 	/* The following checks for tracefs lockdown */
7031 	ret = tracing_buffers_open(inode, filp);
7032 	if (ret < 0)
7033 		return ret;
7034 
7035 	info = filp->private_data;
7036 
7037 	if (info->iter.trace->use_max_tr) {
7038 		tracing_buffers_release(inode, filp);
7039 		return -EBUSY;
7040 	}
7041 
7042 	info->iter.snapshot = true;
7043 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7044 
7045 	return ret;
7046 }
7047 
7048 #endif /* CONFIG_TRACER_SNAPSHOT */
7049 
7050 
7051 static const struct file_operations tracing_thresh_fops = {
7052 	.open		= tracing_open_generic,
7053 	.read		= tracing_thresh_read,
7054 	.write		= tracing_thresh_write,
7055 	.llseek		= generic_file_llseek,
7056 };
7057 
7058 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7059 static const struct file_operations tracing_max_lat_fops = {
7060 	.open		= tracing_open_generic,
7061 	.read		= tracing_max_lat_read,
7062 	.write		= tracing_max_lat_write,
7063 	.llseek		= generic_file_llseek,
7064 };
7065 #endif
7066 
7067 static const struct file_operations set_tracer_fops = {
7068 	.open		= tracing_open_generic,
7069 	.read		= tracing_set_trace_read,
7070 	.write		= tracing_set_trace_write,
7071 	.llseek		= generic_file_llseek,
7072 };
7073 
7074 static const struct file_operations tracing_pipe_fops = {
7075 	.open		= tracing_open_pipe,
7076 	.poll		= tracing_poll_pipe,
7077 	.read		= tracing_read_pipe,
7078 	.splice_read	= tracing_splice_read_pipe,
7079 	.release	= tracing_release_pipe,
7080 	.llseek		= no_llseek,
7081 };
7082 
7083 static const struct file_operations tracing_entries_fops = {
7084 	.open		= tracing_open_generic_tr,
7085 	.read		= tracing_entries_read,
7086 	.write		= tracing_entries_write,
7087 	.llseek		= generic_file_llseek,
7088 	.release	= tracing_release_generic_tr,
7089 };
7090 
7091 static const struct file_operations tracing_total_entries_fops = {
7092 	.open		= tracing_open_generic_tr,
7093 	.read		= tracing_total_entries_read,
7094 	.llseek		= generic_file_llseek,
7095 	.release	= tracing_release_generic_tr,
7096 };
7097 
7098 static const struct file_operations tracing_free_buffer_fops = {
7099 	.open		= tracing_open_generic_tr,
7100 	.write		= tracing_free_buffer_write,
7101 	.release	= tracing_free_buffer_release,
7102 };
7103 
7104 static const struct file_operations tracing_mark_fops = {
7105 	.open		= tracing_open_generic_tr,
7106 	.write		= tracing_mark_write,
7107 	.llseek		= generic_file_llseek,
7108 	.release	= tracing_release_generic_tr,
7109 };
7110 
7111 static const struct file_operations tracing_mark_raw_fops = {
7112 	.open		= tracing_open_generic_tr,
7113 	.write		= tracing_mark_raw_write,
7114 	.llseek		= generic_file_llseek,
7115 	.release	= tracing_release_generic_tr,
7116 };
7117 
7118 static const struct file_operations trace_clock_fops = {
7119 	.open		= tracing_clock_open,
7120 	.read		= seq_read,
7121 	.llseek		= seq_lseek,
7122 	.release	= tracing_single_release_tr,
7123 	.write		= tracing_clock_write,
7124 };
7125 
7126 static const struct file_operations trace_time_stamp_mode_fops = {
7127 	.open		= tracing_time_stamp_mode_open,
7128 	.read		= seq_read,
7129 	.llseek		= seq_lseek,
7130 	.release	= tracing_single_release_tr,
7131 };
7132 
7133 #ifdef CONFIG_TRACER_SNAPSHOT
7134 static const struct file_operations snapshot_fops = {
7135 	.open		= tracing_snapshot_open,
7136 	.read		= seq_read,
7137 	.write		= tracing_snapshot_write,
7138 	.llseek		= tracing_lseek,
7139 	.release	= tracing_snapshot_release,
7140 };
7141 
7142 static const struct file_operations snapshot_raw_fops = {
7143 	.open		= snapshot_raw_open,
7144 	.read		= tracing_buffers_read,
7145 	.release	= tracing_buffers_release,
7146 	.splice_read	= tracing_buffers_splice_read,
7147 	.llseek		= no_llseek,
7148 };
7149 
7150 #endif /* CONFIG_TRACER_SNAPSHOT */
7151 
7152 #define TRACING_LOG_ERRS_MAX	8
7153 #define TRACING_LOG_LOC_MAX	128
7154 
7155 #define CMD_PREFIX "  Command: "
7156 
7157 struct err_info {
7158 	const char	**errs;	/* ptr to loc-specific array of err strings */
7159 	u8		type;	/* index into errs -> specific err string */
7160 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7161 	u64		ts;
7162 };
7163 
7164 struct tracing_log_err {
7165 	struct list_head	list;
7166 	struct err_info		info;
7167 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7168 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7169 };
7170 
7171 static DEFINE_MUTEX(tracing_err_log_lock);
7172 
7173 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7174 {
7175 	struct tracing_log_err *err;
7176 
7177 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7178 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7179 		if (!err)
7180 			err = ERR_PTR(-ENOMEM);
7181 		tr->n_err_log_entries++;
7182 
7183 		return err;
7184 	}
7185 
7186 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7187 	list_del(&err->list);
7188 
7189 	return err;
7190 }
7191 
7192 /**
7193  * err_pos - find the position of a string within a command for error careting
7194  * @cmd: The tracing command that caused the error
7195  * @str: The string to position the caret at within @cmd
7196  *
7197  * Finds the position of the first occurence of @str within @cmd.  The
7198  * return value can be passed to tracing_log_err() for caret placement
7199  * within @cmd.
7200  *
7201  * Returns the index within @cmd of the first occurence of @str or 0
7202  * if @str was not found.
7203  */
7204 unsigned int err_pos(char *cmd, const char *str)
7205 {
7206 	char *found;
7207 
7208 	if (WARN_ON(!strlen(cmd)))
7209 		return 0;
7210 
7211 	found = strstr(cmd, str);
7212 	if (found)
7213 		return found - cmd;
7214 
7215 	return 0;
7216 }
7217 
7218 /**
7219  * tracing_log_err - write an error to the tracing error log
7220  * @tr: The associated trace array for the error (NULL for top level array)
7221  * @loc: A string describing where the error occurred
7222  * @cmd: The tracing command that caused the error
7223  * @errs: The array of loc-specific static error strings
7224  * @type: The index into errs[], which produces the specific static err string
7225  * @pos: The position the caret should be placed in the cmd
7226  *
7227  * Writes an error into tracing/error_log of the form:
7228  *
7229  * <loc>: error: <text>
7230  *   Command: <cmd>
7231  *              ^
7232  *
7233  * tracing/error_log is a small log file containing the last
7234  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7235  * unless there has been a tracing error, and the error log can be
7236  * cleared and have its memory freed by writing the empty string in
7237  * truncation mode to it i.e. echo > tracing/error_log.
7238  *
7239  * NOTE: the @errs array along with the @type param are used to
7240  * produce a static error string - this string is not copied and saved
7241  * when the error is logged - only a pointer to it is saved.  See
7242  * existing callers for examples of how static strings are typically
7243  * defined for use with tracing_log_err().
7244  */
7245 void tracing_log_err(struct trace_array *tr,
7246 		     const char *loc, const char *cmd,
7247 		     const char **errs, u8 type, u8 pos)
7248 {
7249 	struct tracing_log_err *err;
7250 
7251 	if (!tr)
7252 		tr = &global_trace;
7253 
7254 	mutex_lock(&tracing_err_log_lock);
7255 	err = get_tracing_log_err(tr);
7256 	if (PTR_ERR(err) == -ENOMEM) {
7257 		mutex_unlock(&tracing_err_log_lock);
7258 		return;
7259 	}
7260 
7261 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7262 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7263 
7264 	err->info.errs = errs;
7265 	err->info.type = type;
7266 	err->info.pos = pos;
7267 	err->info.ts = local_clock();
7268 
7269 	list_add_tail(&err->list, &tr->err_log);
7270 	mutex_unlock(&tracing_err_log_lock);
7271 }
7272 
7273 static void clear_tracing_err_log(struct trace_array *tr)
7274 {
7275 	struct tracing_log_err *err, *next;
7276 
7277 	mutex_lock(&tracing_err_log_lock);
7278 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7279 		list_del(&err->list);
7280 		kfree(err);
7281 	}
7282 
7283 	tr->n_err_log_entries = 0;
7284 	mutex_unlock(&tracing_err_log_lock);
7285 }
7286 
7287 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7288 {
7289 	struct trace_array *tr = m->private;
7290 
7291 	mutex_lock(&tracing_err_log_lock);
7292 
7293 	return seq_list_start(&tr->err_log, *pos);
7294 }
7295 
7296 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7297 {
7298 	struct trace_array *tr = m->private;
7299 
7300 	return seq_list_next(v, &tr->err_log, pos);
7301 }
7302 
7303 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7304 {
7305 	mutex_unlock(&tracing_err_log_lock);
7306 }
7307 
7308 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7309 {
7310 	u8 i;
7311 
7312 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7313 		seq_putc(m, ' ');
7314 	for (i = 0; i < pos; i++)
7315 		seq_putc(m, ' ');
7316 	seq_puts(m, "^\n");
7317 }
7318 
7319 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7320 {
7321 	struct tracing_log_err *err = v;
7322 
7323 	if (err) {
7324 		const char *err_text = err->info.errs[err->info.type];
7325 		u64 sec = err->info.ts;
7326 		u32 nsec;
7327 
7328 		nsec = do_div(sec, NSEC_PER_SEC);
7329 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7330 			   err->loc, err_text);
7331 		seq_printf(m, "%s", err->cmd);
7332 		tracing_err_log_show_pos(m, err->info.pos);
7333 	}
7334 
7335 	return 0;
7336 }
7337 
7338 static const struct seq_operations tracing_err_log_seq_ops = {
7339 	.start  = tracing_err_log_seq_start,
7340 	.next   = tracing_err_log_seq_next,
7341 	.stop   = tracing_err_log_seq_stop,
7342 	.show   = tracing_err_log_seq_show
7343 };
7344 
7345 static int tracing_err_log_open(struct inode *inode, struct file *file)
7346 {
7347 	struct trace_array *tr = inode->i_private;
7348 	int ret = 0;
7349 
7350 	ret = tracing_check_open_get_tr(tr);
7351 	if (ret)
7352 		return ret;
7353 
7354 	/* If this file was opened for write, then erase contents */
7355 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7356 		clear_tracing_err_log(tr);
7357 
7358 	if (file->f_mode & FMODE_READ) {
7359 		ret = seq_open(file, &tracing_err_log_seq_ops);
7360 		if (!ret) {
7361 			struct seq_file *m = file->private_data;
7362 			m->private = tr;
7363 		} else {
7364 			trace_array_put(tr);
7365 		}
7366 	}
7367 	return ret;
7368 }
7369 
7370 static ssize_t tracing_err_log_write(struct file *file,
7371 				     const char __user *buffer,
7372 				     size_t count, loff_t *ppos)
7373 {
7374 	return count;
7375 }
7376 
7377 static int tracing_err_log_release(struct inode *inode, struct file *file)
7378 {
7379 	struct trace_array *tr = inode->i_private;
7380 
7381 	trace_array_put(tr);
7382 
7383 	if (file->f_mode & FMODE_READ)
7384 		seq_release(inode, file);
7385 
7386 	return 0;
7387 }
7388 
7389 static const struct file_operations tracing_err_log_fops = {
7390 	.open           = tracing_err_log_open,
7391 	.write		= tracing_err_log_write,
7392 	.read           = seq_read,
7393 	.llseek         = seq_lseek,
7394 	.release        = tracing_err_log_release,
7395 };
7396 
7397 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7398 {
7399 	struct trace_array *tr = inode->i_private;
7400 	struct ftrace_buffer_info *info;
7401 	int ret;
7402 
7403 	ret = tracing_check_open_get_tr(tr);
7404 	if (ret)
7405 		return ret;
7406 
7407 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7408 	if (!info) {
7409 		trace_array_put(tr);
7410 		return -ENOMEM;
7411 	}
7412 
7413 	mutex_lock(&trace_types_lock);
7414 
7415 	info->iter.tr		= tr;
7416 	info->iter.cpu_file	= tracing_get_cpu(inode);
7417 	info->iter.trace	= tr->current_trace;
7418 	info->iter.array_buffer = &tr->array_buffer;
7419 	info->spare		= NULL;
7420 	/* Force reading ring buffer for first read */
7421 	info->read		= (unsigned int)-1;
7422 
7423 	filp->private_data = info;
7424 
7425 	tr->current_trace->ref++;
7426 
7427 	mutex_unlock(&trace_types_lock);
7428 
7429 	ret = nonseekable_open(inode, filp);
7430 	if (ret < 0)
7431 		trace_array_put(tr);
7432 
7433 	return ret;
7434 }
7435 
7436 static __poll_t
7437 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7438 {
7439 	struct ftrace_buffer_info *info = filp->private_data;
7440 	struct trace_iterator *iter = &info->iter;
7441 
7442 	return trace_poll(iter, filp, poll_table);
7443 }
7444 
7445 static ssize_t
7446 tracing_buffers_read(struct file *filp, char __user *ubuf,
7447 		     size_t count, loff_t *ppos)
7448 {
7449 	struct ftrace_buffer_info *info = filp->private_data;
7450 	struct trace_iterator *iter = &info->iter;
7451 	ssize_t ret = 0;
7452 	ssize_t size;
7453 
7454 	if (!count)
7455 		return 0;
7456 
7457 #ifdef CONFIG_TRACER_MAX_TRACE
7458 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7459 		return -EBUSY;
7460 #endif
7461 
7462 	if (!info->spare) {
7463 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7464 							  iter->cpu_file);
7465 		if (IS_ERR(info->spare)) {
7466 			ret = PTR_ERR(info->spare);
7467 			info->spare = NULL;
7468 		} else {
7469 			info->spare_cpu = iter->cpu_file;
7470 		}
7471 	}
7472 	if (!info->spare)
7473 		return ret;
7474 
7475 	/* Do we have previous read data to read? */
7476 	if (info->read < PAGE_SIZE)
7477 		goto read;
7478 
7479  again:
7480 	trace_access_lock(iter->cpu_file);
7481 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7482 				    &info->spare,
7483 				    count,
7484 				    iter->cpu_file, 0);
7485 	trace_access_unlock(iter->cpu_file);
7486 
7487 	if (ret < 0) {
7488 		if (trace_empty(iter)) {
7489 			if ((filp->f_flags & O_NONBLOCK))
7490 				return -EAGAIN;
7491 
7492 			ret = wait_on_pipe(iter, 0);
7493 			if (ret)
7494 				return ret;
7495 
7496 			goto again;
7497 		}
7498 		return 0;
7499 	}
7500 
7501 	info->read = 0;
7502  read:
7503 	size = PAGE_SIZE - info->read;
7504 	if (size > count)
7505 		size = count;
7506 
7507 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7508 	if (ret == size)
7509 		return -EFAULT;
7510 
7511 	size -= ret;
7512 
7513 	*ppos += size;
7514 	info->read += size;
7515 
7516 	return size;
7517 }
7518 
7519 static int tracing_buffers_release(struct inode *inode, struct file *file)
7520 {
7521 	struct ftrace_buffer_info *info = file->private_data;
7522 	struct trace_iterator *iter = &info->iter;
7523 
7524 	mutex_lock(&trace_types_lock);
7525 
7526 	iter->tr->current_trace->ref--;
7527 
7528 	__trace_array_put(iter->tr);
7529 
7530 	if (info->spare)
7531 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7532 					   info->spare_cpu, info->spare);
7533 	kfree(info);
7534 
7535 	mutex_unlock(&trace_types_lock);
7536 
7537 	return 0;
7538 }
7539 
7540 struct buffer_ref {
7541 	struct trace_buffer	*buffer;
7542 	void			*page;
7543 	int			cpu;
7544 	refcount_t		refcount;
7545 };
7546 
7547 static void buffer_ref_release(struct buffer_ref *ref)
7548 {
7549 	if (!refcount_dec_and_test(&ref->refcount))
7550 		return;
7551 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7552 	kfree(ref);
7553 }
7554 
7555 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7556 				    struct pipe_buffer *buf)
7557 {
7558 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7559 
7560 	buffer_ref_release(ref);
7561 	buf->private = 0;
7562 }
7563 
7564 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7565 				struct pipe_buffer *buf)
7566 {
7567 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7568 
7569 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7570 		return false;
7571 
7572 	refcount_inc(&ref->refcount);
7573 	return true;
7574 }
7575 
7576 /* Pipe buffer operations for a buffer. */
7577 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7578 	.release		= buffer_pipe_buf_release,
7579 	.get			= buffer_pipe_buf_get,
7580 };
7581 
7582 /*
7583  * Callback from splice_to_pipe(), if we need to release some pages
7584  * at the end of the spd in case we error'ed out in filling the pipe.
7585  */
7586 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7587 {
7588 	struct buffer_ref *ref =
7589 		(struct buffer_ref *)spd->partial[i].private;
7590 
7591 	buffer_ref_release(ref);
7592 	spd->partial[i].private = 0;
7593 }
7594 
7595 static ssize_t
7596 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7597 			    struct pipe_inode_info *pipe, size_t len,
7598 			    unsigned int flags)
7599 {
7600 	struct ftrace_buffer_info *info = file->private_data;
7601 	struct trace_iterator *iter = &info->iter;
7602 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7603 	struct page *pages_def[PIPE_DEF_BUFFERS];
7604 	struct splice_pipe_desc spd = {
7605 		.pages		= pages_def,
7606 		.partial	= partial_def,
7607 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7608 		.ops		= &buffer_pipe_buf_ops,
7609 		.spd_release	= buffer_spd_release,
7610 	};
7611 	struct buffer_ref *ref;
7612 	int entries, i;
7613 	ssize_t ret = 0;
7614 
7615 #ifdef CONFIG_TRACER_MAX_TRACE
7616 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7617 		return -EBUSY;
7618 #endif
7619 
7620 	if (*ppos & (PAGE_SIZE - 1))
7621 		return -EINVAL;
7622 
7623 	if (len & (PAGE_SIZE - 1)) {
7624 		if (len < PAGE_SIZE)
7625 			return -EINVAL;
7626 		len &= PAGE_MASK;
7627 	}
7628 
7629 	if (splice_grow_spd(pipe, &spd))
7630 		return -ENOMEM;
7631 
7632  again:
7633 	trace_access_lock(iter->cpu_file);
7634 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7635 
7636 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7637 		struct page *page;
7638 		int r;
7639 
7640 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7641 		if (!ref) {
7642 			ret = -ENOMEM;
7643 			break;
7644 		}
7645 
7646 		refcount_set(&ref->refcount, 1);
7647 		ref->buffer = iter->array_buffer->buffer;
7648 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7649 		if (IS_ERR(ref->page)) {
7650 			ret = PTR_ERR(ref->page);
7651 			ref->page = NULL;
7652 			kfree(ref);
7653 			break;
7654 		}
7655 		ref->cpu = iter->cpu_file;
7656 
7657 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7658 					  len, iter->cpu_file, 1);
7659 		if (r < 0) {
7660 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7661 						   ref->page);
7662 			kfree(ref);
7663 			break;
7664 		}
7665 
7666 		page = virt_to_page(ref->page);
7667 
7668 		spd.pages[i] = page;
7669 		spd.partial[i].len = PAGE_SIZE;
7670 		spd.partial[i].offset = 0;
7671 		spd.partial[i].private = (unsigned long)ref;
7672 		spd.nr_pages++;
7673 		*ppos += PAGE_SIZE;
7674 
7675 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7676 	}
7677 
7678 	trace_access_unlock(iter->cpu_file);
7679 	spd.nr_pages = i;
7680 
7681 	/* did we read anything? */
7682 	if (!spd.nr_pages) {
7683 		if (ret)
7684 			goto out;
7685 
7686 		ret = -EAGAIN;
7687 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7688 			goto out;
7689 
7690 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7691 		if (ret)
7692 			goto out;
7693 
7694 		goto again;
7695 	}
7696 
7697 	ret = splice_to_pipe(pipe, &spd);
7698 out:
7699 	splice_shrink_spd(&spd);
7700 
7701 	return ret;
7702 }
7703 
7704 static const struct file_operations tracing_buffers_fops = {
7705 	.open		= tracing_buffers_open,
7706 	.read		= tracing_buffers_read,
7707 	.poll		= tracing_buffers_poll,
7708 	.release	= tracing_buffers_release,
7709 	.splice_read	= tracing_buffers_splice_read,
7710 	.llseek		= no_llseek,
7711 };
7712 
7713 static ssize_t
7714 tracing_stats_read(struct file *filp, char __user *ubuf,
7715 		   size_t count, loff_t *ppos)
7716 {
7717 	struct inode *inode = file_inode(filp);
7718 	struct trace_array *tr = inode->i_private;
7719 	struct array_buffer *trace_buf = &tr->array_buffer;
7720 	int cpu = tracing_get_cpu(inode);
7721 	struct trace_seq *s;
7722 	unsigned long cnt;
7723 	unsigned long long t;
7724 	unsigned long usec_rem;
7725 
7726 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7727 	if (!s)
7728 		return -ENOMEM;
7729 
7730 	trace_seq_init(s);
7731 
7732 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7733 	trace_seq_printf(s, "entries: %ld\n", cnt);
7734 
7735 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7736 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7737 
7738 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7739 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7740 
7741 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7742 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7743 
7744 	if (trace_clocks[tr->clock_id].in_ns) {
7745 		/* local or global for trace_clock */
7746 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7747 		usec_rem = do_div(t, USEC_PER_SEC);
7748 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7749 								t, usec_rem);
7750 
7751 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7752 		usec_rem = do_div(t, USEC_PER_SEC);
7753 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7754 	} else {
7755 		/* counter or tsc mode for trace_clock */
7756 		trace_seq_printf(s, "oldest event ts: %llu\n",
7757 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7758 
7759 		trace_seq_printf(s, "now ts: %llu\n",
7760 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7761 	}
7762 
7763 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7764 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7765 
7766 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7767 	trace_seq_printf(s, "read events: %ld\n", cnt);
7768 
7769 	count = simple_read_from_buffer(ubuf, count, ppos,
7770 					s->buffer, trace_seq_used(s));
7771 
7772 	kfree(s);
7773 
7774 	return count;
7775 }
7776 
7777 static const struct file_operations tracing_stats_fops = {
7778 	.open		= tracing_open_generic_tr,
7779 	.read		= tracing_stats_read,
7780 	.llseek		= generic_file_llseek,
7781 	.release	= tracing_release_generic_tr,
7782 };
7783 
7784 #ifdef CONFIG_DYNAMIC_FTRACE
7785 
7786 static ssize_t
7787 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7788 		  size_t cnt, loff_t *ppos)
7789 {
7790 	ssize_t ret;
7791 	char *buf;
7792 	int r;
7793 
7794 	/* 256 should be plenty to hold the amount needed */
7795 	buf = kmalloc(256, GFP_KERNEL);
7796 	if (!buf)
7797 		return -ENOMEM;
7798 
7799 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7800 		      ftrace_update_tot_cnt,
7801 		      ftrace_number_of_pages,
7802 		      ftrace_number_of_groups);
7803 
7804 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7805 	kfree(buf);
7806 	return ret;
7807 }
7808 
7809 static const struct file_operations tracing_dyn_info_fops = {
7810 	.open		= tracing_open_generic,
7811 	.read		= tracing_read_dyn_info,
7812 	.llseek		= generic_file_llseek,
7813 };
7814 #endif /* CONFIG_DYNAMIC_FTRACE */
7815 
7816 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7817 static void
7818 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7819 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7820 		void *data)
7821 {
7822 	tracing_snapshot_instance(tr);
7823 }
7824 
7825 static void
7826 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7827 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7828 		      void *data)
7829 {
7830 	struct ftrace_func_mapper *mapper = data;
7831 	long *count = NULL;
7832 
7833 	if (mapper)
7834 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7835 
7836 	if (count) {
7837 
7838 		if (*count <= 0)
7839 			return;
7840 
7841 		(*count)--;
7842 	}
7843 
7844 	tracing_snapshot_instance(tr);
7845 }
7846 
7847 static int
7848 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7849 		      struct ftrace_probe_ops *ops, void *data)
7850 {
7851 	struct ftrace_func_mapper *mapper = data;
7852 	long *count = NULL;
7853 
7854 	seq_printf(m, "%ps:", (void *)ip);
7855 
7856 	seq_puts(m, "snapshot");
7857 
7858 	if (mapper)
7859 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7860 
7861 	if (count)
7862 		seq_printf(m, ":count=%ld\n", *count);
7863 	else
7864 		seq_puts(m, ":unlimited\n");
7865 
7866 	return 0;
7867 }
7868 
7869 static int
7870 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7871 		     unsigned long ip, void *init_data, void **data)
7872 {
7873 	struct ftrace_func_mapper *mapper = *data;
7874 
7875 	if (!mapper) {
7876 		mapper = allocate_ftrace_func_mapper();
7877 		if (!mapper)
7878 			return -ENOMEM;
7879 		*data = mapper;
7880 	}
7881 
7882 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7883 }
7884 
7885 static void
7886 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7887 		     unsigned long ip, void *data)
7888 {
7889 	struct ftrace_func_mapper *mapper = data;
7890 
7891 	if (!ip) {
7892 		if (!mapper)
7893 			return;
7894 		free_ftrace_func_mapper(mapper, NULL);
7895 		return;
7896 	}
7897 
7898 	ftrace_func_mapper_remove_ip(mapper, ip);
7899 }
7900 
7901 static struct ftrace_probe_ops snapshot_probe_ops = {
7902 	.func			= ftrace_snapshot,
7903 	.print			= ftrace_snapshot_print,
7904 };
7905 
7906 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7907 	.func			= ftrace_count_snapshot,
7908 	.print			= ftrace_snapshot_print,
7909 	.init			= ftrace_snapshot_init,
7910 	.free			= ftrace_snapshot_free,
7911 };
7912 
7913 static int
7914 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7915 			       char *glob, char *cmd, char *param, int enable)
7916 {
7917 	struct ftrace_probe_ops *ops;
7918 	void *count = (void *)-1;
7919 	char *number;
7920 	int ret;
7921 
7922 	if (!tr)
7923 		return -ENODEV;
7924 
7925 	/* hash funcs only work with set_ftrace_filter */
7926 	if (!enable)
7927 		return -EINVAL;
7928 
7929 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7930 
7931 	if (glob[0] == '!')
7932 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7933 
7934 	if (!param)
7935 		goto out_reg;
7936 
7937 	number = strsep(&param, ":");
7938 
7939 	if (!strlen(number))
7940 		goto out_reg;
7941 
7942 	/*
7943 	 * We use the callback data field (which is a pointer)
7944 	 * as our counter.
7945 	 */
7946 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7947 	if (ret)
7948 		return ret;
7949 
7950  out_reg:
7951 	ret = tracing_alloc_snapshot_instance(tr);
7952 	if (ret < 0)
7953 		goto out;
7954 
7955 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7956 
7957  out:
7958 	return ret < 0 ? ret : 0;
7959 }
7960 
7961 static struct ftrace_func_command ftrace_snapshot_cmd = {
7962 	.name			= "snapshot",
7963 	.func			= ftrace_trace_snapshot_callback,
7964 };
7965 
7966 static __init int register_snapshot_cmd(void)
7967 {
7968 	return register_ftrace_command(&ftrace_snapshot_cmd);
7969 }
7970 #else
7971 static inline __init int register_snapshot_cmd(void) { return 0; }
7972 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7973 
7974 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7975 {
7976 	if (WARN_ON(!tr->dir))
7977 		return ERR_PTR(-ENODEV);
7978 
7979 	/* Top directory uses NULL as the parent */
7980 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7981 		return NULL;
7982 
7983 	/* All sub buffers have a descriptor */
7984 	return tr->dir;
7985 }
7986 
7987 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7988 {
7989 	struct dentry *d_tracer;
7990 
7991 	if (tr->percpu_dir)
7992 		return tr->percpu_dir;
7993 
7994 	d_tracer = tracing_get_dentry(tr);
7995 	if (IS_ERR(d_tracer))
7996 		return NULL;
7997 
7998 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7999 
8000 	MEM_FAIL(!tr->percpu_dir,
8001 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8002 
8003 	return tr->percpu_dir;
8004 }
8005 
8006 static struct dentry *
8007 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8008 		      void *data, long cpu, const struct file_operations *fops)
8009 {
8010 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8011 
8012 	if (ret) /* See tracing_get_cpu() */
8013 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8014 	return ret;
8015 }
8016 
8017 static void
8018 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8019 {
8020 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8021 	struct dentry *d_cpu;
8022 	char cpu_dir[30]; /* 30 characters should be more than enough */
8023 
8024 	if (!d_percpu)
8025 		return;
8026 
8027 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8028 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8029 	if (!d_cpu) {
8030 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8031 		return;
8032 	}
8033 
8034 	/* per cpu trace_pipe */
8035 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8036 				tr, cpu, &tracing_pipe_fops);
8037 
8038 	/* per cpu trace */
8039 	trace_create_cpu_file("trace", 0644, d_cpu,
8040 				tr, cpu, &tracing_fops);
8041 
8042 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8043 				tr, cpu, &tracing_buffers_fops);
8044 
8045 	trace_create_cpu_file("stats", 0444, d_cpu,
8046 				tr, cpu, &tracing_stats_fops);
8047 
8048 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8049 				tr, cpu, &tracing_entries_fops);
8050 
8051 #ifdef CONFIG_TRACER_SNAPSHOT
8052 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8053 				tr, cpu, &snapshot_fops);
8054 
8055 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8056 				tr, cpu, &snapshot_raw_fops);
8057 #endif
8058 }
8059 
8060 #ifdef CONFIG_FTRACE_SELFTEST
8061 /* Let selftest have access to static functions in this file */
8062 #include "trace_selftest.c"
8063 #endif
8064 
8065 static ssize_t
8066 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8067 			loff_t *ppos)
8068 {
8069 	struct trace_option_dentry *topt = filp->private_data;
8070 	char *buf;
8071 
8072 	if (topt->flags->val & topt->opt->bit)
8073 		buf = "1\n";
8074 	else
8075 		buf = "0\n";
8076 
8077 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8078 }
8079 
8080 static ssize_t
8081 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8082 			 loff_t *ppos)
8083 {
8084 	struct trace_option_dentry *topt = filp->private_data;
8085 	unsigned long val;
8086 	int ret;
8087 
8088 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8089 	if (ret)
8090 		return ret;
8091 
8092 	if (val != 0 && val != 1)
8093 		return -EINVAL;
8094 
8095 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8096 		mutex_lock(&trace_types_lock);
8097 		ret = __set_tracer_option(topt->tr, topt->flags,
8098 					  topt->opt, !val);
8099 		mutex_unlock(&trace_types_lock);
8100 		if (ret)
8101 			return ret;
8102 	}
8103 
8104 	*ppos += cnt;
8105 
8106 	return cnt;
8107 }
8108 
8109 
8110 static const struct file_operations trace_options_fops = {
8111 	.open = tracing_open_generic,
8112 	.read = trace_options_read,
8113 	.write = trace_options_write,
8114 	.llseek	= generic_file_llseek,
8115 };
8116 
8117 /*
8118  * In order to pass in both the trace_array descriptor as well as the index
8119  * to the flag that the trace option file represents, the trace_array
8120  * has a character array of trace_flags_index[], which holds the index
8121  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8122  * The address of this character array is passed to the flag option file
8123  * read/write callbacks.
8124  *
8125  * In order to extract both the index and the trace_array descriptor,
8126  * get_tr_index() uses the following algorithm.
8127  *
8128  *   idx = *ptr;
8129  *
8130  * As the pointer itself contains the address of the index (remember
8131  * index[1] == 1).
8132  *
8133  * Then to get the trace_array descriptor, by subtracting that index
8134  * from the ptr, we get to the start of the index itself.
8135  *
8136  *   ptr - idx == &index[0]
8137  *
8138  * Then a simple container_of() from that pointer gets us to the
8139  * trace_array descriptor.
8140  */
8141 static void get_tr_index(void *data, struct trace_array **ptr,
8142 			 unsigned int *pindex)
8143 {
8144 	*pindex = *(unsigned char *)data;
8145 
8146 	*ptr = container_of(data - *pindex, struct trace_array,
8147 			    trace_flags_index);
8148 }
8149 
8150 static ssize_t
8151 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8152 			loff_t *ppos)
8153 {
8154 	void *tr_index = filp->private_data;
8155 	struct trace_array *tr;
8156 	unsigned int index;
8157 	char *buf;
8158 
8159 	get_tr_index(tr_index, &tr, &index);
8160 
8161 	if (tr->trace_flags & (1 << index))
8162 		buf = "1\n";
8163 	else
8164 		buf = "0\n";
8165 
8166 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8167 }
8168 
8169 static ssize_t
8170 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8171 			 loff_t *ppos)
8172 {
8173 	void *tr_index = filp->private_data;
8174 	struct trace_array *tr;
8175 	unsigned int index;
8176 	unsigned long val;
8177 	int ret;
8178 
8179 	get_tr_index(tr_index, &tr, &index);
8180 
8181 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8182 	if (ret)
8183 		return ret;
8184 
8185 	if (val != 0 && val != 1)
8186 		return -EINVAL;
8187 
8188 	mutex_lock(&event_mutex);
8189 	mutex_lock(&trace_types_lock);
8190 	ret = set_tracer_flag(tr, 1 << index, val);
8191 	mutex_unlock(&trace_types_lock);
8192 	mutex_unlock(&event_mutex);
8193 
8194 	if (ret < 0)
8195 		return ret;
8196 
8197 	*ppos += cnt;
8198 
8199 	return cnt;
8200 }
8201 
8202 static const struct file_operations trace_options_core_fops = {
8203 	.open = tracing_open_generic,
8204 	.read = trace_options_core_read,
8205 	.write = trace_options_core_write,
8206 	.llseek = generic_file_llseek,
8207 };
8208 
8209 struct dentry *trace_create_file(const char *name,
8210 				 umode_t mode,
8211 				 struct dentry *parent,
8212 				 void *data,
8213 				 const struct file_operations *fops)
8214 {
8215 	struct dentry *ret;
8216 
8217 	ret = tracefs_create_file(name, mode, parent, data, fops);
8218 	if (!ret)
8219 		pr_warn("Could not create tracefs '%s' entry\n", name);
8220 
8221 	return ret;
8222 }
8223 
8224 
8225 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8226 {
8227 	struct dentry *d_tracer;
8228 
8229 	if (tr->options)
8230 		return tr->options;
8231 
8232 	d_tracer = tracing_get_dentry(tr);
8233 	if (IS_ERR(d_tracer))
8234 		return NULL;
8235 
8236 	tr->options = tracefs_create_dir("options", d_tracer);
8237 	if (!tr->options) {
8238 		pr_warn("Could not create tracefs directory 'options'\n");
8239 		return NULL;
8240 	}
8241 
8242 	return tr->options;
8243 }
8244 
8245 static void
8246 create_trace_option_file(struct trace_array *tr,
8247 			 struct trace_option_dentry *topt,
8248 			 struct tracer_flags *flags,
8249 			 struct tracer_opt *opt)
8250 {
8251 	struct dentry *t_options;
8252 
8253 	t_options = trace_options_init_dentry(tr);
8254 	if (!t_options)
8255 		return;
8256 
8257 	topt->flags = flags;
8258 	topt->opt = opt;
8259 	topt->tr = tr;
8260 
8261 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8262 				    &trace_options_fops);
8263 
8264 }
8265 
8266 static void
8267 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8268 {
8269 	struct trace_option_dentry *topts;
8270 	struct trace_options *tr_topts;
8271 	struct tracer_flags *flags;
8272 	struct tracer_opt *opts;
8273 	int cnt;
8274 	int i;
8275 
8276 	if (!tracer)
8277 		return;
8278 
8279 	flags = tracer->flags;
8280 
8281 	if (!flags || !flags->opts)
8282 		return;
8283 
8284 	/*
8285 	 * If this is an instance, only create flags for tracers
8286 	 * the instance may have.
8287 	 */
8288 	if (!trace_ok_for_array(tracer, tr))
8289 		return;
8290 
8291 	for (i = 0; i < tr->nr_topts; i++) {
8292 		/* Make sure there's no duplicate flags. */
8293 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8294 			return;
8295 	}
8296 
8297 	opts = flags->opts;
8298 
8299 	for (cnt = 0; opts[cnt].name; cnt++)
8300 		;
8301 
8302 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8303 	if (!topts)
8304 		return;
8305 
8306 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8307 			    GFP_KERNEL);
8308 	if (!tr_topts) {
8309 		kfree(topts);
8310 		return;
8311 	}
8312 
8313 	tr->topts = tr_topts;
8314 	tr->topts[tr->nr_topts].tracer = tracer;
8315 	tr->topts[tr->nr_topts].topts = topts;
8316 	tr->nr_topts++;
8317 
8318 	for (cnt = 0; opts[cnt].name; cnt++) {
8319 		create_trace_option_file(tr, &topts[cnt], flags,
8320 					 &opts[cnt]);
8321 		MEM_FAIL(topts[cnt].entry == NULL,
8322 			  "Failed to create trace option: %s",
8323 			  opts[cnt].name);
8324 	}
8325 }
8326 
8327 static struct dentry *
8328 create_trace_option_core_file(struct trace_array *tr,
8329 			      const char *option, long index)
8330 {
8331 	struct dentry *t_options;
8332 
8333 	t_options = trace_options_init_dentry(tr);
8334 	if (!t_options)
8335 		return NULL;
8336 
8337 	return trace_create_file(option, 0644, t_options,
8338 				 (void *)&tr->trace_flags_index[index],
8339 				 &trace_options_core_fops);
8340 }
8341 
8342 static void create_trace_options_dir(struct trace_array *tr)
8343 {
8344 	struct dentry *t_options;
8345 	bool top_level = tr == &global_trace;
8346 	int i;
8347 
8348 	t_options = trace_options_init_dentry(tr);
8349 	if (!t_options)
8350 		return;
8351 
8352 	for (i = 0; trace_options[i]; i++) {
8353 		if (top_level ||
8354 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8355 			create_trace_option_core_file(tr, trace_options[i], i);
8356 	}
8357 }
8358 
8359 static ssize_t
8360 rb_simple_read(struct file *filp, char __user *ubuf,
8361 	       size_t cnt, loff_t *ppos)
8362 {
8363 	struct trace_array *tr = filp->private_data;
8364 	char buf[64];
8365 	int r;
8366 
8367 	r = tracer_tracing_is_on(tr);
8368 	r = sprintf(buf, "%d\n", r);
8369 
8370 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8371 }
8372 
8373 static ssize_t
8374 rb_simple_write(struct file *filp, const char __user *ubuf,
8375 		size_t cnt, loff_t *ppos)
8376 {
8377 	struct trace_array *tr = filp->private_data;
8378 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8379 	unsigned long val;
8380 	int ret;
8381 
8382 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8383 	if (ret)
8384 		return ret;
8385 
8386 	if (buffer) {
8387 		mutex_lock(&trace_types_lock);
8388 		if (!!val == tracer_tracing_is_on(tr)) {
8389 			val = 0; /* do nothing */
8390 		} else if (val) {
8391 			tracer_tracing_on(tr);
8392 			if (tr->current_trace->start)
8393 				tr->current_trace->start(tr);
8394 		} else {
8395 			tracer_tracing_off(tr);
8396 			if (tr->current_trace->stop)
8397 				tr->current_trace->stop(tr);
8398 		}
8399 		mutex_unlock(&trace_types_lock);
8400 	}
8401 
8402 	(*ppos)++;
8403 
8404 	return cnt;
8405 }
8406 
8407 static const struct file_operations rb_simple_fops = {
8408 	.open		= tracing_open_generic_tr,
8409 	.read		= rb_simple_read,
8410 	.write		= rb_simple_write,
8411 	.release	= tracing_release_generic_tr,
8412 	.llseek		= default_llseek,
8413 };
8414 
8415 static ssize_t
8416 buffer_percent_read(struct file *filp, char __user *ubuf,
8417 		    size_t cnt, loff_t *ppos)
8418 {
8419 	struct trace_array *tr = filp->private_data;
8420 	char buf[64];
8421 	int r;
8422 
8423 	r = tr->buffer_percent;
8424 	r = sprintf(buf, "%d\n", r);
8425 
8426 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8427 }
8428 
8429 static ssize_t
8430 buffer_percent_write(struct file *filp, const char __user *ubuf,
8431 		     size_t cnt, loff_t *ppos)
8432 {
8433 	struct trace_array *tr = filp->private_data;
8434 	unsigned long val;
8435 	int ret;
8436 
8437 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8438 	if (ret)
8439 		return ret;
8440 
8441 	if (val > 100)
8442 		return -EINVAL;
8443 
8444 	if (!val)
8445 		val = 1;
8446 
8447 	tr->buffer_percent = val;
8448 
8449 	(*ppos)++;
8450 
8451 	return cnt;
8452 }
8453 
8454 static const struct file_operations buffer_percent_fops = {
8455 	.open		= tracing_open_generic_tr,
8456 	.read		= buffer_percent_read,
8457 	.write		= buffer_percent_write,
8458 	.release	= tracing_release_generic_tr,
8459 	.llseek		= default_llseek,
8460 };
8461 
8462 static struct dentry *trace_instance_dir;
8463 
8464 static void
8465 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8466 
8467 static int
8468 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8469 {
8470 	enum ring_buffer_flags rb_flags;
8471 
8472 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8473 
8474 	buf->tr = tr;
8475 
8476 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8477 	if (!buf->buffer)
8478 		return -ENOMEM;
8479 
8480 	buf->data = alloc_percpu(struct trace_array_cpu);
8481 	if (!buf->data) {
8482 		ring_buffer_free(buf->buffer);
8483 		buf->buffer = NULL;
8484 		return -ENOMEM;
8485 	}
8486 
8487 	/* Allocate the first page for all buffers */
8488 	set_buffer_entries(&tr->array_buffer,
8489 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8490 
8491 	return 0;
8492 }
8493 
8494 static int allocate_trace_buffers(struct trace_array *tr, int size)
8495 {
8496 	int ret;
8497 
8498 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8499 	if (ret)
8500 		return ret;
8501 
8502 #ifdef CONFIG_TRACER_MAX_TRACE
8503 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8504 				    allocate_snapshot ? size : 1);
8505 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8506 		ring_buffer_free(tr->array_buffer.buffer);
8507 		tr->array_buffer.buffer = NULL;
8508 		free_percpu(tr->array_buffer.data);
8509 		tr->array_buffer.data = NULL;
8510 		return -ENOMEM;
8511 	}
8512 	tr->allocated_snapshot = allocate_snapshot;
8513 
8514 	/*
8515 	 * Only the top level trace array gets its snapshot allocated
8516 	 * from the kernel command line.
8517 	 */
8518 	allocate_snapshot = false;
8519 #endif
8520 
8521 	return 0;
8522 }
8523 
8524 static void free_trace_buffer(struct array_buffer *buf)
8525 {
8526 	if (buf->buffer) {
8527 		ring_buffer_free(buf->buffer);
8528 		buf->buffer = NULL;
8529 		free_percpu(buf->data);
8530 		buf->data = NULL;
8531 	}
8532 }
8533 
8534 static void free_trace_buffers(struct trace_array *tr)
8535 {
8536 	if (!tr)
8537 		return;
8538 
8539 	free_trace_buffer(&tr->array_buffer);
8540 
8541 #ifdef CONFIG_TRACER_MAX_TRACE
8542 	free_trace_buffer(&tr->max_buffer);
8543 #endif
8544 }
8545 
8546 static void init_trace_flags_index(struct trace_array *tr)
8547 {
8548 	int i;
8549 
8550 	/* Used by the trace options files */
8551 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8552 		tr->trace_flags_index[i] = i;
8553 }
8554 
8555 static void __update_tracer_options(struct trace_array *tr)
8556 {
8557 	struct tracer *t;
8558 
8559 	for (t = trace_types; t; t = t->next)
8560 		add_tracer_options(tr, t);
8561 }
8562 
8563 static void update_tracer_options(struct trace_array *tr)
8564 {
8565 	mutex_lock(&trace_types_lock);
8566 	__update_tracer_options(tr);
8567 	mutex_unlock(&trace_types_lock);
8568 }
8569 
8570 /* Must have trace_types_lock held */
8571 struct trace_array *trace_array_find(const char *instance)
8572 {
8573 	struct trace_array *tr, *found = NULL;
8574 
8575 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8576 		if (tr->name && strcmp(tr->name, instance) == 0) {
8577 			found = tr;
8578 			break;
8579 		}
8580 	}
8581 
8582 	return found;
8583 }
8584 
8585 struct trace_array *trace_array_find_get(const char *instance)
8586 {
8587 	struct trace_array *tr;
8588 
8589 	mutex_lock(&trace_types_lock);
8590 	tr = trace_array_find(instance);
8591 	if (tr)
8592 		tr->ref++;
8593 	mutex_unlock(&trace_types_lock);
8594 
8595 	return tr;
8596 }
8597 
8598 static struct trace_array *trace_array_create(const char *name)
8599 {
8600 	struct trace_array *tr;
8601 	int ret;
8602 
8603 	ret = -ENOMEM;
8604 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8605 	if (!tr)
8606 		return ERR_PTR(ret);
8607 
8608 	tr->name = kstrdup(name, GFP_KERNEL);
8609 	if (!tr->name)
8610 		goto out_free_tr;
8611 
8612 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8613 		goto out_free_tr;
8614 
8615 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8616 
8617 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8618 
8619 	raw_spin_lock_init(&tr->start_lock);
8620 
8621 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8622 
8623 	tr->current_trace = &nop_trace;
8624 
8625 	INIT_LIST_HEAD(&tr->systems);
8626 	INIT_LIST_HEAD(&tr->events);
8627 	INIT_LIST_HEAD(&tr->hist_vars);
8628 	INIT_LIST_HEAD(&tr->err_log);
8629 
8630 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8631 		goto out_free_tr;
8632 
8633 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8634 	if (!tr->dir)
8635 		goto out_free_tr;
8636 
8637 	ret = event_trace_add_tracer(tr->dir, tr);
8638 	if (ret) {
8639 		tracefs_remove(tr->dir);
8640 		goto out_free_tr;
8641 	}
8642 
8643 	ftrace_init_trace_array(tr);
8644 
8645 	init_tracer_tracefs(tr, tr->dir);
8646 	init_trace_flags_index(tr);
8647 	__update_tracer_options(tr);
8648 
8649 	list_add(&tr->list, &ftrace_trace_arrays);
8650 
8651 	tr->ref++;
8652 
8653 
8654 	return tr;
8655 
8656  out_free_tr:
8657 	free_trace_buffers(tr);
8658 	free_cpumask_var(tr->tracing_cpumask);
8659 	kfree(tr->name);
8660 	kfree(tr);
8661 
8662 	return ERR_PTR(ret);
8663 }
8664 
8665 static int instance_mkdir(const char *name)
8666 {
8667 	struct trace_array *tr;
8668 	int ret;
8669 
8670 	mutex_lock(&event_mutex);
8671 	mutex_lock(&trace_types_lock);
8672 
8673 	ret = -EEXIST;
8674 	if (trace_array_find(name))
8675 		goto out_unlock;
8676 
8677 	tr = trace_array_create(name);
8678 
8679 	ret = PTR_ERR_OR_ZERO(tr);
8680 
8681 out_unlock:
8682 	mutex_unlock(&trace_types_lock);
8683 	mutex_unlock(&event_mutex);
8684 	return ret;
8685 }
8686 
8687 /**
8688  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8689  * @name: The name of the trace array to be looked up/created.
8690  *
8691  * Returns pointer to trace array with given name.
8692  * NULL, if it cannot be created.
8693  *
8694  * NOTE: This function increments the reference counter associated with the
8695  * trace array returned. This makes sure it cannot be freed while in use.
8696  * Use trace_array_put() once the trace array is no longer needed.
8697  * If the trace_array is to be freed, trace_array_destroy() needs to
8698  * be called after the trace_array_put(), or simply let user space delete
8699  * it from the tracefs instances directory. But until the
8700  * trace_array_put() is called, user space can not delete it.
8701  *
8702  */
8703 struct trace_array *trace_array_get_by_name(const char *name)
8704 {
8705 	struct trace_array *tr;
8706 
8707 	mutex_lock(&event_mutex);
8708 	mutex_lock(&trace_types_lock);
8709 
8710 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8711 		if (tr->name && strcmp(tr->name, name) == 0)
8712 			goto out_unlock;
8713 	}
8714 
8715 	tr = trace_array_create(name);
8716 
8717 	if (IS_ERR(tr))
8718 		tr = NULL;
8719 out_unlock:
8720 	if (tr)
8721 		tr->ref++;
8722 
8723 	mutex_unlock(&trace_types_lock);
8724 	mutex_unlock(&event_mutex);
8725 	return tr;
8726 }
8727 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8728 
8729 static int __remove_instance(struct trace_array *tr)
8730 {
8731 	int i;
8732 
8733 	/* Reference counter for a newly created trace array = 1. */
8734 	if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8735 		return -EBUSY;
8736 
8737 	list_del(&tr->list);
8738 
8739 	/* Disable all the flags that were enabled coming in */
8740 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8741 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8742 			set_tracer_flag(tr, 1 << i, 0);
8743 	}
8744 
8745 	tracing_set_nop(tr);
8746 	clear_ftrace_function_probes(tr);
8747 	event_trace_del_tracer(tr);
8748 	ftrace_clear_pids(tr);
8749 	ftrace_destroy_function_files(tr);
8750 	tracefs_remove(tr->dir);
8751 	free_trace_buffers(tr);
8752 
8753 	for (i = 0; i < tr->nr_topts; i++) {
8754 		kfree(tr->topts[i].topts);
8755 	}
8756 	kfree(tr->topts);
8757 
8758 	free_cpumask_var(tr->tracing_cpumask);
8759 	kfree(tr->name);
8760 	kfree(tr);
8761 	tr = NULL;
8762 
8763 	return 0;
8764 }
8765 
8766 int trace_array_destroy(struct trace_array *this_tr)
8767 {
8768 	struct trace_array *tr;
8769 	int ret;
8770 
8771 	if (!this_tr)
8772 		return -EINVAL;
8773 
8774 	mutex_lock(&event_mutex);
8775 	mutex_lock(&trace_types_lock);
8776 
8777 	ret = -ENODEV;
8778 
8779 	/* Making sure trace array exists before destroying it. */
8780 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8781 		if (tr == this_tr) {
8782 			ret = __remove_instance(tr);
8783 			break;
8784 		}
8785 	}
8786 
8787 	mutex_unlock(&trace_types_lock);
8788 	mutex_unlock(&event_mutex);
8789 
8790 	return ret;
8791 }
8792 EXPORT_SYMBOL_GPL(trace_array_destroy);
8793 
8794 static int instance_rmdir(const char *name)
8795 {
8796 	struct trace_array *tr;
8797 	int ret;
8798 
8799 	mutex_lock(&event_mutex);
8800 	mutex_lock(&trace_types_lock);
8801 
8802 	ret = -ENODEV;
8803 	tr = trace_array_find(name);
8804 	if (tr)
8805 		ret = __remove_instance(tr);
8806 
8807 	mutex_unlock(&trace_types_lock);
8808 	mutex_unlock(&event_mutex);
8809 
8810 	return ret;
8811 }
8812 
8813 static __init void create_trace_instances(struct dentry *d_tracer)
8814 {
8815 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8816 							 instance_mkdir,
8817 							 instance_rmdir);
8818 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8819 		return;
8820 }
8821 
8822 static void
8823 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8824 {
8825 	struct trace_event_file *file;
8826 	int cpu;
8827 
8828 	trace_create_file("available_tracers", 0444, d_tracer,
8829 			tr, &show_traces_fops);
8830 
8831 	trace_create_file("current_tracer", 0644, d_tracer,
8832 			tr, &set_tracer_fops);
8833 
8834 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8835 			  tr, &tracing_cpumask_fops);
8836 
8837 	trace_create_file("trace_options", 0644, d_tracer,
8838 			  tr, &tracing_iter_fops);
8839 
8840 	trace_create_file("trace", 0644, d_tracer,
8841 			  tr, &tracing_fops);
8842 
8843 	trace_create_file("trace_pipe", 0444, d_tracer,
8844 			  tr, &tracing_pipe_fops);
8845 
8846 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8847 			  tr, &tracing_entries_fops);
8848 
8849 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8850 			  tr, &tracing_total_entries_fops);
8851 
8852 	trace_create_file("free_buffer", 0200, d_tracer,
8853 			  tr, &tracing_free_buffer_fops);
8854 
8855 	trace_create_file("trace_marker", 0220, d_tracer,
8856 			  tr, &tracing_mark_fops);
8857 
8858 	file = __find_event_file(tr, "ftrace", "print");
8859 	if (file && file->dir)
8860 		trace_create_file("trigger", 0644, file->dir, file,
8861 				  &event_trigger_fops);
8862 	tr->trace_marker_file = file;
8863 
8864 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8865 			  tr, &tracing_mark_raw_fops);
8866 
8867 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8868 			  &trace_clock_fops);
8869 
8870 	trace_create_file("tracing_on", 0644, d_tracer,
8871 			  tr, &rb_simple_fops);
8872 
8873 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8874 			  &trace_time_stamp_mode_fops);
8875 
8876 	tr->buffer_percent = 50;
8877 
8878 	trace_create_file("buffer_percent", 0444, d_tracer,
8879 			tr, &buffer_percent_fops);
8880 
8881 	create_trace_options_dir(tr);
8882 
8883 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8884 	trace_create_maxlat_file(tr, d_tracer);
8885 #endif
8886 
8887 	if (ftrace_create_function_files(tr, d_tracer))
8888 		MEM_FAIL(1, "Could not allocate function filter files");
8889 
8890 #ifdef CONFIG_TRACER_SNAPSHOT
8891 	trace_create_file("snapshot", 0644, d_tracer,
8892 			  tr, &snapshot_fops);
8893 #endif
8894 
8895 	trace_create_file("error_log", 0644, d_tracer,
8896 			  tr, &tracing_err_log_fops);
8897 
8898 	for_each_tracing_cpu(cpu)
8899 		tracing_init_tracefs_percpu(tr, cpu);
8900 
8901 	ftrace_init_tracefs(tr, d_tracer);
8902 }
8903 
8904 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8905 {
8906 	struct vfsmount *mnt;
8907 	struct file_system_type *type;
8908 
8909 	/*
8910 	 * To maintain backward compatibility for tools that mount
8911 	 * debugfs to get to the tracing facility, tracefs is automatically
8912 	 * mounted to the debugfs/tracing directory.
8913 	 */
8914 	type = get_fs_type("tracefs");
8915 	if (!type)
8916 		return NULL;
8917 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8918 	put_filesystem(type);
8919 	if (IS_ERR(mnt))
8920 		return NULL;
8921 	mntget(mnt);
8922 
8923 	return mnt;
8924 }
8925 
8926 /**
8927  * tracing_init_dentry - initialize top level trace array
8928  *
8929  * This is called when creating files or directories in the tracing
8930  * directory. It is called via fs_initcall() by any of the boot up code
8931  * and expects to return the dentry of the top level tracing directory.
8932  */
8933 struct dentry *tracing_init_dentry(void)
8934 {
8935 	struct trace_array *tr = &global_trace;
8936 
8937 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8938 		pr_warn("Tracing disabled due to lockdown\n");
8939 		return ERR_PTR(-EPERM);
8940 	}
8941 
8942 	/* The top level trace array uses  NULL as parent */
8943 	if (tr->dir)
8944 		return NULL;
8945 
8946 	if (WARN_ON(!tracefs_initialized()) ||
8947 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8948 		 WARN_ON(!debugfs_initialized())))
8949 		return ERR_PTR(-ENODEV);
8950 
8951 	/*
8952 	 * As there may still be users that expect the tracing
8953 	 * files to exist in debugfs/tracing, we must automount
8954 	 * the tracefs file system there, so older tools still
8955 	 * work with the newer kerenl.
8956 	 */
8957 	tr->dir = debugfs_create_automount("tracing", NULL,
8958 					   trace_automount, NULL);
8959 
8960 	return NULL;
8961 }
8962 
8963 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8964 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8965 
8966 static void __init trace_eval_init(void)
8967 {
8968 	int len;
8969 
8970 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8971 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8972 }
8973 
8974 #ifdef CONFIG_MODULES
8975 static void trace_module_add_evals(struct module *mod)
8976 {
8977 	if (!mod->num_trace_evals)
8978 		return;
8979 
8980 	/*
8981 	 * Modules with bad taint do not have events created, do
8982 	 * not bother with enums either.
8983 	 */
8984 	if (trace_module_has_bad_taint(mod))
8985 		return;
8986 
8987 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8988 }
8989 
8990 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8991 static void trace_module_remove_evals(struct module *mod)
8992 {
8993 	union trace_eval_map_item *map;
8994 	union trace_eval_map_item **last = &trace_eval_maps;
8995 
8996 	if (!mod->num_trace_evals)
8997 		return;
8998 
8999 	mutex_lock(&trace_eval_mutex);
9000 
9001 	map = trace_eval_maps;
9002 
9003 	while (map) {
9004 		if (map->head.mod == mod)
9005 			break;
9006 		map = trace_eval_jmp_to_tail(map);
9007 		last = &map->tail.next;
9008 		map = map->tail.next;
9009 	}
9010 	if (!map)
9011 		goto out;
9012 
9013 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9014 	kfree(map);
9015  out:
9016 	mutex_unlock(&trace_eval_mutex);
9017 }
9018 #else
9019 static inline void trace_module_remove_evals(struct module *mod) { }
9020 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9021 
9022 static int trace_module_notify(struct notifier_block *self,
9023 			       unsigned long val, void *data)
9024 {
9025 	struct module *mod = data;
9026 
9027 	switch (val) {
9028 	case MODULE_STATE_COMING:
9029 		trace_module_add_evals(mod);
9030 		break;
9031 	case MODULE_STATE_GOING:
9032 		trace_module_remove_evals(mod);
9033 		break;
9034 	}
9035 
9036 	return 0;
9037 }
9038 
9039 static struct notifier_block trace_module_nb = {
9040 	.notifier_call = trace_module_notify,
9041 	.priority = 0,
9042 };
9043 #endif /* CONFIG_MODULES */
9044 
9045 static __init int tracer_init_tracefs(void)
9046 {
9047 	struct dentry *d_tracer;
9048 
9049 	trace_access_lock_init();
9050 
9051 	d_tracer = tracing_init_dentry();
9052 	if (IS_ERR(d_tracer))
9053 		return 0;
9054 
9055 	event_trace_init();
9056 
9057 	init_tracer_tracefs(&global_trace, d_tracer);
9058 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9059 
9060 	trace_create_file("tracing_thresh", 0644, d_tracer,
9061 			&global_trace, &tracing_thresh_fops);
9062 
9063 	trace_create_file("README", 0444, d_tracer,
9064 			NULL, &tracing_readme_fops);
9065 
9066 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9067 			NULL, &tracing_saved_cmdlines_fops);
9068 
9069 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9070 			  NULL, &tracing_saved_cmdlines_size_fops);
9071 
9072 	trace_create_file("saved_tgids", 0444, d_tracer,
9073 			NULL, &tracing_saved_tgids_fops);
9074 
9075 	trace_eval_init();
9076 
9077 	trace_create_eval_file(d_tracer);
9078 
9079 #ifdef CONFIG_MODULES
9080 	register_module_notifier(&trace_module_nb);
9081 #endif
9082 
9083 #ifdef CONFIG_DYNAMIC_FTRACE
9084 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9085 			NULL, &tracing_dyn_info_fops);
9086 #endif
9087 
9088 	create_trace_instances(d_tracer);
9089 
9090 	update_tracer_options(&global_trace);
9091 
9092 	return 0;
9093 }
9094 
9095 static int trace_panic_handler(struct notifier_block *this,
9096 			       unsigned long event, void *unused)
9097 {
9098 	if (ftrace_dump_on_oops)
9099 		ftrace_dump(ftrace_dump_on_oops);
9100 	return NOTIFY_OK;
9101 }
9102 
9103 static struct notifier_block trace_panic_notifier = {
9104 	.notifier_call  = trace_panic_handler,
9105 	.next           = NULL,
9106 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9107 };
9108 
9109 static int trace_die_handler(struct notifier_block *self,
9110 			     unsigned long val,
9111 			     void *data)
9112 {
9113 	switch (val) {
9114 	case DIE_OOPS:
9115 		if (ftrace_dump_on_oops)
9116 			ftrace_dump(ftrace_dump_on_oops);
9117 		break;
9118 	default:
9119 		break;
9120 	}
9121 	return NOTIFY_OK;
9122 }
9123 
9124 static struct notifier_block trace_die_notifier = {
9125 	.notifier_call = trace_die_handler,
9126 	.priority = 200
9127 };
9128 
9129 /*
9130  * printk is set to max of 1024, we really don't need it that big.
9131  * Nothing should be printing 1000 characters anyway.
9132  */
9133 #define TRACE_MAX_PRINT		1000
9134 
9135 /*
9136  * Define here KERN_TRACE so that we have one place to modify
9137  * it if we decide to change what log level the ftrace dump
9138  * should be at.
9139  */
9140 #define KERN_TRACE		KERN_EMERG
9141 
9142 void
9143 trace_printk_seq(struct trace_seq *s)
9144 {
9145 	/* Probably should print a warning here. */
9146 	if (s->seq.len >= TRACE_MAX_PRINT)
9147 		s->seq.len = TRACE_MAX_PRINT;
9148 
9149 	/*
9150 	 * More paranoid code. Although the buffer size is set to
9151 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9152 	 * an extra layer of protection.
9153 	 */
9154 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9155 		s->seq.len = s->seq.size - 1;
9156 
9157 	/* should be zero ended, but we are paranoid. */
9158 	s->buffer[s->seq.len] = 0;
9159 
9160 	printk(KERN_TRACE "%s", s->buffer);
9161 
9162 	trace_seq_init(s);
9163 }
9164 
9165 void trace_init_global_iter(struct trace_iterator *iter)
9166 {
9167 	iter->tr = &global_trace;
9168 	iter->trace = iter->tr->current_trace;
9169 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9170 	iter->array_buffer = &global_trace.array_buffer;
9171 
9172 	if (iter->trace && iter->trace->open)
9173 		iter->trace->open(iter);
9174 
9175 	/* Annotate start of buffers if we had overruns */
9176 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9177 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9178 
9179 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9180 	if (trace_clocks[iter->tr->clock_id].in_ns)
9181 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9182 }
9183 
9184 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9185 {
9186 	/* use static because iter can be a bit big for the stack */
9187 	static struct trace_iterator iter;
9188 	static atomic_t dump_running;
9189 	struct trace_array *tr = &global_trace;
9190 	unsigned int old_userobj;
9191 	unsigned long flags;
9192 	int cnt = 0, cpu;
9193 
9194 	/* Only allow one dump user at a time. */
9195 	if (atomic_inc_return(&dump_running) != 1) {
9196 		atomic_dec(&dump_running);
9197 		return;
9198 	}
9199 
9200 	/*
9201 	 * Always turn off tracing when we dump.
9202 	 * We don't need to show trace output of what happens
9203 	 * between multiple crashes.
9204 	 *
9205 	 * If the user does a sysrq-z, then they can re-enable
9206 	 * tracing with echo 1 > tracing_on.
9207 	 */
9208 	tracing_off();
9209 
9210 	local_irq_save(flags);
9211 	printk_nmi_direct_enter();
9212 
9213 	/* Simulate the iterator */
9214 	trace_init_global_iter(&iter);
9215 	/* Can not use kmalloc for iter.temp */
9216 	iter.temp = static_temp_buf;
9217 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9218 
9219 	for_each_tracing_cpu(cpu) {
9220 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9221 	}
9222 
9223 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9224 
9225 	/* don't look at user memory in panic mode */
9226 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9227 
9228 	switch (oops_dump_mode) {
9229 	case DUMP_ALL:
9230 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9231 		break;
9232 	case DUMP_ORIG:
9233 		iter.cpu_file = raw_smp_processor_id();
9234 		break;
9235 	case DUMP_NONE:
9236 		goto out_enable;
9237 	default:
9238 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9239 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9240 	}
9241 
9242 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9243 
9244 	/* Did function tracer already get disabled? */
9245 	if (ftrace_is_dead()) {
9246 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9247 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9248 	}
9249 
9250 	/*
9251 	 * We need to stop all tracing on all CPUS to read the
9252 	 * the next buffer. This is a bit expensive, but is
9253 	 * not done often. We fill all what we can read,
9254 	 * and then release the locks again.
9255 	 */
9256 
9257 	while (!trace_empty(&iter)) {
9258 
9259 		if (!cnt)
9260 			printk(KERN_TRACE "---------------------------------\n");
9261 
9262 		cnt++;
9263 
9264 		trace_iterator_reset(&iter);
9265 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9266 
9267 		if (trace_find_next_entry_inc(&iter) != NULL) {
9268 			int ret;
9269 
9270 			ret = print_trace_line(&iter);
9271 			if (ret != TRACE_TYPE_NO_CONSUME)
9272 				trace_consume(&iter);
9273 		}
9274 		touch_nmi_watchdog();
9275 
9276 		trace_printk_seq(&iter.seq);
9277 	}
9278 
9279 	if (!cnt)
9280 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9281 	else
9282 		printk(KERN_TRACE "---------------------------------\n");
9283 
9284  out_enable:
9285 	tr->trace_flags |= old_userobj;
9286 
9287 	for_each_tracing_cpu(cpu) {
9288 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9289 	}
9290 	atomic_dec(&dump_running);
9291 	printk_nmi_direct_exit();
9292 	local_irq_restore(flags);
9293 }
9294 EXPORT_SYMBOL_GPL(ftrace_dump);
9295 
9296 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9297 {
9298 	char **argv;
9299 	int argc, ret;
9300 
9301 	argc = 0;
9302 	ret = 0;
9303 	argv = argv_split(GFP_KERNEL, buf, &argc);
9304 	if (!argv)
9305 		return -ENOMEM;
9306 
9307 	if (argc)
9308 		ret = createfn(argc, argv);
9309 
9310 	argv_free(argv);
9311 
9312 	return ret;
9313 }
9314 
9315 #define WRITE_BUFSIZE  4096
9316 
9317 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9318 				size_t count, loff_t *ppos,
9319 				int (*createfn)(int, char **))
9320 {
9321 	char *kbuf, *buf, *tmp;
9322 	int ret = 0;
9323 	size_t done = 0;
9324 	size_t size;
9325 
9326 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9327 	if (!kbuf)
9328 		return -ENOMEM;
9329 
9330 	while (done < count) {
9331 		size = count - done;
9332 
9333 		if (size >= WRITE_BUFSIZE)
9334 			size = WRITE_BUFSIZE - 1;
9335 
9336 		if (copy_from_user(kbuf, buffer + done, size)) {
9337 			ret = -EFAULT;
9338 			goto out;
9339 		}
9340 		kbuf[size] = '\0';
9341 		buf = kbuf;
9342 		do {
9343 			tmp = strchr(buf, '\n');
9344 			if (tmp) {
9345 				*tmp = '\0';
9346 				size = tmp - buf + 1;
9347 			} else {
9348 				size = strlen(buf);
9349 				if (done + size < count) {
9350 					if (buf != kbuf)
9351 						break;
9352 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9353 					pr_warn("Line length is too long: Should be less than %d\n",
9354 						WRITE_BUFSIZE - 2);
9355 					ret = -EINVAL;
9356 					goto out;
9357 				}
9358 			}
9359 			done += size;
9360 
9361 			/* Remove comments */
9362 			tmp = strchr(buf, '#');
9363 
9364 			if (tmp)
9365 				*tmp = '\0';
9366 
9367 			ret = trace_run_command(buf, createfn);
9368 			if (ret)
9369 				goto out;
9370 			buf += size;
9371 
9372 		} while (done < count);
9373 	}
9374 	ret = done;
9375 
9376 out:
9377 	kfree(kbuf);
9378 
9379 	return ret;
9380 }
9381 
9382 __init static int tracer_alloc_buffers(void)
9383 {
9384 	int ring_buf_size;
9385 	int ret = -ENOMEM;
9386 
9387 
9388 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9389 		pr_warn("Tracing disabled due to lockdown\n");
9390 		return -EPERM;
9391 	}
9392 
9393 	/*
9394 	 * Make sure we don't accidently add more trace options
9395 	 * than we have bits for.
9396 	 */
9397 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9398 
9399 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9400 		goto out;
9401 
9402 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9403 		goto out_free_buffer_mask;
9404 
9405 	/* Only allocate trace_printk buffers if a trace_printk exists */
9406 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9407 		/* Must be called before global_trace.buffer is allocated */
9408 		trace_printk_init_buffers();
9409 
9410 	/* To save memory, keep the ring buffer size to its minimum */
9411 	if (ring_buffer_expanded)
9412 		ring_buf_size = trace_buf_size;
9413 	else
9414 		ring_buf_size = 1;
9415 
9416 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9417 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9418 
9419 	raw_spin_lock_init(&global_trace.start_lock);
9420 
9421 	/*
9422 	 * The prepare callbacks allocates some memory for the ring buffer. We
9423 	 * don't free the buffer if the if the CPU goes down. If we were to free
9424 	 * the buffer, then the user would lose any trace that was in the
9425 	 * buffer. The memory will be removed once the "instance" is removed.
9426 	 */
9427 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9428 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9429 				      NULL);
9430 	if (ret < 0)
9431 		goto out_free_cpumask;
9432 	/* Used for event triggers */
9433 	ret = -ENOMEM;
9434 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9435 	if (!temp_buffer)
9436 		goto out_rm_hp_state;
9437 
9438 	if (trace_create_savedcmd() < 0)
9439 		goto out_free_temp_buffer;
9440 
9441 	/* TODO: make the number of buffers hot pluggable with CPUS */
9442 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9443 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9444 		goto out_free_savedcmd;
9445 	}
9446 
9447 	if (global_trace.buffer_disabled)
9448 		tracing_off();
9449 
9450 	if (trace_boot_clock) {
9451 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9452 		if (ret < 0)
9453 			pr_warn("Trace clock %s not defined, going back to default\n",
9454 				trace_boot_clock);
9455 	}
9456 
9457 	/*
9458 	 * register_tracer() might reference current_trace, so it
9459 	 * needs to be set before we register anything. This is
9460 	 * just a bootstrap of current_trace anyway.
9461 	 */
9462 	global_trace.current_trace = &nop_trace;
9463 
9464 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9465 
9466 	ftrace_init_global_array_ops(&global_trace);
9467 
9468 	init_trace_flags_index(&global_trace);
9469 
9470 	register_tracer(&nop_trace);
9471 
9472 	/* Function tracing may start here (via kernel command line) */
9473 	init_function_trace();
9474 
9475 	/* All seems OK, enable tracing */
9476 	tracing_disabled = 0;
9477 
9478 	atomic_notifier_chain_register(&panic_notifier_list,
9479 				       &trace_panic_notifier);
9480 
9481 	register_die_notifier(&trace_die_notifier);
9482 
9483 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9484 
9485 	INIT_LIST_HEAD(&global_trace.systems);
9486 	INIT_LIST_HEAD(&global_trace.events);
9487 	INIT_LIST_HEAD(&global_trace.hist_vars);
9488 	INIT_LIST_HEAD(&global_trace.err_log);
9489 	list_add(&global_trace.list, &ftrace_trace_arrays);
9490 
9491 	apply_trace_boot_options();
9492 
9493 	register_snapshot_cmd();
9494 
9495 	return 0;
9496 
9497 out_free_savedcmd:
9498 	free_saved_cmdlines_buffer(savedcmd);
9499 out_free_temp_buffer:
9500 	ring_buffer_free(temp_buffer);
9501 out_rm_hp_state:
9502 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9503 out_free_cpumask:
9504 	free_cpumask_var(global_trace.tracing_cpumask);
9505 out_free_buffer_mask:
9506 	free_cpumask_var(tracing_buffer_mask);
9507 out:
9508 	return ret;
9509 }
9510 
9511 void __init early_trace_init(void)
9512 {
9513 	if (tracepoint_printk) {
9514 		tracepoint_print_iter =
9515 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9516 		if (MEM_FAIL(!tracepoint_print_iter,
9517 			     "Failed to allocate trace iterator\n"))
9518 			tracepoint_printk = 0;
9519 		else
9520 			static_key_enable(&tracepoint_printk_key.key);
9521 	}
9522 	tracer_alloc_buffers();
9523 }
9524 
9525 void __init trace_init(void)
9526 {
9527 	trace_event_init();
9528 }
9529 
9530 __init static int clear_boot_tracer(void)
9531 {
9532 	/*
9533 	 * The default tracer at boot buffer is an init section.
9534 	 * This function is called in lateinit. If we did not
9535 	 * find the boot tracer, then clear it out, to prevent
9536 	 * later registration from accessing the buffer that is
9537 	 * about to be freed.
9538 	 */
9539 	if (!default_bootup_tracer)
9540 		return 0;
9541 
9542 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9543 	       default_bootup_tracer);
9544 	default_bootup_tracer = NULL;
9545 
9546 	return 0;
9547 }
9548 
9549 fs_initcall(tracer_init_tracefs);
9550 late_initcall_sync(clear_boot_tracer);
9551 
9552 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9553 __init static int tracing_set_default_clock(void)
9554 {
9555 	/* sched_clock_stable() is determined in late_initcall */
9556 	if (!trace_boot_clock && !sched_clock_stable()) {
9557 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9558 			pr_warn("Can not set tracing clock due to lockdown\n");
9559 			return -EPERM;
9560 		}
9561 
9562 		printk(KERN_WARNING
9563 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9564 		       "If you want to keep using the local clock, then add:\n"
9565 		       "  \"trace_clock=local\"\n"
9566 		       "on the kernel command line\n");
9567 		tracing_set_clock(&global_trace, "global");
9568 	}
9569 
9570 	return 0;
9571 }
9572 late_initcall_sync(tracing_set_default_clock);
9573 #endif
9574