xref: /openbmc/linux/kernel/trace/trace.c (revision b23d7a5f4a07af02343cdd28fe1f7488bac3afda)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning) {
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 	}
1307 }
1308 
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317 	if (tr->array_buffer.buffer)
1318 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 	return !tr->buffer_disabled;
1320 }
1321 
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327 	return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330 
1331 static int __init set_buf_size(char *str)
1332 {
1333 	unsigned long buf_size;
1334 
1335 	if (!str)
1336 		return 0;
1337 	buf_size = memparse(str, &str);
1338 	/* nr_entries can not be zero */
1339 	if (buf_size == 0)
1340 		return 0;
1341 	trace_buf_size = buf_size;
1342 	return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345 
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348 	unsigned long threshold;
1349 	int ret;
1350 
1351 	if (!str)
1352 		return 0;
1353 	ret = kstrtoul(str, 0, &threshold);
1354 	if (ret < 0)
1355 		return 0;
1356 	tracing_thresh = threshold * 1000;
1357 	return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360 
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363 	return nsecs / 1000;
1364 }
1365 
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374 
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377 	TRACE_FLAGS
1378 	NULL
1379 };
1380 
1381 static struct {
1382 	u64 (*func)(void);
1383 	const char *name;
1384 	int in_ns;		/* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 	{ trace_clock_local,		"local",	1 },
1387 	{ trace_clock_global,		"global",	1 },
1388 	{ trace_clock_counter,		"counter",	0 },
1389 	{ trace_clock_jiffies,		"uptime",	0 },
1390 	{ trace_clock,			"perf",		1 },
1391 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1392 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1393 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1394 	ARCH_TRACE_CLOCKS
1395 };
1396 
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399 	if (trace_clocks[tr->clock_id].in_ns)
1400 		return true;
1401 
1402 	return false;
1403 }
1404 
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410 	memset(parser, 0, sizeof(*parser));
1411 
1412 	parser->buffer = kmalloc(size, GFP_KERNEL);
1413 	if (!parser->buffer)
1414 		return 1;
1415 
1416 	parser->size = size;
1417 	return 0;
1418 }
1419 
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425 	kfree(parser->buffer);
1426 	parser->buffer = NULL;
1427 }
1428 
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 	size_t cnt, loff_t *ppos)
1442 {
1443 	char ch;
1444 	size_t read = 0;
1445 	ssize_t ret;
1446 
1447 	if (!*ppos)
1448 		trace_parser_clear(parser);
1449 
1450 	ret = get_user(ch, ubuf++);
1451 	if (ret)
1452 		goto out;
1453 
1454 	read++;
1455 	cnt--;
1456 
1457 	/*
1458 	 * The parser is not finished with the last write,
1459 	 * continue reading the user input without skipping spaces.
1460 	 */
1461 	if (!parser->cont) {
1462 		/* skip white space */
1463 		while (cnt && isspace(ch)) {
1464 			ret = get_user(ch, ubuf++);
1465 			if (ret)
1466 				goto out;
1467 			read++;
1468 			cnt--;
1469 		}
1470 
1471 		parser->idx = 0;
1472 
1473 		/* only spaces were written */
1474 		if (isspace(ch) || !ch) {
1475 			*ppos += read;
1476 			ret = read;
1477 			goto out;
1478 		}
1479 	}
1480 
1481 	/* read the non-space input */
1482 	while (cnt && !isspace(ch) && ch) {
1483 		if (parser->idx < parser->size - 1)
1484 			parser->buffer[parser->idx++] = ch;
1485 		else {
1486 			ret = -EINVAL;
1487 			goto out;
1488 		}
1489 		ret = get_user(ch, ubuf++);
1490 		if (ret)
1491 			goto out;
1492 		read++;
1493 		cnt--;
1494 	}
1495 
1496 	/* We either got finished input or we have to wait for another call. */
1497 	if (isspace(ch) || !ch) {
1498 		parser->buffer[parser->idx] = 0;
1499 		parser->cont = false;
1500 	} else if (parser->idx < parser->size - 1) {
1501 		parser->cont = true;
1502 		parser->buffer[parser->idx++] = ch;
1503 		/* Make sure the parsed string always terminates with '\0'. */
1504 		parser->buffer[parser->idx] = 0;
1505 	} else {
1506 		ret = -EINVAL;
1507 		goto out;
1508 	}
1509 
1510 	*ppos += read;
1511 	ret = read;
1512 
1513 out:
1514 	return ret;
1515 }
1516 
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520 	int len;
1521 
1522 	if (trace_seq_used(s) <= s->seq.readpos)
1523 		return -EBUSY;
1524 
1525 	len = trace_seq_used(s) - s->seq.readpos;
1526 	if (cnt > len)
1527 		cnt = len;
1528 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529 
1530 	s->seq.readpos += cnt;
1531 	return cnt;
1532 }
1533 
1534 unsigned long __read_mostly	tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536 
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 	defined(CONFIG_FSNOTIFY)
1539 
1540 static struct workqueue_struct *fsnotify_wq;
1541 
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544 	struct trace_array *tr = container_of(work, struct trace_array,
1545 					      fsnotify_work);
1546 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549 
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552 	struct trace_array *tr = container_of(iwork, struct trace_array,
1553 					      fsnotify_irqwork);
1554 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556 
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558 				     struct dentry *d_tracer)
1559 {
1560 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563 					      d_tracer, &tr->max_latency,
1564 					      &tracing_max_lat_fops);
1565 }
1566 
1567 __init static int latency_fsnotify_init(void)
1568 {
1569 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 	if (!fsnotify_wq) {
1572 		pr_err("Unable to allocate tr_max_lat_wq\n");
1573 		return -ENOMEM;
1574 	}
1575 	return 0;
1576 }
1577 
1578 late_initcall_sync(latency_fsnotify_init);
1579 
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582 	if (!fsnotify_wq)
1583 		return;
1584 	/*
1585 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586 	 * possible that we are called from __schedule() or do_idle(), which
1587 	 * could cause a deadlock.
1588 	 */
1589 	irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591 
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597 
1598 #define trace_create_maxlat_file(tr, d_tracer)				\
1599 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1600 			  &tr->max_latency, &tracing_max_lat_fops)
1601 
1602 #endif
1603 
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 	struct array_buffer *trace_buf = &tr->array_buffer;
1614 	struct array_buffer *max_buf = &tr->max_buffer;
1615 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617 
1618 	max_buf->cpu = cpu;
1619 	max_buf->time_start = data->preempt_timestamp;
1620 
1621 	max_data->saved_latency = tr->max_latency;
1622 	max_data->critical_start = data->critical_start;
1623 	max_data->critical_end = data->critical_end;
1624 
1625 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626 	max_data->pid = tsk->pid;
1627 	/*
1628 	 * If tsk == current, then use current_uid(), as that does not use
1629 	 * RCU. The irq tracer can be called out of RCU scope.
1630 	 */
1631 	if (tsk == current)
1632 		max_data->uid = current_uid();
1633 	else
1634 		max_data->uid = task_uid(tsk);
1635 
1636 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 	max_data->policy = tsk->policy;
1638 	max_data->rt_priority = tsk->rt_priority;
1639 
1640 	/* record this tasks comm */
1641 	tracing_record_cmdline(tsk);
1642 	latency_fsnotify(tr);
1643 }
1644 
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657 	      void *cond_data)
1658 {
1659 	if (tr->stop_count)
1660 		return;
1661 
1662 	WARN_ON_ONCE(!irqs_disabled());
1663 
1664 	if (!tr->allocated_snapshot) {
1665 		/* Only the nop tracer should hit this when disabling */
1666 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667 		return;
1668 	}
1669 
1670 	arch_spin_lock(&tr->max_lock);
1671 
1672 	/* Inherit the recordable setting from array_buffer */
1673 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674 		ring_buffer_record_on(tr->max_buffer.buffer);
1675 	else
1676 		ring_buffer_record_off(tr->max_buffer.buffer);
1677 
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680 		goto out_unlock;
1681 #endif
1682 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 
1684 	__update_max_tr(tr, tsk, cpu);
1685 
1686  out_unlock:
1687 	arch_spin_unlock(&tr->max_lock);
1688 }
1689 
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701 	int ret;
1702 
1703 	if (tr->stop_count)
1704 		return;
1705 
1706 	WARN_ON_ONCE(!irqs_disabled());
1707 	if (!tr->allocated_snapshot) {
1708 		/* Only the nop tracer should hit this when disabling */
1709 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710 		return;
1711 	}
1712 
1713 	arch_spin_lock(&tr->max_lock);
1714 
1715 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 
1717 	if (ret == -EBUSY) {
1718 		/*
1719 		 * We failed to swap the buffer due to a commit taking
1720 		 * place on this CPU. We fail to record, but we reset
1721 		 * the max trace buffer (no one writes directly to it)
1722 		 * and flag that it failed.
1723 		 */
1724 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725 			"Failed to swap buffers due to commit in progress\n");
1726 	}
1727 
1728 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 
1730 	__update_max_tr(tr, tsk, cpu);
1731 	arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737 	/* Iterators are static, they should be filled or empty */
1738 	if (trace_buffer_iter(iter, iter->cpu_file))
1739 		return 0;
1740 
1741 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742 				full);
1743 }
1744 
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747 
1748 struct trace_selftests {
1749 	struct list_head		list;
1750 	struct tracer			*type;
1751 };
1752 
1753 static LIST_HEAD(postponed_selftests);
1754 
1755 static int save_selftest(struct tracer *type)
1756 {
1757 	struct trace_selftests *selftest;
1758 
1759 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760 	if (!selftest)
1761 		return -ENOMEM;
1762 
1763 	selftest->type = type;
1764 	list_add(&selftest->list, &postponed_selftests);
1765 	return 0;
1766 }
1767 
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770 	struct trace_array *tr = &global_trace;
1771 	struct tracer *saved_tracer = tr->current_trace;
1772 	int ret;
1773 
1774 	if (!type->selftest || tracing_selftest_disabled)
1775 		return 0;
1776 
1777 	/*
1778 	 * If a tracer registers early in boot up (before scheduling is
1779 	 * initialized and such), then do not run its selftests yet.
1780 	 * Instead, run it a little later in the boot process.
1781 	 */
1782 	if (!selftests_can_run)
1783 		return save_selftest(type);
1784 
1785 	/*
1786 	 * Run a selftest on this tracer.
1787 	 * Here we reset the trace buffer, and set the current
1788 	 * tracer to be this tracer. The tracer can then run some
1789 	 * internal tracing to verify that everything is in order.
1790 	 * If we fail, we do not register this tracer.
1791 	 */
1792 	tracing_reset_online_cpus(&tr->array_buffer);
1793 
1794 	tr->current_trace = type;
1795 
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797 	if (type->use_max_tr) {
1798 		/* If we expanded the buffers, make sure the max is expanded too */
1799 		if (ring_buffer_expanded)
1800 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801 					   RING_BUFFER_ALL_CPUS);
1802 		tr->allocated_snapshot = true;
1803 	}
1804 #endif
1805 
1806 	/* the test is responsible for initializing and enabling */
1807 	pr_info("Testing tracer %s: ", type->name);
1808 	ret = type->selftest(type, tr);
1809 	/* the test is responsible for resetting too */
1810 	tr->current_trace = saved_tracer;
1811 	if (ret) {
1812 		printk(KERN_CONT "FAILED!\n");
1813 		/* Add the warning after printing 'FAILED' */
1814 		WARN_ON(1);
1815 		return -1;
1816 	}
1817 	/* Only reset on passing, to avoid touching corrupted buffers */
1818 	tracing_reset_online_cpus(&tr->array_buffer);
1819 
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821 	if (type->use_max_tr) {
1822 		tr->allocated_snapshot = false;
1823 
1824 		/* Shrink the max buffer again */
1825 		if (ring_buffer_expanded)
1826 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1827 					   RING_BUFFER_ALL_CPUS);
1828 	}
1829 #endif
1830 
1831 	printk(KERN_CONT "PASSED\n");
1832 	return 0;
1833 }
1834 
1835 static __init int init_trace_selftests(void)
1836 {
1837 	struct trace_selftests *p, *n;
1838 	struct tracer *t, **last;
1839 	int ret;
1840 
1841 	selftests_can_run = true;
1842 
1843 	mutex_lock(&trace_types_lock);
1844 
1845 	if (list_empty(&postponed_selftests))
1846 		goto out;
1847 
1848 	pr_info("Running postponed tracer tests:\n");
1849 
1850 	tracing_selftest_running = true;
1851 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852 		/* This loop can take minutes when sanitizers are enabled, so
1853 		 * lets make sure we allow RCU processing.
1854 		 */
1855 		cond_resched();
1856 		ret = run_tracer_selftest(p->type);
1857 		/* If the test fails, then warn and remove from available_tracers */
1858 		if (ret < 0) {
1859 			WARN(1, "tracer: %s failed selftest, disabling\n",
1860 			     p->type->name);
1861 			last = &trace_types;
1862 			for (t = trace_types; t; t = t->next) {
1863 				if (t == p->type) {
1864 					*last = t->next;
1865 					break;
1866 				}
1867 				last = &t->next;
1868 			}
1869 		}
1870 		list_del(&p->list);
1871 		kfree(p);
1872 	}
1873 	tracing_selftest_running = false;
1874 
1875  out:
1876 	mutex_unlock(&trace_types_lock);
1877 
1878 	return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884 	return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 
1890 static void __init apply_trace_boot_options(void);
1891 
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900 	struct tracer *t;
1901 	int ret = 0;
1902 
1903 	if (!type->name) {
1904 		pr_info("Tracer must have a name\n");
1905 		return -1;
1906 	}
1907 
1908 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910 		return -1;
1911 	}
1912 
1913 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914 		pr_warn("Can not register tracer %s due to lockdown\n",
1915 			   type->name);
1916 		return -EPERM;
1917 	}
1918 
1919 	mutex_lock(&trace_types_lock);
1920 
1921 	tracing_selftest_running = true;
1922 
1923 	for (t = trace_types; t; t = t->next) {
1924 		if (strcmp(type->name, t->name) == 0) {
1925 			/* already found */
1926 			pr_info("Tracer %s already registered\n",
1927 				type->name);
1928 			ret = -1;
1929 			goto out;
1930 		}
1931 	}
1932 
1933 	if (!type->set_flag)
1934 		type->set_flag = &dummy_set_flag;
1935 	if (!type->flags) {
1936 		/*allocate a dummy tracer_flags*/
1937 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938 		if (!type->flags) {
1939 			ret = -ENOMEM;
1940 			goto out;
1941 		}
1942 		type->flags->val = 0;
1943 		type->flags->opts = dummy_tracer_opt;
1944 	} else
1945 		if (!type->flags->opts)
1946 			type->flags->opts = dummy_tracer_opt;
1947 
1948 	/* store the tracer for __set_tracer_option */
1949 	type->flags->trace = type;
1950 
1951 	ret = run_tracer_selftest(type);
1952 	if (ret < 0)
1953 		goto out;
1954 
1955 	type->next = trace_types;
1956 	trace_types = type;
1957 	add_tracer_options(&global_trace, type);
1958 
1959  out:
1960 	tracing_selftest_running = false;
1961 	mutex_unlock(&trace_types_lock);
1962 
1963 	if (ret || !default_bootup_tracer)
1964 		goto out_unlock;
1965 
1966 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967 		goto out_unlock;
1968 
1969 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970 	/* Do we want this tracer to start on bootup? */
1971 	tracing_set_tracer(&global_trace, type->name);
1972 	default_bootup_tracer = NULL;
1973 
1974 	apply_trace_boot_options();
1975 
1976 	/* disable other selftests, since this will break it. */
1977 	tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980 	       type->name);
1981 #endif
1982 
1983  out_unlock:
1984 	return ret;
1985 }
1986 
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989 	struct trace_buffer *buffer = buf->buffer;
1990 
1991 	if (!buffer)
1992 		return;
1993 
1994 	ring_buffer_record_disable(buffer);
1995 
1996 	/* Make sure all commits have finished */
1997 	synchronize_rcu();
1998 	ring_buffer_reset_cpu(buffer, cpu);
1999 
2000 	ring_buffer_record_enable(buffer);
2001 }
2002 
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005 	struct trace_buffer *buffer = buf->buffer;
2006 
2007 	if (!buffer)
2008 		return;
2009 
2010 	ring_buffer_record_disable(buffer);
2011 
2012 	/* Make sure all commits have finished */
2013 	synchronize_rcu();
2014 
2015 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2016 
2017 	ring_buffer_reset_online_cpus(buffer);
2018 
2019 	ring_buffer_record_enable(buffer);
2020 }
2021 
2022 /* Must have trace_types_lock held */
2023 void tracing_reset_all_online_cpus(void)
2024 {
2025 	struct trace_array *tr;
2026 
2027 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2028 		if (!tr->clear_trace)
2029 			continue;
2030 		tr->clear_trace = false;
2031 		tracing_reset_online_cpus(&tr->array_buffer);
2032 #ifdef CONFIG_TRACER_MAX_TRACE
2033 		tracing_reset_online_cpus(&tr->max_buffer);
2034 #endif
2035 	}
2036 }
2037 
2038 static int *tgid_map;
2039 
2040 #define SAVED_CMDLINES_DEFAULT 128
2041 #define NO_CMDLINE_MAP UINT_MAX
2042 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2043 struct saved_cmdlines_buffer {
2044 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2045 	unsigned *map_cmdline_to_pid;
2046 	unsigned cmdline_num;
2047 	int cmdline_idx;
2048 	char *saved_cmdlines;
2049 };
2050 static struct saved_cmdlines_buffer *savedcmd;
2051 
2052 /* temporary disable recording */
2053 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2054 
2055 static inline char *get_saved_cmdlines(int idx)
2056 {
2057 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2058 }
2059 
2060 static inline void set_cmdline(int idx, const char *cmdline)
2061 {
2062 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2063 }
2064 
2065 static int allocate_cmdlines_buffer(unsigned int val,
2066 				    struct saved_cmdlines_buffer *s)
2067 {
2068 	s->map_cmdline_to_pid = kmalloc_array(val,
2069 					      sizeof(*s->map_cmdline_to_pid),
2070 					      GFP_KERNEL);
2071 	if (!s->map_cmdline_to_pid)
2072 		return -ENOMEM;
2073 
2074 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2075 	if (!s->saved_cmdlines) {
2076 		kfree(s->map_cmdline_to_pid);
2077 		return -ENOMEM;
2078 	}
2079 
2080 	s->cmdline_idx = 0;
2081 	s->cmdline_num = val;
2082 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2083 	       sizeof(s->map_pid_to_cmdline));
2084 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2085 	       val * sizeof(*s->map_cmdline_to_pid));
2086 
2087 	return 0;
2088 }
2089 
2090 static int trace_create_savedcmd(void)
2091 {
2092 	int ret;
2093 
2094 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2095 	if (!savedcmd)
2096 		return -ENOMEM;
2097 
2098 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2099 	if (ret < 0) {
2100 		kfree(savedcmd);
2101 		savedcmd = NULL;
2102 		return -ENOMEM;
2103 	}
2104 
2105 	return 0;
2106 }
2107 
2108 int is_tracing_stopped(void)
2109 {
2110 	return global_trace.stop_count;
2111 }
2112 
2113 /**
2114  * tracing_start - quick start of the tracer
2115  *
2116  * If tracing is enabled but was stopped by tracing_stop,
2117  * this will start the tracer back up.
2118  */
2119 void tracing_start(void)
2120 {
2121 	struct trace_buffer *buffer;
2122 	unsigned long flags;
2123 
2124 	if (tracing_disabled)
2125 		return;
2126 
2127 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2128 	if (--global_trace.stop_count) {
2129 		if (global_trace.stop_count < 0) {
2130 			/* Someone screwed up their debugging */
2131 			WARN_ON_ONCE(1);
2132 			global_trace.stop_count = 0;
2133 		}
2134 		goto out;
2135 	}
2136 
2137 	/* Prevent the buffers from switching */
2138 	arch_spin_lock(&global_trace.max_lock);
2139 
2140 	buffer = global_trace.array_buffer.buffer;
2141 	if (buffer)
2142 		ring_buffer_record_enable(buffer);
2143 
2144 #ifdef CONFIG_TRACER_MAX_TRACE
2145 	buffer = global_trace.max_buffer.buffer;
2146 	if (buffer)
2147 		ring_buffer_record_enable(buffer);
2148 #endif
2149 
2150 	arch_spin_unlock(&global_trace.max_lock);
2151 
2152  out:
2153 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2154 }
2155 
2156 static void tracing_start_tr(struct trace_array *tr)
2157 {
2158 	struct trace_buffer *buffer;
2159 	unsigned long flags;
2160 
2161 	if (tracing_disabled)
2162 		return;
2163 
2164 	/* If global, we need to also start the max tracer */
2165 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2166 		return tracing_start();
2167 
2168 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2169 
2170 	if (--tr->stop_count) {
2171 		if (tr->stop_count < 0) {
2172 			/* Someone screwed up their debugging */
2173 			WARN_ON_ONCE(1);
2174 			tr->stop_count = 0;
2175 		}
2176 		goto out;
2177 	}
2178 
2179 	buffer = tr->array_buffer.buffer;
2180 	if (buffer)
2181 		ring_buffer_record_enable(buffer);
2182 
2183  out:
2184 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2185 }
2186 
2187 /**
2188  * tracing_stop - quick stop of the tracer
2189  *
2190  * Light weight way to stop tracing. Use in conjunction with
2191  * tracing_start.
2192  */
2193 void tracing_stop(void)
2194 {
2195 	struct trace_buffer *buffer;
2196 	unsigned long flags;
2197 
2198 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2199 	if (global_trace.stop_count++)
2200 		goto out;
2201 
2202 	/* Prevent the buffers from switching */
2203 	arch_spin_lock(&global_trace.max_lock);
2204 
2205 	buffer = global_trace.array_buffer.buffer;
2206 	if (buffer)
2207 		ring_buffer_record_disable(buffer);
2208 
2209 #ifdef CONFIG_TRACER_MAX_TRACE
2210 	buffer = global_trace.max_buffer.buffer;
2211 	if (buffer)
2212 		ring_buffer_record_disable(buffer);
2213 #endif
2214 
2215 	arch_spin_unlock(&global_trace.max_lock);
2216 
2217  out:
2218 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2219 }
2220 
2221 static void tracing_stop_tr(struct trace_array *tr)
2222 {
2223 	struct trace_buffer *buffer;
2224 	unsigned long flags;
2225 
2226 	/* If global, we need to also stop the max tracer */
2227 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2228 		return tracing_stop();
2229 
2230 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2231 	if (tr->stop_count++)
2232 		goto out;
2233 
2234 	buffer = tr->array_buffer.buffer;
2235 	if (buffer)
2236 		ring_buffer_record_disable(buffer);
2237 
2238  out:
2239 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2240 }
2241 
2242 static int trace_save_cmdline(struct task_struct *tsk)
2243 {
2244 	unsigned pid, idx;
2245 
2246 	/* treat recording of idle task as a success */
2247 	if (!tsk->pid)
2248 		return 1;
2249 
2250 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2251 		return 0;
2252 
2253 	/*
2254 	 * It's not the end of the world if we don't get
2255 	 * the lock, but we also don't want to spin
2256 	 * nor do we want to disable interrupts,
2257 	 * so if we miss here, then better luck next time.
2258 	 */
2259 	if (!arch_spin_trylock(&trace_cmdline_lock))
2260 		return 0;
2261 
2262 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2263 	if (idx == NO_CMDLINE_MAP) {
2264 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2265 
2266 		/*
2267 		 * Check whether the cmdline buffer at idx has a pid
2268 		 * mapped. We are going to overwrite that entry so we
2269 		 * need to clear the map_pid_to_cmdline. Otherwise we
2270 		 * would read the new comm for the old pid.
2271 		 */
2272 		pid = savedcmd->map_cmdline_to_pid[idx];
2273 		if (pid != NO_CMDLINE_MAP)
2274 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2275 
2276 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2277 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2278 
2279 		savedcmd->cmdline_idx = idx;
2280 	}
2281 
2282 	set_cmdline(idx, tsk->comm);
2283 
2284 	arch_spin_unlock(&trace_cmdline_lock);
2285 
2286 	return 1;
2287 }
2288 
2289 static void __trace_find_cmdline(int pid, char comm[])
2290 {
2291 	unsigned map;
2292 
2293 	if (!pid) {
2294 		strcpy(comm, "<idle>");
2295 		return;
2296 	}
2297 
2298 	if (WARN_ON_ONCE(pid < 0)) {
2299 		strcpy(comm, "<XXX>");
2300 		return;
2301 	}
2302 
2303 	if (pid > PID_MAX_DEFAULT) {
2304 		strcpy(comm, "<...>");
2305 		return;
2306 	}
2307 
2308 	map = savedcmd->map_pid_to_cmdline[pid];
2309 	if (map != NO_CMDLINE_MAP)
2310 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2311 	else
2312 		strcpy(comm, "<...>");
2313 }
2314 
2315 void trace_find_cmdline(int pid, char comm[])
2316 {
2317 	preempt_disable();
2318 	arch_spin_lock(&trace_cmdline_lock);
2319 
2320 	__trace_find_cmdline(pid, comm);
2321 
2322 	arch_spin_unlock(&trace_cmdline_lock);
2323 	preempt_enable();
2324 }
2325 
2326 int trace_find_tgid(int pid)
2327 {
2328 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2329 		return 0;
2330 
2331 	return tgid_map[pid];
2332 }
2333 
2334 static int trace_save_tgid(struct task_struct *tsk)
2335 {
2336 	/* treat recording of idle task as a success */
2337 	if (!tsk->pid)
2338 		return 1;
2339 
2340 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2341 		return 0;
2342 
2343 	tgid_map[tsk->pid] = tsk->tgid;
2344 	return 1;
2345 }
2346 
2347 static bool tracing_record_taskinfo_skip(int flags)
2348 {
2349 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2350 		return true;
2351 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2352 		return true;
2353 	if (!__this_cpu_read(trace_taskinfo_save))
2354 		return true;
2355 	return false;
2356 }
2357 
2358 /**
2359  * tracing_record_taskinfo - record the task info of a task
2360  *
2361  * @task:  task to record
2362  * @flags: TRACE_RECORD_CMDLINE for recording comm
2363  *         TRACE_RECORD_TGID for recording tgid
2364  */
2365 void tracing_record_taskinfo(struct task_struct *task, int flags)
2366 {
2367 	bool done;
2368 
2369 	if (tracing_record_taskinfo_skip(flags))
2370 		return;
2371 
2372 	/*
2373 	 * Record as much task information as possible. If some fail, continue
2374 	 * to try to record the others.
2375 	 */
2376 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2377 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2378 
2379 	/* If recording any information failed, retry again soon. */
2380 	if (!done)
2381 		return;
2382 
2383 	__this_cpu_write(trace_taskinfo_save, false);
2384 }
2385 
2386 /**
2387  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2388  *
2389  * @prev: previous task during sched_switch
2390  * @next: next task during sched_switch
2391  * @flags: TRACE_RECORD_CMDLINE for recording comm
2392  *         TRACE_RECORD_TGID for recording tgid
2393  */
2394 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2395 					  struct task_struct *next, int flags)
2396 {
2397 	bool done;
2398 
2399 	if (tracing_record_taskinfo_skip(flags))
2400 		return;
2401 
2402 	/*
2403 	 * Record as much task information as possible. If some fail, continue
2404 	 * to try to record the others.
2405 	 */
2406 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2407 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2408 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2409 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2410 
2411 	/* If recording any information failed, retry again soon. */
2412 	if (!done)
2413 		return;
2414 
2415 	__this_cpu_write(trace_taskinfo_save, false);
2416 }
2417 
2418 /* Helpers to record a specific task information */
2419 void tracing_record_cmdline(struct task_struct *task)
2420 {
2421 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2422 }
2423 
2424 void tracing_record_tgid(struct task_struct *task)
2425 {
2426 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2427 }
2428 
2429 /*
2430  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2431  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2432  * simplifies those functions and keeps them in sync.
2433  */
2434 enum print_line_t trace_handle_return(struct trace_seq *s)
2435 {
2436 	return trace_seq_has_overflowed(s) ?
2437 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2438 }
2439 EXPORT_SYMBOL_GPL(trace_handle_return);
2440 
2441 void
2442 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2443 			     unsigned long flags, int pc)
2444 {
2445 	struct task_struct *tsk = current;
2446 
2447 	entry->preempt_count		= pc & 0xff;
2448 	entry->pid			= (tsk) ? tsk->pid : 0;
2449 	entry->type			= type;
2450 	entry->flags =
2451 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2452 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2453 #else
2454 		TRACE_FLAG_IRQS_NOSUPPORT |
2455 #endif
2456 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2457 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2458 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2459 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2460 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2461 }
2462 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2463 
2464 struct ring_buffer_event *
2465 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2466 			  int type,
2467 			  unsigned long len,
2468 			  unsigned long flags, int pc)
2469 {
2470 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2471 }
2472 
2473 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2474 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2475 static int trace_buffered_event_ref;
2476 
2477 /**
2478  * trace_buffered_event_enable - enable buffering events
2479  *
2480  * When events are being filtered, it is quicker to use a temporary
2481  * buffer to write the event data into if there's a likely chance
2482  * that it will not be committed. The discard of the ring buffer
2483  * is not as fast as committing, and is much slower than copying
2484  * a commit.
2485  *
2486  * When an event is to be filtered, allocate per cpu buffers to
2487  * write the event data into, and if the event is filtered and discarded
2488  * it is simply dropped, otherwise, the entire data is to be committed
2489  * in one shot.
2490  */
2491 void trace_buffered_event_enable(void)
2492 {
2493 	struct ring_buffer_event *event;
2494 	struct page *page;
2495 	int cpu;
2496 
2497 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2498 
2499 	if (trace_buffered_event_ref++)
2500 		return;
2501 
2502 	for_each_tracing_cpu(cpu) {
2503 		page = alloc_pages_node(cpu_to_node(cpu),
2504 					GFP_KERNEL | __GFP_NORETRY, 0);
2505 		if (!page)
2506 			goto failed;
2507 
2508 		event = page_address(page);
2509 		memset(event, 0, sizeof(*event));
2510 
2511 		per_cpu(trace_buffered_event, cpu) = event;
2512 
2513 		preempt_disable();
2514 		if (cpu == smp_processor_id() &&
2515 		    this_cpu_read(trace_buffered_event) !=
2516 		    per_cpu(trace_buffered_event, cpu))
2517 			WARN_ON_ONCE(1);
2518 		preempt_enable();
2519 	}
2520 
2521 	return;
2522  failed:
2523 	trace_buffered_event_disable();
2524 }
2525 
2526 static void enable_trace_buffered_event(void *data)
2527 {
2528 	/* Probably not needed, but do it anyway */
2529 	smp_rmb();
2530 	this_cpu_dec(trace_buffered_event_cnt);
2531 }
2532 
2533 static void disable_trace_buffered_event(void *data)
2534 {
2535 	this_cpu_inc(trace_buffered_event_cnt);
2536 }
2537 
2538 /**
2539  * trace_buffered_event_disable - disable buffering events
2540  *
2541  * When a filter is removed, it is faster to not use the buffered
2542  * events, and to commit directly into the ring buffer. Free up
2543  * the temp buffers when there are no more users. This requires
2544  * special synchronization with current events.
2545  */
2546 void trace_buffered_event_disable(void)
2547 {
2548 	int cpu;
2549 
2550 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2551 
2552 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2553 		return;
2554 
2555 	if (--trace_buffered_event_ref)
2556 		return;
2557 
2558 	preempt_disable();
2559 	/* For each CPU, set the buffer as used. */
2560 	smp_call_function_many(tracing_buffer_mask,
2561 			       disable_trace_buffered_event, NULL, 1);
2562 	preempt_enable();
2563 
2564 	/* Wait for all current users to finish */
2565 	synchronize_rcu();
2566 
2567 	for_each_tracing_cpu(cpu) {
2568 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2569 		per_cpu(trace_buffered_event, cpu) = NULL;
2570 	}
2571 	/*
2572 	 * Make sure trace_buffered_event is NULL before clearing
2573 	 * trace_buffered_event_cnt.
2574 	 */
2575 	smp_wmb();
2576 
2577 	preempt_disable();
2578 	/* Do the work on each cpu */
2579 	smp_call_function_many(tracing_buffer_mask,
2580 			       enable_trace_buffered_event, NULL, 1);
2581 	preempt_enable();
2582 }
2583 
2584 static struct trace_buffer *temp_buffer;
2585 
2586 struct ring_buffer_event *
2587 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2588 			  struct trace_event_file *trace_file,
2589 			  int type, unsigned long len,
2590 			  unsigned long flags, int pc)
2591 {
2592 	struct ring_buffer_event *entry;
2593 	int val;
2594 
2595 	*current_rb = trace_file->tr->array_buffer.buffer;
2596 
2597 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2598 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2599 	    (entry = this_cpu_read(trace_buffered_event))) {
2600 		/* Try to use the per cpu buffer first */
2601 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2602 		if (val == 1) {
2603 			trace_event_setup(entry, type, flags, pc);
2604 			entry->array[0] = len;
2605 			return entry;
2606 		}
2607 		this_cpu_dec(trace_buffered_event_cnt);
2608 	}
2609 
2610 	entry = __trace_buffer_lock_reserve(*current_rb,
2611 					    type, len, flags, pc);
2612 	/*
2613 	 * If tracing is off, but we have triggers enabled
2614 	 * we still need to look at the event data. Use the temp_buffer
2615 	 * to store the trace event for the tigger to use. It's recusive
2616 	 * safe and will not be recorded anywhere.
2617 	 */
2618 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2619 		*current_rb = temp_buffer;
2620 		entry = __trace_buffer_lock_reserve(*current_rb,
2621 						    type, len, flags, pc);
2622 	}
2623 	return entry;
2624 }
2625 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2626 
2627 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2628 static DEFINE_MUTEX(tracepoint_printk_mutex);
2629 
2630 static void output_printk(struct trace_event_buffer *fbuffer)
2631 {
2632 	struct trace_event_call *event_call;
2633 	struct trace_event_file *file;
2634 	struct trace_event *event;
2635 	unsigned long flags;
2636 	struct trace_iterator *iter = tracepoint_print_iter;
2637 
2638 	/* We should never get here if iter is NULL */
2639 	if (WARN_ON_ONCE(!iter))
2640 		return;
2641 
2642 	event_call = fbuffer->trace_file->event_call;
2643 	if (!event_call || !event_call->event.funcs ||
2644 	    !event_call->event.funcs->trace)
2645 		return;
2646 
2647 	file = fbuffer->trace_file;
2648 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2649 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2650 	     !filter_match_preds(file->filter, fbuffer->entry)))
2651 		return;
2652 
2653 	event = &fbuffer->trace_file->event_call->event;
2654 
2655 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2656 	trace_seq_init(&iter->seq);
2657 	iter->ent = fbuffer->entry;
2658 	event_call->event.funcs->trace(iter, 0, event);
2659 	trace_seq_putc(&iter->seq, 0);
2660 	printk("%s", iter->seq.buffer);
2661 
2662 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2663 }
2664 
2665 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2666 			     void *buffer, size_t *lenp,
2667 			     loff_t *ppos)
2668 {
2669 	int save_tracepoint_printk;
2670 	int ret;
2671 
2672 	mutex_lock(&tracepoint_printk_mutex);
2673 	save_tracepoint_printk = tracepoint_printk;
2674 
2675 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2676 
2677 	/*
2678 	 * This will force exiting early, as tracepoint_printk
2679 	 * is always zero when tracepoint_printk_iter is not allocated
2680 	 */
2681 	if (!tracepoint_print_iter)
2682 		tracepoint_printk = 0;
2683 
2684 	if (save_tracepoint_printk == tracepoint_printk)
2685 		goto out;
2686 
2687 	if (tracepoint_printk)
2688 		static_key_enable(&tracepoint_printk_key.key);
2689 	else
2690 		static_key_disable(&tracepoint_printk_key.key);
2691 
2692  out:
2693 	mutex_unlock(&tracepoint_printk_mutex);
2694 
2695 	return ret;
2696 }
2697 
2698 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2699 {
2700 	if (static_key_false(&tracepoint_printk_key.key))
2701 		output_printk(fbuffer);
2702 
2703 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2704 				    fbuffer->event, fbuffer->entry,
2705 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2706 }
2707 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2708 
2709 /*
2710  * Skip 3:
2711  *
2712  *   trace_buffer_unlock_commit_regs()
2713  *   trace_event_buffer_commit()
2714  *   trace_event_raw_event_xxx()
2715  */
2716 # define STACK_SKIP 3
2717 
2718 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2719 				     struct trace_buffer *buffer,
2720 				     struct ring_buffer_event *event,
2721 				     unsigned long flags, int pc,
2722 				     struct pt_regs *regs)
2723 {
2724 	__buffer_unlock_commit(buffer, event);
2725 
2726 	/*
2727 	 * If regs is not set, then skip the necessary functions.
2728 	 * Note, we can still get here via blktrace, wakeup tracer
2729 	 * and mmiotrace, but that's ok if they lose a function or
2730 	 * two. They are not that meaningful.
2731 	 */
2732 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2733 	ftrace_trace_userstack(buffer, flags, pc);
2734 }
2735 
2736 /*
2737  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2738  */
2739 void
2740 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2741 				   struct ring_buffer_event *event)
2742 {
2743 	__buffer_unlock_commit(buffer, event);
2744 }
2745 
2746 static void
2747 trace_process_export(struct trace_export *export,
2748 	       struct ring_buffer_event *event)
2749 {
2750 	struct trace_entry *entry;
2751 	unsigned int size = 0;
2752 
2753 	entry = ring_buffer_event_data(event);
2754 	size = ring_buffer_event_length(event);
2755 	export->write(export, entry, size);
2756 }
2757 
2758 static DEFINE_MUTEX(ftrace_export_lock);
2759 
2760 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2761 
2762 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2763 
2764 static inline void ftrace_exports_enable(void)
2765 {
2766 	static_branch_enable(&ftrace_exports_enabled);
2767 }
2768 
2769 static inline void ftrace_exports_disable(void)
2770 {
2771 	static_branch_disable(&ftrace_exports_enabled);
2772 }
2773 
2774 static void ftrace_exports(struct ring_buffer_event *event)
2775 {
2776 	struct trace_export *export;
2777 
2778 	preempt_disable_notrace();
2779 
2780 	export = rcu_dereference_raw_check(ftrace_exports_list);
2781 	while (export) {
2782 		trace_process_export(export, event);
2783 		export = rcu_dereference_raw_check(export->next);
2784 	}
2785 
2786 	preempt_enable_notrace();
2787 }
2788 
2789 static inline void
2790 add_trace_export(struct trace_export **list, struct trace_export *export)
2791 {
2792 	rcu_assign_pointer(export->next, *list);
2793 	/*
2794 	 * We are entering export into the list but another
2795 	 * CPU might be walking that list. We need to make sure
2796 	 * the export->next pointer is valid before another CPU sees
2797 	 * the export pointer included into the list.
2798 	 */
2799 	rcu_assign_pointer(*list, export);
2800 }
2801 
2802 static inline int
2803 rm_trace_export(struct trace_export **list, struct trace_export *export)
2804 {
2805 	struct trace_export **p;
2806 
2807 	for (p = list; *p != NULL; p = &(*p)->next)
2808 		if (*p == export)
2809 			break;
2810 
2811 	if (*p != export)
2812 		return -1;
2813 
2814 	rcu_assign_pointer(*p, (*p)->next);
2815 
2816 	return 0;
2817 }
2818 
2819 static inline void
2820 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2821 {
2822 	if (*list == NULL)
2823 		ftrace_exports_enable();
2824 
2825 	add_trace_export(list, export);
2826 }
2827 
2828 static inline int
2829 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2830 {
2831 	int ret;
2832 
2833 	ret = rm_trace_export(list, export);
2834 	if (*list == NULL)
2835 		ftrace_exports_disable();
2836 
2837 	return ret;
2838 }
2839 
2840 int register_ftrace_export(struct trace_export *export)
2841 {
2842 	if (WARN_ON_ONCE(!export->write))
2843 		return -1;
2844 
2845 	mutex_lock(&ftrace_export_lock);
2846 
2847 	add_ftrace_export(&ftrace_exports_list, export);
2848 
2849 	mutex_unlock(&ftrace_export_lock);
2850 
2851 	return 0;
2852 }
2853 EXPORT_SYMBOL_GPL(register_ftrace_export);
2854 
2855 int unregister_ftrace_export(struct trace_export *export)
2856 {
2857 	int ret;
2858 
2859 	mutex_lock(&ftrace_export_lock);
2860 
2861 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2862 
2863 	mutex_unlock(&ftrace_export_lock);
2864 
2865 	return ret;
2866 }
2867 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2868 
2869 void
2870 trace_function(struct trace_array *tr,
2871 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2872 	       int pc)
2873 {
2874 	struct trace_event_call *call = &event_function;
2875 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2876 	struct ring_buffer_event *event;
2877 	struct ftrace_entry *entry;
2878 
2879 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2880 					    flags, pc);
2881 	if (!event)
2882 		return;
2883 	entry	= ring_buffer_event_data(event);
2884 	entry->ip			= ip;
2885 	entry->parent_ip		= parent_ip;
2886 
2887 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2888 		if (static_branch_unlikely(&ftrace_exports_enabled))
2889 			ftrace_exports(event);
2890 		__buffer_unlock_commit(buffer, event);
2891 	}
2892 }
2893 
2894 #ifdef CONFIG_STACKTRACE
2895 
2896 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2897 #define FTRACE_KSTACK_NESTING	4
2898 
2899 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2900 
2901 struct ftrace_stack {
2902 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2903 };
2904 
2905 
2906 struct ftrace_stacks {
2907 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2908 };
2909 
2910 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2911 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2912 
2913 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2914 				 unsigned long flags,
2915 				 int skip, int pc, struct pt_regs *regs)
2916 {
2917 	struct trace_event_call *call = &event_kernel_stack;
2918 	struct ring_buffer_event *event;
2919 	unsigned int size, nr_entries;
2920 	struct ftrace_stack *fstack;
2921 	struct stack_entry *entry;
2922 	int stackidx;
2923 
2924 	/*
2925 	 * Add one, for this function and the call to save_stack_trace()
2926 	 * If regs is set, then these functions will not be in the way.
2927 	 */
2928 #ifndef CONFIG_UNWINDER_ORC
2929 	if (!regs)
2930 		skip++;
2931 #endif
2932 
2933 	/*
2934 	 * Since events can happen in NMIs there's no safe way to
2935 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2936 	 * or NMI comes in, it will just have to use the default
2937 	 * FTRACE_STACK_SIZE.
2938 	 */
2939 	preempt_disable_notrace();
2940 
2941 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2942 
2943 	/* This should never happen. If it does, yell once and skip */
2944 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2945 		goto out;
2946 
2947 	/*
2948 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2949 	 * interrupt will either see the value pre increment or post
2950 	 * increment. If the interrupt happens pre increment it will have
2951 	 * restored the counter when it returns.  We just need a barrier to
2952 	 * keep gcc from moving things around.
2953 	 */
2954 	barrier();
2955 
2956 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2957 	size = ARRAY_SIZE(fstack->calls);
2958 
2959 	if (regs) {
2960 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2961 						   size, skip);
2962 	} else {
2963 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2964 	}
2965 
2966 	size = nr_entries * sizeof(unsigned long);
2967 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2968 					    sizeof(*entry) + size, flags, pc);
2969 	if (!event)
2970 		goto out;
2971 	entry = ring_buffer_event_data(event);
2972 
2973 	memcpy(&entry->caller, fstack->calls, size);
2974 	entry->size = nr_entries;
2975 
2976 	if (!call_filter_check_discard(call, entry, buffer, event))
2977 		__buffer_unlock_commit(buffer, event);
2978 
2979  out:
2980 	/* Again, don't let gcc optimize things here */
2981 	barrier();
2982 	__this_cpu_dec(ftrace_stack_reserve);
2983 	preempt_enable_notrace();
2984 
2985 }
2986 
2987 static inline void ftrace_trace_stack(struct trace_array *tr,
2988 				      struct trace_buffer *buffer,
2989 				      unsigned long flags,
2990 				      int skip, int pc, struct pt_regs *regs)
2991 {
2992 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2993 		return;
2994 
2995 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2996 }
2997 
2998 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2999 		   int pc)
3000 {
3001 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3002 
3003 	if (rcu_is_watching()) {
3004 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3005 		return;
3006 	}
3007 
3008 	/*
3009 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3010 	 * but if the above rcu_is_watching() failed, then the NMI
3011 	 * triggered someplace critical, and rcu_irq_enter() should
3012 	 * not be called from NMI.
3013 	 */
3014 	if (unlikely(in_nmi()))
3015 		return;
3016 
3017 	rcu_irq_enter_irqson();
3018 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3019 	rcu_irq_exit_irqson();
3020 }
3021 
3022 /**
3023  * trace_dump_stack - record a stack back trace in the trace buffer
3024  * @skip: Number of functions to skip (helper handlers)
3025  */
3026 void trace_dump_stack(int skip)
3027 {
3028 	unsigned long flags;
3029 
3030 	if (tracing_disabled || tracing_selftest_running)
3031 		return;
3032 
3033 	local_save_flags(flags);
3034 
3035 #ifndef CONFIG_UNWINDER_ORC
3036 	/* Skip 1 to skip this function. */
3037 	skip++;
3038 #endif
3039 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3040 			     flags, skip, preempt_count(), NULL);
3041 }
3042 EXPORT_SYMBOL_GPL(trace_dump_stack);
3043 
3044 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3045 static DEFINE_PER_CPU(int, user_stack_count);
3046 
3047 static void
3048 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3049 {
3050 	struct trace_event_call *call = &event_user_stack;
3051 	struct ring_buffer_event *event;
3052 	struct userstack_entry *entry;
3053 
3054 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3055 		return;
3056 
3057 	/*
3058 	 * NMIs can not handle page faults, even with fix ups.
3059 	 * The save user stack can (and often does) fault.
3060 	 */
3061 	if (unlikely(in_nmi()))
3062 		return;
3063 
3064 	/*
3065 	 * prevent recursion, since the user stack tracing may
3066 	 * trigger other kernel events.
3067 	 */
3068 	preempt_disable();
3069 	if (__this_cpu_read(user_stack_count))
3070 		goto out;
3071 
3072 	__this_cpu_inc(user_stack_count);
3073 
3074 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3075 					    sizeof(*entry), flags, pc);
3076 	if (!event)
3077 		goto out_drop_count;
3078 	entry	= ring_buffer_event_data(event);
3079 
3080 	entry->tgid		= current->tgid;
3081 	memset(&entry->caller, 0, sizeof(entry->caller));
3082 
3083 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3084 	if (!call_filter_check_discard(call, entry, buffer, event))
3085 		__buffer_unlock_commit(buffer, event);
3086 
3087  out_drop_count:
3088 	__this_cpu_dec(user_stack_count);
3089  out:
3090 	preempt_enable();
3091 }
3092 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3093 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3094 				   unsigned long flags, int pc)
3095 {
3096 }
3097 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3098 
3099 #endif /* CONFIG_STACKTRACE */
3100 
3101 /* created for use with alloc_percpu */
3102 struct trace_buffer_struct {
3103 	int nesting;
3104 	char buffer[4][TRACE_BUF_SIZE];
3105 };
3106 
3107 static struct trace_buffer_struct *trace_percpu_buffer;
3108 
3109 /*
3110  * Thise allows for lockless recording.  If we're nested too deeply, then
3111  * this returns NULL.
3112  */
3113 static char *get_trace_buf(void)
3114 {
3115 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3116 
3117 	if (!buffer || buffer->nesting >= 4)
3118 		return NULL;
3119 
3120 	buffer->nesting++;
3121 
3122 	/* Interrupts must see nesting incremented before we use the buffer */
3123 	barrier();
3124 	return &buffer->buffer[buffer->nesting][0];
3125 }
3126 
3127 static void put_trace_buf(void)
3128 {
3129 	/* Don't let the decrement of nesting leak before this */
3130 	barrier();
3131 	this_cpu_dec(trace_percpu_buffer->nesting);
3132 }
3133 
3134 static int alloc_percpu_trace_buffer(void)
3135 {
3136 	struct trace_buffer_struct *buffers;
3137 
3138 	buffers = alloc_percpu(struct trace_buffer_struct);
3139 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3140 		return -ENOMEM;
3141 
3142 	trace_percpu_buffer = buffers;
3143 	return 0;
3144 }
3145 
3146 static int buffers_allocated;
3147 
3148 void trace_printk_init_buffers(void)
3149 {
3150 	if (buffers_allocated)
3151 		return;
3152 
3153 	if (alloc_percpu_trace_buffer())
3154 		return;
3155 
3156 	/* trace_printk() is for debug use only. Don't use it in production. */
3157 
3158 	pr_warn("\n");
3159 	pr_warn("**********************************************************\n");
3160 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3161 	pr_warn("**                                                      **\n");
3162 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3163 	pr_warn("**                                                      **\n");
3164 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3165 	pr_warn("** unsafe for production use.                           **\n");
3166 	pr_warn("**                                                      **\n");
3167 	pr_warn("** If you see this message and you are not debugging    **\n");
3168 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3169 	pr_warn("**                                                      **\n");
3170 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3171 	pr_warn("**********************************************************\n");
3172 
3173 	/* Expand the buffers to set size */
3174 	tracing_update_buffers();
3175 
3176 	buffers_allocated = 1;
3177 
3178 	/*
3179 	 * trace_printk_init_buffers() can be called by modules.
3180 	 * If that happens, then we need to start cmdline recording
3181 	 * directly here. If the global_trace.buffer is already
3182 	 * allocated here, then this was called by module code.
3183 	 */
3184 	if (global_trace.array_buffer.buffer)
3185 		tracing_start_cmdline_record();
3186 }
3187 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3188 
3189 void trace_printk_start_comm(void)
3190 {
3191 	/* Start tracing comms if trace printk is set */
3192 	if (!buffers_allocated)
3193 		return;
3194 	tracing_start_cmdline_record();
3195 }
3196 
3197 static void trace_printk_start_stop_comm(int enabled)
3198 {
3199 	if (!buffers_allocated)
3200 		return;
3201 
3202 	if (enabled)
3203 		tracing_start_cmdline_record();
3204 	else
3205 		tracing_stop_cmdline_record();
3206 }
3207 
3208 /**
3209  * trace_vbprintk - write binary msg to tracing buffer
3210  * @ip:    The address of the caller
3211  * @fmt:   The string format to write to the buffer
3212  * @args:  Arguments for @fmt
3213  */
3214 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3215 {
3216 	struct trace_event_call *call = &event_bprint;
3217 	struct ring_buffer_event *event;
3218 	struct trace_buffer *buffer;
3219 	struct trace_array *tr = &global_trace;
3220 	struct bprint_entry *entry;
3221 	unsigned long flags;
3222 	char *tbuffer;
3223 	int len = 0, size, pc;
3224 
3225 	if (unlikely(tracing_selftest_running || tracing_disabled))
3226 		return 0;
3227 
3228 	/* Don't pollute graph traces with trace_vprintk internals */
3229 	pause_graph_tracing();
3230 
3231 	pc = preempt_count();
3232 	preempt_disable_notrace();
3233 
3234 	tbuffer = get_trace_buf();
3235 	if (!tbuffer) {
3236 		len = 0;
3237 		goto out_nobuffer;
3238 	}
3239 
3240 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3241 
3242 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3243 		goto out_put;
3244 
3245 	local_save_flags(flags);
3246 	size = sizeof(*entry) + sizeof(u32) * len;
3247 	buffer = tr->array_buffer.buffer;
3248 	ring_buffer_nest_start(buffer);
3249 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3250 					    flags, pc);
3251 	if (!event)
3252 		goto out;
3253 	entry = ring_buffer_event_data(event);
3254 	entry->ip			= ip;
3255 	entry->fmt			= fmt;
3256 
3257 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3258 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3259 		__buffer_unlock_commit(buffer, event);
3260 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3261 	}
3262 
3263 out:
3264 	ring_buffer_nest_end(buffer);
3265 out_put:
3266 	put_trace_buf();
3267 
3268 out_nobuffer:
3269 	preempt_enable_notrace();
3270 	unpause_graph_tracing();
3271 
3272 	return len;
3273 }
3274 EXPORT_SYMBOL_GPL(trace_vbprintk);
3275 
3276 __printf(3, 0)
3277 static int
3278 __trace_array_vprintk(struct trace_buffer *buffer,
3279 		      unsigned long ip, const char *fmt, va_list args)
3280 {
3281 	struct trace_event_call *call = &event_print;
3282 	struct ring_buffer_event *event;
3283 	int len = 0, size, pc;
3284 	struct print_entry *entry;
3285 	unsigned long flags;
3286 	char *tbuffer;
3287 
3288 	if (tracing_disabled || tracing_selftest_running)
3289 		return 0;
3290 
3291 	/* Don't pollute graph traces with trace_vprintk internals */
3292 	pause_graph_tracing();
3293 
3294 	pc = preempt_count();
3295 	preempt_disable_notrace();
3296 
3297 
3298 	tbuffer = get_trace_buf();
3299 	if (!tbuffer) {
3300 		len = 0;
3301 		goto out_nobuffer;
3302 	}
3303 
3304 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3305 
3306 	local_save_flags(flags);
3307 	size = sizeof(*entry) + len + 1;
3308 	ring_buffer_nest_start(buffer);
3309 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3310 					    flags, pc);
3311 	if (!event)
3312 		goto out;
3313 	entry = ring_buffer_event_data(event);
3314 	entry->ip = ip;
3315 
3316 	memcpy(&entry->buf, tbuffer, len + 1);
3317 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3318 		__buffer_unlock_commit(buffer, event);
3319 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3320 	}
3321 
3322 out:
3323 	ring_buffer_nest_end(buffer);
3324 	put_trace_buf();
3325 
3326 out_nobuffer:
3327 	preempt_enable_notrace();
3328 	unpause_graph_tracing();
3329 
3330 	return len;
3331 }
3332 
3333 __printf(3, 0)
3334 int trace_array_vprintk(struct trace_array *tr,
3335 			unsigned long ip, const char *fmt, va_list args)
3336 {
3337 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3338 }
3339 
3340 __printf(3, 0)
3341 int trace_array_printk(struct trace_array *tr,
3342 		       unsigned long ip, const char *fmt, ...)
3343 {
3344 	int ret;
3345 	va_list ap;
3346 
3347 	if (!tr)
3348 		return -ENOENT;
3349 
3350 	/* This is only allowed for created instances */
3351 	if (tr == &global_trace)
3352 		return 0;
3353 
3354 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3355 		return 0;
3356 
3357 	va_start(ap, fmt);
3358 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3359 	va_end(ap);
3360 	return ret;
3361 }
3362 EXPORT_SYMBOL_GPL(trace_array_printk);
3363 
3364 __printf(3, 4)
3365 int trace_array_printk_buf(struct trace_buffer *buffer,
3366 			   unsigned long ip, const char *fmt, ...)
3367 {
3368 	int ret;
3369 	va_list ap;
3370 
3371 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3372 		return 0;
3373 
3374 	va_start(ap, fmt);
3375 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3376 	va_end(ap);
3377 	return ret;
3378 }
3379 
3380 __printf(2, 0)
3381 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3382 {
3383 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3384 }
3385 EXPORT_SYMBOL_GPL(trace_vprintk);
3386 
3387 static void trace_iterator_increment(struct trace_iterator *iter)
3388 {
3389 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3390 
3391 	iter->idx++;
3392 	if (buf_iter)
3393 		ring_buffer_iter_advance(buf_iter);
3394 }
3395 
3396 static struct trace_entry *
3397 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3398 		unsigned long *lost_events)
3399 {
3400 	struct ring_buffer_event *event;
3401 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3402 
3403 	if (buf_iter) {
3404 		event = ring_buffer_iter_peek(buf_iter, ts);
3405 		if (lost_events)
3406 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3407 				(unsigned long)-1 : 0;
3408 	} else {
3409 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3410 					 lost_events);
3411 	}
3412 
3413 	if (event) {
3414 		iter->ent_size = ring_buffer_event_length(event);
3415 		return ring_buffer_event_data(event);
3416 	}
3417 	iter->ent_size = 0;
3418 	return NULL;
3419 }
3420 
3421 static struct trace_entry *
3422 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3423 		  unsigned long *missing_events, u64 *ent_ts)
3424 {
3425 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3426 	struct trace_entry *ent, *next = NULL;
3427 	unsigned long lost_events = 0, next_lost = 0;
3428 	int cpu_file = iter->cpu_file;
3429 	u64 next_ts = 0, ts;
3430 	int next_cpu = -1;
3431 	int next_size = 0;
3432 	int cpu;
3433 
3434 	/*
3435 	 * If we are in a per_cpu trace file, don't bother by iterating over
3436 	 * all cpu and peek directly.
3437 	 */
3438 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3439 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3440 			return NULL;
3441 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3442 		if (ent_cpu)
3443 			*ent_cpu = cpu_file;
3444 
3445 		return ent;
3446 	}
3447 
3448 	for_each_tracing_cpu(cpu) {
3449 
3450 		if (ring_buffer_empty_cpu(buffer, cpu))
3451 			continue;
3452 
3453 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3454 
3455 		/*
3456 		 * Pick the entry with the smallest timestamp:
3457 		 */
3458 		if (ent && (!next || ts < next_ts)) {
3459 			next = ent;
3460 			next_cpu = cpu;
3461 			next_ts = ts;
3462 			next_lost = lost_events;
3463 			next_size = iter->ent_size;
3464 		}
3465 	}
3466 
3467 	iter->ent_size = next_size;
3468 
3469 	if (ent_cpu)
3470 		*ent_cpu = next_cpu;
3471 
3472 	if (ent_ts)
3473 		*ent_ts = next_ts;
3474 
3475 	if (missing_events)
3476 		*missing_events = next_lost;
3477 
3478 	return next;
3479 }
3480 
3481 #define STATIC_TEMP_BUF_SIZE	128
3482 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3483 
3484 /* Find the next real entry, without updating the iterator itself */
3485 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3486 					  int *ent_cpu, u64 *ent_ts)
3487 {
3488 	/* __find_next_entry will reset ent_size */
3489 	int ent_size = iter->ent_size;
3490 	struct trace_entry *entry;
3491 
3492 	/*
3493 	 * If called from ftrace_dump(), then the iter->temp buffer
3494 	 * will be the static_temp_buf and not created from kmalloc.
3495 	 * If the entry size is greater than the buffer, we can
3496 	 * not save it. Just return NULL in that case. This is only
3497 	 * used to add markers when two consecutive events' time
3498 	 * stamps have a large delta. See trace_print_lat_context()
3499 	 */
3500 	if (iter->temp == static_temp_buf &&
3501 	    STATIC_TEMP_BUF_SIZE < ent_size)
3502 		return NULL;
3503 
3504 	/*
3505 	 * The __find_next_entry() may call peek_next_entry(), which may
3506 	 * call ring_buffer_peek() that may make the contents of iter->ent
3507 	 * undefined. Need to copy iter->ent now.
3508 	 */
3509 	if (iter->ent && iter->ent != iter->temp) {
3510 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3511 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3512 			kfree(iter->temp);
3513 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3514 			if (!iter->temp)
3515 				return NULL;
3516 		}
3517 		memcpy(iter->temp, iter->ent, iter->ent_size);
3518 		iter->temp_size = iter->ent_size;
3519 		iter->ent = iter->temp;
3520 	}
3521 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3522 	/* Put back the original ent_size */
3523 	iter->ent_size = ent_size;
3524 
3525 	return entry;
3526 }
3527 
3528 /* Find the next real entry, and increment the iterator to the next entry */
3529 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3530 {
3531 	iter->ent = __find_next_entry(iter, &iter->cpu,
3532 				      &iter->lost_events, &iter->ts);
3533 
3534 	if (iter->ent)
3535 		trace_iterator_increment(iter);
3536 
3537 	return iter->ent ? iter : NULL;
3538 }
3539 
3540 static void trace_consume(struct trace_iterator *iter)
3541 {
3542 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3543 			    &iter->lost_events);
3544 }
3545 
3546 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3547 {
3548 	struct trace_iterator *iter = m->private;
3549 	int i = (int)*pos;
3550 	void *ent;
3551 
3552 	WARN_ON_ONCE(iter->leftover);
3553 
3554 	(*pos)++;
3555 
3556 	/* can't go backwards */
3557 	if (iter->idx > i)
3558 		return NULL;
3559 
3560 	if (iter->idx < 0)
3561 		ent = trace_find_next_entry_inc(iter);
3562 	else
3563 		ent = iter;
3564 
3565 	while (ent && iter->idx < i)
3566 		ent = trace_find_next_entry_inc(iter);
3567 
3568 	iter->pos = *pos;
3569 
3570 	return ent;
3571 }
3572 
3573 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3574 {
3575 	struct ring_buffer_iter *buf_iter;
3576 	unsigned long entries = 0;
3577 	u64 ts;
3578 
3579 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3580 
3581 	buf_iter = trace_buffer_iter(iter, cpu);
3582 	if (!buf_iter)
3583 		return;
3584 
3585 	ring_buffer_iter_reset(buf_iter);
3586 
3587 	/*
3588 	 * We could have the case with the max latency tracers
3589 	 * that a reset never took place on a cpu. This is evident
3590 	 * by the timestamp being before the start of the buffer.
3591 	 */
3592 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3593 		if (ts >= iter->array_buffer->time_start)
3594 			break;
3595 		entries++;
3596 		ring_buffer_iter_advance(buf_iter);
3597 	}
3598 
3599 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3600 }
3601 
3602 /*
3603  * The current tracer is copied to avoid a global locking
3604  * all around.
3605  */
3606 static void *s_start(struct seq_file *m, loff_t *pos)
3607 {
3608 	struct trace_iterator *iter = m->private;
3609 	struct trace_array *tr = iter->tr;
3610 	int cpu_file = iter->cpu_file;
3611 	void *p = NULL;
3612 	loff_t l = 0;
3613 	int cpu;
3614 
3615 	/*
3616 	 * copy the tracer to avoid using a global lock all around.
3617 	 * iter->trace is a copy of current_trace, the pointer to the
3618 	 * name may be used instead of a strcmp(), as iter->trace->name
3619 	 * will point to the same string as current_trace->name.
3620 	 */
3621 	mutex_lock(&trace_types_lock);
3622 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3623 		*iter->trace = *tr->current_trace;
3624 	mutex_unlock(&trace_types_lock);
3625 
3626 #ifdef CONFIG_TRACER_MAX_TRACE
3627 	if (iter->snapshot && iter->trace->use_max_tr)
3628 		return ERR_PTR(-EBUSY);
3629 #endif
3630 
3631 	if (!iter->snapshot)
3632 		atomic_inc(&trace_record_taskinfo_disabled);
3633 
3634 	if (*pos != iter->pos) {
3635 		iter->ent = NULL;
3636 		iter->cpu = 0;
3637 		iter->idx = -1;
3638 
3639 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3640 			for_each_tracing_cpu(cpu)
3641 				tracing_iter_reset(iter, cpu);
3642 		} else
3643 			tracing_iter_reset(iter, cpu_file);
3644 
3645 		iter->leftover = 0;
3646 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3647 			;
3648 
3649 	} else {
3650 		/*
3651 		 * If we overflowed the seq_file before, then we want
3652 		 * to just reuse the trace_seq buffer again.
3653 		 */
3654 		if (iter->leftover)
3655 			p = iter;
3656 		else {
3657 			l = *pos - 1;
3658 			p = s_next(m, p, &l);
3659 		}
3660 	}
3661 
3662 	trace_event_read_lock();
3663 	trace_access_lock(cpu_file);
3664 	return p;
3665 }
3666 
3667 static void s_stop(struct seq_file *m, void *p)
3668 {
3669 	struct trace_iterator *iter = m->private;
3670 
3671 #ifdef CONFIG_TRACER_MAX_TRACE
3672 	if (iter->snapshot && iter->trace->use_max_tr)
3673 		return;
3674 #endif
3675 
3676 	if (!iter->snapshot)
3677 		atomic_dec(&trace_record_taskinfo_disabled);
3678 
3679 	trace_access_unlock(iter->cpu_file);
3680 	trace_event_read_unlock();
3681 }
3682 
3683 static void
3684 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3685 		      unsigned long *entries, int cpu)
3686 {
3687 	unsigned long count;
3688 
3689 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3690 	/*
3691 	 * If this buffer has skipped entries, then we hold all
3692 	 * entries for the trace and we need to ignore the
3693 	 * ones before the time stamp.
3694 	 */
3695 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3696 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3697 		/* total is the same as the entries */
3698 		*total = count;
3699 	} else
3700 		*total = count +
3701 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3702 	*entries = count;
3703 }
3704 
3705 static void
3706 get_total_entries(struct array_buffer *buf,
3707 		  unsigned long *total, unsigned long *entries)
3708 {
3709 	unsigned long t, e;
3710 	int cpu;
3711 
3712 	*total = 0;
3713 	*entries = 0;
3714 
3715 	for_each_tracing_cpu(cpu) {
3716 		get_total_entries_cpu(buf, &t, &e, cpu);
3717 		*total += t;
3718 		*entries += e;
3719 	}
3720 }
3721 
3722 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3723 {
3724 	unsigned long total, entries;
3725 
3726 	if (!tr)
3727 		tr = &global_trace;
3728 
3729 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3730 
3731 	return entries;
3732 }
3733 
3734 unsigned long trace_total_entries(struct trace_array *tr)
3735 {
3736 	unsigned long total, entries;
3737 
3738 	if (!tr)
3739 		tr = &global_trace;
3740 
3741 	get_total_entries(&tr->array_buffer, &total, &entries);
3742 
3743 	return entries;
3744 }
3745 
3746 static void print_lat_help_header(struct seq_file *m)
3747 {
3748 	seq_puts(m, "#                  _------=> CPU#            \n"
3749 		    "#                 / _-----=> irqs-off        \n"
3750 		    "#                | / _----=> need-resched    \n"
3751 		    "#                || / _---=> hardirq/softirq \n"
3752 		    "#                ||| / _--=> preempt-depth   \n"
3753 		    "#                |||| /     delay            \n"
3754 		    "#  cmd     pid   ||||| time  |   caller      \n"
3755 		    "#     \\   /      |||||  \\    |   /         \n");
3756 }
3757 
3758 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3759 {
3760 	unsigned long total;
3761 	unsigned long entries;
3762 
3763 	get_total_entries(buf, &total, &entries);
3764 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3765 		   entries, total, num_online_cpus());
3766 	seq_puts(m, "#\n");
3767 }
3768 
3769 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3770 				   unsigned int flags)
3771 {
3772 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3773 
3774 	print_event_info(buf, m);
3775 
3776 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3777 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3778 }
3779 
3780 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3781 				       unsigned int flags)
3782 {
3783 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3784 	const char *space = "          ";
3785 	int prec = tgid ? 10 : 2;
3786 
3787 	print_event_info(buf, m);
3788 
3789 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3790 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3791 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3792 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3793 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3794 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3795 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3796 }
3797 
3798 void
3799 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3800 {
3801 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3802 	struct array_buffer *buf = iter->array_buffer;
3803 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3804 	struct tracer *type = iter->trace;
3805 	unsigned long entries;
3806 	unsigned long total;
3807 	const char *name = "preemption";
3808 
3809 	name = type->name;
3810 
3811 	get_total_entries(buf, &total, &entries);
3812 
3813 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3814 		   name, UTS_RELEASE);
3815 	seq_puts(m, "# -----------------------------------"
3816 		 "---------------------------------\n");
3817 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3818 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3819 		   nsecs_to_usecs(data->saved_latency),
3820 		   entries,
3821 		   total,
3822 		   buf->cpu,
3823 #if defined(CONFIG_PREEMPT_NONE)
3824 		   "server",
3825 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3826 		   "desktop",
3827 #elif defined(CONFIG_PREEMPT)
3828 		   "preempt",
3829 #elif defined(CONFIG_PREEMPT_RT)
3830 		   "preempt_rt",
3831 #else
3832 		   "unknown",
3833 #endif
3834 		   /* These are reserved for later use */
3835 		   0, 0, 0, 0);
3836 #ifdef CONFIG_SMP
3837 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3838 #else
3839 	seq_puts(m, ")\n");
3840 #endif
3841 	seq_puts(m, "#    -----------------\n");
3842 	seq_printf(m, "#    | task: %.16s-%d "
3843 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3844 		   data->comm, data->pid,
3845 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3846 		   data->policy, data->rt_priority);
3847 	seq_puts(m, "#    -----------------\n");
3848 
3849 	if (data->critical_start) {
3850 		seq_puts(m, "#  => started at: ");
3851 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3852 		trace_print_seq(m, &iter->seq);
3853 		seq_puts(m, "\n#  => ended at:   ");
3854 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3855 		trace_print_seq(m, &iter->seq);
3856 		seq_puts(m, "\n#\n");
3857 	}
3858 
3859 	seq_puts(m, "#\n");
3860 }
3861 
3862 static void test_cpu_buff_start(struct trace_iterator *iter)
3863 {
3864 	struct trace_seq *s = &iter->seq;
3865 	struct trace_array *tr = iter->tr;
3866 
3867 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3868 		return;
3869 
3870 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3871 		return;
3872 
3873 	if (cpumask_available(iter->started) &&
3874 	    cpumask_test_cpu(iter->cpu, iter->started))
3875 		return;
3876 
3877 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3878 		return;
3879 
3880 	if (cpumask_available(iter->started))
3881 		cpumask_set_cpu(iter->cpu, iter->started);
3882 
3883 	/* Don't print started cpu buffer for the first entry of the trace */
3884 	if (iter->idx > 1)
3885 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3886 				iter->cpu);
3887 }
3888 
3889 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3890 {
3891 	struct trace_array *tr = iter->tr;
3892 	struct trace_seq *s = &iter->seq;
3893 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3894 	struct trace_entry *entry;
3895 	struct trace_event *event;
3896 
3897 	entry = iter->ent;
3898 
3899 	test_cpu_buff_start(iter);
3900 
3901 	event = ftrace_find_event(entry->type);
3902 
3903 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3904 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3905 			trace_print_lat_context(iter);
3906 		else
3907 			trace_print_context(iter);
3908 	}
3909 
3910 	if (trace_seq_has_overflowed(s))
3911 		return TRACE_TYPE_PARTIAL_LINE;
3912 
3913 	if (event)
3914 		return event->funcs->trace(iter, sym_flags, event);
3915 
3916 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3917 
3918 	return trace_handle_return(s);
3919 }
3920 
3921 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3922 {
3923 	struct trace_array *tr = iter->tr;
3924 	struct trace_seq *s = &iter->seq;
3925 	struct trace_entry *entry;
3926 	struct trace_event *event;
3927 
3928 	entry = iter->ent;
3929 
3930 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3931 		trace_seq_printf(s, "%d %d %llu ",
3932 				 entry->pid, iter->cpu, iter->ts);
3933 
3934 	if (trace_seq_has_overflowed(s))
3935 		return TRACE_TYPE_PARTIAL_LINE;
3936 
3937 	event = ftrace_find_event(entry->type);
3938 	if (event)
3939 		return event->funcs->raw(iter, 0, event);
3940 
3941 	trace_seq_printf(s, "%d ?\n", entry->type);
3942 
3943 	return trace_handle_return(s);
3944 }
3945 
3946 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3947 {
3948 	struct trace_array *tr = iter->tr;
3949 	struct trace_seq *s = &iter->seq;
3950 	unsigned char newline = '\n';
3951 	struct trace_entry *entry;
3952 	struct trace_event *event;
3953 
3954 	entry = iter->ent;
3955 
3956 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3957 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3958 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3959 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3960 		if (trace_seq_has_overflowed(s))
3961 			return TRACE_TYPE_PARTIAL_LINE;
3962 	}
3963 
3964 	event = ftrace_find_event(entry->type);
3965 	if (event) {
3966 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3967 		if (ret != TRACE_TYPE_HANDLED)
3968 			return ret;
3969 	}
3970 
3971 	SEQ_PUT_FIELD(s, newline);
3972 
3973 	return trace_handle_return(s);
3974 }
3975 
3976 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3977 {
3978 	struct trace_array *tr = iter->tr;
3979 	struct trace_seq *s = &iter->seq;
3980 	struct trace_entry *entry;
3981 	struct trace_event *event;
3982 
3983 	entry = iter->ent;
3984 
3985 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3986 		SEQ_PUT_FIELD(s, entry->pid);
3987 		SEQ_PUT_FIELD(s, iter->cpu);
3988 		SEQ_PUT_FIELD(s, iter->ts);
3989 		if (trace_seq_has_overflowed(s))
3990 			return TRACE_TYPE_PARTIAL_LINE;
3991 	}
3992 
3993 	event = ftrace_find_event(entry->type);
3994 	return event ? event->funcs->binary(iter, 0, event) :
3995 		TRACE_TYPE_HANDLED;
3996 }
3997 
3998 int trace_empty(struct trace_iterator *iter)
3999 {
4000 	struct ring_buffer_iter *buf_iter;
4001 	int cpu;
4002 
4003 	/* If we are looking at one CPU buffer, only check that one */
4004 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4005 		cpu = iter->cpu_file;
4006 		buf_iter = trace_buffer_iter(iter, cpu);
4007 		if (buf_iter) {
4008 			if (!ring_buffer_iter_empty(buf_iter))
4009 				return 0;
4010 		} else {
4011 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4012 				return 0;
4013 		}
4014 		return 1;
4015 	}
4016 
4017 	for_each_tracing_cpu(cpu) {
4018 		buf_iter = trace_buffer_iter(iter, cpu);
4019 		if (buf_iter) {
4020 			if (!ring_buffer_iter_empty(buf_iter))
4021 				return 0;
4022 		} else {
4023 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4024 				return 0;
4025 		}
4026 	}
4027 
4028 	return 1;
4029 }
4030 
4031 /*  Called with trace_event_read_lock() held. */
4032 enum print_line_t print_trace_line(struct trace_iterator *iter)
4033 {
4034 	struct trace_array *tr = iter->tr;
4035 	unsigned long trace_flags = tr->trace_flags;
4036 	enum print_line_t ret;
4037 
4038 	if (iter->lost_events) {
4039 		if (iter->lost_events == (unsigned long)-1)
4040 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4041 					 iter->cpu);
4042 		else
4043 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4044 					 iter->cpu, iter->lost_events);
4045 		if (trace_seq_has_overflowed(&iter->seq))
4046 			return TRACE_TYPE_PARTIAL_LINE;
4047 	}
4048 
4049 	if (iter->trace && iter->trace->print_line) {
4050 		ret = iter->trace->print_line(iter);
4051 		if (ret != TRACE_TYPE_UNHANDLED)
4052 			return ret;
4053 	}
4054 
4055 	if (iter->ent->type == TRACE_BPUTS &&
4056 			trace_flags & TRACE_ITER_PRINTK &&
4057 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4058 		return trace_print_bputs_msg_only(iter);
4059 
4060 	if (iter->ent->type == TRACE_BPRINT &&
4061 			trace_flags & TRACE_ITER_PRINTK &&
4062 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4063 		return trace_print_bprintk_msg_only(iter);
4064 
4065 	if (iter->ent->type == TRACE_PRINT &&
4066 			trace_flags & TRACE_ITER_PRINTK &&
4067 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4068 		return trace_print_printk_msg_only(iter);
4069 
4070 	if (trace_flags & TRACE_ITER_BIN)
4071 		return print_bin_fmt(iter);
4072 
4073 	if (trace_flags & TRACE_ITER_HEX)
4074 		return print_hex_fmt(iter);
4075 
4076 	if (trace_flags & TRACE_ITER_RAW)
4077 		return print_raw_fmt(iter);
4078 
4079 	return print_trace_fmt(iter);
4080 }
4081 
4082 void trace_latency_header(struct seq_file *m)
4083 {
4084 	struct trace_iterator *iter = m->private;
4085 	struct trace_array *tr = iter->tr;
4086 
4087 	/* print nothing if the buffers are empty */
4088 	if (trace_empty(iter))
4089 		return;
4090 
4091 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4092 		print_trace_header(m, iter);
4093 
4094 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4095 		print_lat_help_header(m);
4096 }
4097 
4098 void trace_default_header(struct seq_file *m)
4099 {
4100 	struct trace_iterator *iter = m->private;
4101 	struct trace_array *tr = iter->tr;
4102 	unsigned long trace_flags = tr->trace_flags;
4103 
4104 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4105 		return;
4106 
4107 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4108 		/* print nothing if the buffers are empty */
4109 		if (trace_empty(iter))
4110 			return;
4111 		print_trace_header(m, iter);
4112 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4113 			print_lat_help_header(m);
4114 	} else {
4115 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4116 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4117 				print_func_help_header_irq(iter->array_buffer,
4118 							   m, trace_flags);
4119 			else
4120 				print_func_help_header(iter->array_buffer, m,
4121 						       trace_flags);
4122 		}
4123 	}
4124 }
4125 
4126 static void test_ftrace_alive(struct seq_file *m)
4127 {
4128 	if (!ftrace_is_dead())
4129 		return;
4130 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4131 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4132 }
4133 
4134 #ifdef CONFIG_TRACER_MAX_TRACE
4135 static void show_snapshot_main_help(struct seq_file *m)
4136 {
4137 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4138 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4139 		    "#                      Takes a snapshot of the main buffer.\n"
4140 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4141 		    "#                      (Doesn't have to be '2' works with any number that\n"
4142 		    "#                       is not a '0' or '1')\n");
4143 }
4144 
4145 static void show_snapshot_percpu_help(struct seq_file *m)
4146 {
4147 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4148 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4149 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4150 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4151 #else
4152 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4153 		    "#                     Must use main snapshot file to allocate.\n");
4154 #endif
4155 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4156 		    "#                      (Doesn't have to be '2' works with any number that\n"
4157 		    "#                       is not a '0' or '1')\n");
4158 }
4159 
4160 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4161 {
4162 	if (iter->tr->allocated_snapshot)
4163 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4164 	else
4165 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4166 
4167 	seq_puts(m, "# Snapshot commands:\n");
4168 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4169 		show_snapshot_main_help(m);
4170 	else
4171 		show_snapshot_percpu_help(m);
4172 }
4173 #else
4174 /* Should never be called */
4175 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4176 #endif
4177 
4178 static int s_show(struct seq_file *m, void *v)
4179 {
4180 	struct trace_iterator *iter = v;
4181 	int ret;
4182 
4183 	if (iter->ent == NULL) {
4184 		if (iter->tr) {
4185 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4186 			seq_puts(m, "#\n");
4187 			test_ftrace_alive(m);
4188 		}
4189 		if (iter->snapshot && trace_empty(iter))
4190 			print_snapshot_help(m, iter);
4191 		else if (iter->trace && iter->trace->print_header)
4192 			iter->trace->print_header(m);
4193 		else
4194 			trace_default_header(m);
4195 
4196 	} else if (iter->leftover) {
4197 		/*
4198 		 * If we filled the seq_file buffer earlier, we
4199 		 * want to just show it now.
4200 		 */
4201 		ret = trace_print_seq(m, &iter->seq);
4202 
4203 		/* ret should this time be zero, but you never know */
4204 		iter->leftover = ret;
4205 
4206 	} else {
4207 		print_trace_line(iter);
4208 		ret = trace_print_seq(m, &iter->seq);
4209 		/*
4210 		 * If we overflow the seq_file buffer, then it will
4211 		 * ask us for this data again at start up.
4212 		 * Use that instead.
4213 		 *  ret is 0 if seq_file write succeeded.
4214 		 *        -1 otherwise.
4215 		 */
4216 		iter->leftover = ret;
4217 	}
4218 
4219 	return 0;
4220 }
4221 
4222 /*
4223  * Should be used after trace_array_get(), trace_types_lock
4224  * ensures that i_cdev was already initialized.
4225  */
4226 static inline int tracing_get_cpu(struct inode *inode)
4227 {
4228 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4229 		return (long)inode->i_cdev - 1;
4230 	return RING_BUFFER_ALL_CPUS;
4231 }
4232 
4233 static const struct seq_operations tracer_seq_ops = {
4234 	.start		= s_start,
4235 	.next		= s_next,
4236 	.stop		= s_stop,
4237 	.show		= s_show,
4238 };
4239 
4240 static struct trace_iterator *
4241 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4242 {
4243 	struct trace_array *tr = inode->i_private;
4244 	struct trace_iterator *iter;
4245 	int cpu;
4246 
4247 	if (tracing_disabled)
4248 		return ERR_PTR(-ENODEV);
4249 
4250 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4251 	if (!iter)
4252 		return ERR_PTR(-ENOMEM);
4253 
4254 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4255 				    GFP_KERNEL);
4256 	if (!iter->buffer_iter)
4257 		goto release;
4258 
4259 	/*
4260 	 * trace_find_next_entry() may need to save off iter->ent.
4261 	 * It will place it into the iter->temp buffer. As most
4262 	 * events are less than 128, allocate a buffer of that size.
4263 	 * If one is greater, then trace_find_next_entry() will
4264 	 * allocate a new buffer to adjust for the bigger iter->ent.
4265 	 * It's not critical if it fails to get allocated here.
4266 	 */
4267 	iter->temp = kmalloc(128, GFP_KERNEL);
4268 	if (iter->temp)
4269 		iter->temp_size = 128;
4270 
4271 	/*
4272 	 * We make a copy of the current tracer to avoid concurrent
4273 	 * changes on it while we are reading.
4274 	 */
4275 	mutex_lock(&trace_types_lock);
4276 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4277 	if (!iter->trace)
4278 		goto fail;
4279 
4280 	*iter->trace = *tr->current_trace;
4281 
4282 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4283 		goto fail;
4284 
4285 	iter->tr = tr;
4286 
4287 #ifdef CONFIG_TRACER_MAX_TRACE
4288 	/* Currently only the top directory has a snapshot */
4289 	if (tr->current_trace->print_max || snapshot)
4290 		iter->array_buffer = &tr->max_buffer;
4291 	else
4292 #endif
4293 		iter->array_buffer = &tr->array_buffer;
4294 	iter->snapshot = snapshot;
4295 	iter->pos = -1;
4296 	iter->cpu_file = tracing_get_cpu(inode);
4297 	mutex_init(&iter->mutex);
4298 
4299 	/* Notify the tracer early; before we stop tracing. */
4300 	if (iter->trace->open)
4301 		iter->trace->open(iter);
4302 
4303 	/* Annotate start of buffers if we had overruns */
4304 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4305 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4306 
4307 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4308 	if (trace_clocks[tr->clock_id].in_ns)
4309 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4310 
4311 	/*
4312 	 * If pause-on-trace is enabled, then stop the trace while
4313 	 * dumping, unless this is the "snapshot" file
4314 	 */
4315 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4316 		tracing_stop_tr(tr);
4317 
4318 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4319 		for_each_tracing_cpu(cpu) {
4320 			iter->buffer_iter[cpu] =
4321 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4322 							 cpu, GFP_KERNEL);
4323 		}
4324 		ring_buffer_read_prepare_sync();
4325 		for_each_tracing_cpu(cpu) {
4326 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4327 			tracing_iter_reset(iter, cpu);
4328 		}
4329 	} else {
4330 		cpu = iter->cpu_file;
4331 		iter->buffer_iter[cpu] =
4332 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4333 						 cpu, GFP_KERNEL);
4334 		ring_buffer_read_prepare_sync();
4335 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4336 		tracing_iter_reset(iter, cpu);
4337 	}
4338 
4339 	mutex_unlock(&trace_types_lock);
4340 
4341 	return iter;
4342 
4343  fail:
4344 	mutex_unlock(&trace_types_lock);
4345 	kfree(iter->trace);
4346 	kfree(iter->temp);
4347 	kfree(iter->buffer_iter);
4348 release:
4349 	seq_release_private(inode, file);
4350 	return ERR_PTR(-ENOMEM);
4351 }
4352 
4353 int tracing_open_generic(struct inode *inode, struct file *filp)
4354 {
4355 	int ret;
4356 
4357 	ret = tracing_check_open_get_tr(NULL);
4358 	if (ret)
4359 		return ret;
4360 
4361 	filp->private_data = inode->i_private;
4362 	return 0;
4363 }
4364 
4365 bool tracing_is_disabled(void)
4366 {
4367 	return (tracing_disabled) ? true: false;
4368 }
4369 
4370 /*
4371  * Open and update trace_array ref count.
4372  * Must have the current trace_array passed to it.
4373  */
4374 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4375 {
4376 	struct trace_array *tr = inode->i_private;
4377 	int ret;
4378 
4379 	ret = tracing_check_open_get_tr(tr);
4380 	if (ret)
4381 		return ret;
4382 
4383 	filp->private_data = inode->i_private;
4384 
4385 	return 0;
4386 }
4387 
4388 static int tracing_release(struct inode *inode, struct file *file)
4389 {
4390 	struct trace_array *tr = inode->i_private;
4391 	struct seq_file *m = file->private_data;
4392 	struct trace_iterator *iter;
4393 	int cpu;
4394 
4395 	if (!(file->f_mode & FMODE_READ)) {
4396 		trace_array_put(tr);
4397 		return 0;
4398 	}
4399 
4400 	/* Writes do not use seq_file */
4401 	iter = m->private;
4402 	mutex_lock(&trace_types_lock);
4403 
4404 	for_each_tracing_cpu(cpu) {
4405 		if (iter->buffer_iter[cpu])
4406 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4407 	}
4408 
4409 	if (iter->trace && iter->trace->close)
4410 		iter->trace->close(iter);
4411 
4412 	if (!iter->snapshot && tr->stop_count)
4413 		/* reenable tracing if it was previously enabled */
4414 		tracing_start_tr(tr);
4415 
4416 	__trace_array_put(tr);
4417 
4418 	mutex_unlock(&trace_types_lock);
4419 
4420 	mutex_destroy(&iter->mutex);
4421 	free_cpumask_var(iter->started);
4422 	kfree(iter->temp);
4423 	kfree(iter->trace);
4424 	kfree(iter->buffer_iter);
4425 	seq_release_private(inode, file);
4426 
4427 	return 0;
4428 }
4429 
4430 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4431 {
4432 	struct trace_array *tr = inode->i_private;
4433 
4434 	trace_array_put(tr);
4435 	return 0;
4436 }
4437 
4438 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4439 {
4440 	struct trace_array *tr = inode->i_private;
4441 
4442 	trace_array_put(tr);
4443 
4444 	return single_release(inode, file);
4445 }
4446 
4447 static int tracing_open(struct inode *inode, struct file *file)
4448 {
4449 	struct trace_array *tr = inode->i_private;
4450 	struct trace_iterator *iter;
4451 	int ret;
4452 
4453 	ret = tracing_check_open_get_tr(tr);
4454 	if (ret)
4455 		return ret;
4456 
4457 	/* If this file was open for write, then erase contents */
4458 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4459 		int cpu = tracing_get_cpu(inode);
4460 		struct array_buffer *trace_buf = &tr->array_buffer;
4461 
4462 #ifdef CONFIG_TRACER_MAX_TRACE
4463 		if (tr->current_trace->print_max)
4464 			trace_buf = &tr->max_buffer;
4465 #endif
4466 
4467 		if (cpu == RING_BUFFER_ALL_CPUS)
4468 			tracing_reset_online_cpus(trace_buf);
4469 		else
4470 			tracing_reset_cpu(trace_buf, cpu);
4471 	}
4472 
4473 	if (file->f_mode & FMODE_READ) {
4474 		iter = __tracing_open(inode, file, false);
4475 		if (IS_ERR(iter))
4476 			ret = PTR_ERR(iter);
4477 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4478 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4479 	}
4480 
4481 	if (ret < 0)
4482 		trace_array_put(tr);
4483 
4484 	return ret;
4485 }
4486 
4487 /*
4488  * Some tracers are not suitable for instance buffers.
4489  * A tracer is always available for the global array (toplevel)
4490  * or if it explicitly states that it is.
4491  */
4492 static bool
4493 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4494 {
4495 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4496 }
4497 
4498 /* Find the next tracer that this trace array may use */
4499 static struct tracer *
4500 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4501 {
4502 	while (t && !trace_ok_for_array(t, tr))
4503 		t = t->next;
4504 
4505 	return t;
4506 }
4507 
4508 static void *
4509 t_next(struct seq_file *m, void *v, loff_t *pos)
4510 {
4511 	struct trace_array *tr = m->private;
4512 	struct tracer *t = v;
4513 
4514 	(*pos)++;
4515 
4516 	if (t)
4517 		t = get_tracer_for_array(tr, t->next);
4518 
4519 	return t;
4520 }
4521 
4522 static void *t_start(struct seq_file *m, loff_t *pos)
4523 {
4524 	struct trace_array *tr = m->private;
4525 	struct tracer *t;
4526 	loff_t l = 0;
4527 
4528 	mutex_lock(&trace_types_lock);
4529 
4530 	t = get_tracer_for_array(tr, trace_types);
4531 	for (; t && l < *pos; t = t_next(m, t, &l))
4532 			;
4533 
4534 	return t;
4535 }
4536 
4537 static void t_stop(struct seq_file *m, void *p)
4538 {
4539 	mutex_unlock(&trace_types_lock);
4540 }
4541 
4542 static int t_show(struct seq_file *m, void *v)
4543 {
4544 	struct tracer *t = v;
4545 
4546 	if (!t)
4547 		return 0;
4548 
4549 	seq_puts(m, t->name);
4550 	if (t->next)
4551 		seq_putc(m, ' ');
4552 	else
4553 		seq_putc(m, '\n');
4554 
4555 	return 0;
4556 }
4557 
4558 static const struct seq_operations show_traces_seq_ops = {
4559 	.start		= t_start,
4560 	.next		= t_next,
4561 	.stop		= t_stop,
4562 	.show		= t_show,
4563 };
4564 
4565 static int show_traces_open(struct inode *inode, struct file *file)
4566 {
4567 	struct trace_array *tr = inode->i_private;
4568 	struct seq_file *m;
4569 	int ret;
4570 
4571 	ret = tracing_check_open_get_tr(tr);
4572 	if (ret)
4573 		return ret;
4574 
4575 	ret = seq_open(file, &show_traces_seq_ops);
4576 	if (ret) {
4577 		trace_array_put(tr);
4578 		return ret;
4579 	}
4580 
4581 	m = file->private_data;
4582 	m->private = tr;
4583 
4584 	return 0;
4585 }
4586 
4587 static int show_traces_release(struct inode *inode, struct file *file)
4588 {
4589 	struct trace_array *tr = inode->i_private;
4590 
4591 	trace_array_put(tr);
4592 	return seq_release(inode, file);
4593 }
4594 
4595 static ssize_t
4596 tracing_write_stub(struct file *filp, const char __user *ubuf,
4597 		   size_t count, loff_t *ppos)
4598 {
4599 	return count;
4600 }
4601 
4602 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4603 {
4604 	int ret;
4605 
4606 	if (file->f_mode & FMODE_READ)
4607 		ret = seq_lseek(file, offset, whence);
4608 	else
4609 		file->f_pos = ret = 0;
4610 
4611 	return ret;
4612 }
4613 
4614 static const struct file_operations tracing_fops = {
4615 	.open		= tracing_open,
4616 	.read		= seq_read,
4617 	.write		= tracing_write_stub,
4618 	.llseek		= tracing_lseek,
4619 	.release	= tracing_release,
4620 };
4621 
4622 static const struct file_operations show_traces_fops = {
4623 	.open		= show_traces_open,
4624 	.read		= seq_read,
4625 	.llseek		= seq_lseek,
4626 	.release	= show_traces_release,
4627 };
4628 
4629 static ssize_t
4630 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4631 		     size_t count, loff_t *ppos)
4632 {
4633 	struct trace_array *tr = file_inode(filp)->i_private;
4634 	char *mask_str;
4635 	int len;
4636 
4637 	len = snprintf(NULL, 0, "%*pb\n",
4638 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4639 	mask_str = kmalloc(len, GFP_KERNEL);
4640 	if (!mask_str)
4641 		return -ENOMEM;
4642 
4643 	len = snprintf(mask_str, len, "%*pb\n",
4644 		       cpumask_pr_args(tr->tracing_cpumask));
4645 	if (len >= count) {
4646 		count = -EINVAL;
4647 		goto out_err;
4648 	}
4649 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4650 
4651 out_err:
4652 	kfree(mask_str);
4653 
4654 	return count;
4655 }
4656 
4657 int tracing_set_cpumask(struct trace_array *tr,
4658 			cpumask_var_t tracing_cpumask_new)
4659 {
4660 	int cpu;
4661 
4662 	if (!tr)
4663 		return -EINVAL;
4664 
4665 	local_irq_disable();
4666 	arch_spin_lock(&tr->max_lock);
4667 	for_each_tracing_cpu(cpu) {
4668 		/*
4669 		 * Increase/decrease the disabled counter if we are
4670 		 * about to flip a bit in the cpumask:
4671 		 */
4672 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4673 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4674 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4675 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4676 		}
4677 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4678 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4679 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4680 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4681 		}
4682 	}
4683 	arch_spin_unlock(&tr->max_lock);
4684 	local_irq_enable();
4685 
4686 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4687 
4688 	return 0;
4689 }
4690 
4691 static ssize_t
4692 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4693 		      size_t count, loff_t *ppos)
4694 {
4695 	struct trace_array *tr = file_inode(filp)->i_private;
4696 	cpumask_var_t tracing_cpumask_new;
4697 	int err;
4698 
4699 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4700 		return -ENOMEM;
4701 
4702 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4703 	if (err)
4704 		goto err_free;
4705 
4706 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4707 	if (err)
4708 		goto err_free;
4709 
4710 	free_cpumask_var(tracing_cpumask_new);
4711 
4712 	return count;
4713 
4714 err_free:
4715 	free_cpumask_var(tracing_cpumask_new);
4716 
4717 	return err;
4718 }
4719 
4720 static const struct file_operations tracing_cpumask_fops = {
4721 	.open		= tracing_open_generic_tr,
4722 	.read		= tracing_cpumask_read,
4723 	.write		= tracing_cpumask_write,
4724 	.release	= tracing_release_generic_tr,
4725 	.llseek		= generic_file_llseek,
4726 };
4727 
4728 static int tracing_trace_options_show(struct seq_file *m, void *v)
4729 {
4730 	struct tracer_opt *trace_opts;
4731 	struct trace_array *tr = m->private;
4732 	u32 tracer_flags;
4733 	int i;
4734 
4735 	mutex_lock(&trace_types_lock);
4736 	tracer_flags = tr->current_trace->flags->val;
4737 	trace_opts = tr->current_trace->flags->opts;
4738 
4739 	for (i = 0; trace_options[i]; i++) {
4740 		if (tr->trace_flags & (1 << i))
4741 			seq_printf(m, "%s\n", trace_options[i]);
4742 		else
4743 			seq_printf(m, "no%s\n", trace_options[i]);
4744 	}
4745 
4746 	for (i = 0; trace_opts[i].name; i++) {
4747 		if (tracer_flags & trace_opts[i].bit)
4748 			seq_printf(m, "%s\n", trace_opts[i].name);
4749 		else
4750 			seq_printf(m, "no%s\n", trace_opts[i].name);
4751 	}
4752 	mutex_unlock(&trace_types_lock);
4753 
4754 	return 0;
4755 }
4756 
4757 static int __set_tracer_option(struct trace_array *tr,
4758 			       struct tracer_flags *tracer_flags,
4759 			       struct tracer_opt *opts, int neg)
4760 {
4761 	struct tracer *trace = tracer_flags->trace;
4762 	int ret;
4763 
4764 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4765 	if (ret)
4766 		return ret;
4767 
4768 	if (neg)
4769 		tracer_flags->val &= ~opts->bit;
4770 	else
4771 		tracer_flags->val |= opts->bit;
4772 	return 0;
4773 }
4774 
4775 /* Try to assign a tracer specific option */
4776 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4777 {
4778 	struct tracer *trace = tr->current_trace;
4779 	struct tracer_flags *tracer_flags = trace->flags;
4780 	struct tracer_opt *opts = NULL;
4781 	int i;
4782 
4783 	for (i = 0; tracer_flags->opts[i].name; i++) {
4784 		opts = &tracer_flags->opts[i];
4785 
4786 		if (strcmp(cmp, opts->name) == 0)
4787 			return __set_tracer_option(tr, trace->flags, opts, neg);
4788 	}
4789 
4790 	return -EINVAL;
4791 }
4792 
4793 /* Some tracers require overwrite to stay enabled */
4794 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4795 {
4796 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4797 		return -1;
4798 
4799 	return 0;
4800 }
4801 
4802 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4803 {
4804 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4805 	    (mask == TRACE_ITER_RECORD_CMD))
4806 		lockdep_assert_held(&event_mutex);
4807 
4808 	/* do nothing if flag is already set */
4809 	if (!!(tr->trace_flags & mask) == !!enabled)
4810 		return 0;
4811 
4812 	/* Give the tracer a chance to approve the change */
4813 	if (tr->current_trace->flag_changed)
4814 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4815 			return -EINVAL;
4816 
4817 	if (enabled)
4818 		tr->trace_flags |= mask;
4819 	else
4820 		tr->trace_flags &= ~mask;
4821 
4822 	if (mask == TRACE_ITER_RECORD_CMD)
4823 		trace_event_enable_cmd_record(enabled);
4824 
4825 	if (mask == TRACE_ITER_RECORD_TGID) {
4826 		if (!tgid_map)
4827 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4828 					   sizeof(*tgid_map),
4829 					   GFP_KERNEL);
4830 		if (!tgid_map) {
4831 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4832 			return -ENOMEM;
4833 		}
4834 
4835 		trace_event_enable_tgid_record(enabled);
4836 	}
4837 
4838 	if (mask == TRACE_ITER_EVENT_FORK)
4839 		trace_event_follow_fork(tr, enabled);
4840 
4841 	if (mask == TRACE_ITER_FUNC_FORK)
4842 		ftrace_pid_follow_fork(tr, enabled);
4843 
4844 	if (mask == TRACE_ITER_OVERWRITE) {
4845 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4846 #ifdef CONFIG_TRACER_MAX_TRACE
4847 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4848 #endif
4849 	}
4850 
4851 	if (mask == TRACE_ITER_PRINTK) {
4852 		trace_printk_start_stop_comm(enabled);
4853 		trace_printk_control(enabled);
4854 	}
4855 
4856 	return 0;
4857 }
4858 
4859 int trace_set_options(struct trace_array *tr, char *option)
4860 {
4861 	char *cmp;
4862 	int neg = 0;
4863 	int ret;
4864 	size_t orig_len = strlen(option);
4865 	int len;
4866 
4867 	cmp = strstrip(option);
4868 
4869 	len = str_has_prefix(cmp, "no");
4870 	if (len)
4871 		neg = 1;
4872 
4873 	cmp += len;
4874 
4875 	mutex_lock(&event_mutex);
4876 	mutex_lock(&trace_types_lock);
4877 
4878 	ret = match_string(trace_options, -1, cmp);
4879 	/* If no option could be set, test the specific tracer options */
4880 	if (ret < 0)
4881 		ret = set_tracer_option(tr, cmp, neg);
4882 	else
4883 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4884 
4885 	mutex_unlock(&trace_types_lock);
4886 	mutex_unlock(&event_mutex);
4887 
4888 	/*
4889 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4890 	 * turn it back into a space.
4891 	 */
4892 	if (orig_len > strlen(option))
4893 		option[strlen(option)] = ' ';
4894 
4895 	return ret;
4896 }
4897 
4898 static void __init apply_trace_boot_options(void)
4899 {
4900 	char *buf = trace_boot_options_buf;
4901 	char *option;
4902 
4903 	while (true) {
4904 		option = strsep(&buf, ",");
4905 
4906 		if (!option)
4907 			break;
4908 
4909 		if (*option)
4910 			trace_set_options(&global_trace, option);
4911 
4912 		/* Put back the comma to allow this to be called again */
4913 		if (buf)
4914 			*(buf - 1) = ',';
4915 	}
4916 }
4917 
4918 static ssize_t
4919 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4920 			size_t cnt, loff_t *ppos)
4921 {
4922 	struct seq_file *m = filp->private_data;
4923 	struct trace_array *tr = m->private;
4924 	char buf[64];
4925 	int ret;
4926 
4927 	if (cnt >= sizeof(buf))
4928 		return -EINVAL;
4929 
4930 	if (copy_from_user(buf, ubuf, cnt))
4931 		return -EFAULT;
4932 
4933 	buf[cnt] = 0;
4934 
4935 	ret = trace_set_options(tr, buf);
4936 	if (ret < 0)
4937 		return ret;
4938 
4939 	*ppos += cnt;
4940 
4941 	return cnt;
4942 }
4943 
4944 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4945 {
4946 	struct trace_array *tr = inode->i_private;
4947 	int ret;
4948 
4949 	ret = tracing_check_open_get_tr(tr);
4950 	if (ret)
4951 		return ret;
4952 
4953 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4954 	if (ret < 0)
4955 		trace_array_put(tr);
4956 
4957 	return ret;
4958 }
4959 
4960 static const struct file_operations tracing_iter_fops = {
4961 	.open		= tracing_trace_options_open,
4962 	.read		= seq_read,
4963 	.llseek		= seq_lseek,
4964 	.release	= tracing_single_release_tr,
4965 	.write		= tracing_trace_options_write,
4966 };
4967 
4968 static const char readme_msg[] =
4969 	"tracing mini-HOWTO:\n\n"
4970 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4971 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4972 	" Important files:\n"
4973 	"  trace\t\t\t- The static contents of the buffer\n"
4974 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4975 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4976 	"  current_tracer\t- function and latency tracers\n"
4977 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4978 	"  error_log\t- error log for failed commands (that support it)\n"
4979 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4980 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4981 	"  trace_clock\t\t-change the clock used to order events\n"
4982 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4983 	"      global:   Synced across CPUs but slows tracing down.\n"
4984 	"     counter:   Not a clock, but just an increment\n"
4985 	"      uptime:   Jiffy counter from time of boot\n"
4986 	"        perf:   Same clock that perf events use\n"
4987 #ifdef CONFIG_X86_64
4988 	"     x86-tsc:   TSC cycle counter\n"
4989 #endif
4990 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4991 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4992 	"    absolute:   Absolute (standalone) timestamp\n"
4993 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4994 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4995 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4996 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4997 	"\t\t\t  Remove sub-buffer with rmdir\n"
4998 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4999 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5000 	"\t\t\t  option name\n"
5001 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5002 #ifdef CONFIG_DYNAMIC_FTRACE
5003 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5004 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5005 	"\t\t\t  functions\n"
5006 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5007 	"\t     modules: Can select a group via module\n"
5008 	"\t      Format: :mod:<module-name>\n"
5009 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5010 	"\t    triggers: a command to perform when function is hit\n"
5011 	"\t      Format: <function>:<trigger>[:count]\n"
5012 	"\t     trigger: traceon, traceoff\n"
5013 	"\t\t      enable_event:<system>:<event>\n"
5014 	"\t\t      disable_event:<system>:<event>\n"
5015 #ifdef CONFIG_STACKTRACE
5016 	"\t\t      stacktrace\n"
5017 #endif
5018 #ifdef CONFIG_TRACER_SNAPSHOT
5019 	"\t\t      snapshot\n"
5020 #endif
5021 	"\t\t      dump\n"
5022 	"\t\t      cpudump\n"
5023 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5024 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5025 	"\t     The first one will disable tracing every time do_fault is hit\n"
5026 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5027 	"\t       The first time do trap is hit and it disables tracing, the\n"
5028 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5029 	"\t       the counter will not decrement. It only decrements when the\n"
5030 	"\t       trigger did work\n"
5031 	"\t     To remove trigger without count:\n"
5032 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5033 	"\t     To remove trigger with a count:\n"
5034 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5035 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5036 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5037 	"\t    modules: Can select a group via module command :mod:\n"
5038 	"\t    Does not accept triggers\n"
5039 #endif /* CONFIG_DYNAMIC_FTRACE */
5040 #ifdef CONFIG_FUNCTION_TRACER
5041 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5042 	"\t\t    (function)\n"
5043 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5044 	"\t\t    (function)\n"
5045 #endif
5046 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5047 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5048 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5049 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5050 #endif
5051 #ifdef CONFIG_TRACER_SNAPSHOT
5052 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5053 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5054 	"\t\t\t  information\n"
5055 #endif
5056 #ifdef CONFIG_STACK_TRACER
5057 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5058 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5059 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5060 	"\t\t\t  new trace)\n"
5061 #ifdef CONFIG_DYNAMIC_FTRACE
5062 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5063 	"\t\t\t  traces\n"
5064 #endif
5065 #endif /* CONFIG_STACK_TRACER */
5066 #ifdef CONFIG_DYNAMIC_EVENTS
5067 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5068 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5069 #endif
5070 #ifdef CONFIG_KPROBE_EVENTS
5071 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5072 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5073 #endif
5074 #ifdef CONFIG_UPROBE_EVENTS
5075 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5076 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5077 #endif
5078 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5079 	"\t  accepts: event-definitions (one definition per line)\n"
5080 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5081 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5082 #ifdef CONFIG_HIST_TRIGGERS
5083 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5084 #endif
5085 	"\t           -:[<group>/]<event>\n"
5086 #ifdef CONFIG_KPROBE_EVENTS
5087 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5088   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5089 #endif
5090 #ifdef CONFIG_UPROBE_EVENTS
5091   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5092 #endif
5093 	"\t     args: <name>=fetcharg[:type]\n"
5094 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5095 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5096 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5097 #else
5098 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5099 #endif
5100 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5101 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5102 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5103 	"\t           <type>\\[<array-size>\\]\n"
5104 #ifdef CONFIG_HIST_TRIGGERS
5105 	"\t    field: <stype> <name>;\n"
5106 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5107 	"\t           [unsigned] char/int/long\n"
5108 #endif
5109 #endif
5110 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5111 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5112 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5113 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5114 	"\t\t\t  events\n"
5115 	"      filter\t\t- If set, only events passing filter are traced\n"
5116 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5117 	"\t\t\t  <event>:\n"
5118 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5119 	"      filter\t\t- If set, only events passing filter are traced\n"
5120 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5121 	"\t    Format: <trigger>[:count][if <filter>]\n"
5122 	"\t   trigger: traceon, traceoff\n"
5123 	"\t            enable_event:<system>:<event>\n"
5124 	"\t            disable_event:<system>:<event>\n"
5125 #ifdef CONFIG_HIST_TRIGGERS
5126 	"\t            enable_hist:<system>:<event>\n"
5127 	"\t            disable_hist:<system>:<event>\n"
5128 #endif
5129 #ifdef CONFIG_STACKTRACE
5130 	"\t\t    stacktrace\n"
5131 #endif
5132 #ifdef CONFIG_TRACER_SNAPSHOT
5133 	"\t\t    snapshot\n"
5134 #endif
5135 #ifdef CONFIG_HIST_TRIGGERS
5136 	"\t\t    hist (see below)\n"
5137 #endif
5138 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5139 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5140 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5141 	"\t                  events/block/block_unplug/trigger\n"
5142 	"\t   The first disables tracing every time block_unplug is hit.\n"
5143 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5144 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5145 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5146 	"\t   Like function triggers, the counter is only decremented if it\n"
5147 	"\t    enabled or disabled tracing.\n"
5148 	"\t   To remove a trigger without a count:\n"
5149 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5150 	"\t   To remove a trigger with a count:\n"
5151 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5152 	"\t   Filters can be ignored when removing a trigger.\n"
5153 #ifdef CONFIG_HIST_TRIGGERS
5154 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5155 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5156 	"\t            [:values=<field1[,field2,...]>]\n"
5157 	"\t            [:sort=<field1[,field2,...]>]\n"
5158 	"\t            [:size=#entries]\n"
5159 	"\t            [:pause][:continue][:clear]\n"
5160 	"\t            [:name=histname1]\n"
5161 	"\t            [:<handler>.<action>]\n"
5162 	"\t            [if <filter>]\n\n"
5163 	"\t    When a matching event is hit, an entry is added to a hash\n"
5164 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5165 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5166 	"\t    correspond to fields in the event's format description.  Keys\n"
5167 	"\t    can be any field, or the special string 'stacktrace'.\n"
5168 	"\t    Compound keys consisting of up to two fields can be specified\n"
5169 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5170 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5171 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5172 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5173 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5174 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5175 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5176 	"\t    its histogram data will be shared with other triggers of the\n"
5177 	"\t    same name, and trigger hits will update this common data.\n\n"
5178 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5179 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5180 	"\t    triggers attached to an event, there will be a table for each\n"
5181 	"\t    trigger in the output.  The table displayed for a named\n"
5182 	"\t    trigger will be the same as any other instance having the\n"
5183 	"\t    same name.  The default format used to display a given field\n"
5184 	"\t    can be modified by appending any of the following modifiers\n"
5185 	"\t    to the field name, as applicable:\n\n"
5186 	"\t            .hex        display a number as a hex value\n"
5187 	"\t            .sym        display an address as a symbol\n"
5188 	"\t            .sym-offset display an address as a symbol and offset\n"
5189 	"\t            .execname   display a common_pid as a program name\n"
5190 	"\t            .syscall    display a syscall id as a syscall name\n"
5191 	"\t            .log2       display log2 value rather than raw number\n"
5192 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5193 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5194 	"\t    trigger or to start a hist trigger but not log any events\n"
5195 	"\t    until told to do so.  'continue' can be used to start or\n"
5196 	"\t    restart a paused hist trigger.\n\n"
5197 	"\t    The 'clear' parameter will clear the contents of a running\n"
5198 	"\t    hist trigger and leave its current paused/active state\n"
5199 	"\t    unchanged.\n\n"
5200 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5201 	"\t    have one event conditionally start and stop another event's\n"
5202 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5203 	"\t    the enable_event and disable_event triggers.\n\n"
5204 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5205 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5206 	"\t        <handler>.<action>\n\n"
5207 	"\t    The available handlers are:\n\n"
5208 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5209 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5210 	"\t        onchange(var)            - invoke action if var changes\n\n"
5211 	"\t    The available actions are:\n\n"
5212 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5213 	"\t        save(field,...)                      - save current event fields\n"
5214 #ifdef CONFIG_TRACER_SNAPSHOT
5215 	"\t        snapshot()                           - snapshot the trace buffer\n"
5216 #endif
5217 #endif
5218 ;
5219 
5220 static ssize_t
5221 tracing_readme_read(struct file *filp, char __user *ubuf,
5222 		       size_t cnt, loff_t *ppos)
5223 {
5224 	return simple_read_from_buffer(ubuf, cnt, ppos,
5225 					readme_msg, strlen(readme_msg));
5226 }
5227 
5228 static const struct file_operations tracing_readme_fops = {
5229 	.open		= tracing_open_generic,
5230 	.read		= tracing_readme_read,
5231 	.llseek		= generic_file_llseek,
5232 };
5233 
5234 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5235 {
5236 	int *ptr = v;
5237 
5238 	if (*pos || m->count)
5239 		ptr++;
5240 
5241 	(*pos)++;
5242 
5243 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5244 		if (trace_find_tgid(*ptr))
5245 			return ptr;
5246 	}
5247 
5248 	return NULL;
5249 }
5250 
5251 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5252 {
5253 	void *v;
5254 	loff_t l = 0;
5255 
5256 	if (!tgid_map)
5257 		return NULL;
5258 
5259 	v = &tgid_map[0];
5260 	while (l <= *pos) {
5261 		v = saved_tgids_next(m, v, &l);
5262 		if (!v)
5263 			return NULL;
5264 	}
5265 
5266 	return v;
5267 }
5268 
5269 static void saved_tgids_stop(struct seq_file *m, void *v)
5270 {
5271 }
5272 
5273 static int saved_tgids_show(struct seq_file *m, void *v)
5274 {
5275 	int pid = (int *)v - tgid_map;
5276 
5277 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5278 	return 0;
5279 }
5280 
5281 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5282 	.start		= saved_tgids_start,
5283 	.stop		= saved_tgids_stop,
5284 	.next		= saved_tgids_next,
5285 	.show		= saved_tgids_show,
5286 };
5287 
5288 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5289 {
5290 	int ret;
5291 
5292 	ret = tracing_check_open_get_tr(NULL);
5293 	if (ret)
5294 		return ret;
5295 
5296 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5297 }
5298 
5299 
5300 static const struct file_operations tracing_saved_tgids_fops = {
5301 	.open		= tracing_saved_tgids_open,
5302 	.read		= seq_read,
5303 	.llseek		= seq_lseek,
5304 	.release	= seq_release,
5305 };
5306 
5307 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5308 {
5309 	unsigned int *ptr = v;
5310 
5311 	if (*pos || m->count)
5312 		ptr++;
5313 
5314 	(*pos)++;
5315 
5316 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5317 	     ptr++) {
5318 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5319 			continue;
5320 
5321 		return ptr;
5322 	}
5323 
5324 	return NULL;
5325 }
5326 
5327 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5328 {
5329 	void *v;
5330 	loff_t l = 0;
5331 
5332 	preempt_disable();
5333 	arch_spin_lock(&trace_cmdline_lock);
5334 
5335 	v = &savedcmd->map_cmdline_to_pid[0];
5336 	while (l <= *pos) {
5337 		v = saved_cmdlines_next(m, v, &l);
5338 		if (!v)
5339 			return NULL;
5340 	}
5341 
5342 	return v;
5343 }
5344 
5345 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5346 {
5347 	arch_spin_unlock(&trace_cmdline_lock);
5348 	preempt_enable();
5349 }
5350 
5351 static int saved_cmdlines_show(struct seq_file *m, void *v)
5352 {
5353 	char buf[TASK_COMM_LEN];
5354 	unsigned int *pid = v;
5355 
5356 	__trace_find_cmdline(*pid, buf);
5357 	seq_printf(m, "%d %s\n", *pid, buf);
5358 	return 0;
5359 }
5360 
5361 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5362 	.start		= saved_cmdlines_start,
5363 	.next		= saved_cmdlines_next,
5364 	.stop		= saved_cmdlines_stop,
5365 	.show		= saved_cmdlines_show,
5366 };
5367 
5368 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5369 {
5370 	int ret;
5371 
5372 	ret = tracing_check_open_get_tr(NULL);
5373 	if (ret)
5374 		return ret;
5375 
5376 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5377 }
5378 
5379 static const struct file_operations tracing_saved_cmdlines_fops = {
5380 	.open		= tracing_saved_cmdlines_open,
5381 	.read		= seq_read,
5382 	.llseek		= seq_lseek,
5383 	.release	= seq_release,
5384 };
5385 
5386 static ssize_t
5387 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5388 				 size_t cnt, loff_t *ppos)
5389 {
5390 	char buf[64];
5391 	int r;
5392 
5393 	arch_spin_lock(&trace_cmdline_lock);
5394 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5395 	arch_spin_unlock(&trace_cmdline_lock);
5396 
5397 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5398 }
5399 
5400 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5401 {
5402 	kfree(s->saved_cmdlines);
5403 	kfree(s->map_cmdline_to_pid);
5404 	kfree(s);
5405 }
5406 
5407 static int tracing_resize_saved_cmdlines(unsigned int val)
5408 {
5409 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5410 
5411 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5412 	if (!s)
5413 		return -ENOMEM;
5414 
5415 	if (allocate_cmdlines_buffer(val, s) < 0) {
5416 		kfree(s);
5417 		return -ENOMEM;
5418 	}
5419 
5420 	arch_spin_lock(&trace_cmdline_lock);
5421 	savedcmd_temp = savedcmd;
5422 	savedcmd = s;
5423 	arch_spin_unlock(&trace_cmdline_lock);
5424 	free_saved_cmdlines_buffer(savedcmd_temp);
5425 
5426 	return 0;
5427 }
5428 
5429 static ssize_t
5430 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5431 				  size_t cnt, loff_t *ppos)
5432 {
5433 	unsigned long val;
5434 	int ret;
5435 
5436 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5437 	if (ret)
5438 		return ret;
5439 
5440 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5441 	if (!val || val > PID_MAX_DEFAULT)
5442 		return -EINVAL;
5443 
5444 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5445 	if (ret < 0)
5446 		return ret;
5447 
5448 	*ppos += cnt;
5449 
5450 	return cnt;
5451 }
5452 
5453 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5454 	.open		= tracing_open_generic,
5455 	.read		= tracing_saved_cmdlines_size_read,
5456 	.write		= tracing_saved_cmdlines_size_write,
5457 };
5458 
5459 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5460 static union trace_eval_map_item *
5461 update_eval_map(union trace_eval_map_item *ptr)
5462 {
5463 	if (!ptr->map.eval_string) {
5464 		if (ptr->tail.next) {
5465 			ptr = ptr->tail.next;
5466 			/* Set ptr to the next real item (skip head) */
5467 			ptr++;
5468 		} else
5469 			return NULL;
5470 	}
5471 	return ptr;
5472 }
5473 
5474 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5475 {
5476 	union trace_eval_map_item *ptr = v;
5477 
5478 	/*
5479 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5480 	 * This really should never happen.
5481 	 */
5482 	(*pos)++;
5483 	ptr = update_eval_map(ptr);
5484 	if (WARN_ON_ONCE(!ptr))
5485 		return NULL;
5486 
5487 	ptr++;
5488 	ptr = update_eval_map(ptr);
5489 
5490 	return ptr;
5491 }
5492 
5493 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5494 {
5495 	union trace_eval_map_item *v;
5496 	loff_t l = 0;
5497 
5498 	mutex_lock(&trace_eval_mutex);
5499 
5500 	v = trace_eval_maps;
5501 	if (v)
5502 		v++;
5503 
5504 	while (v && l < *pos) {
5505 		v = eval_map_next(m, v, &l);
5506 	}
5507 
5508 	return v;
5509 }
5510 
5511 static void eval_map_stop(struct seq_file *m, void *v)
5512 {
5513 	mutex_unlock(&trace_eval_mutex);
5514 }
5515 
5516 static int eval_map_show(struct seq_file *m, void *v)
5517 {
5518 	union trace_eval_map_item *ptr = v;
5519 
5520 	seq_printf(m, "%s %ld (%s)\n",
5521 		   ptr->map.eval_string, ptr->map.eval_value,
5522 		   ptr->map.system);
5523 
5524 	return 0;
5525 }
5526 
5527 static const struct seq_operations tracing_eval_map_seq_ops = {
5528 	.start		= eval_map_start,
5529 	.next		= eval_map_next,
5530 	.stop		= eval_map_stop,
5531 	.show		= eval_map_show,
5532 };
5533 
5534 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5535 {
5536 	int ret;
5537 
5538 	ret = tracing_check_open_get_tr(NULL);
5539 	if (ret)
5540 		return ret;
5541 
5542 	return seq_open(filp, &tracing_eval_map_seq_ops);
5543 }
5544 
5545 static const struct file_operations tracing_eval_map_fops = {
5546 	.open		= tracing_eval_map_open,
5547 	.read		= seq_read,
5548 	.llseek		= seq_lseek,
5549 	.release	= seq_release,
5550 };
5551 
5552 static inline union trace_eval_map_item *
5553 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5554 {
5555 	/* Return tail of array given the head */
5556 	return ptr + ptr->head.length + 1;
5557 }
5558 
5559 static void
5560 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5561 			   int len)
5562 {
5563 	struct trace_eval_map **stop;
5564 	struct trace_eval_map **map;
5565 	union trace_eval_map_item *map_array;
5566 	union trace_eval_map_item *ptr;
5567 
5568 	stop = start + len;
5569 
5570 	/*
5571 	 * The trace_eval_maps contains the map plus a head and tail item,
5572 	 * where the head holds the module and length of array, and the
5573 	 * tail holds a pointer to the next list.
5574 	 */
5575 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5576 	if (!map_array) {
5577 		pr_warn("Unable to allocate trace eval mapping\n");
5578 		return;
5579 	}
5580 
5581 	mutex_lock(&trace_eval_mutex);
5582 
5583 	if (!trace_eval_maps)
5584 		trace_eval_maps = map_array;
5585 	else {
5586 		ptr = trace_eval_maps;
5587 		for (;;) {
5588 			ptr = trace_eval_jmp_to_tail(ptr);
5589 			if (!ptr->tail.next)
5590 				break;
5591 			ptr = ptr->tail.next;
5592 
5593 		}
5594 		ptr->tail.next = map_array;
5595 	}
5596 	map_array->head.mod = mod;
5597 	map_array->head.length = len;
5598 	map_array++;
5599 
5600 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5601 		map_array->map = **map;
5602 		map_array++;
5603 	}
5604 	memset(map_array, 0, sizeof(*map_array));
5605 
5606 	mutex_unlock(&trace_eval_mutex);
5607 }
5608 
5609 static void trace_create_eval_file(struct dentry *d_tracer)
5610 {
5611 	trace_create_file("eval_map", 0444, d_tracer,
5612 			  NULL, &tracing_eval_map_fops);
5613 }
5614 
5615 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5616 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5617 static inline void trace_insert_eval_map_file(struct module *mod,
5618 			      struct trace_eval_map **start, int len) { }
5619 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5620 
5621 static void trace_insert_eval_map(struct module *mod,
5622 				  struct trace_eval_map **start, int len)
5623 {
5624 	struct trace_eval_map **map;
5625 
5626 	if (len <= 0)
5627 		return;
5628 
5629 	map = start;
5630 
5631 	trace_event_eval_update(map, len);
5632 
5633 	trace_insert_eval_map_file(mod, start, len);
5634 }
5635 
5636 static ssize_t
5637 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5638 		       size_t cnt, loff_t *ppos)
5639 {
5640 	struct trace_array *tr = filp->private_data;
5641 	char buf[MAX_TRACER_SIZE+2];
5642 	int r;
5643 
5644 	mutex_lock(&trace_types_lock);
5645 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5646 	mutex_unlock(&trace_types_lock);
5647 
5648 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5649 }
5650 
5651 int tracer_init(struct tracer *t, struct trace_array *tr)
5652 {
5653 	tracing_reset_online_cpus(&tr->array_buffer);
5654 	return t->init(tr);
5655 }
5656 
5657 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5658 {
5659 	int cpu;
5660 
5661 	for_each_tracing_cpu(cpu)
5662 		per_cpu_ptr(buf->data, cpu)->entries = val;
5663 }
5664 
5665 #ifdef CONFIG_TRACER_MAX_TRACE
5666 /* resize @tr's buffer to the size of @size_tr's entries */
5667 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5668 					struct array_buffer *size_buf, int cpu_id)
5669 {
5670 	int cpu, ret = 0;
5671 
5672 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5673 		for_each_tracing_cpu(cpu) {
5674 			ret = ring_buffer_resize(trace_buf->buffer,
5675 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5676 			if (ret < 0)
5677 				break;
5678 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5679 				per_cpu_ptr(size_buf->data, cpu)->entries;
5680 		}
5681 	} else {
5682 		ret = ring_buffer_resize(trace_buf->buffer,
5683 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5684 		if (ret == 0)
5685 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5686 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5687 	}
5688 
5689 	return ret;
5690 }
5691 #endif /* CONFIG_TRACER_MAX_TRACE */
5692 
5693 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5694 					unsigned long size, int cpu)
5695 {
5696 	int ret;
5697 
5698 	/*
5699 	 * If kernel or user changes the size of the ring buffer
5700 	 * we use the size that was given, and we can forget about
5701 	 * expanding it later.
5702 	 */
5703 	ring_buffer_expanded = true;
5704 
5705 	/* May be called before buffers are initialized */
5706 	if (!tr->array_buffer.buffer)
5707 		return 0;
5708 
5709 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5710 	if (ret < 0)
5711 		return ret;
5712 
5713 #ifdef CONFIG_TRACER_MAX_TRACE
5714 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5715 	    !tr->current_trace->use_max_tr)
5716 		goto out;
5717 
5718 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5719 	if (ret < 0) {
5720 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5721 						     &tr->array_buffer, cpu);
5722 		if (r < 0) {
5723 			/*
5724 			 * AARGH! We are left with different
5725 			 * size max buffer!!!!
5726 			 * The max buffer is our "snapshot" buffer.
5727 			 * When a tracer needs a snapshot (one of the
5728 			 * latency tracers), it swaps the max buffer
5729 			 * with the saved snap shot. We succeeded to
5730 			 * update the size of the main buffer, but failed to
5731 			 * update the size of the max buffer. But when we tried
5732 			 * to reset the main buffer to the original size, we
5733 			 * failed there too. This is very unlikely to
5734 			 * happen, but if it does, warn and kill all
5735 			 * tracing.
5736 			 */
5737 			WARN_ON(1);
5738 			tracing_disabled = 1;
5739 		}
5740 		return ret;
5741 	}
5742 
5743 	if (cpu == RING_BUFFER_ALL_CPUS)
5744 		set_buffer_entries(&tr->max_buffer, size);
5745 	else
5746 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5747 
5748  out:
5749 #endif /* CONFIG_TRACER_MAX_TRACE */
5750 
5751 	if (cpu == RING_BUFFER_ALL_CPUS)
5752 		set_buffer_entries(&tr->array_buffer, size);
5753 	else
5754 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5755 
5756 	return ret;
5757 }
5758 
5759 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5760 				  unsigned long size, int cpu_id)
5761 {
5762 	int ret = size;
5763 
5764 	mutex_lock(&trace_types_lock);
5765 
5766 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5767 		/* make sure, this cpu is enabled in the mask */
5768 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5769 			ret = -EINVAL;
5770 			goto out;
5771 		}
5772 	}
5773 
5774 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5775 	if (ret < 0)
5776 		ret = -ENOMEM;
5777 
5778 out:
5779 	mutex_unlock(&trace_types_lock);
5780 
5781 	return ret;
5782 }
5783 
5784 
5785 /**
5786  * tracing_update_buffers - used by tracing facility to expand ring buffers
5787  *
5788  * To save on memory when the tracing is never used on a system with it
5789  * configured in. The ring buffers are set to a minimum size. But once
5790  * a user starts to use the tracing facility, then they need to grow
5791  * to their default size.
5792  *
5793  * This function is to be called when a tracer is about to be used.
5794  */
5795 int tracing_update_buffers(void)
5796 {
5797 	int ret = 0;
5798 
5799 	mutex_lock(&trace_types_lock);
5800 	if (!ring_buffer_expanded)
5801 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5802 						RING_BUFFER_ALL_CPUS);
5803 	mutex_unlock(&trace_types_lock);
5804 
5805 	return ret;
5806 }
5807 
5808 struct trace_option_dentry;
5809 
5810 static void
5811 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5812 
5813 /*
5814  * Used to clear out the tracer before deletion of an instance.
5815  * Must have trace_types_lock held.
5816  */
5817 static void tracing_set_nop(struct trace_array *tr)
5818 {
5819 	if (tr->current_trace == &nop_trace)
5820 		return;
5821 
5822 	tr->current_trace->enabled--;
5823 
5824 	if (tr->current_trace->reset)
5825 		tr->current_trace->reset(tr);
5826 
5827 	tr->current_trace = &nop_trace;
5828 }
5829 
5830 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5831 {
5832 	/* Only enable if the directory has been created already. */
5833 	if (!tr->dir)
5834 		return;
5835 
5836 	create_trace_option_files(tr, t);
5837 }
5838 
5839 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5840 {
5841 	struct tracer *t;
5842 #ifdef CONFIG_TRACER_MAX_TRACE
5843 	bool had_max_tr;
5844 #endif
5845 	int ret = 0;
5846 
5847 	mutex_lock(&trace_types_lock);
5848 
5849 	if (!ring_buffer_expanded) {
5850 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5851 						RING_BUFFER_ALL_CPUS);
5852 		if (ret < 0)
5853 			goto out;
5854 		ret = 0;
5855 	}
5856 
5857 	for (t = trace_types; t; t = t->next) {
5858 		if (strcmp(t->name, buf) == 0)
5859 			break;
5860 	}
5861 	if (!t) {
5862 		ret = -EINVAL;
5863 		goto out;
5864 	}
5865 	if (t == tr->current_trace)
5866 		goto out;
5867 
5868 #ifdef CONFIG_TRACER_SNAPSHOT
5869 	if (t->use_max_tr) {
5870 		arch_spin_lock(&tr->max_lock);
5871 		if (tr->cond_snapshot)
5872 			ret = -EBUSY;
5873 		arch_spin_unlock(&tr->max_lock);
5874 		if (ret)
5875 			goto out;
5876 	}
5877 #endif
5878 	/* Some tracers won't work on kernel command line */
5879 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5880 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5881 			t->name);
5882 		goto out;
5883 	}
5884 
5885 	/* Some tracers are only allowed for the top level buffer */
5886 	if (!trace_ok_for_array(t, tr)) {
5887 		ret = -EINVAL;
5888 		goto out;
5889 	}
5890 
5891 	/* If trace pipe files are being read, we can't change the tracer */
5892 	if (tr->trace_ref) {
5893 		ret = -EBUSY;
5894 		goto out;
5895 	}
5896 
5897 	trace_branch_disable();
5898 
5899 	tr->current_trace->enabled--;
5900 
5901 	if (tr->current_trace->reset)
5902 		tr->current_trace->reset(tr);
5903 
5904 	/* Current trace needs to be nop_trace before synchronize_rcu */
5905 	tr->current_trace = &nop_trace;
5906 
5907 #ifdef CONFIG_TRACER_MAX_TRACE
5908 	had_max_tr = tr->allocated_snapshot;
5909 
5910 	if (had_max_tr && !t->use_max_tr) {
5911 		/*
5912 		 * We need to make sure that the update_max_tr sees that
5913 		 * current_trace changed to nop_trace to keep it from
5914 		 * swapping the buffers after we resize it.
5915 		 * The update_max_tr is called from interrupts disabled
5916 		 * so a synchronized_sched() is sufficient.
5917 		 */
5918 		synchronize_rcu();
5919 		free_snapshot(tr);
5920 	}
5921 #endif
5922 
5923 #ifdef CONFIG_TRACER_MAX_TRACE
5924 	if (t->use_max_tr && !had_max_tr) {
5925 		ret = tracing_alloc_snapshot_instance(tr);
5926 		if (ret < 0)
5927 			goto out;
5928 	}
5929 #endif
5930 
5931 	if (t->init) {
5932 		ret = tracer_init(t, tr);
5933 		if (ret)
5934 			goto out;
5935 	}
5936 
5937 	tr->current_trace = t;
5938 	tr->current_trace->enabled++;
5939 	trace_branch_enable(tr);
5940  out:
5941 	mutex_unlock(&trace_types_lock);
5942 
5943 	return ret;
5944 }
5945 
5946 static ssize_t
5947 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5948 			size_t cnt, loff_t *ppos)
5949 {
5950 	struct trace_array *tr = filp->private_data;
5951 	char buf[MAX_TRACER_SIZE+1];
5952 	int i;
5953 	size_t ret;
5954 	int err;
5955 
5956 	ret = cnt;
5957 
5958 	if (cnt > MAX_TRACER_SIZE)
5959 		cnt = MAX_TRACER_SIZE;
5960 
5961 	if (copy_from_user(buf, ubuf, cnt))
5962 		return -EFAULT;
5963 
5964 	buf[cnt] = 0;
5965 
5966 	/* strip ending whitespace. */
5967 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5968 		buf[i] = 0;
5969 
5970 	err = tracing_set_tracer(tr, buf);
5971 	if (err)
5972 		return err;
5973 
5974 	*ppos += ret;
5975 
5976 	return ret;
5977 }
5978 
5979 static ssize_t
5980 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5981 		   size_t cnt, loff_t *ppos)
5982 {
5983 	char buf[64];
5984 	int r;
5985 
5986 	r = snprintf(buf, sizeof(buf), "%ld\n",
5987 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5988 	if (r > sizeof(buf))
5989 		r = sizeof(buf);
5990 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5991 }
5992 
5993 static ssize_t
5994 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5995 		    size_t cnt, loff_t *ppos)
5996 {
5997 	unsigned long val;
5998 	int ret;
5999 
6000 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6001 	if (ret)
6002 		return ret;
6003 
6004 	*ptr = val * 1000;
6005 
6006 	return cnt;
6007 }
6008 
6009 static ssize_t
6010 tracing_thresh_read(struct file *filp, char __user *ubuf,
6011 		    size_t cnt, loff_t *ppos)
6012 {
6013 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6014 }
6015 
6016 static ssize_t
6017 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6018 		     size_t cnt, loff_t *ppos)
6019 {
6020 	struct trace_array *tr = filp->private_data;
6021 	int ret;
6022 
6023 	mutex_lock(&trace_types_lock);
6024 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6025 	if (ret < 0)
6026 		goto out;
6027 
6028 	if (tr->current_trace->update_thresh) {
6029 		ret = tr->current_trace->update_thresh(tr);
6030 		if (ret < 0)
6031 			goto out;
6032 	}
6033 
6034 	ret = cnt;
6035 out:
6036 	mutex_unlock(&trace_types_lock);
6037 
6038 	return ret;
6039 }
6040 
6041 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6042 
6043 static ssize_t
6044 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6045 		     size_t cnt, loff_t *ppos)
6046 {
6047 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6048 }
6049 
6050 static ssize_t
6051 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6052 		      size_t cnt, loff_t *ppos)
6053 {
6054 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6055 }
6056 
6057 #endif
6058 
6059 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6060 {
6061 	struct trace_array *tr = inode->i_private;
6062 	struct trace_iterator *iter;
6063 	int ret;
6064 
6065 	ret = tracing_check_open_get_tr(tr);
6066 	if (ret)
6067 		return ret;
6068 
6069 	mutex_lock(&trace_types_lock);
6070 
6071 	/* create a buffer to store the information to pass to userspace */
6072 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6073 	if (!iter) {
6074 		ret = -ENOMEM;
6075 		__trace_array_put(tr);
6076 		goto out;
6077 	}
6078 
6079 	trace_seq_init(&iter->seq);
6080 	iter->trace = tr->current_trace;
6081 
6082 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6083 		ret = -ENOMEM;
6084 		goto fail;
6085 	}
6086 
6087 	/* trace pipe does not show start of buffer */
6088 	cpumask_setall(iter->started);
6089 
6090 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6091 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6092 
6093 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6094 	if (trace_clocks[tr->clock_id].in_ns)
6095 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6096 
6097 	iter->tr = tr;
6098 	iter->array_buffer = &tr->array_buffer;
6099 	iter->cpu_file = tracing_get_cpu(inode);
6100 	mutex_init(&iter->mutex);
6101 	filp->private_data = iter;
6102 
6103 	if (iter->trace->pipe_open)
6104 		iter->trace->pipe_open(iter);
6105 
6106 	nonseekable_open(inode, filp);
6107 
6108 	tr->trace_ref++;
6109 out:
6110 	mutex_unlock(&trace_types_lock);
6111 	return ret;
6112 
6113 fail:
6114 	kfree(iter);
6115 	__trace_array_put(tr);
6116 	mutex_unlock(&trace_types_lock);
6117 	return ret;
6118 }
6119 
6120 static int tracing_release_pipe(struct inode *inode, struct file *file)
6121 {
6122 	struct trace_iterator *iter = file->private_data;
6123 	struct trace_array *tr = inode->i_private;
6124 
6125 	mutex_lock(&trace_types_lock);
6126 
6127 	tr->trace_ref--;
6128 
6129 	if (iter->trace->pipe_close)
6130 		iter->trace->pipe_close(iter);
6131 
6132 	mutex_unlock(&trace_types_lock);
6133 
6134 	free_cpumask_var(iter->started);
6135 	mutex_destroy(&iter->mutex);
6136 	kfree(iter);
6137 
6138 	trace_array_put(tr);
6139 
6140 	return 0;
6141 }
6142 
6143 static __poll_t
6144 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6145 {
6146 	struct trace_array *tr = iter->tr;
6147 
6148 	/* Iterators are static, they should be filled or empty */
6149 	if (trace_buffer_iter(iter, iter->cpu_file))
6150 		return EPOLLIN | EPOLLRDNORM;
6151 
6152 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6153 		/*
6154 		 * Always select as readable when in blocking mode
6155 		 */
6156 		return EPOLLIN | EPOLLRDNORM;
6157 	else
6158 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6159 					     filp, poll_table);
6160 }
6161 
6162 static __poll_t
6163 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6164 {
6165 	struct trace_iterator *iter = filp->private_data;
6166 
6167 	return trace_poll(iter, filp, poll_table);
6168 }
6169 
6170 /* Must be called with iter->mutex held. */
6171 static int tracing_wait_pipe(struct file *filp)
6172 {
6173 	struct trace_iterator *iter = filp->private_data;
6174 	int ret;
6175 
6176 	while (trace_empty(iter)) {
6177 
6178 		if ((filp->f_flags & O_NONBLOCK)) {
6179 			return -EAGAIN;
6180 		}
6181 
6182 		/*
6183 		 * We block until we read something and tracing is disabled.
6184 		 * We still block if tracing is disabled, but we have never
6185 		 * read anything. This allows a user to cat this file, and
6186 		 * then enable tracing. But after we have read something,
6187 		 * we give an EOF when tracing is again disabled.
6188 		 *
6189 		 * iter->pos will be 0 if we haven't read anything.
6190 		 */
6191 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6192 			break;
6193 
6194 		mutex_unlock(&iter->mutex);
6195 
6196 		ret = wait_on_pipe(iter, 0);
6197 
6198 		mutex_lock(&iter->mutex);
6199 
6200 		if (ret)
6201 			return ret;
6202 	}
6203 
6204 	return 1;
6205 }
6206 
6207 /*
6208  * Consumer reader.
6209  */
6210 static ssize_t
6211 tracing_read_pipe(struct file *filp, char __user *ubuf,
6212 		  size_t cnt, loff_t *ppos)
6213 {
6214 	struct trace_iterator *iter = filp->private_data;
6215 	ssize_t sret;
6216 
6217 	/*
6218 	 * Avoid more than one consumer on a single file descriptor
6219 	 * This is just a matter of traces coherency, the ring buffer itself
6220 	 * is protected.
6221 	 */
6222 	mutex_lock(&iter->mutex);
6223 
6224 	/* return any leftover data */
6225 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6226 	if (sret != -EBUSY)
6227 		goto out;
6228 
6229 	trace_seq_init(&iter->seq);
6230 
6231 	if (iter->trace->read) {
6232 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6233 		if (sret)
6234 			goto out;
6235 	}
6236 
6237 waitagain:
6238 	sret = tracing_wait_pipe(filp);
6239 	if (sret <= 0)
6240 		goto out;
6241 
6242 	/* stop when tracing is finished */
6243 	if (trace_empty(iter)) {
6244 		sret = 0;
6245 		goto out;
6246 	}
6247 
6248 	if (cnt >= PAGE_SIZE)
6249 		cnt = PAGE_SIZE - 1;
6250 
6251 	/* reset all but tr, trace, and overruns */
6252 	memset(&iter->seq, 0,
6253 	       sizeof(struct trace_iterator) -
6254 	       offsetof(struct trace_iterator, seq));
6255 	cpumask_clear(iter->started);
6256 	trace_seq_init(&iter->seq);
6257 	iter->pos = -1;
6258 
6259 	trace_event_read_lock();
6260 	trace_access_lock(iter->cpu_file);
6261 	while (trace_find_next_entry_inc(iter) != NULL) {
6262 		enum print_line_t ret;
6263 		int save_len = iter->seq.seq.len;
6264 
6265 		ret = print_trace_line(iter);
6266 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6267 			/* don't print partial lines */
6268 			iter->seq.seq.len = save_len;
6269 			break;
6270 		}
6271 		if (ret != TRACE_TYPE_NO_CONSUME)
6272 			trace_consume(iter);
6273 
6274 		if (trace_seq_used(&iter->seq) >= cnt)
6275 			break;
6276 
6277 		/*
6278 		 * Setting the full flag means we reached the trace_seq buffer
6279 		 * size and we should leave by partial output condition above.
6280 		 * One of the trace_seq_* functions is not used properly.
6281 		 */
6282 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6283 			  iter->ent->type);
6284 	}
6285 	trace_access_unlock(iter->cpu_file);
6286 	trace_event_read_unlock();
6287 
6288 	/* Now copy what we have to the user */
6289 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6290 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6291 		trace_seq_init(&iter->seq);
6292 
6293 	/*
6294 	 * If there was nothing to send to user, in spite of consuming trace
6295 	 * entries, go back to wait for more entries.
6296 	 */
6297 	if (sret == -EBUSY)
6298 		goto waitagain;
6299 
6300 out:
6301 	mutex_unlock(&iter->mutex);
6302 
6303 	return sret;
6304 }
6305 
6306 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6307 				     unsigned int idx)
6308 {
6309 	__free_page(spd->pages[idx]);
6310 }
6311 
6312 static size_t
6313 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6314 {
6315 	size_t count;
6316 	int save_len;
6317 	int ret;
6318 
6319 	/* Seq buffer is page-sized, exactly what we need. */
6320 	for (;;) {
6321 		save_len = iter->seq.seq.len;
6322 		ret = print_trace_line(iter);
6323 
6324 		if (trace_seq_has_overflowed(&iter->seq)) {
6325 			iter->seq.seq.len = save_len;
6326 			break;
6327 		}
6328 
6329 		/*
6330 		 * This should not be hit, because it should only
6331 		 * be set if the iter->seq overflowed. But check it
6332 		 * anyway to be safe.
6333 		 */
6334 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6335 			iter->seq.seq.len = save_len;
6336 			break;
6337 		}
6338 
6339 		count = trace_seq_used(&iter->seq) - save_len;
6340 		if (rem < count) {
6341 			rem = 0;
6342 			iter->seq.seq.len = save_len;
6343 			break;
6344 		}
6345 
6346 		if (ret != TRACE_TYPE_NO_CONSUME)
6347 			trace_consume(iter);
6348 		rem -= count;
6349 		if (!trace_find_next_entry_inc(iter))	{
6350 			rem = 0;
6351 			iter->ent = NULL;
6352 			break;
6353 		}
6354 	}
6355 
6356 	return rem;
6357 }
6358 
6359 static ssize_t tracing_splice_read_pipe(struct file *filp,
6360 					loff_t *ppos,
6361 					struct pipe_inode_info *pipe,
6362 					size_t len,
6363 					unsigned int flags)
6364 {
6365 	struct page *pages_def[PIPE_DEF_BUFFERS];
6366 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6367 	struct trace_iterator *iter = filp->private_data;
6368 	struct splice_pipe_desc spd = {
6369 		.pages		= pages_def,
6370 		.partial	= partial_def,
6371 		.nr_pages	= 0, /* This gets updated below. */
6372 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6373 		.ops		= &default_pipe_buf_ops,
6374 		.spd_release	= tracing_spd_release_pipe,
6375 	};
6376 	ssize_t ret;
6377 	size_t rem;
6378 	unsigned int i;
6379 
6380 	if (splice_grow_spd(pipe, &spd))
6381 		return -ENOMEM;
6382 
6383 	mutex_lock(&iter->mutex);
6384 
6385 	if (iter->trace->splice_read) {
6386 		ret = iter->trace->splice_read(iter, filp,
6387 					       ppos, pipe, len, flags);
6388 		if (ret)
6389 			goto out_err;
6390 	}
6391 
6392 	ret = tracing_wait_pipe(filp);
6393 	if (ret <= 0)
6394 		goto out_err;
6395 
6396 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6397 		ret = -EFAULT;
6398 		goto out_err;
6399 	}
6400 
6401 	trace_event_read_lock();
6402 	trace_access_lock(iter->cpu_file);
6403 
6404 	/* Fill as many pages as possible. */
6405 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6406 		spd.pages[i] = alloc_page(GFP_KERNEL);
6407 		if (!spd.pages[i])
6408 			break;
6409 
6410 		rem = tracing_fill_pipe_page(rem, iter);
6411 
6412 		/* Copy the data into the page, so we can start over. */
6413 		ret = trace_seq_to_buffer(&iter->seq,
6414 					  page_address(spd.pages[i]),
6415 					  trace_seq_used(&iter->seq));
6416 		if (ret < 0) {
6417 			__free_page(spd.pages[i]);
6418 			break;
6419 		}
6420 		spd.partial[i].offset = 0;
6421 		spd.partial[i].len = trace_seq_used(&iter->seq);
6422 
6423 		trace_seq_init(&iter->seq);
6424 	}
6425 
6426 	trace_access_unlock(iter->cpu_file);
6427 	trace_event_read_unlock();
6428 	mutex_unlock(&iter->mutex);
6429 
6430 	spd.nr_pages = i;
6431 
6432 	if (i)
6433 		ret = splice_to_pipe(pipe, &spd);
6434 	else
6435 		ret = 0;
6436 out:
6437 	splice_shrink_spd(&spd);
6438 	return ret;
6439 
6440 out_err:
6441 	mutex_unlock(&iter->mutex);
6442 	goto out;
6443 }
6444 
6445 static ssize_t
6446 tracing_entries_read(struct file *filp, char __user *ubuf,
6447 		     size_t cnt, loff_t *ppos)
6448 {
6449 	struct inode *inode = file_inode(filp);
6450 	struct trace_array *tr = inode->i_private;
6451 	int cpu = tracing_get_cpu(inode);
6452 	char buf[64];
6453 	int r = 0;
6454 	ssize_t ret;
6455 
6456 	mutex_lock(&trace_types_lock);
6457 
6458 	if (cpu == RING_BUFFER_ALL_CPUS) {
6459 		int cpu, buf_size_same;
6460 		unsigned long size;
6461 
6462 		size = 0;
6463 		buf_size_same = 1;
6464 		/* check if all cpu sizes are same */
6465 		for_each_tracing_cpu(cpu) {
6466 			/* fill in the size from first enabled cpu */
6467 			if (size == 0)
6468 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6469 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6470 				buf_size_same = 0;
6471 				break;
6472 			}
6473 		}
6474 
6475 		if (buf_size_same) {
6476 			if (!ring_buffer_expanded)
6477 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6478 					    size >> 10,
6479 					    trace_buf_size >> 10);
6480 			else
6481 				r = sprintf(buf, "%lu\n", size >> 10);
6482 		} else
6483 			r = sprintf(buf, "X\n");
6484 	} else
6485 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6486 
6487 	mutex_unlock(&trace_types_lock);
6488 
6489 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6490 	return ret;
6491 }
6492 
6493 static ssize_t
6494 tracing_entries_write(struct file *filp, const char __user *ubuf,
6495 		      size_t cnt, loff_t *ppos)
6496 {
6497 	struct inode *inode = file_inode(filp);
6498 	struct trace_array *tr = inode->i_private;
6499 	unsigned long val;
6500 	int ret;
6501 
6502 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503 	if (ret)
6504 		return ret;
6505 
6506 	/* must have at least 1 entry */
6507 	if (!val)
6508 		return -EINVAL;
6509 
6510 	/* value is in KB */
6511 	val <<= 10;
6512 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6513 	if (ret < 0)
6514 		return ret;
6515 
6516 	*ppos += cnt;
6517 
6518 	return cnt;
6519 }
6520 
6521 static ssize_t
6522 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6523 				size_t cnt, loff_t *ppos)
6524 {
6525 	struct trace_array *tr = filp->private_data;
6526 	char buf[64];
6527 	int r, cpu;
6528 	unsigned long size = 0, expanded_size = 0;
6529 
6530 	mutex_lock(&trace_types_lock);
6531 	for_each_tracing_cpu(cpu) {
6532 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6533 		if (!ring_buffer_expanded)
6534 			expanded_size += trace_buf_size >> 10;
6535 	}
6536 	if (ring_buffer_expanded)
6537 		r = sprintf(buf, "%lu\n", size);
6538 	else
6539 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6540 	mutex_unlock(&trace_types_lock);
6541 
6542 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6543 }
6544 
6545 static ssize_t
6546 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6547 			  size_t cnt, loff_t *ppos)
6548 {
6549 	/*
6550 	 * There is no need to read what the user has written, this function
6551 	 * is just to make sure that there is no error when "echo" is used
6552 	 */
6553 
6554 	*ppos += cnt;
6555 
6556 	return cnt;
6557 }
6558 
6559 static int
6560 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6561 {
6562 	struct trace_array *tr = inode->i_private;
6563 
6564 	/* disable tracing ? */
6565 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6566 		tracer_tracing_off(tr);
6567 	/* resize the ring buffer to 0 */
6568 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6569 
6570 	trace_array_put(tr);
6571 
6572 	return 0;
6573 }
6574 
6575 static ssize_t
6576 tracing_mark_write(struct file *filp, const char __user *ubuf,
6577 					size_t cnt, loff_t *fpos)
6578 {
6579 	struct trace_array *tr = filp->private_data;
6580 	struct ring_buffer_event *event;
6581 	enum event_trigger_type tt = ETT_NONE;
6582 	struct trace_buffer *buffer;
6583 	struct print_entry *entry;
6584 	unsigned long irq_flags;
6585 	ssize_t written;
6586 	int size;
6587 	int len;
6588 
6589 /* Used in tracing_mark_raw_write() as well */
6590 #define FAULTED_STR "<faulted>"
6591 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6592 
6593 	if (tracing_disabled)
6594 		return -EINVAL;
6595 
6596 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6597 		return -EINVAL;
6598 
6599 	if (cnt > TRACE_BUF_SIZE)
6600 		cnt = TRACE_BUF_SIZE;
6601 
6602 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6603 
6604 	local_save_flags(irq_flags);
6605 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6606 
6607 	/* If less than "<faulted>", then make sure we can still add that */
6608 	if (cnt < FAULTED_SIZE)
6609 		size += FAULTED_SIZE - cnt;
6610 
6611 	buffer = tr->array_buffer.buffer;
6612 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6613 					    irq_flags, preempt_count());
6614 	if (unlikely(!event))
6615 		/* Ring buffer disabled, return as if not open for write */
6616 		return -EBADF;
6617 
6618 	entry = ring_buffer_event_data(event);
6619 	entry->ip = _THIS_IP_;
6620 
6621 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6622 	if (len) {
6623 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6624 		cnt = FAULTED_SIZE;
6625 		written = -EFAULT;
6626 	} else
6627 		written = cnt;
6628 	len = cnt;
6629 
6630 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6631 		/* do not add \n before testing triggers, but add \0 */
6632 		entry->buf[cnt] = '\0';
6633 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6634 	}
6635 
6636 	if (entry->buf[cnt - 1] != '\n') {
6637 		entry->buf[cnt] = '\n';
6638 		entry->buf[cnt + 1] = '\0';
6639 	} else
6640 		entry->buf[cnt] = '\0';
6641 
6642 	__buffer_unlock_commit(buffer, event);
6643 
6644 	if (tt)
6645 		event_triggers_post_call(tr->trace_marker_file, tt);
6646 
6647 	if (written > 0)
6648 		*fpos += written;
6649 
6650 	return written;
6651 }
6652 
6653 /* Limit it for now to 3K (including tag) */
6654 #define RAW_DATA_MAX_SIZE (1024*3)
6655 
6656 static ssize_t
6657 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6658 					size_t cnt, loff_t *fpos)
6659 {
6660 	struct trace_array *tr = filp->private_data;
6661 	struct ring_buffer_event *event;
6662 	struct trace_buffer *buffer;
6663 	struct raw_data_entry *entry;
6664 	unsigned long irq_flags;
6665 	ssize_t written;
6666 	int size;
6667 	int len;
6668 
6669 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6670 
6671 	if (tracing_disabled)
6672 		return -EINVAL;
6673 
6674 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6675 		return -EINVAL;
6676 
6677 	/* The marker must at least have a tag id */
6678 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6679 		return -EINVAL;
6680 
6681 	if (cnt > TRACE_BUF_SIZE)
6682 		cnt = TRACE_BUF_SIZE;
6683 
6684 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6685 
6686 	local_save_flags(irq_flags);
6687 	size = sizeof(*entry) + cnt;
6688 	if (cnt < FAULT_SIZE_ID)
6689 		size += FAULT_SIZE_ID - cnt;
6690 
6691 	buffer = tr->array_buffer.buffer;
6692 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6693 					    irq_flags, preempt_count());
6694 	if (!event)
6695 		/* Ring buffer disabled, return as if not open for write */
6696 		return -EBADF;
6697 
6698 	entry = ring_buffer_event_data(event);
6699 
6700 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6701 	if (len) {
6702 		entry->id = -1;
6703 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6704 		written = -EFAULT;
6705 	} else
6706 		written = cnt;
6707 
6708 	__buffer_unlock_commit(buffer, event);
6709 
6710 	if (written > 0)
6711 		*fpos += written;
6712 
6713 	return written;
6714 }
6715 
6716 static int tracing_clock_show(struct seq_file *m, void *v)
6717 {
6718 	struct trace_array *tr = m->private;
6719 	int i;
6720 
6721 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6722 		seq_printf(m,
6723 			"%s%s%s%s", i ? " " : "",
6724 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6725 			i == tr->clock_id ? "]" : "");
6726 	seq_putc(m, '\n');
6727 
6728 	return 0;
6729 }
6730 
6731 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6732 {
6733 	int i;
6734 
6735 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6736 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6737 			break;
6738 	}
6739 	if (i == ARRAY_SIZE(trace_clocks))
6740 		return -EINVAL;
6741 
6742 	mutex_lock(&trace_types_lock);
6743 
6744 	tr->clock_id = i;
6745 
6746 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6747 
6748 	/*
6749 	 * New clock may not be consistent with the previous clock.
6750 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6751 	 */
6752 	tracing_reset_online_cpus(&tr->array_buffer);
6753 
6754 #ifdef CONFIG_TRACER_MAX_TRACE
6755 	if (tr->max_buffer.buffer)
6756 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6757 	tracing_reset_online_cpus(&tr->max_buffer);
6758 #endif
6759 
6760 	mutex_unlock(&trace_types_lock);
6761 
6762 	return 0;
6763 }
6764 
6765 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6766 				   size_t cnt, loff_t *fpos)
6767 {
6768 	struct seq_file *m = filp->private_data;
6769 	struct trace_array *tr = m->private;
6770 	char buf[64];
6771 	const char *clockstr;
6772 	int ret;
6773 
6774 	if (cnt >= sizeof(buf))
6775 		return -EINVAL;
6776 
6777 	if (copy_from_user(buf, ubuf, cnt))
6778 		return -EFAULT;
6779 
6780 	buf[cnt] = 0;
6781 
6782 	clockstr = strstrip(buf);
6783 
6784 	ret = tracing_set_clock(tr, clockstr);
6785 	if (ret)
6786 		return ret;
6787 
6788 	*fpos += cnt;
6789 
6790 	return cnt;
6791 }
6792 
6793 static int tracing_clock_open(struct inode *inode, struct file *file)
6794 {
6795 	struct trace_array *tr = inode->i_private;
6796 	int ret;
6797 
6798 	ret = tracing_check_open_get_tr(tr);
6799 	if (ret)
6800 		return ret;
6801 
6802 	ret = single_open(file, tracing_clock_show, inode->i_private);
6803 	if (ret < 0)
6804 		trace_array_put(tr);
6805 
6806 	return ret;
6807 }
6808 
6809 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6810 {
6811 	struct trace_array *tr = m->private;
6812 
6813 	mutex_lock(&trace_types_lock);
6814 
6815 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6816 		seq_puts(m, "delta [absolute]\n");
6817 	else
6818 		seq_puts(m, "[delta] absolute\n");
6819 
6820 	mutex_unlock(&trace_types_lock);
6821 
6822 	return 0;
6823 }
6824 
6825 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6826 {
6827 	struct trace_array *tr = inode->i_private;
6828 	int ret;
6829 
6830 	ret = tracing_check_open_get_tr(tr);
6831 	if (ret)
6832 		return ret;
6833 
6834 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6835 	if (ret < 0)
6836 		trace_array_put(tr);
6837 
6838 	return ret;
6839 }
6840 
6841 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6842 {
6843 	int ret = 0;
6844 
6845 	mutex_lock(&trace_types_lock);
6846 
6847 	if (abs && tr->time_stamp_abs_ref++)
6848 		goto out;
6849 
6850 	if (!abs) {
6851 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6852 			ret = -EINVAL;
6853 			goto out;
6854 		}
6855 
6856 		if (--tr->time_stamp_abs_ref)
6857 			goto out;
6858 	}
6859 
6860 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6861 
6862 #ifdef CONFIG_TRACER_MAX_TRACE
6863 	if (tr->max_buffer.buffer)
6864 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6865 #endif
6866  out:
6867 	mutex_unlock(&trace_types_lock);
6868 
6869 	return ret;
6870 }
6871 
6872 struct ftrace_buffer_info {
6873 	struct trace_iterator	iter;
6874 	void			*spare;
6875 	unsigned int		spare_cpu;
6876 	unsigned int		read;
6877 };
6878 
6879 #ifdef CONFIG_TRACER_SNAPSHOT
6880 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6881 {
6882 	struct trace_array *tr = inode->i_private;
6883 	struct trace_iterator *iter;
6884 	struct seq_file *m;
6885 	int ret;
6886 
6887 	ret = tracing_check_open_get_tr(tr);
6888 	if (ret)
6889 		return ret;
6890 
6891 	if (file->f_mode & FMODE_READ) {
6892 		iter = __tracing_open(inode, file, true);
6893 		if (IS_ERR(iter))
6894 			ret = PTR_ERR(iter);
6895 	} else {
6896 		/* Writes still need the seq_file to hold the private data */
6897 		ret = -ENOMEM;
6898 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6899 		if (!m)
6900 			goto out;
6901 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6902 		if (!iter) {
6903 			kfree(m);
6904 			goto out;
6905 		}
6906 		ret = 0;
6907 
6908 		iter->tr = tr;
6909 		iter->array_buffer = &tr->max_buffer;
6910 		iter->cpu_file = tracing_get_cpu(inode);
6911 		m->private = iter;
6912 		file->private_data = m;
6913 	}
6914 out:
6915 	if (ret < 0)
6916 		trace_array_put(tr);
6917 
6918 	return ret;
6919 }
6920 
6921 static ssize_t
6922 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6923 		       loff_t *ppos)
6924 {
6925 	struct seq_file *m = filp->private_data;
6926 	struct trace_iterator *iter = m->private;
6927 	struct trace_array *tr = iter->tr;
6928 	unsigned long val;
6929 	int ret;
6930 
6931 	ret = tracing_update_buffers();
6932 	if (ret < 0)
6933 		return ret;
6934 
6935 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6936 	if (ret)
6937 		return ret;
6938 
6939 	mutex_lock(&trace_types_lock);
6940 
6941 	if (tr->current_trace->use_max_tr) {
6942 		ret = -EBUSY;
6943 		goto out;
6944 	}
6945 
6946 	arch_spin_lock(&tr->max_lock);
6947 	if (tr->cond_snapshot)
6948 		ret = -EBUSY;
6949 	arch_spin_unlock(&tr->max_lock);
6950 	if (ret)
6951 		goto out;
6952 
6953 	switch (val) {
6954 	case 0:
6955 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6956 			ret = -EINVAL;
6957 			break;
6958 		}
6959 		if (tr->allocated_snapshot)
6960 			free_snapshot(tr);
6961 		break;
6962 	case 1:
6963 /* Only allow per-cpu swap if the ring buffer supports it */
6964 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6965 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6966 			ret = -EINVAL;
6967 			break;
6968 		}
6969 #endif
6970 		if (tr->allocated_snapshot)
6971 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6972 					&tr->array_buffer, iter->cpu_file);
6973 		else
6974 			ret = tracing_alloc_snapshot_instance(tr);
6975 		if (ret < 0)
6976 			break;
6977 		local_irq_disable();
6978 		/* Now, we're going to swap */
6979 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6980 			update_max_tr(tr, current, smp_processor_id(), NULL);
6981 		else
6982 			update_max_tr_single(tr, current, iter->cpu_file);
6983 		local_irq_enable();
6984 		break;
6985 	default:
6986 		if (tr->allocated_snapshot) {
6987 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6988 				tracing_reset_online_cpus(&tr->max_buffer);
6989 			else
6990 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6991 		}
6992 		break;
6993 	}
6994 
6995 	if (ret >= 0) {
6996 		*ppos += cnt;
6997 		ret = cnt;
6998 	}
6999 out:
7000 	mutex_unlock(&trace_types_lock);
7001 	return ret;
7002 }
7003 
7004 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7005 {
7006 	struct seq_file *m = file->private_data;
7007 	int ret;
7008 
7009 	ret = tracing_release(inode, file);
7010 
7011 	if (file->f_mode & FMODE_READ)
7012 		return ret;
7013 
7014 	/* If write only, the seq_file is just a stub */
7015 	if (m)
7016 		kfree(m->private);
7017 	kfree(m);
7018 
7019 	return 0;
7020 }
7021 
7022 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7023 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7024 				    size_t count, loff_t *ppos);
7025 static int tracing_buffers_release(struct inode *inode, struct file *file);
7026 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7027 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7028 
7029 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7030 {
7031 	struct ftrace_buffer_info *info;
7032 	int ret;
7033 
7034 	/* The following checks for tracefs lockdown */
7035 	ret = tracing_buffers_open(inode, filp);
7036 	if (ret < 0)
7037 		return ret;
7038 
7039 	info = filp->private_data;
7040 
7041 	if (info->iter.trace->use_max_tr) {
7042 		tracing_buffers_release(inode, filp);
7043 		return -EBUSY;
7044 	}
7045 
7046 	info->iter.snapshot = true;
7047 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7048 
7049 	return ret;
7050 }
7051 
7052 #endif /* CONFIG_TRACER_SNAPSHOT */
7053 
7054 
7055 static const struct file_operations tracing_thresh_fops = {
7056 	.open		= tracing_open_generic,
7057 	.read		= tracing_thresh_read,
7058 	.write		= tracing_thresh_write,
7059 	.llseek		= generic_file_llseek,
7060 };
7061 
7062 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7063 static const struct file_operations tracing_max_lat_fops = {
7064 	.open		= tracing_open_generic,
7065 	.read		= tracing_max_lat_read,
7066 	.write		= tracing_max_lat_write,
7067 	.llseek		= generic_file_llseek,
7068 };
7069 #endif
7070 
7071 static const struct file_operations set_tracer_fops = {
7072 	.open		= tracing_open_generic,
7073 	.read		= tracing_set_trace_read,
7074 	.write		= tracing_set_trace_write,
7075 	.llseek		= generic_file_llseek,
7076 };
7077 
7078 static const struct file_operations tracing_pipe_fops = {
7079 	.open		= tracing_open_pipe,
7080 	.poll		= tracing_poll_pipe,
7081 	.read		= tracing_read_pipe,
7082 	.splice_read	= tracing_splice_read_pipe,
7083 	.release	= tracing_release_pipe,
7084 	.llseek		= no_llseek,
7085 };
7086 
7087 static const struct file_operations tracing_entries_fops = {
7088 	.open		= tracing_open_generic_tr,
7089 	.read		= tracing_entries_read,
7090 	.write		= tracing_entries_write,
7091 	.llseek		= generic_file_llseek,
7092 	.release	= tracing_release_generic_tr,
7093 };
7094 
7095 static const struct file_operations tracing_total_entries_fops = {
7096 	.open		= tracing_open_generic_tr,
7097 	.read		= tracing_total_entries_read,
7098 	.llseek		= generic_file_llseek,
7099 	.release	= tracing_release_generic_tr,
7100 };
7101 
7102 static const struct file_operations tracing_free_buffer_fops = {
7103 	.open		= tracing_open_generic_tr,
7104 	.write		= tracing_free_buffer_write,
7105 	.release	= tracing_free_buffer_release,
7106 };
7107 
7108 static const struct file_operations tracing_mark_fops = {
7109 	.open		= tracing_open_generic_tr,
7110 	.write		= tracing_mark_write,
7111 	.llseek		= generic_file_llseek,
7112 	.release	= tracing_release_generic_tr,
7113 };
7114 
7115 static const struct file_operations tracing_mark_raw_fops = {
7116 	.open		= tracing_open_generic_tr,
7117 	.write		= tracing_mark_raw_write,
7118 	.llseek		= generic_file_llseek,
7119 	.release	= tracing_release_generic_tr,
7120 };
7121 
7122 static const struct file_operations trace_clock_fops = {
7123 	.open		= tracing_clock_open,
7124 	.read		= seq_read,
7125 	.llseek		= seq_lseek,
7126 	.release	= tracing_single_release_tr,
7127 	.write		= tracing_clock_write,
7128 };
7129 
7130 static const struct file_operations trace_time_stamp_mode_fops = {
7131 	.open		= tracing_time_stamp_mode_open,
7132 	.read		= seq_read,
7133 	.llseek		= seq_lseek,
7134 	.release	= tracing_single_release_tr,
7135 };
7136 
7137 #ifdef CONFIG_TRACER_SNAPSHOT
7138 static const struct file_operations snapshot_fops = {
7139 	.open		= tracing_snapshot_open,
7140 	.read		= seq_read,
7141 	.write		= tracing_snapshot_write,
7142 	.llseek		= tracing_lseek,
7143 	.release	= tracing_snapshot_release,
7144 };
7145 
7146 static const struct file_operations snapshot_raw_fops = {
7147 	.open		= snapshot_raw_open,
7148 	.read		= tracing_buffers_read,
7149 	.release	= tracing_buffers_release,
7150 	.splice_read	= tracing_buffers_splice_read,
7151 	.llseek		= no_llseek,
7152 };
7153 
7154 #endif /* CONFIG_TRACER_SNAPSHOT */
7155 
7156 #define TRACING_LOG_ERRS_MAX	8
7157 #define TRACING_LOG_LOC_MAX	128
7158 
7159 #define CMD_PREFIX "  Command: "
7160 
7161 struct err_info {
7162 	const char	**errs;	/* ptr to loc-specific array of err strings */
7163 	u8		type;	/* index into errs -> specific err string */
7164 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7165 	u64		ts;
7166 };
7167 
7168 struct tracing_log_err {
7169 	struct list_head	list;
7170 	struct err_info		info;
7171 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7172 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7173 };
7174 
7175 static DEFINE_MUTEX(tracing_err_log_lock);
7176 
7177 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7178 {
7179 	struct tracing_log_err *err;
7180 
7181 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7182 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7183 		if (!err)
7184 			err = ERR_PTR(-ENOMEM);
7185 		tr->n_err_log_entries++;
7186 
7187 		return err;
7188 	}
7189 
7190 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7191 	list_del(&err->list);
7192 
7193 	return err;
7194 }
7195 
7196 /**
7197  * err_pos - find the position of a string within a command for error careting
7198  * @cmd: The tracing command that caused the error
7199  * @str: The string to position the caret at within @cmd
7200  *
7201  * Finds the position of the first occurence of @str within @cmd.  The
7202  * return value can be passed to tracing_log_err() for caret placement
7203  * within @cmd.
7204  *
7205  * Returns the index within @cmd of the first occurence of @str or 0
7206  * if @str was not found.
7207  */
7208 unsigned int err_pos(char *cmd, const char *str)
7209 {
7210 	char *found;
7211 
7212 	if (WARN_ON(!strlen(cmd)))
7213 		return 0;
7214 
7215 	found = strstr(cmd, str);
7216 	if (found)
7217 		return found - cmd;
7218 
7219 	return 0;
7220 }
7221 
7222 /**
7223  * tracing_log_err - write an error to the tracing error log
7224  * @tr: The associated trace array for the error (NULL for top level array)
7225  * @loc: A string describing where the error occurred
7226  * @cmd: The tracing command that caused the error
7227  * @errs: The array of loc-specific static error strings
7228  * @type: The index into errs[], which produces the specific static err string
7229  * @pos: The position the caret should be placed in the cmd
7230  *
7231  * Writes an error into tracing/error_log of the form:
7232  *
7233  * <loc>: error: <text>
7234  *   Command: <cmd>
7235  *              ^
7236  *
7237  * tracing/error_log is a small log file containing the last
7238  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7239  * unless there has been a tracing error, and the error log can be
7240  * cleared and have its memory freed by writing the empty string in
7241  * truncation mode to it i.e. echo > tracing/error_log.
7242  *
7243  * NOTE: the @errs array along with the @type param are used to
7244  * produce a static error string - this string is not copied and saved
7245  * when the error is logged - only a pointer to it is saved.  See
7246  * existing callers for examples of how static strings are typically
7247  * defined for use with tracing_log_err().
7248  */
7249 void tracing_log_err(struct trace_array *tr,
7250 		     const char *loc, const char *cmd,
7251 		     const char **errs, u8 type, u8 pos)
7252 {
7253 	struct tracing_log_err *err;
7254 
7255 	if (!tr)
7256 		tr = &global_trace;
7257 
7258 	mutex_lock(&tracing_err_log_lock);
7259 	err = get_tracing_log_err(tr);
7260 	if (PTR_ERR(err) == -ENOMEM) {
7261 		mutex_unlock(&tracing_err_log_lock);
7262 		return;
7263 	}
7264 
7265 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7266 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7267 
7268 	err->info.errs = errs;
7269 	err->info.type = type;
7270 	err->info.pos = pos;
7271 	err->info.ts = local_clock();
7272 
7273 	list_add_tail(&err->list, &tr->err_log);
7274 	mutex_unlock(&tracing_err_log_lock);
7275 }
7276 
7277 static void clear_tracing_err_log(struct trace_array *tr)
7278 {
7279 	struct tracing_log_err *err, *next;
7280 
7281 	mutex_lock(&tracing_err_log_lock);
7282 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7283 		list_del(&err->list);
7284 		kfree(err);
7285 	}
7286 
7287 	tr->n_err_log_entries = 0;
7288 	mutex_unlock(&tracing_err_log_lock);
7289 }
7290 
7291 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7292 {
7293 	struct trace_array *tr = m->private;
7294 
7295 	mutex_lock(&tracing_err_log_lock);
7296 
7297 	return seq_list_start(&tr->err_log, *pos);
7298 }
7299 
7300 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7301 {
7302 	struct trace_array *tr = m->private;
7303 
7304 	return seq_list_next(v, &tr->err_log, pos);
7305 }
7306 
7307 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7308 {
7309 	mutex_unlock(&tracing_err_log_lock);
7310 }
7311 
7312 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7313 {
7314 	u8 i;
7315 
7316 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7317 		seq_putc(m, ' ');
7318 	for (i = 0; i < pos; i++)
7319 		seq_putc(m, ' ');
7320 	seq_puts(m, "^\n");
7321 }
7322 
7323 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7324 {
7325 	struct tracing_log_err *err = v;
7326 
7327 	if (err) {
7328 		const char *err_text = err->info.errs[err->info.type];
7329 		u64 sec = err->info.ts;
7330 		u32 nsec;
7331 
7332 		nsec = do_div(sec, NSEC_PER_SEC);
7333 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7334 			   err->loc, err_text);
7335 		seq_printf(m, "%s", err->cmd);
7336 		tracing_err_log_show_pos(m, err->info.pos);
7337 	}
7338 
7339 	return 0;
7340 }
7341 
7342 static const struct seq_operations tracing_err_log_seq_ops = {
7343 	.start  = tracing_err_log_seq_start,
7344 	.next   = tracing_err_log_seq_next,
7345 	.stop   = tracing_err_log_seq_stop,
7346 	.show   = tracing_err_log_seq_show
7347 };
7348 
7349 static int tracing_err_log_open(struct inode *inode, struct file *file)
7350 {
7351 	struct trace_array *tr = inode->i_private;
7352 	int ret = 0;
7353 
7354 	ret = tracing_check_open_get_tr(tr);
7355 	if (ret)
7356 		return ret;
7357 
7358 	/* If this file was opened for write, then erase contents */
7359 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7360 		clear_tracing_err_log(tr);
7361 
7362 	if (file->f_mode & FMODE_READ) {
7363 		ret = seq_open(file, &tracing_err_log_seq_ops);
7364 		if (!ret) {
7365 			struct seq_file *m = file->private_data;
7366 			m->private = tr;
7367 		} else {
7368 			trace_array_put(tr);
7369 		}
7370 	}
7371 	return ret;
7372 }
7373 
7374 static ssize_t tracing_err_log_write(struct file *file,
7375 				     const char __user *buffer,
7376 				     size_t count, loff_t *ppos)
7377 {
7378 	return count;
7379 }
7380 
7381 static int tracing_err_log_release(struct inode *inode, struct file *file)
7382 {
7383 	struct trace_array *tr = inode->i_private;
7384 
7385 	trace_array_put(tr);
7386 
7387 	if (file->f_mode & FMODE_READ)
7388 		seq_release(inode, file);
7389 
7390 	return 0;
7391 }
7392 
7393 static const struct file_operations tracing_err_log_fops = {
7394 	.open           = tracing_err_log_open,
7395 	.write		= tracing_err_log_write,
7396 	.read           = seq_read,
7397 	.llseek         = seq_lseek,
7398 	.release        = tracing_err_log_release,
7399 };
7400 
7401 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7402 {
7403 	struct trace_array *tr = inode->i_private;
7404 	struct ftrace_buffer_info *info;
7405 	int ret;
7406 
7407 	ret = tracing_check_open_get_tr(tr);
7408 	if (ret)
7409 		return ret;
7410 
7411 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7412 	if (!info) {
7413 		trace_array_put(tr);
7414 		return -ENOMEM;
7415 	}
7416 
7417 	mutex_lock(&trace_types_lock);
7418 
7419 	info->iter.tr		= tr;
7420 	info->iter.cpu_file	= tracing_get_cpu(inode);
7421 	info->iter.trace	= tr->current_trace;
7422 	info->iter.array_buffer = &tr->array_buffer;
7423 	info->spare		= NULL;
7424 	/* Force reading ring buffer for first read */
7425 	info->read		= (unsigned int)-1;
7426 
7427 	filp->private_data = info;
7428 
7429 	tr->trace_ref++;
7430 
7431 	mutex_unlock(&trace_types_lock);
7432 
7433 	ret = nonseekable_open(inode, filp);
7434 	if (ret < 0)
7435 		trace_array_put(tr);
7436 
7437 	return ret;
7438 }
7439 
7440 static __poll_t
7441 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7442 {
7443 	struct ftrace_buffer_info *info = filp->private_data;
7444 	struct trace_iterator *iter = &info->iter;
7445 
7446 	return trace_poll(iter, filp, poll_table);
7447 }
7448 
7449 static ssize_t
7450 tracing_buffers_read(struct file *filp, char __user *ubuf,
7451 		     size_t count, loff_t *ppos)
7452 {
7453 	struct ftrace_buffer_info *info = filp->private_data;
7454 	struct trace_iterator *iter = &info->iter;
7455 	ssize_t ret = 0;
7456 	ssize_t size;
7457 
7458 	if (!count)
7459 		return 0;
7460 
7461 #ifdef CONFIG_TRACER_MAX_TRACE
7462 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7463 		return -EBUSY;
7464 #endif
7465 
7466 	if (!info->spare) {
7467 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7468 							  iter->cpu_file);
7469 		if (IS_ERR(info->spare)) {
7470 			ret = PTR_ERR(info->spare);
7471 			info->spare = NULL;
7472 		} else {
7473 			info->spare_cpu = iter->cpu_file;
7474 		}
7475 	}
7476 	if (!info->spare)
7477 		return ret;
7478 
7479 	/* Do we have previous read data to read? */
7480 	if (info->read < PAGE_SIZE)
7481 		goto read;
7482 
7483  again:
7484 	trace_access_lock(iter->cpu_file);
7485 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7486 				    &info->spare,
7487 				    count,
7488 				    iter->cpu_file, 0);
7489 	trace_access_unlock(iter->cpu_file);
7490 
7491 	if (ret < 0) {
7492 		if (trace_empty(iter)) {
7493 			if ((filp->f_flags & O_NONBLOCK))
7494 				return -EAGAIN;
7495 
7496 			ret = wait_on_pipe(iter, 0);
7497 			if (ret)
7498 				return ret;
7499 
7500 			goto again;
7501 		}
7502 		return 0;
7503 	}
7504 
7505 	info->read = 0;
7506  read:
7507 	size = PAGE_SIZE - info->read;
7508 	if (size > count)
7509 		size = count;
7510 
7511 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7512 	if (ret == size)
7513 		return -EFAULT;
7514 
7515 	size -= ret;
7516 
7517 	*ppos += size;
7518 	info->read += size;
7519 
7520 	return size;
7521 }
7522 
7523 static int tracing_buffers_release(struct inode *inode, struct file *file)
7524 {
7525 	struct ftrace_buffer_info *info = file->private_data;
7526 	struct trace_iterator *iter = &info->iter;
7527 
7528 	mutex_lock(&trace_types_lock);
7529 
7530 	iter->tr->trace_ref--;
7531 
7532 	__trace_array_put(iter->tr);
7533 
7534 	if (info->spare)
7535 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7536 					   info->spare_cpu, info->spare);
7537 	kfree(info);
7538 
7539 	mutex_unlock(&trace_types_lock);
7540 
7541 	return 0;
7542 }
7543 
7544 struct buffer_ref {
7545 	struct trace_buffer	*buffer;
7546 	void			*page;
7547 	int			cpu;
7548 	refcount_t		refcount;
7549 };
7550 
7551 static void buffer_ref_release(struct buffer_ref *ref)
7552 {
7553 	if (!refcount_dec_and_test(&ref->refcount))
7554 		return;
7555 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7556 	kfree(ref);
7557 }
7558 
7559 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7560 				    struct pipe_buffer *buf)
7561 {
7562 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7563 
7564 	buffer_ref_release(ref);
7565 	buf->private = 0;
7566 }
7567 
7568 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7569 				struct pipe_buffer *buf)
7570 {
7571 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7572 
7573 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7574 		return false;
7575 
7576 	refcount_inc(&ref->refcount);
7577 	return true;
7578 }
7579 
7580 /* Pipe buffer operations for a buffer. */
7581 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7582 	.release		= buffer_pipe_buf_release,
7583 	.get			= buffer_pipe_buf_get,
7584 };
7585 
7586 /*
7587  * Callback from splice_to_pipe(), if we need to release some pages
7588  * at the end of the spd in case we error'ed out in filling the pipe.
7589  */
7590 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7591 {
7592 	struct buffer_ref *ref =
7593 		(struct buffer_ref *)spd->partial[i].private;
7594 
7595 	buffer_ref_release(ref);
7596 	spd->partial[i].private = 0;
7597 }
7598 
7599 static ssize_t
7600 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7601 			    struct pipe_inode_info *pipe, size_t len,
7602 			    unsigned int flags)
7603 {
7604 	struct ftrace_buffer_info *info = file->private_data;
7605 	struct trace_iterator *iter = &info->iter;
7606 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7607 	struct page *pages_def[PIPE_DEF_BUFFERS];
7608 	struct splice_pipe_desc spd = {
7609 		.pages		= pages_def,
7610 		.partial	= partial_def,
7611 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7612 		.ops		= &buffer_pipe_buf_ops,
7613 		.spd_release	= buffer_spd_release,
7614 	};
7615 	struct buffer_ref *ref;
7616 	int entries, i;
7617 	ssize_t ret = 0;
7618 
7619 #ifdef CONFIG_TRACER_MAX_TRACE
7620 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7621 		return -EBUSY;
7622 #endif
7623 
7624 	if (*ppos & (PAGE_SIZE - 1))
7625 		return -EINVAL;
7626 
7627 	if (len & (PAGE_SIZE - 1)) {
7628 		if (len < PAGE_SIZE)
7629 			return -EINVAL;
7630 		len &= PAGE_MASK;
7631 	}
7632 
7633 	if (splice_grow_spd(pipe, &spd))
7634 		return -ENOMEM;
7635 
7636  again:
7637 	trace_access_lock(iter->cpu_file);
7638 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7639 
7640 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7641 		struct page *page;
7642 		int r;
7643 
7644 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7645 		if (!ref) {
7646 			ret = -ENOMEM;
7647 			break;
7648 		}
7649 
7650 		refcount_set(&ref->refcount, 1);
7651 		ref->buffer = iter->array_buffer->buffer;
7652 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7653 		if (IS_ERR(ref->page)) {
7654 			ret = PTR_ERR(ref->page);
7655 			ref->page = NULL;
7656 			kfree(ref);
7657 			break;
7658 		}
7659 		ref->cpu = iter->cpu_file;
7660 
7661 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7662 					  len, iter->cpu_file, 1);
7663 		if (r < 0) {
7664 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7665 						   ref->page);
7666 			kfree(ref);
7667 			break;
7668 		}
7669 
7670 		page = virt_to_page(ref->page);
7671 
7672 		spd.pages[i] = page;
7673 		spd.partial[i].len = PAGE_SIZE;
7674 		spd.partial[i].offset = 0;
7675 		spd.partial[i].private = (unsigned long)ref;
7676 		spd.nr_pages++;
7677 		*ppos += PAGE_SIZE;
7678 
7679 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7680 	}
7681 
7682 	trace_access_unlock(iter->cpu_file);
7683 	spd.nr_pages = i;
7684 
7685 	/* did we read anything? */
7686 	if (!spd.nr_pages) {
7687 		if (ret)
7688 			goto out;
7689 
7690 		ret = -EAGAIN;
7691 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7692 			goto out;
7693 
7694 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7695 		if (ret)
7696 			goto out;
7697 
7698 		goto again;
7699 	}
7700 
7701 	ret = splice_to_pipe(pipe, &spd);
7702 out:
7703 	splice_shrink_spd(&spd);
7704 
7705 	return ret;
7706 }
7707 
7708 static const struct file_operations tracing_buffers_fops = {
7709 	.open		= tracing_buffers_open,
7710 	.read		= tracing_buffers_read,
7711 	.poll		= tracing_buffers_poll,
7712 	.release	= tracing_buffers_release,
7713 	.splice_read	= tracing_buffers_splice_read,
7714 	.llseek		= no_llseek,
7715 };
7716 
7717 static ssize_t
7718 tracing_stats_read(struct file *filp, char __user *ubuf,
7719 		   size_t count, loff_t *ppos)
7720 {
7721 	struct inode *inode = file_inode(filp);
7722 	struct trace_array *tr = inode->i_private;
7723 	struct array_buffer *trace_buf = &tr->array_buffer;
7724 	int cpu = tracing_get_cpu(inode);
7725 	struct trace_seq *s;
7726 	unsigned long cnt;
7727 	unsigned long long t;
7728 	unsigned long usec_rem;
7729 
7730 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7731 	if (!s)
7732 		return -ENOMEM;
7733 
7734 	trace_seq_init(s);
7735 
7736 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7737 	trace_seq_printf(s, "entries: %ld\n", cnt);
7738 
7739 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7740 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7741 
7742 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7743 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7744 
7745 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7746 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7747 
7748 	if (trace_clocks[tr->clock_id].in_ns) {
7749 		/* local or global for trace_clock */
7750 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7751 		usec_rem = do_div(t, USEC_PER_SEC);
7752 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7753 								t, usec_rem);
7754 
7755 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7756 		usec_rem = do_div(t, USEC_PER_SEC);
7757 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7758 	} else {
7759 		/* counter or tsc mode for trace_clock */
7760 		trace_seq_printf(s, "oldest event ts: %llu\n",
7761 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7762 
7763 		trace_seq_printf(s, "now ts: %llu\n",
7764 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7765 	}
7766 
7767 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7768 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7769 
7770 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7771 	trace_seq_printf(s, "read events: %ld\n", cnt);
7772 
7773 	count = simple_read_from_buffer(ubuf, count, ppos,
7774 					s->buffer, trace_seq_used(s));
7775 
7776 	kfree(s);
7777 
7778 	return count;
7779 }
7780 
7781 static const struct file_operations tracing_stats_fops = {
7782 	.open		= tracing_open_generic_tr,
7783 	.read		= tracing_stats_read,
7784 	.llseek		= generic_file_llseek,
7785 	.release	= tracing_release_generic_tr,
7786 };
7787 
7788 #ifdef CONFIG_DYNAMIC_FTRACE
7789 
7790 static ssize_t
7791 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7792 		  size_t cnt, loff_t *ppos)
7793 {
7794 	ssize_t ret;
7795 	char *buf;
7796 	int r;
7797 
7798 	/* 256 should be plenty to hold the amount needed */
7799 	buf = kmalloc(256, GFP_KERNEL);
7800 	if (!buf)
7801 		return -ENOMEM;
7802 
7803 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7804 		      ftrace_update_tot_cnt,
7805 		      ftrace_number_of_pages,
7806 		      ftrace_number_of_groups);
7807 
7808 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7809 	kfree(buf);
7810 	return ret;
7811 }
7812 
7813 static const struct file_operations tracing_dyn_info_fops = {
7814 	.open		= tracing_open_generic,
7815 	.read		= tracing_read_dyn_info,
7816 	.llseek		= generic_file_llseek,
7817 };
7818 #endif /* CONFIG_DYNAMIC_FTRACE */
7819 
7820 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7821 static void
7822 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7823 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7824 		void *data)
7825 {
7826 	tracing_snapshot_instance(tr);
7827 }
7828 
7829 static void
7830 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7831 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7832 		      void *data)
7833 {
7834 	struct ftrace_func_mapper *mapper = data;
7835 	long *count = NULL;
7836 
7837 	if (mapper)
7838 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7839 
7840 	if (count) {
7841 
7842 		if (*count <= 0)
7843 			return;
7844 
7845 		(*count)--;
7846 	}
7847 
7848 	tracing_snapshot_instance(tr);
7849 }
7850 
7851 static int
7852 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7853 		      struct ftrace_probe_ops *ops, void *data)
7854 {
7855 	struct ftrace_func_mapper *mapper = data;
7856 	long *count = NULL;
7857 
7858 	seq_printf(m, "%ps:", (void *)ip);
7859 
7860 	seq_puts(m, "snapshot");
7861 
7862 	if (mapper)
7863 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7864 
7865 	if (count)
7866 		seq_printf(m, ":count=%ld\n", *count);
7867 	else
7868 		seq_puts(m, ":unlimited\n");
7869 
7870 	return 0;
7871 }
7872 
7873 static int
7874 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7875 		     unsigned long ip, void *init_data, void **data)
7876 {
7877 	struct ftrace_func_mapper *mapper = *data;
7878 
7879 	if (!mapper) {
7880 		mapper = allocate_ftrace_func_mapper();
7881 		if (!mapper)
7882 			return -ENOMEM;
7883 		*data = mapper;
7884 	}
7885 
7886 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7887 }
7888 
7889 static void
7890 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7891 		     unsigned long ip, void *data)
7892 {
7893 	struct ftrace_func_mapper *mapper = data;
7894 
7895 	if (!ip) {
7896 		if (!mapper)
7897 			return;
7898 		free_ftrace_func_mapper(mapper, NULL);
7899 		return;
7900 	}
7901 
7902 	ftrace_func_mapper_remove_ip(mapper, ip);
7903 }
7904 
7905 static struct ftrace_probe_ops snapshot_probe_ops = {
7906 	.func			= ftrace_snapshot,
7907 	.print			= ftrace_snapshot_print,
7908 };
7909 
7910 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7911 	.func			= ftrace_count_snapshot,
7912 	.print			= ftrace_snapshot_print,
7913 	.init			= ftrace_snapshot_init,
7914 	.free			= ftrace_snapshot_free,
7915 };
7916 
7917 static int
7918 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7919 			       char *glob, char *cmd, char *param, int enable)
7920 {
7921 	struct ftrace_probe_ops *ops;
7922 	void *count = (void *)-1;
7923 	char *number;
7924 	int ret;
7925 
7926 	if (!tr)
7927 		return -ENODEV;
7928 
7929 	/* hash funcs only work with set_ftrace_filter */
7930 	if (!enable)
7931 		return -EINVAL;
7932 
7933 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7934 
7935 	if (glob[0] == '!')
7936 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7937 
7938 	if (!param)
7939 		goto out_reg;
7940 
7941 	number = strsep(&param, ":");
7942 
7943 	if (!strlen(number))
7944 		goto out_reg;
7945 
7946 	/*
7947 	 * We use the callback data field (which is a pointer)
7948 	 * as our counter.
7949 	 */
7950 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7951 	if (ret)
7952 		return ret;
7953 
7954  out_reg:
7955 	ret = tracing_alloc_snapshot_instance(tr);
7956 	if (ret < 0)
7957 		goto out;
7958 
7959 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7960 
7961  out:
7962 	return ret < 0 ? ret : 0;
7963 }
7964 
7965 static struct ftrace_func_command ftrace_snapshot_cmd = {
7966 	.name			= "snapshot",
7967 	.func			= ftrace_trace_snapshot_callback,
7968 };
7969 
7970 static __init int register_snapshot_cmd(void)
7971 {
7972 	return register_ftrace_command(&ftrace_snapshot_cmd);
7973 }
7974 #else
7975 static inline __init int register_snapshot_cmd(void) { return 0; }
7976 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7977 
7978 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7979 {
7980 	if (WARN_ON(!tr->dir))
7981 		return ERR_PTR(-ENODEV);
7982 
7983 	/* Top directory uses NULL as the parent */
7984 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7985 		return NULL;
7986 
7987 	/* All sub buffers have a descriptor */
7988 	return tr->dir;
7989 }
7990 
7991 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7992 {
7993 	struct dentry *d_tracer;
7994 
7995 	if (tr->percpu_dir)
7996 		return tr->percpu_dir;
7997 
7998 	d_tracer = tracing_get_dentry(tr);
7999 	if (IS_ERR(d_tracer))
8000 		return NULL;
8001 
8002 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8003 
8004 	MEM_FAIL(!tr->percpu_dir,
8005 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8006 
8007 	return tr->percpu_dir;
8008 }
8009 
8010 static struct dentry *
8011 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8012 		      void *data, long cpu, const struct file_operations *fops)
8013 {
8014 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8015 
8016 	if (ret) /* See tracing_get_cpu() */
8017 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8018 	return ret;
8019 }
8020 
8021 static void
8022 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8023 {
8024 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8025 	struct dentry *d_cpu;
8026 	char cpu_dir[30]; /* 30 characters should be more than enough */
8027 
8028 	if (!d_percpu)
8029 		return;
8030 
8031 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8032 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8033 	if (!d_cpu) {
8034 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8035 		return;
8036 	}
8037 
8038 	/* per cpu trace_pipe */
8039 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8040 				tr, cpu, &tracing_pipe_fops);
8041 
8042 	/* per cpu trace */
8043 	trace_create_cpu_file("trace", 0644, d_cpu,
8044 				tr, cpu, &tracing_fops);
8045 
8046 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8047 				tr, cpu, &tracing_buffers_fops);
8048 
8049 	trace_create_cpu_file("stats", 0444, d_cpu,
8050 				tr, cpu, &tracing_stats_fops);
8051 
8052 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8053 				tr, cpu, &tracing_entries_fops);
8054 
8055 #ifdef CONFIG_TRACER_SNAPSHOT
8056 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8057 				tr, cpu, &snapshot_fops);
8058 
8059 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8060 				tr, cpu, &snapshot_raw_fops);
8061 #endif
8062 }
8063 
8064 #ifdef CONFIG_FTRACE_SELFTEST
8065 /* Let selftest have access to static functions in this file */
8066 #include "trace_selftest.c"
8067 #endif
8068 
8069 static ssize_t
8070 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8071 			loff_t *ppos)
8072 {
8073 	struct trace_option_dentry *topt = filp->private_data;
8074 	char *buf;
8075 
8076 	if (topt->flags->val & topt->opt->bit)
8077 		buf = "1\n";
8078 	else
8079 		buf = "0\n";
8080 
8081 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8082 }
8083 
8084 static ssize_t
8085 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8086 			 loff_t *ppos)
8087 {
8088 	struct trace_option_dentry *topt = filp->private_data;
8089 	unsigned long val;
8090 	int ret;
8091 
8092 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8093 	if (ret)
8094 		return ret;
8095 
8096 	if (val != 0 && val != 1)
8097 		return -EINVAL;
8098 
8099 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8100 		mutex_lock(&trace_types_lock);
8101 		ret = __set_tracer_option(topt->tr, topt->flags,
8102 					  topt->opt, !val);
8103 		mutex_unlock(&trace_types_lock);
8104 		if (ret)
8105 			return ret;
8106 	}
8107 
8108 	*ppos += cnt;
8109 
8110 	return cnt;
8111 }
8112 
8113 
8114 static const struct file_operations trace_options_fops = {
8115 	.open = tracing_open_generic,
8116 	.read = trace_options_read,
8117 	.write = trace_options_write,
8118 	.llseek	= generic_file_llseek,
8119 };
8120 
8121 /*
8122  * In order to pass in both the trace_array descriptor as well as the index
8123  * to the flag that the trace option file represents, the trace_array
8124  * has a character array of trace_flags_index[], which holds the index
8125  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8126  * The address of this character array is passed to the flag option file
8127  * read/write callbacks.
8128  *
8129  * In order to extract both the index and the trace_array descriptor,
8130  * get_tr_index() uses the following algorithm.
8131  *
8132  *   idx = *ptr;
8133  *
8134  * As the pointer itself contains the address of the index (remember
8135  * index[1] == 1).
8136  *
8137  * Then to get the trace_array descriptor, by subtracting that index
8138  * from the ptr, we get to the start of the index itself.
8139  *
8140  *   ptr - idx == &index[0]
8141  *
8142  * Then a simple container_of() from that pointer gets us to the
8143  * trace_array descriptor.
8144  */
8145 static void get_tr_index(void *data, struct trace_array **ptr,
8146 			 unsigned int *pindex)
8147 {
8148 	*pindex = *(unsigned char *)data;
8149 
8150 	*ptr = container_of(data - *pindex, struct trace_array,
8151 			    trace_flags_index);
8152 }
8153 
8154 static ssize_t
8155 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8156 			loff_t *ppos)
8157 {
8158 	void *tr_index = filp->private_data;
8159 	struct trace_array *tr;
8160 	unsigned int index;
8161 	char *buf;
8162 
8163 	get_tr_index(tr_index, &tr, &index);
8164 
8165 	if (tr->trace_flags & (1 << index))
8166 		buf = "1\n";
8167 	else
8168 		buf = "0\n";
8169 
8170 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8171 }
8172 
8173 static ssize_t
8174 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8175 			 loff_t *ppos)
8176 {
8177 	void *tr_index = filp->private_data;
8178 	struct trace_array *tr;
8179 	unsigned int index;
8180 	unsigned long val;
8181 	int ret;
8182 
8183 	get_tr_index(tr_index, &tr, &index);
8184 
8185 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8186 	if (ret)
8187 		return ret;
8188 
8189 	if (val != 0 && val != 1)
8190 		return -EINVAL;
8191 
8192 	mutex_lock(&event_mutex);
8193 	mutex_lock(&trace_types_lock);
8194 	ret = set_tracer_flag(tr, 1 << index, val);
8195 	mutex_unlock(&trace_types_lock);
8196 	mutex_unlock(&event_mutex);
8197 
8198 	if (ret < 0)
8199 		return ret;
8200 
8201 	*ppos += cnt;
8202 
8203 	return cnt;
8204 }
8205 
8206 static const struct file_operations trace_options_core_fops = {
8207 	.open = tracing_open_generic,
8208 	.read = trace_options_core_read,
8209 	.write = trace_options_core_write,
8210 	.llseek = generic_file_llseek,
8211 };
8212 
8213 struct dentry *trace_create_file(const char *name,
8214 				 umode_t mode,
8215 				 struct dentry *parent,
8216 				 void *data,
8217 				 const struct file_operations *fops)
8218 {
8219 	struct dentry *ret;
8220 
8221 	ret = tracefs_create_file(name, mode, parent, data, fops);
8222 	if (!ret)
8223 		pr_warn("Could not create tracefs '%s' entry\n", name);
8224 
8225 	return ret;
8226 }
8227 
8228 
8229 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8230 {
8231 	struct dentry *d_tracer;
8232 
8233 	if (tr->options)
8234 		return tr->options;
8235 
8236 	d_tracer = tracing_get_dentry(tr);
8237 	if (IS_ERR(d_tracer))
8238 		return NULL;
8239 
8240 	tr->options = tracefs_create_dir("options", d_tracer);
8241 	if (!tr->options) {
8242 		pr_warn("Could not create tracefs directory 'options'\n");
8243 		return NULL;
8244 	}
8245 
8246 	return tr->options;
8247 }
8248 
8249 static void
8250 create_trace_option_file(struct trace_array *tr,
8251 			 struct trace_option_dentry *topt,
8252 			 struct tracer_flags *flags,
8253 			 struct tracer_opt *opt)
8254 {
8255 	struct dentry *t_options;
8256 
8257 	t_options = trace_options_init_dentry(tr);
8258 	if (!t_options)
8259 		return;
8260 
8261 	topt->flags = flags;
8262 	topt->opt = opt;
8263 	topt->tr = tr;
8264 
8265 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8266 				    &trace_options_fops);
8267 
8268 }
8269 
8270 static void
8271 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8272 {
8273 	struct trace_option_dentry *topts;
8274 	struct trace_options *tr_topts;
8275 	struct tracer_flags *flags;
8276 	struct tracer_opt *opts;
8277 	int cnt;
8278 	int i;
8279 
8280 	if (!tracer)
8281 		return;
8282 
8283 	flags = tracer->flags;
8284 
8285 	if (!flags || !flags->opts)
8286 		return;
8287 
8288 	/*
8289 	 * If this is an instance, only create flags for tracers
8290 	 * the instance may have.
8291 	 */
8292 	if (!trace_ok_for_array(tracer, tr))
8293 		return;
8294 
8295 	for (i = 0; i < tr->nr_topts; i++) {
8296 		/* Make sure there's no duplicate flags. */
8297 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8298 			return;
8299 	}
8300 
8301 	opts = flags->opts;
8302 
8303 	for (cnt = 0; opts[cnt].name; cnt++)
8304 		;
8305 
8306 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8307 	if (!topts)
8308 		return;
8309 
8310 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8311 			    GFP_KERNEL);
8312 	if (!tr_topts) {
8313 		kfree(topts);
8314 		return;
8315 	}
8316 
8317 	tr->topts = tr_topts;
8318 	tr->topts[tr->nr_topts].tracer = tracer;
8319 	tr->topts[tr->nr_topts].topts = topts;
8320 	tr->nr_topts++;
8321 
8322 	for (cnt = 0; opts[cnt].name; cnt++) {
8323 		create_trace_option_file(tr, &topts[cnt], flags,
8324 					 &opts[cnt]);
8325 		MEM_FAIL(topts[cnt].entry == NULL,
8326 			  "Failed to create trace option: %s",
8327 			  opts[cnt].name);
8328 	}
8329 }
8330 
8331 static struct dentry *
8332 create_trace_option_core_file(struct trace_array *tr,
8333 			      const char *option, long index)
8334 {
8335 	struct dentry *t_options;
8336 
8337 	t_options = trace_options_init_dentry(tr);
8338 	if (!t_options)
8339 		return NULL;
8340 
8341 	return trace_create_file(option, 0644, t_options,
8342 				 (void *)&tr->trace_flags_index[index],
8343 				 &trace_options_core_fops);
8344 }
8345 
8346 static void create_trace_options_dir(struct trace_array *tr)
8347 {
8348 	struct dentry *t_options;
8349 	bool top_level = tr == &global_trace;
8350 	int i;
8351 
8352 	t_options = trace_options_init_dentry(tr);
8353 	if (!t_options)
8354 		return;
8355 
8356 	for (i = 0; trace_options[i]; i++) {
8357 		if (top_level ||
8358 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8359 			create_trace_option_core_file(tr, trace_options[i], i);
8360 	}
8361 }
8362 
8363 static ssize_t
8364 rb_simple_read(struct file *filp, char __user *ubuf,
8365 	       size_t cnt, loff_t *ppos)
8366 {
8367 	struct trace_array *tr = filp->private_data;
8368 	char buf[64];
8369 	int r;
8370 
8371 	r = tracer_tracing_is_on(tr);
8372 	r = sprintf(buf, "%d\n", r);
8373 
8374 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8375 }
8376 
8377 static ssize_t
8378 rb_simple_write(struct file *filp, const char __user *ubuf,
8379 		size_t cnt, loff_t *ppos)
8380 {
8381 	struct trace_array *tr = filp->private_data;
8382 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8383 	unsigned long val;
8384 	int ret;
8385 
8386 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8387 	if (ret)
8388 		return ret;
8389 
8390 	if (buffer) {
8391 		mutex_lock(&trace_types_lock);
8392 		if (!!val == tracer_tracing_is_on(tr)) {
8393 			val = 0; /* do nothing */
8394 		} else if (val) {
8395 			tracer_tracing_on(tr);
8396 			if (tr->current_trace->start)
8397 				tr->current_trace->start(tr);
8398 		} else {
8399 			tracer_tracing_off(tr);
8400 			if (tr->current_trace->stop)
8401 				tr->current_trace->stop(tr);
8402 		}
8403 		mutex_unlock(&trace_types_lock);
8404 	}
8405 
8406 	(*ppos)++;
8407 
8408 	return cnt;
8409 }
8410 
8411 static const struct file_operations rb_simple_fops = {
8412 	.open		= tracing_open_generic_tr,
8413 	.read		= rb_simple_read,
8414 	.write		= rb_simple_write,
8415 	.release	= tracing_release_generic_tr,
8416 	.llseek		= default_llseek,
8417 };
8418 
8419 static ssize_t
8420 buffer_percent_read(struct file *filp, char __user *ubuf,
8421 		    size_t cnt, loff_t *ppos)
8422 {
8423 	struct trace_array *tr = filp->private_data;
8424 	char buf[64];
8425 	int r;
8426 
8427 	r = tr->buffer_percent;
8428 	r = sprintf(buf, "%d\n", r);
8429 
8430 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8431 }
8432 
8433 static ssize_t
8434 buffer_percent_write(struct file *filp, const char __user *ubuf,
8435 		     size_t cnt, loff_t *ppos)
8436 {
8437 	struct trace_array *tr = filp->private_data;
8438 	unsigned long val;
8439 	int ret;
8440 
8441 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8442 	if (ret)
8443 		return ret;
8444 
8445 	if (val > 100)
8446 		return -EINVAL;
8447 
8448 	if (!val)
8449 		val = 1;
8450 
8451 	tr->buffer_percent = val;
8452 
8453 	(*ppos)++;
8454 
8455 	return cnt;
8456 }
8457 
8458 static const struct file_operations buffer_percent_fops = {
8459 	.open		= tracing_open_generic_tr,
8460 	.read		= buffer_percent_read,
8461 	.write		= buffer_percent_write,
8462 	.release	= tracing_release_generic_tr,
8463 	.llseek		= default_llseek,
8464 };
8465 
8466 static struct dentry *trace_instance_dir;
8467 
8468 static void
8469 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8470 
8471 static int
8472 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8473 {
8474 	enum ring_buffer_flags rb_flags;
8475 
8476 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8477 
8478 	buf->tr = tr;
8479 
8480 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8481 	if (!buf->buffer)
8482 		return -ENOMEM;
8483 
8484 	buf->data = alloc_percpu(struct trace_array_cpu);
8485 	if (!buf->data) {
8486 		ring_buffer_free(buf->buffer);
8487 		buf->buffer = NULL;
8488 		return -ENOMEM;
8489 	}
8490 
8491 	/* Allocate the first page for all buffers */
8492 	set_buffer_entries(&tr->array_buffer,
8493 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8494 
8495 	return 0;
8496 }
8497 
8498 static int allocate_trace_buffers(struct trace_array *tr, int size)
8499 {
8500 	int ret;
8501 
8502 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8503 	if (ret)
8504 		return ret;
8505 
8506 #ifdef CONFIG_TRACER_MAX_TRACE
8507 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8508 				    allocate_snapshot ? size : 1);
8509 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8510 		ring_buffer_free(tr->array_buffer.buffer);
8511 		tr->array_buffer.buffer = NULL;
8512 		free_percpu(tr->array_buffer.data);
8513 		tr->array_buffer.data = NULL;
8514 		return -ENOMEM;
8515 	}
8516 	tr->allocated_snapshot = allocate_snapshot;
8517 
8518 	/*
8519 	 * Only the top level trace array gets its snapshot allocated
8520 	 * from the kernel command line.
8521 	 */
8522 	allocate_snapshot = false;
8523 #endif
8524 
8525 	return 0;
8526 }
8527 
8528 static void free_trace_buffer(struct array_buffer *buf)
8529 {
8530 	if (buf->buffer) {
8531 		ring_buffer_free(buf->buffer);
8532 		buf->buffer = NULL;
8533 		free_percpu(buf->data);
8534 		buf->data = NULL;
8535 	}
8536 }
8537 
8538 static void free_trace_buffers(struct trace_array *tr)
8539 {
8540 	if (!tr)
8541 		return;
8542 
8543 	free_trace_buffer(&tr->array_buffer);
8544 
8545 #ifdef CONFIG_TRACER_MAX_TRACE
8546 	free_trace_buffer(&tr->max_buffer);
8547 #endif
8548 }
8549 
8550 static void init_trace_flags_index(struct trace_array *tr)
8551 {
8552 	int i;
8553 
8554 	/* Used by the trace options files */
8555 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8556 		tr->trace_flags_index[i] = i;
8557 }
8558 
8559 static void __update_tracer_options(struct trace_array *tr)
8560 {
8561 	struct tracer *t;
8562 
8563 	for (t = trace_types; t; t = t->next)
8564 		add_tracer_options(tr, t);
8565 }
8566 
8567 static void update_tracer_options(struct trace_array *tr)
8568 {
8569 	mutex_lock(&trace_types_lock);
8570 	__update_tracer_options(tr);
8571 	mutex_unlock(&trace_types_lock);
8572 }
8573 
8574 /* Must have trace_types_lock held */
8575 struct trace_array *trace_array_find(const char *instance)
8576 {
8577 	struct trace_array *tr, *found = NULL;
8578 
8579 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8580 		if (tr->name && strcmp(tr->name, instance) == 0) {
8581 			found = tr;
8582 			break;
8583 		}
8584 	}
8585 
8586 	return found;
8587 }
8588 
8589 struct trace_array *trace_array_find_get(const char *instance)
8590 {
8591 	struct trace_array *tr;
8592 
8593 	mutex_lock(&trace_types_lock);
8594 	tr = trace_array_find(instance);
8595 	if (tr)
8596 		tr->ref++;
8597 	mutex_unlock(&trace_types_lock);
8598 
8599 	return tr;
8600 }
8601 
8602 static struct trace_array *trace_array_create(const char *name)
8603 {
8604 	struct trace_array *tr;
8605 	int ret;
8606 
8607 	ret = -ENOMEM;
8608 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8609 	if (!tr)
8610 		return ERR_PTR(ret);
8611 
8612 	tr->name = kstrdup(name, GFP_KERNEL);
8613 	if (!tr->name)
8614 		goto out_free_tr;
8615 
8616 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8617 		goto out_free_tr;
8618 
8619 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8620 
8621 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8622 
8623 	raw_spin_lock_init(&tr->start_lock);
8624 
8625 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8626 
8627 	tr->current_trace = &nop_trace;
8628 
8629 	INIT_LIST_HEAD(&tr->systems);
8630 	INIT_LIST_HEAD(&tr->events);
8631 	INIT_LIST_HEAD(&tr->hist_vars);
8632 	INIT_LIST_HEAD(&tr->err_log);
8633 
8634 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8635 		goto out_free_tr;
8636 
8637 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8638 	if (!tr->dir)
8639 		goto out_free_tr;
8640 
8641 	ret = event_trace_add_tracer(tr->dir, tr);
8642 	if (ret) {
8643 		tracefs_remove(tr->dir);
8644 		goto out_free_tr;
8645 	}
8646 
8647 	ftrace_init_trace_array(tr);
8648 
8649 	init_tracer_tracefs(tr, tr->dir);
8650 	init_trace_flags_index(tr);
8651 	__update_tracer_options(tr);
8652 
8653 	list_add(&tr->list, &ftrace_trace_arrays);
8654 
8655 	tr->ref++;
8656 
8657 
8658 	return tr;
8659 
8660  out_free_tr:
8661 	free_trace_buffers(tr);
8662 	free_cpumask_var(tr->tracing_cpumask);
8663 	kfree(tr->name);
8664 	kfree(tr);
8665 
8666 	return ERR_PTR(ret);
8667 }
8668 
8669 static int instance_mkdir(const char *name)
8670 {
8671 	struct trace_array *tr;
8672 	int ret;
8673 
8674 	mutex_lock(&event_mutex);
8675 	mutex_lock(&trace_types_lock);
8676 
8677 	ret = -EEXIST;
8678 	if (trace_array_find(name))
8679 		goto out_unlock;
8680 
8681 	tr = trace_array_create(name);
8682 
8683 	ret = PTR_ERR_OR_ZERO(tr);
8684 
8685 out_unlock:
8686 	mutex_unlock(&trace_types_lock);
8687 	mutex_unlock(&event_mutex);
8688 	return ret;
8689 }
8690 
8691 /**
8692  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8693  * @name: The name of the trace array to be looked up/created.
8694  *
8695  * Returns pointer to trace array with given name.
8696  * NULL, if it cannot be created.
8697  *
8698  * NOTE: This function increments the reference counter associated with the
8699  * trace array returned. This makes sure it cannot be freed while in use.
8700  * Use trace_array_put() once the trace array is no longer needed.
8701  * If the trace_array is to be freed, trace_array_destroy() needs to
8702  * be called after the trace_array_put(), or simply let user space delete
8703  * it from the tracefs instances directory. But until the
8704  * trace_array_put() is called, user space can not delete it.
8705  *
8706  */
8707 struct trace_array *trace_array_get_by_name(const char *name)
8708 {
8709 	struct trace_array *tr;
8710 
8711 	mutex_lock(&event_mutex);
8712 	mutex_lock(&trace_types_lock);
8713 
8714 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8715 		if (tr->name && strcmp(tr->name, name) == 0)
8716 			goto out_unlock;
8717 	}
8718 
8719 	tr = trace_array_create(name);
8720 
8721 	if (IS_ERR(tr))
8722 		tr = NULL;
8723 out_unlock:
8724 	if (tr)
8725 		tr->ref++;
8726 
8727 	mutex_unlock(&trace_types_lock);
8728 	mutex_unlock(&event_mutex);
8729 	return tr;
8730 }
8731 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8732 
8733 static int __remove_instance(struct trace_array *tr)
8734 {
8735 	int i;
8736 
8737 	/* Reference counter for a newly created trace array = 1. */
8738 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8739 		return -EBUSY;
8740 
8741 	list_del(&tr->list);
8742 
8743 	/* Disable all the flags that were enabled coming in */
8744 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8745 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8746 			set_tracer_flag(tr, 1 << i, 0);
8747 	}
8748 
8749 	tracing_set_nop(tr);
8750 	clear_ftrace_function_probes(tr);
8751 	event_trace_del_tracer(tr);
8752 	ftrace_clear_pids(tr);
8753 	ftrace_destroy_function_files(tr);
8754 	tracefs_remove(tr->dir);
8755 	free_trace_buffers(tr);
8756 
8757 	for (i = 0; i < tr->nr_topts; i++) {
8758 		kfree(tr->topts[i].topts);
8759 	}
8760 	kfree(tr->topts);
8761 
8762 	free_cpumask_var(tr->tracing_cpumask);
8763 	kfree(tr->name);
8764 	kfree(tr);
8765 	tr = NULL;
8766 
8767 	return 0;
8768 }
8769 
8770 int trace_array_destroy(struct trace_array *this_tr)
8771 {
8772 	struct trace_array *tr;
8773 	int ret;
8774 
8775 	if (!this_tr)
8776 		return -EINVAL;
8777 
8778 	mutex_lock(&event_mutex);
8779 	mutex_lock(&trace_types_lock);
8780 
8781 	ret = -ENODEV;
8782 
8783 	/* Making sure trace array exists before destroying it. */
8784 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8785 		if (tr == this_tr) {
8786 			ret = __remove_instance(tr);
8787 			break;
8788 		}
8789 	}
8790 
8791 	mutex_unlock(&trace_types_lock);
8792 	mutex_unlock(&event_mutex);
8793 
8794 	return ret;
8795 }
8796 EXPORT_SYMBOL_GPL(trace_array_destroy);
8797 
8798 static int instance_rmdir(const char *name)
8799 {
8800 	struct trace_array *tr;
8801 	int ret;
8802 
8803 	mutex_lock(&event_mutex);
8804 	mutex_lock(&trace_types_lock);
8805 
8806 	ret = -ENODEV;
8807 	tr = trace_array_find(name);
8808 	if (tr)
8809 		ret = __remove_instance(tr);
8810 
8811 	mutex_unlock(&trace_types_lock);
8812 	mutex_unlock(&event_mutex);
8813 
8814 	return ret;
8815 }
8816 
8817 static __init void create_trace_instances(struct dentry *d_tracer)
8818 {
8819 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8820 							 instance_mkdir,
8821 							 instance_rmdir);
8822 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8823 		return;
8824 }
8825 
8826 static void
8827 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8828 {
8829 	struct trace_event_file *file;
8830 	int cpu;
8831 
8832 	trace_create_file("available_tracers", 0444, d_tracer,
8833 			tr, &show_traces_fops);
8834 
8835 	trace_create_file("current_tracer", 0644, d_tracer,
8836 			tr, &set_tracer_fops);
8837 
8838 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8839 			  tr, &tracing_cpumask_fops);
8840 
8841 	trace_create_file("trace_options", 0644, d_tracer,
8842 			  tr, &tracing_iter_fops);
8843 
8844 	trace_create_file("trace", 0644, d_tracer,
8845 			  tr, &tracing_fops);
8846 
8847 	trace_create_file("trace_pipe", 0444, d_tracer,
8848 			  tr, &tracing_pipe_fops);
8849 
8850 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8851 			  tr, &tracing_entries_fops);
8852 
8853 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8854 			  tr, &tracing_total_entries_fops);
8855 
8856 	trace_create_file("free_buffer", 0200, d_tracer,
8857 			  tr, &tracing_free_buffer_fops);
8858 
8859 	trace_create_file("trace_marker", 0220, d_tracer,
8860 			  tr, &tracing_mark_fops);
8861 
8862 	file = __find_event_file(tr, "ftrace", "print");
8863 	if (file && file->dir)
8864 		trace_create_file("trigger", 0644, file->dir, file,
8865 				  &event_trigger_fops);
8866 	tr->trace_marker_file = file;
8867 
8868 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8869 			  tr, &tracing_mark_raw_fops);
8870 
8871 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8872 			  &trace_clock_fops);
8873 
8874 	trace_create_file("tracing_on", 0644, d_tracer,
8875 			  tr, &rb_simple_fops);
8876 
8877 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8878 			  &trace_time_stamp_mode_fops);
8879 
8880 	tr->buffer_percent = 50;
8881 
8882 	trace_create_file("buffer_percent", 0444, d_tracer,
8883 			tr, &buffer_percent_fops);
8884 
8885 	create_trace_options_dir(tr);
8886 
8887 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8888 	trace_create_maxlat_file(tr, d_tracer);
8889 #endif
8890 
8891 	if (ftrace_create_function_files(tr, d_tracer))
8892 		MEM_FAIL(1, "Could not allocate function filter files");
8893 
8894 #ifdef CONFIG_TRACER_SNAPSHOT
8895 	trace_create_file("snapshot", 0644, d_tracer,
8896 			  tr, &snapshot_fops);
8897 #endif
8898 
8899 	trace_create_file("error_log", 0644, d_tracer,
8900 			  tr, &tracing_err_log_fops);
8901 
8902 	for_each_tracing_cpu(cpu)
8903 		tracing_init_tracefs_percpu(tr, cpu);
8904 
8905 	ftrace_init_tracefs(tr, d_tracer);
8906 }
8907 
8908 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8909 {
8910 	struct vfsmount *mnt;
8911 	struct file_system_type *type;
8912 
8913 	/*
8914 	 * To maintain backward compatibility for tools that mount
8915 	 * debugfs to get to the tracing facility, tracefs is automatically
8916 	 * mounted to the debugfs/tracing directory.
8917 	 */
8918 	type = get_fs_type("tracefs");
8919 	if (!type)
8920 		return NULL;
8921 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8922 	put_filesystem(type);
8923 	if (IS_ERR(mnt))
8924 		return NULL;
8925 	mntget(mnt);
8926 
8927 	return mnt;
8928 }
8929 
8930 /**
8931  * tracing_init_dentry - initialize top level trace array
8932  *
8933  * This is called when creating files or directories in the tracing
8934  * directory. It is called via fs_initcall() by any of the boot up code
8935  * and expects to return the dentry of the top level tracing directory.
8936  */
8937 struct dentry *tracing_init_dentry(void)
8938 {
8939 	struct trace_array *tr = &global_trace;
8940 
8941 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8942 		pr_warn("Tracing disabled due to lockdown\n");
8943 		return ERR_PTR(-EPERM);
8944 	}
8945 
8946 	/* The top level trace array uses  NULL as parent */
8947 	if (tr->dir)
8948 		return NULL;
8949 
8950 	if (WARN_ON(!tracefs_initialized()) ||
8951 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8952 		 WARN_ON(!debugfs_initialized())))
8953 		return ERR_PTR(-ENODEV);
8954 
8955 	/*
8956 	 * As there may still be users that expect the tracing
8957 	 * files to exist in debugfs/tracing, we must automount
8958 	 * the tracefs file system there, so older tools still
8959 	 * work with the newer kerenl.
8960 	 */
8961 	tr->dir = debugfs_create_automount("tracing", NULL,
8962 					   trace_automount, NULL);
8963 
8964 	return NULL;
8965 }
8966 
8967 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8968 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8969 
8970 static void __init trace_eval_init(void)
8971 {
8972 	int len;
8973 
8974 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8975 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8976 }
8977 
8978 #ifdef CONFIG_MODULES
8979 static void trace_module_add_evals(struct module *mod)
8980 {
8981 	if (!mod->num_trace_evals)
8982 		return;
8983 
8984 	/*
8985 	 * Modules with bad taint do not have events created, do
8986 	 * not bother with enums either.
8987 	 */
8988 	if (trace_module_has_bad_taint(mod))
8989 		return;
8990 
8991 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8992 }
8993 
8994 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8995 static void trace_module_remove_evals(struct module *mod)
8996 {
8997 	union trace_eval_map_item *map;
8998 	union trace_eval_map_item **last = &trace_eval_maps;
8999 
9000 	if (!mod->num_trace_evals)
9001 		return;
9002 
9003 	mutex_lock(&trace_eval_mutex);
9004 
9005 	map = trace_eval_maps;
9006 
9007 	while (map) {
9008 		if (map->head.mod == mod)
9009 			break;
9010 		map = trace_eval_jmp_to_tail(map);
9011 		last = &map->tail.next;
9012 		map = map->tail.next;
9013 	}
9014 	if (!map)
9015 		goto out;
9016 
9017 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9018 	kfree(map);
9019  out:
9020 	mutex_unlock(&trace_eval_mutex);
9021 }
9022 #else
9023 static inline void trace_module_remove_evals(struct module *mod) { }
9024 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9025 
9026 static int trace_module_notify(struct notifier_block *self,
9027 			       unsigned long val, void *data)
9028 {
9029 	struct module *mod = data;
9030 
9031 	switch (val) {
9032 	case MODULE_STATE_COMING:
9033 		trace_module_add_evals(mod);
9034 		break;
9035 	case MODULE_STATE_GOING:
9036 		trace_module_remove_evals(mod);
9037 		break;
9038 	}
9039 
9040 	return 0;
9041 }
9042 
9043 static struct notifier_block trace_module_nb = {
9044 	.notifier_call = trace_module_notify,
9045 	.priority = 0,
9046 };
9047 #endif /* CONFIG_MODULES */
9048 
9049 static __init int tracer_init_tracefs(void)
9050 {
9051 	struct dentry *d_tracer;
9052 
9053 	trace_access_lock_init();
9054 
9055 	d_tracer = tracing_init_dentry();
9056 	if (IS_ERR(d_tracer))
9057 		return 0;
9058 
9059 	event_trace_init();
9060 
9061 	init_tracer_tracefs(&global_trace, d_tracer);
9062 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9063 
9064 	trace_create_file("tracing_thresh", 0644, d_tracer,
9065 			&global_trace, &tracing_thresh_fops);
9066 
9067 	trace_create_file("README", 0444, d_tracer,
9068 			NULL, &tracing_readme_fops);
9069 
9070 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9071 			NULL, &tracing_saved_cmdlines_fops);
9072 
9073 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9074 			  NULL, &tracing_saved_cmdlines_size_fops);
9075 
9076 	trace_create_file("saved_tgids", 0444, d_tracer,
9077 			NULL, &tracing_saved_tgids_fops);
9078 
9079 	trace_eval_init();
9080 
9081 	trace_create_eval_file(d_tracer);
9082 
9083 #ifdef CONFIG_MODULES
9084 	register_module_notifier(&trace_module_nb);
9085 #endif
9086 
9087 #ifdef CONFIG_DYNAMIC_FTRACE
9088 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9089 			NULL, &tracing_dyn_info_fops);
9090 #endif
9091 
9092 	create_trace_instances(d_tracer);
9093 
9094 	update_tracer_options(&global_trace);
9095 
9096 	return 0;
9097 }
9098 
9099 static int trace_panic_handler(struct notifier_block *this,
9100 			       unsigned long event, void *unused)
9101 {
9102 	if (ftrace_dump_on_oops)
9103 		ftrace_dump(ftrace_dump_on_oops);
9104 	return NOTIFY_OK;
9105 }
9106 
9107 static struct notifier_block trace_panic_notifier = {
9108 	.notifier_call  = trace_panic_handler,
9109 	.next           = NULL,
9110 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9111 };
9112 
9113 static int trace_die_handler(struct notifier_block *self,
9114 			     unsigned long val,
9115 			     void *data)
9116 {
9117 	switch (val) {
9118 	case DIE_OOPS:
9119 		if (ftrace_dump_on_oops)
9120 			ftrace_dump(ftrace_dump_on_oops);
9121 		break;
9122 	default:
9123 		break;
9124 	}
9125 	return NOTIFY_OK;
9126 }
9127 
9128 static struct notifier_block trace_die_notifier = {
9129 	.notifier_call = trace_die_handler,
9130 	.priority = 200
9131 };
9132 
9133 /*
9134  * printk is set to max of 1024, we really don't need it that big.
9135  * Nothing should be printing 1000 characters anyway.
9136  */
9137 #define TRACE_MAX_PRINT		1000
9138 
9139 /*
9140  * Define here KERN_TRACE so that we have one place to modify
9141  * it if we decide to change what log level the ftrace dump
9142  * should be at.
9143  */
9144 #define KERN_TRACE		KERN_EMERG
9145 
9146 void
9147 trace_printk_seq(struct trace_seq *s)
9148 {
9149 	/* Probably should print a warning here. */
9150 	if (s->seq.len >= TRACE_MAX_PRINT)
9151 		s->seq.len = TRACE_MAX_PRINT;
9152 
9153 	/*
9154 	 * More paranoid code. Although the buffer size is set to
9155 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9156 	 * an extra layer of protection.
9157 	 */
9158 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9159 		s->seq.len = s->seq.size - 1;
9160 
9161 	/* should be zero ended, but we are paranoid. */
9162 	s->buffer[s->seq.len] = 0;
9163 
9164 	printk(KERN_TRACE "%s", s->buffer);
9165 
9166 	trace_seq_init(s);
9167 }
9168 
9169 void trace_init_global_iter(struct trace_iterator *iter)
9170 {
9171 	iter->tr = &global_trace;
9172 	iter->trace = iter->tr->current_trace;
9173 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9174 	iter->array_buffer = &global_trace.array_buffer;
9175 
9176 	if (iter->trace && iter->trace->open)
9177 		iter->trace->open(iter);
9178 
9179 	/* Annotate start of buffers if we had overruns */
9180 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9181 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9182 
9183 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9184 	if (trace_clocks[iter->tr->clock_id].in_ns)
9185 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9186 }
9187 
9188 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9189 {
9190 	/* use static because iter can be a bit big for the stack */
9191 	static struct trace_iterator iter;
9192 	static atomic_t dump_running;
9193 	struct trace_array *tr = &global_trace;
9194 	unsigned int old_userobj;
9195 	unsigned long flags;
9196 	int cnt = 0, cpu;
9197 
9198 	/* Only allow one dump user at a time. */
9199 	if (atomic_inc_return(&dump_running) != 1) {
9200 		atomic_dec(&dump_running);
9201 		return;
9202 	}
9203 
9204 	/*
9205 	 * Always turn off tracing when we dump.
9206 	 * We don't need to show trace output of what happens
9207 	 * between multiple crashes.
9208 	 *
9209 	 * If the user does a sysrq-z, then they can re-enable
9210 	 * tracing with echo 1 > tracing_on.
9211 	 */
9212 	tracing_off();
9213 
9214 	local_irq_save(flags);
9215 	printk_nmi_direct_enter();
9216 
9217 	/* Simulate the iterator */
9218 	trace_init_global_iter(&iter);
9219 	/* Can not use kmalloc for iter.temp */
9220 	iter.temp = static_temp_buf;
9221 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9222 
9223 	for_each_tracing_cpu(cpu) {
9224 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9225 	}
9226 
9227 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9228 
9229 	/* don't look at user memory in panic mode */
9230 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9231 
9232 	switch (oops_dump_mode) {
9233 	case DUMP_ALL:
9234 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9235 		break;
9236 	case DUMP_ORIG:
9237 		iter.cpu_file = raw_smp_processor_id();
9238 		break;
9239 	case DUMP_NONE:
9240 		goto out_enable;
9241 	default:
9242 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9243 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9244 	}
9245 
9246 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9247 
9248 	/* Did function tracer already get disabled? */
9249 	if (ftrace_is_dead()) {
9250 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9251 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9252 	}
9253 
9254 	/*
9255 	 * We need to stop all tracing on all CPUS to read the
9256 	 * the next buffer. This is a bit expensive, but is
9257 	 * not done often. We fill all what we can read,
9258 	 * and then release the locks again.
9259 	 */
9260 
9261 	while (!trace_empty(&iter)) {
9262 
9263 		if (!cnt)
9264 			printk(KERN_TRACE "---------------------------------\n");
9265 
9266 		cnt++;
9267 
9268 		trace_iterator_reset(&iter);
9269 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9270 
9271 		if (trace_find_next_entry_inc(&iter) != NULL) {
9272 			int ret;
9273 
9274 			ret = print_trace_line(&iter);
9275 			if (ret != TRACE_TYPE_NO_CONSUME)
9276 				trace_consume(&iter);
9277 		}
9278 		touch_nmi_watchdog();
9279 
9280 		trace_printk_seq(&iter.seq);
9281 	}
9282 
9283 	if (!cnt)
9284 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9285 	else
9286 		printk(KERN_TRACE "---------------------------------\n");
9287 
9288  out_enable:
9289 	tr->trace_flags |= old_userobj;
9290 
9291 	for_each_tracing_cpu(cpu) {
9292 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9293 	}
9294 	atomic_dec(&dump_running);
9295 	printk_nmi_direct_exit();
9296 	local_irq_restore(flags);
9297 }
9298 EXPORT_SYMBOL_GPL(ftrace_dump);
9299 
9300 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9301 {
9302 	char **argv;
9303 	int argc, ret;
9304 
9305 	argc = 0;
9306 	ret = 0;
9307 	argv = argv_split(GFP_KERNEL, buf, &argc);
9308 	if (!argv)
9309 		return -ENOMEM;
9310 
9311 	if (argc)
9312 		ret = createfn(argc, argv);
9313 
9314 	argv_free(argv);
9315 
9316 	return ret;
9317 }
9318 
9319 #define WRITE_BUFSIZE  4096
9320 
9321 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9322 				size_t count, loff_t *ppos,
9323 				int (*createfn)(int, char **))
9324 {
9325 	char *kbuf, *buf, *tmp;
9326 	int ret = 0;
9327 	size_t done = 0;
9328 	size_t size;
9329 
9330 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9331 	if (!kbuf)
9332 		return -ENOMEM;
9333 
9334 	while (done < count) {
9335 		size = count - done;
9336 
9337 		if (size >= WRITE_BUFSIZE)
9338 			size = WRITE_BUFSIZE - 1;
9339 
9340 		if (copy_from_user(kbuf, buffer + done, size)) {
9341 			ret = -EFAULT;
9342 			goto out;
9343 		}
9344 		kbuf[size] = '\0';
9345 		buf = kbuf;
9346 		do {
9347 			tmp = strchr(buf, '\n');
9348 			if (tmp) {
9349 				*tmp = '\0';
9350 				size = tmp - buf + 1;
9351 			} else {
9352 				size = strlen(buf);
9353 				if (done + size < count) {
9354 					if (buf != kbuf)
9355 						break;
9356 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9357 					pr_warn("Line length is too long: Should be less than %d\n",
9358 						WRITE_BUFSIZE - 2);
9359 					ret = -EINVAL;
9360 					goto out;
9361 				}
9362 			}
9363 			done += size;
9364 
9365 			/* Remove comments */
9366 			tmp = strchr(buf, '#');
9367 
9368 			if (tmp)
9369 				*tmp = '\0';
9370 
9371 			ret = trace_run_command(buf, createfn);
9372 			if (ret)
9373 				goto out;
9374 			buf += size;
9375 
9376 		} while (done < count);
9377 	}
9378 	ret = done;
9379 
9380 out:
9381 	kfree(kbuf);
9382 
9383 	return ret;
9384 }
9385 
9386 __init static int tracer_alloc_buffers(void)
9387 {
9388 	int ring_buf_size;
9389 	int ret = -ENOMEM;
9390 
9391 
9392 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9393 		pr_warn("Tracing disabled due to lockdown\n");
9394 		return -EPERM;
9395 	}
9396 
9397 	/*
9398 	 * Make sure we don't accidently add more trace options
9399 	 * than we have bits for.
9400 	 */
9401 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9402 
9403 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9404 		goto out;
9405 
9406 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9407 		goto out_free_buffer_mask;
9408 
9409 	/* Only allocate trace_printk buffers if a trace_printk exists */
9410 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9411 		/* Must be called before global_trace.buffer is allocated */
9412 		trace_printk_init_buffers();
9413 
9414 	/* To save memory, keep the ring buffer size to its minimum */
9415 	if (ring_buffer_expanded)
9416 		ring_buf_size = trace_buf_size;
9417 	else
9418 		ring_buf_size = 1;
9419 
9420 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9421 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9422 
9423 	raw_spin_lock_init(&global_trace.start_lock);
9424 
9425 	/*
9426 	 * The prepare callbacks allocates some memory for the ring buffer. We
9427 	 * don't free the buffer if the if the CPU goes down. If we were to free
9428 	 * the buffer, then the user would lose any trace that was in the
9429 	 * buffer. The memory will be removed once the "instance" is removed.
9430 	 */
9431 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9432 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9433 				      NULL);
9434 	if (ret < 0)
9435 		goto out_free_cpumask;
9436 	/* Used for event triggers */
9437 	ret = -ENOMEM;
9438 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9439 	if (!temp_buffer)
9440 		goto out_rm_hp_state;
9441 
9442 	if (trace_create_savedcmd() < 0)
9443 		goto out_free_temp_buffer;
9444 
9445 	/* TODO: make the number of buffers hot pluggable with CPUS */
9446 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9447 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9448 		goto out_free_savedcmd;
9449 	}
9450 
9451 	if (global_trace.buffer_disabled)
9452 		tracing_off();
9453 
9454 	if (trace_boot_clock) {
9455 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9456 		if (ret < 0)
9457 			pr_warn("Trace clock %s not defined, going back to default\n",
9458 				trace_boot_clock);
9459 	}
9460 
9461 	/*
9462 	 * register_tracer() might reference current_trace, so it
9463 	 * needs to be set before we register anything. This is
9464 	 * just a bootstrap of current_trace anyway.
9465 	 */
9466 	global_trace.current_trace = &nop_trace;
9467 
9468 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9469 
9470 	ftrace_init_global_array_ops(&global_trace);
9471 
9472 	init_trace_flags_index(&global_trace);
9473 
9474 	register_tracer(&nop_trace);
9475 
9476 	/* Function tracing may start here (via kernel command line) */
9477 	init_function_trace();
9478 
9479 	/* All seems OK, enable tracing */
9480 	tracing_disabled = 0;
9481 
9482 	atomic_notifier_chain_register(&panic_notifier_list,
9483 				       &trace_panic_notifier);
9484 
9485 	register_die_notifier(&trace_die_notifier);
9486 
9487 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9488 
9489 	INIT_LIST_HEAD(&global_trace.systems);
9490 	INIT_LIST_HEAD(&global_trace.events);
9491 	INIT_LIST_HEAD(&global_trace.hist_vars);
9492 	INIT_LIST_HEAD(&global_trace.err_log);
9493 	list_add(&global_trace.list, &ftrace_trace_arrays);
9494 
9495 	apply_trace_boot_options();
9496 
9497 	register_snapshot_cmd();
9498 
9499 	return 0;
9500 
9501 out_free_savedcmd:
9502 	free_saved_cmdlines_buffer(savedcmd);
9503 out_free_temp_buffer:
9504 	ring_buffer_free(temp_buffer);
9505 out_rm_hp_state:
9506 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9507 out_free_cpumask:
9508 	free_cpumask_var(global_trace.tracing_cpumask);
9509 out_free_buffer_mask:
9510 	free_cpumask_var(tracing_buffer_mask);
9511 out:
9512 	return ret;
9513 }
9514 
9515 void __init early_trace_init(void)
9516 {
9517 	if (tracepoint_printk) {
9518 		tracepoint_print_iter =
9519 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9520 		if (MEM_FAIL(!tracepoint_print_iter,
9521 			     "Failed to allocate trace iterator\n"))
9522 			tracepoint_printk = 0;
9523 		else
9524 			static_key_enable(&tracepoint_printk_key.key);
9525 	}
9526 	tracer_alloc_buffers();
9527 }
9528 
9529 void __init trace_init(void)
9530 {
9531 	trace_event_init();
9532 }
9533 
9534 __init static int clear_boot_tracer(void)
9535 {
9536 	/*
9537 	 * The default tracer at boot buffer is an init section.
9538 	 * This function is called in lateinit. If we did not
9539 	 * find the boot tracer, then clear it out, to prevent
9540 	 * later registration from accessing the buffer that is
9541 	 * about to be freed.
9542 	 */
9543 	if (!default_bootup_tracer)
9544 		return 0;
9545 
9546 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9547 	       default_bootup_tracer);
9548 	default_bootup_tracer = NULL;
9549 
9550 	return 0;
9551 }
9552 
9553 fs_initcall(tracer_init_tracefs);
9554 late_initcall_sync(clear_boot_tracer);
9555 
9556 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9557 __init static int tracing_set_default_clock(void)
9558 {
9559 	/* sched_clock_stable() is determined in late_initcall */
9560 	if (!trace_boot_clock && !sched_clock_stable()) {
9561 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9562 			pr_warn("Can not set tracing clock due to lockdown\n");
9563 			return -EPERM;
9564 		}
9565 
9566 		printk(KERN_WARNING
9567 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9568 		       "If you want to keep using the local clock, then add:\n"
9569 		       "  \"trace_clock=local\"\n"
9570 		       "on the kernel command line\n");
9571 		tracing_set_clock(&global_trace, "global");
9572 	}
9573 
9574 	return 0;
9575 }
9576 late_initcall_sync(tracing_set_default_clock);
9577 #endif
9578