xref: /openbmc/linux/kernel/trace/trace.c (revision 55fd7e02)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79 
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82 	{ }
83 };
84 
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88 	return 0;
89 }
90 
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97 
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105 
106 cpumask_var_t __read_mostly	tracing_buffer_mask;
107 
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123 
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125 
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128 
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132 	struct module			*mod;
133 	unsigned long			length;
134 };
135 
136 union trace_eval_map_item;
137 
138 struct trace_eval_map_tail {
139 	/*
140 	 * "end" is first and points to NULL as it must be different
141 	 * than "mod" or "eval_string"
142 	 */
143 	union trace_eval_map_item	*next;
144 	const char			*end;	/* points to NULL */
145 };
146 
147 static DEFINE_MUTEX(trace_eval_mutex);
148 
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157 	struct trace_eval_map		map;
158 	struct trace_eval_map_head	head;
159 	struct trace_eval_map_tail	tail;
160 };
161 
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164 
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 				   unsigned long flags, int pc);
168 
169 #define MAX_TRACER_SIZE		100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172 
173 static bool allocate_snapshot;
174 
175 static int __init set_cmdline_ftrace(char *str)
176 {
177 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 	default_bootup_tracer = bootup_tracer_buf;
179 	/* We are using ftrace early, expand it */
180 	ring_buffer_expanded = true;
181 	return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184 
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187 	if (*str++ != '=' || !*str) {
188 		ftrace_dump_on_oops = DUMP_ALL;
189 		return 1;
190 	}
191 
192 	if (!strcmp("orig_cpu", str)) {
193 		ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196 
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200 
201 static int __init stop_trace_on_warning(char *str)
202 {
203 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 		__disable_trace_on_warning = 1;
205 	return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208 
209 static int __init boot_alloc_snapshot(char *str)
210 {
211 	allocate_snapshot = true;
212 	/* We also need the main ring buffer expanded */
213 	ring_buffer_expanded = true;
214 	return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217 
218 
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220 
221 static int __init set_trace_boot_options(char *str)
222 {
223 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224 	return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227 
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230 
231 static int __init set_trace_boot_clock(char *str)
232 {
233 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 	trace_boot_clock = trace_boot_clock_buf;
235 	return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238 
239 static int __init set_tracepoint_printk(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		tracepoint_printk = 1;
243 	return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246 
247 unsigned long long ns2usecs(u64 nsec)
248 {
249 	nsec += 500;
250 	do_div(nsec, 1000);
251 	return nsec;
252 }
253 
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS						\
256 	(FUNCTION_DEFAULT_FLAGS |					\
257 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
258 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
259 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
260 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261 
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
264 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265 
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269 
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275 	.trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282 	struct trace_array *tr;
283 	int ret = -ENODEV;
284 
285 	mutex_lock(&trace_types_lock);
286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287 		if (tr == this_tr) {
288 			tr->ref++;
289 			ret = 0;
290 			break;
291 		}
292 	}
293 	mutex_unlock(&trace_types_lock);
294 
295 	return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300 	WARN_ON(!this_tr->ref);
301 	this_tr->ref--;
302 }
303 
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314 	if (!this_tr)
315 		return;
316 
317 	mutex_lock(&trace_types_lock);
318 	__trace_array_put(this_tr);
319 	mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322 
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325 	int ret;
326 
327 	ret = security_locked_down(LOCKDOWN_TRACEFS);
328 	if (ret)
329 		return ret;
330 
331 	if (tracing_disabled)
332 		return -ENODEV;
333 
334 	if (tr && trace_array_get(tr) < 0)
335 		return -ENODEV;
336 
337 	return 0;
338 }
339 
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 			      struct trace_buffer *buffer,
342 			      struct ring_buffer_event *event)
343 {
344 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 	    !filter_match_preds(call->filter, rec)) {
346 		__trace_event_discard_commit(buffer, event);
347 		return 1;
348 	}
349 
350 	return 0;
351 }
352 
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355 	vfree(pid_list->pids);
356 	kfree(pid_list);
357 }
358 
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369 	/*
370 	 * If pid_max changed after filtered_pids was created, we
371 	 * by default ignore all pids greater than the previous pid_max.
372 	 */
373 	if (search_pid >= filtered_pids->pid_max)
374 		return false;
375 
376 	return test_bit(search_pid, filtered_pids->pids);
377 }
378 
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 		       struct trace_pid_list *filtered_no_pids,
391 		       struct task_struct *task)
392 {
393 	/*
394 	 * If filterd_no_pids is not empty, and the task's pid is listed
395 	 * in filtered_no_pids, then return true.
396 	 * Otherwise, if filtered_pids is empty, that means we can
397 	 * trace all tasks. If it has content, then only trace pids
398 	 * within filtered_pids.
399 	 */
400 
401 	return (filtered_pids &&
402 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
403 		(filtered_no_pids &&
404 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406 
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 				  struct task_struct *self,
421 				  struct task_struct *task)
422 {
423 	if (!pid_list)
424 		return;
425 
426 	/* For forks, we only add if the forking task is listed */
427 	if (self) {
428 		if (!trace_find_filtered_pid(pid_list, self->pid))
429 			return;
430 	}
431 
432 	/* Sorry, but we don't support pid_max changing after setting */
433 	if (task->pid >= pid_list->pid_max)
434 		return;
435 
436 	/* "self" is set for forks, and NULL for exits */
437 	if (self)
438 		set_bit(task->pid, pid_list->pids);
439 	else
440 		clear_bit(task->pid, pid_list->pids);
441 }
442 
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457 	unsigned long pid = (unsigned long)v;
458 
459 	(*pos)++;
460 
461 	/* pid already is +1 of the actual prevous bit */
462 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463 
464 	/* Return pid + 1 to allow zero to be represented */
465 	if (pid < pid_list->pid_max)
466 		return (void *)(pid + 1);
467 
468 	return NULL;
469 }
470 
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484 	unsigned long pid;
485 	loff_t l = 0;
486 
487 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 	if (pid >= pid_list->pid_max)
489 		return NULL;
490 
491 	/* Return pid + 1 so that zero can be the exit value */
492 	for (pid++; pid && l < *pos;
493 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494 		;
495 	return (void *)pid;
496 }
497 
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508 	unsigned long pid = (unsigned long)v - 1;
509 
510 	seq_printf(m, "%lu\n", pid);
511 	return 0;
512 }
513 
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE		127
516 
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 		    struct trace_pid_list **new_pid_list,
519 		    const char __user *ubuf, size_t cnt)
520 {
521 	struct trace_pid_list *pid_list;
522 	struct trace_parser parser;
523 	unsigned long val;
524 	int nr_pids = 0;
525 	ssize_t read = 0;
526 	ssize_t ret = 0;
527 	loff_t pos;
528 	pid_t pid;
529 
530 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531 		return -ENOMEM;
532 
533 	/*
534 	 * Always recreate a new array. The write is an all or nothing
535 	 * operation. Always create a new array when adding new pids by
536 	 * the user. If the operation fails, then the current list is
537 	 * not modified.
538 	 */
539 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540 	if (!pid_list) {
541 		trace_parser_put(&parser);
542 		return -ENOMEM;
543 	}
544 
545 	pid_list->pid_max = READ_ONCE(pid_max);
546 
547 	/* Only truncating will shrink pid_max */
548 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 		pid_list->pid_max = filtered_pids->pid_max;
550 
551 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 	if (!pid_list->pids) {
553 		trace_parser_put(&parser);
554 		kfree(pid_list);
555 		return -ENOMEM;
556 	}
557 
558 	if (filtered_pids) {
559 		/* copy the current bits to the new max */
560 		for_each_set_bit(pid, filtered_pids->pids,
561 				 filtered_pids->pid_max) {
562 			set_bit(pid, pid_list->pids);
563 			nr_pids++;
564 		}
565 	}
566 
567 	while (cnt > 0) {
568 
569 		pos = 0;
570 
571 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 		if (ret < 0 || !trace_parser_loaded(&parser))
573 			break;
574 
575 		read += ret;
576 		ubuf += ret;
577 		cnt -= ret;
578 
579 		ret = -EINVAL;
580 		if (kstrtoul(parser.buffer, 0, &val))
581 			break;
582 		if (val >= pid_list->pid_max)
583 			break;
584 
585 		pid = (pid_t)val;
586 
587 		set_bit(pid, pid_list->pids);
588 		nr_pids++;
589 
590 		trace_parser_clear(&parser);
591 		ret = 0;
592 	}
593 	trace_parser_put(&parser);
594 
595 	if (ret < 0) {
596 		trace_free_pid_list(pid_list);
597 		return ret;
598 	}
599 
600 	if (!nr_pids) {
601 		/* Cleared the list of pids */
602 		trace_free_pid_list(pid_list);
603 		read = ret;
604 		pid_list = NULL;
605 	}
606 
607 	*new_pid_list = pid_list;
608 
609 	return read;
610 }
611 
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614 	u64 ts;
615 
616 	/* Early boot up does not have a buffer yet */
617 	if (!buf->buffer)
618 		return trace_clock_local();
619 
620 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622 
623 	return ts;
624 }
625 
626 u64 ftrace_now(int cpu)
627 {
628 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630 
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642 	/*
643 	 * For quick access (irqsoff uses this in fast path), just
644 	 * return the mirror variable of the state of the ring buffer.
645 	 * It's a little racy, but we don't really care.
646 	 */
647 	smp_rmb();
648 	return !global_trace.buffer_disabled;
649 }
650 
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
662 
663 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664 
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer		*trace_types __read_mostly;
667 
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672 
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694 
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	if (cpu == RING_BUFFER_ALL_CPUS) {
702 		/* gain it for accessing the whole ring buffer. */
703 		down_write(&all_cpu_access_lock);
704 	} else {
705 		/* gain it for accessing a cpu ring buffer. */
706 
707 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 		down_read(&all_cpu_access_lock);
709 
710 		/* Secondly block other access to this @cpu ring buffer. */
711 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
712 	}
713 }
714 
715 static inline void trace_access_unlock(int cpu)
716 {
717 	if (cpu == RING_BUFFER_ALL_CPUS) {
718 		up_write(&all_cpu_access_lock);
719 	} else {
720 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 		up_read(&all_cpu_access_lock);
722 	}
723 }
724 
725 static inline void trace_access_lock_init(void)
726 {
727 	int cpu;
728 
729 	for_each_possible_cpu(cpu)
730 		mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732 
733 #else
734 
735 static DEFINE_MUTEX(access_lock);
736 
737 static inline void trace_access_lock(int cpu)
738 {
739 	(void)cpu;
740 	mutex_lock(&access_lock);
741 }
742 
743 static inline void trace_access_unlock(int cpu)
744 {
745 	(void)cpu;
746 	mutex_unlock(&access_lock);
747 }
748 
749 static inline void trace_access_lock_init(void)
750 {
751 }
752 
753 #endif
754 
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757 				 unsigned long flags,
758 				 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 				      struct trace_buffer *buffer,
761 				      unsigned long flags,
762 				      int skip, int pc, struct pt_regs *regs);
763 
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766 					unsigned long flags,
767 					int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 				      struct trace_buffer *buffer,
772 				      unsigned long flags,
773 				      int skip, int pc, struct pt_regs *regs)
774 {
775 }
776 
777 #endif
778 
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 		  int type, unsigned long flags, int pc)
782 {
783 	struct trace_entry *ent = ring_buffer_event_data(event);
784 
785 	tracing_generic_entry_update(ent, type, flags, pc);
786 }
787 
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790 			  int type,
791 			  unsigned long len,
792 			  unsigned long flags, int pc)
793 {
794 	struct ring_buffer_event *event;
795 
796 	event = ring_buffer_lock_reserve(buffer, len);
797 	if (event != NULL)
798 		trace_event_setup(event, type, flags, pc);
799 
800 	return event;
801 }
802 
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805 	if (tr->array_buffer.buffer)
806 		ring_buffer_record_on(tr->array_buffer.buffer);
807 	/*
808 	 * This flag is looked at when buffers haven't been allocated
809 	 * yet, or by some tracers (like irqsoff), that just want to
810 	 * know if the ring buffer has been disabled, but it can handle
811 	 * races of where it gets disabled but we still do a record.
812 	 * As the check is in the fast path of the tracers, it is more
813 	 * important to be fast than accurate.
814 	 */
815 	tr->buffer_disabled = 0;
816 	/* Make the flag seen by readers */
817 	smp_wmb();
818 }
819 
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828 	tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831 
832 
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836 	__this_cpu_write(trace_taskinfo_save, true);
837 
838 	/* If this is the temp buffer, we need to commit fully */
839 	if (this_cpu_read(trace_buffered_event) == event) {
840 		/* Length is in event->array[0] */
841 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 		/* Release the temp buffer */
843 		this_cpu_dec(trace_buffered_event_cnt);
844 	} else
845 		ring_buffer_unlock_commit(buffer, event);
846 }
847 
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:	   The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856 	struct ring_buffer_event *event;
857 	struct trace_buffer *buffer;
858 	struct print_entry *entry;
859 	unsigned long irq_flags;
860 	int alloc;
861 	int pc;
862 
863 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864 		return 0;
865 
866 	pc = preempt_count();
867 
868 	if (unlikely(tracing_selftest_running || tracing_disabled))
869 		return 0;
870 
871 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
872 
873 	local_save_flags(irq_flags);
874 	buffer = global_trace.array_buffer.buffer;
875 	ring_buffer_nest_start(buffer);
876 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
877 					    irq_flags, pc);
878 	if (!event) {
879 		size = 0;
880 		goto out;
881 	}
882 
883 	entry = ring_buffer_event_data(event);
884 	entry->ip = ip;
885 
886 	memcpy(&entry->buf, str, size);
887 
888 	/* Add a newline if necessary */
889 	if (entry->buf[size - 1] != '\n') {
890 		entry->buf[size] = '\n';
891 		entry->buf[size + 1] = '\0';
892 	} else
893 		entry->buf[size] = '\0';
894 
895 	__buffer_unlock_commit(buffer, event);
896 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898 	ring_buffer_nest_end(buffer);
899 	return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902 
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:	   The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910 	struct ring_buffer_event *event;
911 	struct trace_buffer *buffer;
912 	struct bputs_entry *entry;
913 	unsigned long irq_flags;
914 	int size = sizeof(struct bputs_entry);
915 	int ret = 0;
916 	int pc;
917 
918 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919 		return 0;
920 
921 	pc = preempt_count();
922 
923 	if (unlikely(tracing_selftest_running || tracing_disabled))
924 		return 0;
925 
926 	local_save_flags(irq_flags);
927 	buffer = global_trace.array_buffer.buffer;
928 
929 	ring_buffer_nest_start(buffer);
930 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931 					    irq_flags, pc);
932 	if (!event)
933 		goto out;
934 
935 	entry = ring_buffer_event_data(event);
936 	entry->ip			= ip;
937 	entry->str			= str;
938 
939 	__buffer_unlock_commit(buffer, event);
940 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941 
942 	ret = 1;
943  out:
944 	ring_buffer_nest_end(buffer);
945 	return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948 
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951 					   void *cond_data)
952 {
953 	struct tracer *tracer = tr->current_trace;
954 	unsigned long flags;
955 
956 	if (in_nmi()) {
957 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 		internal_trace_puts("*** snapshot is being ignored        ***\n");
959 		return;
960 	}
961 
962 	if (!tr->allocated_snapshot) {
963 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 		internal_trace_puts("*** stopping trace here!   ***\n");
965 		tracing_off();
966 		return;
967 	}
968 
969 	/* Note, snapshot can not be used when the tracer uses it */
970 	if (tracer->use_max_tr) {
971 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973 		return;
974 	}
975 
976 	local_irq_save(flags);
977 	update_max_tr(tr, current, smp_processor_id(), cond_data);
978 	local_irq_restore(flags);
979 }
980 
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983 	tracing_snapshot_instance_cond(tr, NULL);
984 }
985 
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002 	struct trace_array *tr = &global_trace;
1003 
1004 	tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007 
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:		The tracing instance to snapshot
1011  * @cond_data:	The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023 	tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026 
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:		The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043 	void *cond_data = NULL;
1044 
1045 	arch_spin_lock(&tr->max_lock);
1046 
1047 	if (tr->cond_snapshot)
1048 		cond_data = tr->cond_snapshot->cond_data;
1049 
1050 	arch_spin_unlock(&tr->max_lock);
1051 
1052 	return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055 
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 					struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059 
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062 	int ret;
1063 
1064 	if (!tr->allocated_snapshot) {
1065 
1066 		/* allocate spare buffer */
1067 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069 		if (ret < 0)
1070 			return ret;
1071 
1072 		tr->allocated_snapshot = true;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080 	/*
1081 	 * We don't free the ring buffer. instead, resize it because
1082 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 	 * we want preserve it.
1084 	 */
1085 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 	set_buffer_entries(&tr->max_buffer, 1);
1087 	tracing_reset_online_cpus(&tr->max_buffer);
1088 	tr->allocated_snapshot = false;
1089 }
1090 
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103 	struct trace_array *tr = &global_trace;
1104 	int ret;
1105 
1106 	ret = tracing_alloc_snapshot_instance(tr);
1107 	WARN_ON(ret < 0);
1108 
1109 	return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112 
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126 	int ret;
1127 
1128 	ret = tracing_alloc_snapshot();
1129 	if (ret < 0)
1130 		return;
1131 
1132 	tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135 
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:		The tracing instance
1139  * @cond_data:	User data to associate with the snapshot
1140  * @update:	Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 				 cond_update_fn_t update)
1151 {
1152 	struct cond_snapshot *cond_snapshot;
1153 	int ret = 0;
1154 
1155 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156 	if (!cond_snapshot)
1157 		return -ENOMEM;
1158 
1159 	cond_snapshot->cond_data = cond_data;
1160 	cond_snapshot->update = update;
1161 
1162 	mutex_lock(&trace_types_lock);
1163 
1164 	ret = tracing_alloc_snapshot_instance(tr);
1165 	if (ret)
1166 		goto fail_unlock;
1167 
1168 	if (tr->current_trace->use_max_tr) {
1169 		ret = -EBUSY;
1170 		goto fail_unlock;
1171 	}
1172 
1173 	/*
1174 	 * The cond_snapshot can only change to NULL without the
1175 	 * trace_types_lock. We don't care if we race with it going
1176 	 * to NULL, but we want to make sure that it's not set to
1177 	 * something other than NULL when we get here, which we can
1178 	 * do safely with only holding the trace_types_lock and not
1179 	 * having to take the max_lock.
1180 	 */
1181 	if (tr->cond_snapshot) {
1182 		ret = -EBUSY;
1183 		goto fail_unlock;
1184 	}
1185 
1186 	arch_spin_lock(&tr->max_lock);
1187 	tr->cond_snapshot = cond_snapshot;
1188 	arch_spin_unlock(&tr->max_lock);
1189 
1190 	mutex_unlock(&trace_types_lock);
1191 
1192 	return ret;
1193 
1194  fail_unlock:
1195 	mutex_unlock(&trace_types_lock);
1196 	kfree(cond_snapshot);
1197 	return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:		The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213 	int ret = 0;
1214 
1215 	arch_spin_lock(&tr->max_lock);
1216 
1217 	if (!tr->cond_snapshot)
1218 		ret = -EINVAL;
1219 	else {
1220 		kfree(tr->cond_snapshot);
1221 		tr->cond_snapshot = NULL;
1222 	}
1223 
1224 	arch_spin_unlock(&tr->max_lock);
1225 
1226 	return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243 	return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248 	/* Give warning */
1249 	tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254 	return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259 	return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264 	return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268 
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271 	if (tr->array_buffer.buffer)
1272 		ring_buffer_record_off(tr->array_buffer.buffer);
1273 	/*
1274 	 * This flag is looked at when buffers haven't been allocated
1275 	 * yet, or by some tracers (like irqsoff), that just want to
1276 	 * know if the ring buffer has been disabled, but it can handle
1277 	 * races of where it gets disabled but we still do a record.
1278 	 * As the check is in the fast path of the tracers, it is more
1279 	 * important to be fast than accurate.
1280 	 */
1281 	tr->buffer_disabled = 1;
1282 	/* Make the flag seen by readers */
1283 	smp_wmb();
1284 }
1285 
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296 	tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299 
1300 void disable_trace_on_warning(void)
1301 {
1302 	if (__disable_trace_on_warning) {
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 	}
1307 }
1308 
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317 	if (tr->array_buffer.buffer)
1318 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 	return !tr->buffer_disabled;
1320 }
1321 
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327 	return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330 
1331 static int __init set_buf_size(char *str)
1332 {
1333 	unsigned long buf_size;
1334 
1335 	if (!str)
1336 		return 0;
1337 	buf_size = memparse(str, &str);
1338 	/* nr_entries can not be zero */
1339 	if (buf_size == 0)
1340 		return 0;
1341 	trace_buf_size = buf_size;
1342 	return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345 
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348 	unsigned long threshold;
1349 	int ret;
1350 
1351 	if (!str)
1352 		return 0;
1353 	ret = kstrtoul(str, 0, &threshold);
1354 	if (ret < 0)
1355 		return 0;
1356 	tracing_thresh = threshold * 1000;
1357 	return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360 
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363 	return nsecs / 1000;
1364 }
1365 
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374 
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377 	TRACE_FLAGS
1378 	NULL
1379 };
1380 
1381 static struct {
1382 	u64 (*func)(void);
1383 	const char *name;
1384 	int in_ns;		/* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 	{ trace_clock_local,		"local",	1 },
1387 	{ trace_clock_global,		"global",	1 },
1388 	{ trace_clock_counter,		"counter",	0 },
1389 	{ trace_clock_jiffies,		"uptime",	0 },
1390 	{ trace_clock,			"perf",		1 },
1391 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1392 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1393 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1394 	ARCH_TRACE_CLOCKS
1395 };
1396 
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399 	if (trace_clocks[tr->clock_id].in_ns)
1400 		return true;
1401 
1402 	return false;
1403 }
1404 
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410 	memset(parser, 0, sizeof(*parser));
1411 
1412 	parser->buffer = kmalloc(size, GFP_KERNEL);
1413 	if (!parser->buffer)
1414 		return 1;
1415 
1416 	parser->size = size;
1417 	return 0;
1418 }
1419 
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425 	kfree(parser->buffer);
1426 	parser->buffer = NULL;
1427 }
1428 
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 	size_t cnt, loff_t *ppos)
1442 {
1443 	char ch;
1444 	size_t read = 0;
1445 	ssize_t ret;
1446 
1447 	if (!*ppos)
1448 		trace_parser_clear(parser);
1449 
1450 	ret = get_user(ch, ubuf++);
1451 	if (ret)
1452 		goto out;
1453 
1454 	read++;
1455 	cnt--;
1456 
1457 	/*
1458 	 * The parser is not finished with the last write,
1459 	 * continue reading the user input without skipping spaces.
1460 	 */
1461 	if (!parser->cont) {
1462 		/* skip white space */
1463 		while (cnt && isspace(ch)) {
1464 			ret = get_user(ch, ubuf++);
1465 			if (ret)
1466 				goto out;
1467 			read++;
1468 			cnt--;
1469 		}
1470 
1471 		parser->idx = 0;
1472 
1473 		/* only spaces were written */
1474 		if (isspace(ch) || !ch) {
1475 			*ppos += read;
1476 			ret = read;
1477 			goto out;
1478 		}
1479 	}
1480 
1481 	/* read the non-space input */
1482 	while (cnt && !isspace(ch) && ch) {
1483 		if (parser->idx < parser->size - 1)
1484 			parser->buffer[parser->idx++] = ch;
1485 		else {
1486 			ret = -EINVAL;
1487 			goto out;
1488 		}
1489 		ret = get_user(ch, ubuf++);
1490 		if (ret)
1491 			goto out;
1492 		read++;
1493 		cnt--;
1494 	}
1495 
1496 	/* We either got finished input or we have to wait for another call. */
1497 	if (isspace(ch) || !ch) {
1498 		parser->buffer[parser->idx] = 0;
1499 		parser->cont = false;
1500 	} else if (parser->idx < parser->size - 1) {
1501 		parser->cont = true;
1502 		parser->buffer[parser->idx++] = ch;
1503 		/* Make sure the parsed string always terminates with '\0'. */
1504 		parser->buffer[parser->idx] = 0;
1505 	} else {
1506 		ret = -EINVAL;
1507 		goto out;
1508 	}
1509 
1510 	*ppos += read;
1511 	ret = read;
1512 
1513 out:
1514 	return ret;
1515 }
1516 
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520 	int len;
1521 
1522 	if (trace_seq_used(s) <= s->seq.readpos)
1523 		return -EBUSY;
1524 
1525 	len = trace_seq_used(s) - s->seq.readpos;
1526 	if (cnt > len)
1527 		cnt = len;
1528 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529 
1530 	s->seq.readpos += cnt;
1531 	return cnt;
1532 }
1533 
1534 unsigned long __read_mostly	tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536 
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 	defined(CONFIG_FSNOTIFY)
1539 
1540 static struct workqueue_struct *fsnotify_wq;
1541 
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544 	struct trace_array *tr = container_of(work, struct trace_array,
1545 					      fsnotify_work);
1546 	fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547 		 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549 
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552 	struct trace_array *tr = container_of(iwork, struct trace_array,
1553 					      fsnotify_irqwork);
1554 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556 
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558 				     struct dentry *d_tracer)
1559 {
1560 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563 					      d_tracer, &tr->max_latency,
1564 					      &tracing_max_lat_fops);
1565 }
1566 
1567 __init static int latency_fsnotify_init(void)
1568 {
1569 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 	if (!fsnotify_wq) {
1572 		pr_err("Unable to allocate tr_max_lat_wq\n");
1573 		return -ENOMEM;
1574 	}
1575 	return 0;
1576 }
1577 
1578 late_initcall_sync(latency_fsnotify_init);
1579 
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582 	if (!fsnotify_wq)
1583 		return;
1584 	/*
1585 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586 	 * possible that we are called from __schedule() or do_idle(), which
1587 	 * could cause a deadlock.
1588 	 */
1589 	irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591 
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597 
1598 #define trace_create_maxlat_file(tr, d_tracer)				\
1599 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1600 			  &tr->max_latency, &tracing_max_lat_fops)
1601 
1602 #endif
1603 
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 	struct array_buffer *trace_buf = &tr->array_buffer;
1614 	struct array_buffer *max_buf = &tr->max_buffer;
1615 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617 
1618 	max_buf->cpu = cpu;
1619 	max_buf->time_start = data->preempt_timestamp;
1620 
1621 	max_data->saved_latency = tr->max_latency;
1622 	max_data->critical_start = data->critical_start;
1623 	max_data->critical_end = data->critical_end;
1624 
1625 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626 	max_data->pid = tsk->pid;
1627 	/*
1628 	 * If tsk == current, then use current_uid(), as that does not use
1629 	 * RCU. The irq tracer can be called out of RCU scope.
1630 	 */
1631 	if (tsk == current)
1632 		max_data->uid = current_uid();
1633 	else
1634 		max_data->uid = task_uid(tsk);
1635 
1636 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 	max_data->policy = tsk->policy;
1638 	max_data->rt_priority = tsk->rt_priority;
1639 
1640 	/* record this tasks comm */
1641 	tracing_record_cmdline(tsk);
1642 	latency_fsnotify(tr);
1643 }
1644 
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657 	      void *cond_data)
1658 {
1659 	if (tr->stop_count)
1660 		return;
1661 
1662 	WARN_ON_ONCE(!irqs_disabled());
1663 
1664 	if (!tr->allocated_snapshot) {
1665 		/* Only the nop tracer should hit this when disabling */
1666 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667 		return;
1668 	}
1669 
1670 	arch_spin_lock(&tr->max_lock);
1671 
1672 	/* Inherit the recordable setting from array_buffer */
1673 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674 		ring_buffer_record_on(tr->max_buffer.buffer);
1675 	else
1676 		ring_buffer_record_off(tr->max_buffer.buffer);
1677 
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680 		goto out_unlock;
1681 #endif
1682 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 
1684 	__update_max_tr(tr, tsk, cpu);
1685 
1686  out_unlock:
1687 	arch_spin_unlock(&tr->max_lock);
1688 }
1689 
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701 	int ret;
1702 
1703 	if (tr->stop_count)
1704 		return;
1705 
1706 	WARN_ON_ONCE(!irqs_disabled());
1707 	if (!tr->allocated_snapshot) {
1708 		/* Only the nop tracer should hit this when disabling */
1709 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710 		return;
1711 	}
1712 
1713 	arch_spin_lock(&tr->max_lock);
1714 
1715 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 
1717 	if (ret == -EBUSY) {
1718 		/*
1719 		 * We failed to swap the buffer due to a commit taking
1720 		 * place on this CPU. We fail to record, but we reset
1721 		 * the max trace buffer (no one writes directly to it)
1722 		 * and flag that it failed.
1723 		 */
1724 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725 			"Failed to swap buffers due to commit in progress\n");
1726 	}
1727 
1728 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 
1730 	__update_max_tr(tr, tsk, cpu);
1731 	arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737 	/* Iterators are static, they should be filled or empty */
1738 	if (trace_buffer_iter(iter, iter->cpu_file))
1739 		return 0;
1740 
1741 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742 				full);
1743 }
1744 
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747 
1748 struct trace_selftests {
1749 	struct list_head		list;
1750 	struct tracer			*type;
1751 };
1752 
1753 static LIST_HEAD(postponed_selftests);
1754 
1755 static int save_selftest(struct tracer *type)
1756 {
1757 	struct trace_selftests *selftest;
1758 
1759 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760 	if (!selftest)
1761 		return -ENOMEM;
1762 
1763 	selftest->type = type;
1764 	list_add(&selftest->list, &postponed_selftests);
1765 	return 0;
1766 }
1767 
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770 	struct trace_array *tr = &global_trace;
1771 	struct tracer *saved_tracer = tr->current_trace;
1772 	int ret;
1773 
1774 	if (!type->selftest || tracing_selftest_disabled)
1775 		return 0;
1776 
1777 	/*
1778 	 * If a tracer registers early in boot up (before scheduling is
1779 	 * initialized and such), then do not run its selftests yet.
1780 	 * Instead, run it a little later in the boot process.
1781 	 */
1782 	if (!selftests_can_run)
1783 		return save_selftest(type);
1784 
1785 	/*
1786 	 * Run a selftest on this tracer.
1787 	 * Here we reset the trace buffer, and set the current
1788 	 * tracer to be this tracer. The tracer can then run some
1789 	 * internal tracing to verify that everything is in order.
1790 	 * If we fail, we do not register this tracer.
1791 	 */
1792 	tracing_reset_online_cpus(&tr->array_buffer);
1793 
1794 	tr->current_trace = type;
1795 
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797 	if (type->use_max_tr) {
1798 		/* If we expanded the buffers, make sure the max is expanded too */
1799 		if (ring_buffer_expanded)
1800 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801 					   RING_BUFFER_ALL_CPUS);
1802 		tr->allocated_snapshot = true;
1803 	}
1804 #endif
1805 
1806 	/* the test is responsible for initializing and enabling */
1807 	pr_info("Testing tracer %s: ", type->name);
1808 	ret = type->selftest(type, tr);
1809 	/* the test is responsible for resetting too */
1810 	tr->current_trace = saved_tracer;
1811 	if (ret) {
1812 		printk(KERN_CONT "FAILED!\n");
1813 		/* Add the warning after printing 'FAILED' */
1814 		WARN_ON(1);
1815 		return -1;
1816 	}
1817 	/* Only reset on passing, to avoid touching corrupted buffers */
1818 	tracing_reset_online_cpus(&tr->array_buffer);
1819 
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821 	if (type->use_max_tr) {
1822 		tr->allocated_snapshot = false;
1823 
1824 		/* Shrink the max buffer again */
1825 		if (ring_buffer_expanded)
1826 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1827 					   RING_BUFFER_ALL_CPUS);
1828 	}
1829 #endif
1830 
1831 	printk(KERN_CONT "PASSED\n");
1832 	return 0;
1833 }
1834 
1835 static __init int init_trace_selftests(void)
1836 {
1837 	struct trace_selftests *p, *n;
1838 	struct tracer *t, **last;
1839 	int ret;
1840 
1841 	selftests_can_run = true;
1842 
1843 	mutex_lock(&trace_types_lock);
1844 
1845 	if (list_empty(&postponed_selftests))
1846 		goto out;
1847 
1848 	pr_info("Running postponed tracer tests:\n");
1849 
1850 	tracing_selftest_running = true;
1851 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852 		/* This loop can take minutes when sanitizers are enabled, so
1853 		 * lets make sure we allow RCU processing.
1854 		 */
1855 		cond_resched();
1856 		ret = run_tracer_selftest(p->type);
1857 		/* If the test fails, then warn and remove from available_tracers */
1858 		if (ret < 0) {
1859 			WARN(1, "tracer: %s failed selftest, disabling\n",
1860 			     p->type->name);
1861 			last = &trace_types;
1862 			for (t = trace_types; t; t = t->next) {
1863 				if (t == p->type) {
1864 					*last = t->next;
1865 					break;
1866 				}
1867 				last = &t->next;
1868 			}
1869 		}
1870 		list_del(&p->list);
1871 		kfree(p);
1872 	}
1873 	tracing_selftest_running = false;
1874 
1875  out:
1876 	mutex_unlock(&trace_types_lock);
1877 
1878 	return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884 	return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 
1890 static void __init apply_trace_boot_options(void);
1891 
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900 	struct tracer *t;
1901 	int ret = 0;
1902 
1903 	if (!type->name) {
1904 		pr_info("Tracer must have a name\n");
1905 		return -1;
1906 	}
1907 
1908 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910 		return -1;
1911 	}
1912 
1913 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914 		pr_warn("Can not register tracer %s due to lockdown\n",
1915 			   type->name);
1916 		return -EPERM;
1917 	}
1918 
1919 	mutex_lock(&trace_types_lock);
1920 
1921 	tracing_selftest_running = true;
1922 
1923 	for (t = trace_types; t; t = t->next) {
1924 		if (strcmp(type->name, t->name) == 0) {
1925 			/* already found */
1926 			pr_info("Tracer %s already registered\n",
1927 				type->name);
1928 			ret = -1;
1929 			goto out;
1930 		}
1931 	}
1932 
1933 	if (!type->set_flag)
1934 		type->set_flag = &dummy_set_flag;
1935 	if (!type->flags) {
1936 		/*allocate a dummy tracer_flags*/
1937 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938 		if (!type->flags) {
1939 			ret = -ENOMEM;
1940 			goto out;
1941 		}
1942 		type->flags->val = 0;
1943 		type->flags->opts = dummy_tracer_opt;
1944 	} else
1945 		if (!type->flags->opts)
1946 			type->flags->opts = dummy_tracer_opt;
1947 
1948 	/* store the tracer for __set_tracer_option */
1949 	type->flags->trace = type;
1950 
1951 	ret = run_tracer_selftest(type);
1952 	if (ret < 0)
1953 		goto out;
1954 
1955 	type->next = trace_types;
1956 	trace_types = type;
1957 	add_tracer_options(&global_trace, type);
1958 
1959  out:
1960 	tracing_selftest_running = false;
1961 	mutex_unlock(&trace_types_lock);
1962 
1963 	if (ret || !default_bootup_tracer)
1964 		goto out_unlock;
1965 
1966 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967 		goto out_unlock;
1968 
1969 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970 	/* Do we want this tracer to start on bootup? */
1971 	tracing_set_tracer(&global_trace, type->name);
1972 	default_bootup_tracer = NULL;
1973 
1974 	apply_trace_boot_options();
1975 
1976 	/* disable other selftests, since this will break it. */
1977 	tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980 	       type->name);
1981 #endif
1982 
1983  out_unlock:
1984 	return ret;
1985 }
1986 
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989 	struct trace_buffer *buffer = buf->buffer;
1990 
1991 	if (!buffer)
1992 		return;
1993 
1994 	ring_buffer_record_disable(buffer);
1995 
1996 	/* Make sure all commits have finished */
1997 	synchronize_rcu();
1998 	ring_buffer_reset_cpu(buffer, cpu);
1999 
2000 	ring_buffer_record_enable(buffer);
2001 }
2002 
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005 	struct trace_buffer *buffer = buf->buffer;
2006 	int cpu;
2007 
2008 	if (!buffer)
2009 		return;
2010 
2011 	ring_buffer_record_disable(buffer);
2012 
2013 	/* Make sure all commits have finished */
2014 	synchronize_rcu();
2015 
2016 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017 
2018 	for_each_online_cpu(cpu)
2019 		ring_buffer_reset_cpu(buffer, cpu);
2020 
2021 	ring_buffer_record_enable(buffer);
2022 }
2023 
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027 	struct trace_array *tr;
2028 
2029 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030 		if (!tr->clear_trace)
2031 			continue;
2032 		tr->clear_trace = false;
2033 		tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035 		tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037 	}
2038 }
2039 
2040 static int *tgid_map;
2041 
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047 	unsigned *map_cmdline_to_pid;
2048 	unsigned cmdline_num;
2049 	int cmdline_idx;
2050 	char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053 
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056 
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061 
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066 
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068 				    struct saved_cmdlines_buffer *s)
2069 {
2070 	s->map_cmdline_to_pid = kmalloc_array(val,
2071 					      sizeof(*s->map_cmdline_to_pid),
2072 					      GFP_KERNEL);
2073 	if (!s->map_cmdline_to_pid)
2074 		return -ENOMEM;
2075 
2076 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077 	if (!s->saved_cmdlines) {
2078 		kfree(s->map_cmdline_to_pid);
2079 		return -ENOMEM;
2080 	}
2081 
2082 	s->cmdline_idx = 0;
2083 	s->cmdline_num = val;
2084 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085 	       sizeof(s->map_pid_to_cmdline));
2086 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087 	       val * sizeof(*s->map_cmdline_to_pid));
2088 
2089 	return 0;
2090 }
2091 
2092 static int trace_create_savedcmd(void)
2093 {
2094 	int ret;
2095 
2096 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097 	if (!savedcmd)
2098 		return -ENOMEM;
2099 
2100 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101 	if (ret < 0) {
2102 		kfree(savedcmd);
2103 		savedcmd = NULL;
2104 		return -ENOMEM;
2105 	}
2106 
2107 	return 0;
2108 }
2109 
2110 int is_tracing_stopped(void)
2111 {
2112 	return global_trace.stop_count;
2113 }
2114 
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123 	struct trace_buffer *buffer;
2124 	unsigned long flags;
2125 
2126 	if (tracing_disabled)
2127 		return;
2128 
2129 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130 	if (--global_trace.stop_count) {
2131 		if (global_trace.stop_count < 0) {
2132 			/* Someone screwed up their debugging */
2133 			WARN_ON_ONCE(1);
2134 			global_trace.stop_count = 0;
2135 		}
2136 		goto out;
2137 	}
2138 
2139 	/* Prevent the buffers from switching */
2140 	arch_spin_lock(&global_trace.max_lock);
2141 
2142 	buffer = global_trace.array_buffer.buffer;
2143 	if (buffer)
2144 		ring_buffer_record_enable(buffer);
2145 
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147 	buffer = global_trace.max_buffer.buffer;
2148 	if (buffer)
2149 		ring_buffer_record_enable(buffer);
2150 #endif
2151 
2152 	arch_spin_unlock(&global_trace.max_lock);
2153 
2154  out:
2155 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157 
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160 	struct trace_buffer *buffer;
2161 	unsigned long flags;
2162 
2163 	if (tracing_disabled)
2164 		return;
2165 
2166 	/* If global, we need to also start the max tracer */
2167 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168 		return tracing_start();
2169 
2170 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2171 
2172 	if (--tr->stop_count) {
2173 		if (tr->stop_count < 0) {
2174 			/* Someone screwed up their debugging */
2175 			WARN_ON_ONCE(1);
2176 			tr->stop_count = 0;
2177 		}
2178 		goto out;
2179 	}
2180 
2181 	buffer = tr->array_buffer.buffer;
2182 	if (buffer)
2183 		ring_buffer_record_enable(buffer);
2184 
2185  out:
2186 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188 
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197 	struct trace_buffer *buffer;
2198 	unsigned long flags;
2199 
2200 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201 	if (global_trace.stop_count++)
2202 		goto out;
2203 
2204 	/* Prevent the buffers from switching */
2205 	arch_spin_lock(&global_trace.max_lock);
2206 
2207 	buffer = global_trace.array_buffer.buffer;
2208 	if (buffer)
2209 		ring_buffer_record_disable(buffer);
2210 
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212 	buffer = global_trace.max_buffer.buffer;
2213 	if (buffer)
2214 		ring_buffer_record_disable(buffer);
2215 #endif
2216 
2217 	arch_spin_unlock(&global_trace.max_lock);
2218 
2219  out:
2220 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222 
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225 	struct trace_buffer *buffer;
2226 	unsigned long flags;
2227 
2228 	/* If global, we need to also stop the max tracer */
2229 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230 		return tracing_stop();
2231 
2232 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2233 	if (tr->stop_count++)
2234 		goto out;
2235 
2236 	buffer = tr->array_buffer.buffer;
2237 	if (buffer)
2238 		ring_buffer_record_disable(buffer);
2239 
2240  out:
2241 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243 
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246 	unsigned pid, idx;
2247 
2248 	/* treat recording of idle task as a success */
2249 	if (!tsk->pid)
2250 		return 1;
2251 
2252 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253 		return 0;
2254 
2255 	/*
2256 	 * It's not the end of the world if we don't get
2257 	 * the lock, but we also don't want to spin
2258 	 * nor do we want to disable interrupts,
2259 	 * so if we miss here, then better luck next time.
2260 	 */
2261 	if (!arch_spin_trylock(&trace_cmdline_lock))
2262 		return 0;
2263 
2264 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265 	if (idx == NO_CMDLINE_MAP) {
2266 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267 
2268 		/*
2269 		 * Check whether the cmdline buffer at idx has a pid
2270 		 * mapped. We are going to overwrite that entry so we
2271 		 * need to clear the map_pid_to_cmdline. Otherwise we
2272 		 * would read the new comm for the old pid.
2273 		 */
2274 		pid = savedcmd->map_cmdline_to_pid[idx];
2275 		if (pid != NO_CMDLINE_MAP)
2276 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277 
2278 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280 
2281 		savedcmd->cmdline_idx = idx;
2282 	}
2283 
2284 	set_cmdline(idx, tsk->comm);
2285 
2286 	arch_spin_unlock(&trace_cmdline_lock);
2287 
2288 	return 1;
2289 }
2290 
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293 	unsigned map;
2294 
2295 	if (!pid) {
2296 		strcpy(comm, "<idle>");
2297 		return;
2298 	}
2299 
2300 	if (WARN_ON_ONCE(pid < 0)) {
2301 		strcpy(comm, "<XXX>");
2302 		return;
2303 	}
2304 
2305 	if (pid > PID_MAX_DEFAULT) {
2306 		strcpy(comm, "<...>");
2307 		return;
2308 	}
2309 
2310 	map = savedcmd->map_pid_to_cmdline[pid];
2311 	if (map != NO_CMDLINE_MAP)
2312 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313 	else
2314 		strcpy(comm, "<...>");
2315 }
2316 
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319 	preempt_disable();
2320 	arch_spin_lock(&trace_cmdline_lock);
2321 
2322 	__trace_find_cmdline(pid, comm);
2323 
2324 	arch_spin_unlock(&trace_cmdline_lock);
2325 	preempt_enable();
2326 }
2327 
2328 int trace_find_tgid(int pid)
2329 {
2330 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331 		return 0;
2332 
2333 	return tgid_map[pid];
2334 }
2335 
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338 	/* treat recording of idle task as a success */
2339 	if (!tsk->pid)
2340 		return 1;
2341 
2342 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343 		return 0;
2344 
2345 	tgid_map[tsk->pid] = tsk->tgid;
2346 	return 1;
2347 }
2348 
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352 		return true;
2353 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354 		return true;
2355 	if (!__this_cpu_read(trace_taskinfo_save))
2356 		return true;
2357 	return false;
2358 }
2359 
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369 	bool done;
2370 
2371 	if (tracing_record_taskinfo_skip(flags))
2372 		return;
2373 
2374 	/*
2375 	 * Record as much task information as possible. If some fail, continue
2376 	 * to try to record the others.
2377 	 */
2378 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380 
2381 	/* If recording any information failed, retry again soon. */
2382 	if (!done)
2383 		return;
2384 
2385 	__this_cpu_write(trace_taskinfo_save, false);
2386 }
2387 
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397 					  struct task_struct *next, int flags)
2398 {
2399 	bool done;
2400 
2401 	if (tracing_record_taskinfo_skip(flags))
2402 		return;
2403 
2404 	/*
2405 	 * Record as much task information as possible. If some fail, continue
2406 	 * to try to record the others.
2407 	 */
2408 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412 
2413 	/* If recording any information failed, retry again soon. */
2414 	if (!done)
2415 		return;
2416 
2417 	__this_cpu_write(trace_taskinfo_save, false);
2418 }
2419 
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425 
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430 
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438 	return trace_seq_has_overflowed(s) ?
2439 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442 
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445 			     unsigned long flags, int pc)
2446 {
2447 	struct task_struct *tsk = current;
2448 
2449 	entry->preempt_count		= pc & 0xff;
2450 	entry->pid			= (tsk) ? tsk->pid : 0;
2451 	entry->type			= type;
2452 	entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456 		TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465 
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468 			  int type,
2469 			  unsigned long len,
2470 			  unsigned long flags, int pc)
2471 {
2472 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474 
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478 
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495 	struct ring_buffer_event *event;
2496 	struct page *page;
2497 	int cpu;
2498 
2499 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500 
2501 	if (trace_buffered_event_ref++)
2502 		return;
2503 
2504 	for_each_tracing_cpu(cpu) {
2505 		page = alloc_pages_node(cpu_to_node(cpu),
2506 					GFP_KERNEL | __GFP_NORETRY, 0);
2507 		if (!page)
2508 			goto failed;
2509 
2510 		event = page_address(page);
2511 		memset(event, 0, sizeof(*event));
2512 
2513 		per_cpu(trace_buffered_event, cpu) = event;
2514 
2515 		preempt_disable();
2516 		if (cpu == smp_processor_id() &&
2517 		    this_cpu_read(trace_buffered_event) !=
2518 		    per_cpu(trace_buffered_event, cpu))
2519 			WARN_ON_ONCE(1);
2520 		preempt_enable();
2521 	}
2522 
2523 	return;
2524  failed:
2525 	trace_buffered_event_disable();
2526 }
2527 
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530 	/* Probably not needed, but do it anyway */
2531 	smp_rmb();
2532 	this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534 
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537 	this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539 
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550 	int cpu;
2551 
2552 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553 
2554 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555 		return;
2556 
2557 	if (--trace_buffered_event_ref)
2558 		return;
2559 
2560 	preempt_disable();
2561 	/* For each CPU, set the buffer as used. */
2562 	smp_call_function_many(tracing_buffer_mask,
2563 			       disable_trace_buffered_event, NULL, 1);
2564 	preempt_enable();
2565 
2566 	/* Wait for all current users to finish */
2567 	synchronize_rcu();
2568 
2569 	for_each_tracing_cpu(cpu) {
2570 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571 		per_cpu(trace_buffered_event, cpu) = NULL;
2572 	}
2573 	/*
2574 	 * Make sure trace_buffered_event is NULL before clearing
2575 	 * trace_buffered_event_cnt.
2576 	 */
2577 	smp_wmb();
2578 
2579 	preempt_disable();
2580 	/* Do the work on each cpu */
2581 	smp_call_function_many(tracing_buffer_mask,
2582 			       enable_trace_buffered_event, NULL, 1);
2583 	preempt_enable();
2584 }
2585 
2586 static struct trace_buffer *temp_buffer;
2587 
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590 			  struct trace_event_file *trace_file,
2591 			  int type, unsigned long len,
2592 			  unsigned long flags, int pc)
2593 {
2594 	struct ring_buffer_event *entry;
2595 	int val;
2596 
2597 	*current_rb = trace_file->tr->array_buffer.buffer;
2598 
2599 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601 	    (entry = this_cpu_read(trace_buffered_event))) {
2602 		/* Try to use the per cpu buffer first */
2603 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2604 		if (val == 1) {
2605 			trace_event_setup(entry, type, flags, pc);
2606 			entry->array[0] = len;
2607 			return entry;
2608 		}
2609 		this_cpu_dec(trace_buffered_event_cnt);
2610 	}
2611 
2612 	entry = __trace_buffer_lock_reserve(*current_rb,
2613 					    type, len, flags, pc);
2614 	/*
2615 	 * If tracing is off, but we have triggers enabled
2616 	 * we still need to look at the event data. Use the temp_buffer
2617 	 * to store the trace event for the tigger to use. It's recusive
2618 	 * safe and will not be recorded anywhere.
2619 	 */
2620 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621 		*current_rb = temp_buffer;
2622 		entry = __trace_buffer_lock_reserve(*current_rb,
2623 						    type, len, flags, pc);
2624 	}
2625 	return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628 
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631 
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634 	struct trace_event_call *event_call;
2635 	struct trace_event_file *file;
2636 	struct trace_event *event;
2637 	unsigned long flags;
2638 	struct trace_iterator *iter = tracepoint_print_iter;
2639 
2640 	/* We should never get here if iter is NULL */
2641 	if (WARN_ON_ONCE(!iter))
2642 		return;
2643 
2644 	event_call = fbuffer->trace_file->event_call;
2645 	if (!event_call || !event_call->event.funcs ||
2646 	    !event_call->event.funcs->trace)
2647 		return;
2648 
2649 	file = fbuffer->trace_file;
2650 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652 	     !filter_match_preds(file->filter, fbuffer->entry)))
2653 		return;
2654 
2655 	event = &fbuffer->trace_file->event_call->event;
2656 
2657 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658 	trace_seq_init(&iter->seq);
2659 	iter->ent = fbuffer->entry;
2660 	event_call->event.funcs->trace(iter, 0, event);
2661 	trace_seq_putc(&iter->seq, 0);
2662 	printk("%s", iter->seq.buffer);
2663 
2664 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666 
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668 			     void *buffer, size_t *lenp,
2669 			     loff_t *ppos)
2670 {
2671 	int save_tracepoint_printk;
2672 	int ret;
2673 
2674 	mutex_lock(&tracepoint_printk_mutex);
2675 	save_tracepoint_printk = tracepoint_printk;
2676 
2677 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678 
2679 	/*
2680 	 * This will force exiting early, as tracepoint_printk
2681 	 * is always zero when tracepoint_printk_iter is not allocated
2682 	 */
2683 	if (!tracepoint_print_iter)
2684 		tracepoint_printk = 0;
2685 
2686 	if (save_tracepoint_printk == tracepoint_printk)
2687 		goto out;
2688 
2689 	if (tracepoint_printk)
2690 		static_key_enable(&tracepoint_printk_key.key);
2691 	else
2692 		static_key_disable(&tracepoint_printk_key.key);
2693 
2694  out:
2695 	mutex_unlock(&tracepoint_printk_mutex);
2696 
2697 	return ret;
2698 }
2699 
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702 	if (static_key_false(&tracepoint_printk_key.key))
2703 		output_printk(fbuffer);
2704 
2705 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706 				    fbuffer->event, fbuffer->entry,
2707 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710 
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719 
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721 				     struct trace_buffer *buffer,
2722 				     struct ring_buffer_event *event,
2723 				     unsigned long flags, int pc,
2724 				     struct pt_regs *regs)
2725 {
2726 	__buffer_unlock_commit(buffer, event);
2727 
2728 	/*
2729 	 * If regs is not set, then skip the necessary functions.
2730 	 * Note, we can still get here via blktrace, wakeup tracer
2731 	 * and mmiotrace, but that's ok if they lose a function or
2732 	 * two. They are not that meaningful.
2733 	 */
2734 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735 	ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737 
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743 				   struct ring_buffer_event *event)
2744 {
2745 	__buffer_unlock_commit(buffer, event);
2746 }
2747 
2748 static void
2749 trace_process_export(struct trace_export *export,
2750 	       struct ring_buffer_event *event)
2751 {
2752 	struct trace_entry *entry;
2753 	unsigned int size = 0;
2754 
2755 	entry = ring_buffer_event_data(event);
2756 	size = ring_buffer_event_length(event);
2757 	export->write(export, entry, size);
2758 }
2759 
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761 
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763 
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765 
2766 static inline void ftrace_exports_enable(void)
2767 {
2768 	static_branch_enable(&ftrace_exports_enabled);
2769 }
2770 
2771 static inline void ftrace_exports_disable(void)
2772 {
2773 	static_branch_disable(&ftrace_exports_enabled);
2774 }
2775 
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778 	struct trace_export *export;
2779 
2780 	preempt_disable_notrace();
2781 
2782 	export = rcu_dereference_raw_check(ftrace_exports_list);
2783 	while (export) {
2784 		trace_process_export(export, event);
2785 		export = rcu_dereference_raw_check(export->next);
2786 	}
2787 
2788 	preempt_enable_notrace();
2789 }
2790 
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794 	rcu_assign_pointer(export->next, *list);
2795 	/*
2796 	 * We are entering export into the list but another
2797 	 * CPU might be walking that list. We need to make sure
2798 	 * the export->next pointer is valid before another CPU sees
2799 	 * the export pointer included into the list.
2800 	 */
2801 	rcu_assign_pointer(*list, export);
2802 }
2803 
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807 	struct trace_export **p;
2808 
2809 	for (p = list; *p != NULL; p = &(*p)->next)
2810 		if (*p == export)
2811 			break;
2812 
2813 	if (*p != export)
2814 		return -1;
2815 
2816 	rcu_assign_pointer(*p, (*p)->next);
2817 
2818 	return 0;
2819 }
2820 
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824 	if (*list == NULL)
2825 		ftrace_exports_enable();
2826 
2827 	add_trace_export(list, export);
2828 }
2829 
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833 	int ret;
2834 
2835 	ret = rm_trace_export(list, export);
2836 	if (*list == NULL)
2837 		ftrace_exports_disable();
2838 
2839 	return ret;
2840 }
2841 
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844 	if (WARN_ON_ONCE(!export->write))
2845 		return -1;
2846 
2847 	mutex_lock(&ftrace_export_lock);
2848 
2849 	add_ftrace_export(&ftrace_exports_list, export);
2850 
2851 	mutex_unlock(&ftrace_export_lock);
2852 
2853 	return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856 
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859 	int ret;
2860 
2861 	mutex_lock(&ftrace_export_lock);
2862 
2863 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2864 
2865 	mutex_unlock(&ftrace_export_lock);
2866 
2867 	return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870 
2871 void
2872 trace_function(struct trace_array *tr,
2873 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874 	       int pc)
2875 {
2876 	struct trace_event_call *call = &event_function;
2877 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2878 	struct ring_buffer_event *event;
2879 	struct ftrace_entry *entry;
2880 
2881 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882 					    flags, pc);
2883 	if (!event)
2884 		return;
2885 	entry	= ring_buffer_event_data(event);
2886 	entry->ip			= ip;
2887 	entry->parent_ip		= parent_ip;
2888 
2889 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2890 		if (static_branch_unlikely(&ftrace_exports_enabled))
2891 			ftrace_exports(event);
2892 		__buffer_unlock_commit(buffer, event);
2893 	}
2894 }
2895 
2896 #ifdef CONFIG_STACKTRACE
2897 
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING	4
2900 
2901 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902 
2903 struct ftrace_stack {
2904 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906 
2907 
2908 struct ftrace_stacks {
2909 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2910 };
2911 
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914 
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916 				 unsigned long flags,
2917 				 int skip, int pc, struct pt_regs *regs)
2918 {
2919 	struct trace_event_call *call = &event_kernel_stack;
2920 	struct ring_buffer_event *event;
2921 	unsigned int size, nr_entries;
2922 	struct ftrace_stack *fstack;
2923 	struct stack_entry *entry;
2924 	int stackidx;
2925 
2926 	/*
2927 	 * Add one, for this function and the call to save_stack_trace()
2928 	 * If regs is set, then these functions will not be in the way.
2929 	 */
2930 #ifndef CONFIG_UNWINDER_ORC
2931 	if (!regs)
2932 		skip++;
2933 #endif
2934 
2935 	/*
2936 	 * Since events can happen in NMIs there's no safe way to
2937 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938 	 * or NMI comes in, it will just have to use the default
2939 	 * FTRACE_STACK_SIZE.
2940 	 */
2941 	preempt_disable_notrace();
2942 
2943 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944 
2945 	/* This should never happen. If it does, yell once and skip */
2946 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947 		goto out;
2948 
2949 	/*
2950 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951 	 * interrupt will either see the value pre increment or post
2952 	 * increment. If the interrupt happens pre increment it will have
2953 	 * restored the counter when it returns.  We just need a barrier to
2954 	 * keep gcc from moving things around.
2955 	 */
2956 	barrier();
2957 
2958 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959 	size = ARRAY_SIZE(fstack->calls);
2960 
2961 	if (regs) {
2962 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963 						   size, skip);
2964 	} else {
2965 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2966 	}
2967 
2968 	size = nr_entries * sizeof(unsigned long);
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970 					    sizeof(*entry) + size, flags, pc);
2971 	if (!event)
2972 		goto out;
2973 	entry = ring_buffer_event_data(event);
2974 
2975 	memcpy(&entry->caller, fstack->calls, size);
2976 	entry->size = nr_entries;
2977 
2978 	if (!call_filter_check_discard(call, entry, buffer, event))
2979 		__buffer_unlock_commit(buffer, event);
2980 
2981  out:
2982 	/* Again, don't let gcc optimize things here */
2983 	barrier();
2984 	__this_cpu_dec(ftrace_stack_reserve);
2985 	preempt_enable_notrace();
2986 
2987 }
2988 
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990 				      struct trace_buffer *buffer,
2991 				      unsigned long flags,
2992 				      int skip, int pc, struct pt_regs *regs)
2993 {
2994 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995 		return;
2996 
2997 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999 
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001 		   int pc)
3002 {
3003 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3004 
3005 	if (rcu_is_watching()) {
3006 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007 		return;
3008 	}
3009 
3010 	/*
3011 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012 	 * but if the above rcu_is_watching() failed, then the NMI
3013 	 * triggered someplace critical, and rcu_irq_enter() should
3014 	 * not be called from NMI.
3015 	 */
3016 	if (unlikely(in_nmi()))
3017 		return;
3018 
3019 	rcu_irq_enter_irqson();
3020 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021 	rcu_irq_exit_irqson();
3022 }
3023 
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030 	unsigned long flags;
3031 
3032 	if (tracing_disabled || tracing_selftest_running)
3033 		return;
3034 
3035 	local_save_flags(flags);
3036 
3037 #ifndef CONFIG_UNWINDER_ORC
3038 	/* Skip 1 to skip this function. */
3039 	skip++;
3040 #endif
3041 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3042 			     flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045 
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048 
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052 	struct trace_event_call *call = &event_user_stack;
3053 	struct ring_buffer_event *event;
3054 	struct userstack_entry *entry;
3055 
3056 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057 		return;
3058 
3059 	/*
3060 	 * NMIs can not handle page faults, even with fix ups.
3061 	 * The save user stack can (and often does) fault.
3062 	 */
3063 	if (unlikely(in_nmi()))
3064 		return;
3065 
3066 	/*
3067 	 * prevent recursion, since the user stack tracing may
3068 	 * trigger other kernel events.
3069 	 */
3070 	preempt_disable();
3071 	if (__this_cpu_read(user_stack_count))
3072 		goto out;
3073 
3074 	__this_cpu_inc(user_stack_count);
3075 
3076 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077 					    sizeof(*entry), flags, pc);
3078 	if (!event)
3079 		goto out_drop_count;
3080 	entry	= ring_buffer_event_data(event);
3081 
3082 	entry->tgid		= current->tgid;
3083 	memset(&entry->caller, 0, sizeof(entry->caller));
3084 
3085 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086 	if (!call_filter_check_discard(call, entry, buffer, event))
3087 		__buffer_unlock_commit(buffer, event);
3088 
3089  out_drop_count:
3090 	__this_cpu_dec(user_stack_count);
3091  out:
3092 	preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096 				   unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100 
3101 #endif /* CONFIG_STACKTRACE */
3102 
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105 	int nesting;
3106 	char buffer[4][TRACE_BUF_SIZE];
3107 };
3108 
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110 
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118 
3119 	if (!buffer || buffer->nesting >= 4)
3120 		return NULL;
3121 
3122 	buffer->nesting++;
3123 
3124 	/* Interrupts must see nesting incremented before we use the buffer */
3125 	barrier();
3126 	return &buffer->buffer[buffer->nesting][0];
3127 }
3128 
3129 static void put_trace_buf(void)
3130 {
3131 	/* Don't let the decrement of nesting leak before this */
3132 	barrier();
3133 	this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135 
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138 	struct trace_buffer_struct *buffers;
3139 
3140 	buffers = alloc_percpu(struct trace_buffer_struct);
3141 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142 		return -ENOMEM;
3143 
3144 	trace_percpu_buffer = buffers;
3145 	return 0;
3146 }
3147 
3148 static int buffers_allocated;
3149 
3150 void trace_printk_init_buffers(void)
3151 {
3152 	if (buffers_allocated)
3153 		return;
3154 
3155 	if (alloc_percpu_trace_buffer())
3156 		return;
3157 
3158 	/* trace_printk() is for debug use only. Don't use it in production. */
3159 
3160 	pr_warn("\n");
3161 	pr_warn("**********************************************************\n");
3162 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163 	pr_warn("**                                                      **\n");
3164 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165 	pr_warn("**                                                      **\n");
3166 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167 	pr_warn("** unsafe for production use.                           **\n");
3168 	pr_warn("**                                                      **\n");
3169 	pr_warn("** If you see this message and you are not debugging    **\n");
3170 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171 	pr_warn("**                                                      **\n");
3172 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173 	pr_warn("**********************************************************\n");
3174 
3175 	/* Expand the buffers to set size */
3176 	tracing_update_buffers();
3177 
3178 	buffers_allocated = 1;
3179 
3180 	/*
3181 	 * trace_printk_init_buffers() can be called by modules.
3182 	 * If that happens, then we need to start cmdline recording
3183 	 * directly here. If the global_trace.buffer is already
3184 	 * allocated here, then this was called by module code.
3185 	 */
3186 	if (global_trace.array_buffer.buffer)
3187 		tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190 
3191 void trace_printk_start_comm(void)
3192 {
3193 	/* Start tracing comms if trace printk is set */
3194 	if (!buffers_allocated)
3195 		return;
3196 	tracing_start_cmdline_record();
3197 }
3198 
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201 	if (!buffers_allocated)
3202 		return;
3203 
3204 	if (enabled)
3205 		tracing_start_cmdline_record();
3206 	else
3207 		tracing_stop_cmdline_record();
3208 }
3209 
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218 	struct trace_event_call *call = &event_bprint;
3219 	struct ring_buffer_event *event;
3220 	struct trace_buffer *buffer;
3221 	struct trace_array *tr = &global_trace;
3222 	struct bprint_entry *entry;
3223 	unsigned long flags;
3224 	char *tbuffer;
3225 	int len = 0, size, pc;
3226 
3227 	if (unlikely(tracing_selftest_running || tracing_disabled))
3228 		return 0;
3229 
3230 	/* Don't pollute graph traces with trace_vprintk internals */
3231 	pause_graph_tracing();
3232 
3233 	pc = preempt_count();
3234 	preempt_disable_notrace();
3235 
3236 	tbuffer = get_trace_buf();
3237 	if (!tbuffer) {
3238 		len = 0;
3239 		goto out_nobuffer;
3240 	}
3241 
3242 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243 
3244 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245 		goto out_put;
3246 
3247 	local_save_flags(flags);
3248 	size = sizeof(*entry) + sizeof(u32) * len;
3249 	buffer = tr->array_buffer.buffer;
3250 	ring_buffer_nest_start(buffer);
3251 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252 					    flags, pc);
3253 	if (!event)
3254 		goto out;
3255 	entry = ring_buffer_event_data(event);
3256 	entry->ip			= ip;
3257 	entry->fmt			= fmt;
3258 
3259 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3261 		__buffer_unlock_commit(buffer, event);
3262 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263 	}
3264 
3265 out:
3266 	ring_buffer_nest_end(buffer);
3267 out_put:
3268 	put_trace_buf();
3269 
3270 out_nobuffer:
3271 	preempt_enable_notrace();
3272 	unpause_graph_tracing();
3273 
3274 	return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277 
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281 		      unsigned long ip, const char *fmt, va_list args)
3282 {
3283 	struct trace_event_call *call = &event_print;
3284 	struct ring_buffer_event *event;
3285 	int len = 0, size, pc;
3286 	struct print_entry *entry;
3287 	unsigned long flags;
3288 	char *tbuffer;
3289 
3290 	if (tracing_disabled || tracing_selftest_running)
3291 		return 0;
3292 
3293 	/* Don't pollute graph traces with trace_vprintk internals */
3294 	pause_graph_tracing();
3295 
3296 	pc = preempt_count();
3297 	preempt_disable_notrace();
3298 
3299 
3300 	tbuffer = get_trace_buf();
3301 	if (!tbuffer) {
3302 		len = 0;
3303 		goto out_nobuffer;
3304 	}
3305 
3306 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307 
3308 	local_save_flags(flags);
3309 	size = sizeof(*entry) + len + 1;
3310 	ring_buffer_nest_start(buffer);
3311 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312 					    flags, pc);
3313 	if (!event)
3314 		goto out;
3315 	entry = ring_buffer_event_data(event);
3316 	entry->ip = ip;
3317 
3318 	memcpy(&entry->buf, tbuffer, len + 1);
3319 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3320 		__buffer_unlock_commit(buffer, event);
3321 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322 	}
3323 
3324 out:
3325 	ring_buffer_nest_end(buffer);
3326 	put_trace_buf();
3327 
3328 out_nobuffer:
3329 	preempt_enable_notrace();
3330 	unpause_graph_tracing();
3331 
3332 	return len;
3333 }
3334 
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337 			unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341 
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344 		       unsigned long ip, const char *fmt, ...)
3345 {
3346 	int ret;
3347 	va_list ap;
3348 
3349 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3350 		return 0;
3351 
3352 	if (!tr)
3353 		return -ENOENT;
3354 
3355 	va_start(ap, fmt);
3356 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3357 	va_end(ap);
3358 	return ret;
3359 }
3360 EXPORT_SYMBOL_GPL(trace_array_printk);
3361 
3362 __printf(3, 4)
3363 int trace_array_printk_buf(struct trace_buffer *buffer,
3364 			   unsigned long ip, const char *fmt, ...)
3365 {
3366 	int ret;
3367 	va_list ap;
3368 
3369 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3370 		return 0;
3371 
3372 	va_start(ap, fmt);
3373 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3374 	va_end(ap);
3375 	return ret;
3376 }
3377 
3378 __printf(2, 0)
3379 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3382 }
3383 EXPORT_SYMBOL_GPL(trace_vprintk);
3384 
3385 static void trace_iterator_increment(struct trace_iterator *iter)
3386 {
3387 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3388 
3389 	iter->idx++;
3390 	if (buf_iter)
3391 		ring_buffer_iter_advance(buf_iter);
3392 }
3393 
3394 static struct trace_entry *
3395 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3396 		unsigned long *lost_events)
3397 {
3398 	struct ring_buffer_event *event;
3399 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3400 
3401 	if (buf_iter) {
3402 		event = ring_buffer_iter_peek(buf_iter, ts);
3403 		if (lost_events)
3404 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3405 				(unsigned long)-1 : 0;
3406 	} else {
3407 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3408 					 lost_events);
3409 	}
3410 
3411 	if (event) {
3412 		iter->ent_size = ring_buffer_event_length(event);
3413 		return ring_buffer_event_data(event);
3414 	}
3415 	iter->ent_size = 0;
3416 	return NULL;
3417 }
3418 
3419 static struct trace_entry *
3420 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3421 		  unsigned long *missing_events, u64 *ent_ts)
3422 {
3423 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3424 	struct trace_entry *ent, *next = NULL;
3425 	unsigned long lost_events = 0, next_lost = 0;
3426 	int cpu_file = iter->cpu_file;
3427 	u64 next_ts = 0, ts;
3428 	int next_cpu = -1;
3429 	int next_size = 0;
3430 	int cpu;
3431 
3432 	/*
3433 	 * If we are in a per_cpu trace file, don't bother by iterating over
3434 	 * all cpu and peek directly.
3435 	 */
3436 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3437 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3438 			return NULL;
3439 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3440 		if (ent_cpu)
3441 			*ent_cpu = cpu_file;
3442 
3443 		return ent;
3444 	}
3445 
3446 	for_each_tracing_cpu(cpu) {
3447 
3448 		if (ring_buffer_empty_cpu(buffer, cpu))
3449 			continue;
3450 
3451 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3452 
3453 		/*
3454 		 * Pick the entry with the smallest timestamp:
3455 		 */
3456 		if (ent && (!next || ts < next_ts)) {
3457 			next = ent;
3458 			next_cpu = cpu;
3459 			next_ts = ts;
3460 			next_lost = lost_events;
3461 			next_size = iter->ent_size;
3462 		}
3463 	}
3464 
3465 	iter->ent_size = next_size;
3466 
3467 	if (ent_cpu)
3468 		*ent_cpu = next_cpu;
3469 
3470 	if (ent_ts)
3471 		*ent_ts = next_ts;
3472 
3473 	if (missing_events)
3474 		*missing_events = next_lost;
3475 
3476 	return next;
3477 }
3478 
3479 #define STATIC_TEMP_BUF_SIZE	128
3480 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3481 
3482 /* Find the next real entry, without updating the iterator itself */
3483 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3484 					  int *ent_cpu, u64 *ent_ts)
3485 {
3486 	/* __find_next_entry will reset ent_size */
3487 	int ent_size = iter->ent_size;
3488 	struct trace_entry *entry;
3489 
3490 	/*
3491 	 * If called from ftrace_dump(), then the iter->temp buffer
3492 	 * will be the static_temp_buf and not created from kmalloc.
3493 	 * If the entry size is greater than the buffer, we can
3494 	 * not save it. Just return NULL in that case. This is only
3495 	 * used to add markers when two consecutive events' time
3496 	 * stamps have a large delta. See trace_print_lat_context()
3497 	 */
3498 	if (iter->temp == static_temp_buf &&
3499 	    STATIC_TEMP_BUF_SIZE < ent_size)
3500 		return NULL;
3501 
3502 	/*
3503 	 * The __find_next_entry() may call peek_next_entry(), which may
3504 	 * call ring_buffer_peek() that may make the contents of iter->ent
3505 	 * undefined. Need to copy iter->ent now.
3506 	 */
3507 	if (iter->ent && iter->ent != iter->temp) {
3508 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3509 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3510 			kfree(iter->temp);
3511 			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3512 			if (!iter->temp)
3513 				return NULL;
3514 		}
3515 		memcpy(iter->temp, iter->ent, iter->ent_size);
3516 		iter->temp_size = iter->ent_size;
3517 		iter->ent = iter->temp;
3518 	}
3519 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3520 	/* Put back the original ent_size */
3521 	iter->ent_size = ent_size;
3522 
3523 	return entry;
3524 }
3525 
3526 /* Find the next real entry, and increment the iterator to the next entry */
3527 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3528 {
3529 	iter->ent = __find_next_entry(iter, &iter->cpu,
3530 				      &iter->lost_events, &iter->ts);
3531 
3532 	if (iter->ent)
3533 		trace_iterator_increment(iter);
3534 
3535 	return iter->ent ? iter : NULL;
3536 }
3537 
3538 static void trace_consume(struct trace_iterator *iter)
3539 {
3540 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3541 			    &iter->lost_events);
3542 }
3543 
3544 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3545 {
3546 	struct trace_iterator *iter = m->private;
3547 	int i = (int)*pos;
3548 	void *ent;
3549 
3550 	WARN_ON_ONCE(iter->leftover);
3551 
3552 	(*pos)++;
3553 
3554 	/* can't go backwards */
3555 	if (iter->idx > i)
3556 		return NULL;
3557 
3558 	if (iter->idx < 0)
3559 		ent = trace_find_next_entry_inc(iter);
3560 	else
3561 		ent = iter;
3562 
3563 	while (ent && iter->idx < i)
3564 		ent = trace_find_next_entry_inc(iter);
3565 
3566 	iter->pos = *pos;
3567 
3568 	return ent;
3569 }
3570 
3571 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3572 {
3573 	struct ring_buffer_iter *buf_iter;
3574 	unsigned long entries = 0;
3575 	u64 ts;
3576 
3577 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3578 
3579 	buf_iter = trace_buffer_iter(iter, cpu);
3580 	if (!buf_iter)
3581 		return;
3582 
3583 	ring_buffer_iter_reset(buf_iter);
3584 
3585 	/*
3586 	 * We could have the case with the max latency tracers
3587 	 * that a reset never took place on a cpu. This is evident
3588 	 * by the timestamp being before the start of the buffer.
3589 	 */
3590 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3591 		if (ts >= iter->array_buffer->time_start)
3592 			break;
3593 		entries++;
3594 		ring_buffer_iter_advance(buf_iter);
3595 	}
3596 
3597 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3598 }
3599 
3600 /*
3601  * The current tracer is copied to avoid a global locking
3602  * all around.
3603  */
3604 static void *s_start(struct seq_file *m, loff_t *pos)
3605 {
3606 	struct trace_iterator *iter = m->private;
3607 	struct trace_array *tr = iter->tr;
3608 	int cpu_file = iter->cpu_file;
3609 	void *p = NULL;
3610 	loff_t l = 0;
3611 	int cpu;
3612 
3613 	/*
3614 	 * copy the tracer to avoid using a global lock all around.
3615 	 * iter->trace is a copy of current_trace, the pointer to the
3616 	 * name may be used instead of a strcmp(), as iter->trace->name
3617 	 * will point to the same string as current_trace->name.
3618 	 */
3619 	mutex_lock(&trace_types_lock);
3620 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3621 		*iter->trace = *tr->current_trace;
3622 	mutex_unlock(&trace_types_lock);
3623 
3624 #ifdef CONFIG_TRACER_MAX_TRACE
3625 	if (iter->snapshot && iter->trace->use_max_tr)
3626 		return ERR_PTR(-EBUSY);
3627 #endif
3628 
3629 	if (!iter->snapshot)
3630 		atomic_inc(&trace_record_taskinfo_disabled);
3631 
3632 	if (*pos != iter->pos) {
3633 		iter->ent = NULL;
3634 		iter->cpu = 0;
3635 		iter->idx = -1;
3636 
3637 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3638 			for_each_tracing_cpu(cpu)
3639 				tracing_iter_reset(iter, cpu);
3640 		} else
3641 			tracing_iter_reset(iter, cpu_file);
3642 
3643 		iter->leftover = 0;
3644 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3645 			;
3646 
3647 	} else {
3648 		/*
3649 		 * If we overflowed the seq_file before, then we want
3650 		 * to just reuse the trace_seq buffer again.
3651 		 */
3652 		if (iter->leftover)
3653 			p = iter;
3654 		else {
3655 			l = *pos - 1;
3656 			p = s_next(m, p, &l);
3657 		}
3658 	}
3659 
3660 	trace_event_read_lock();
3661 	trace_access_lock(cpu_file);
3662 	return p;
3663 }
3664 
3665 static void s_stop(struct seq_file *m, void *p)
3666 {
3667 	struct trace_iterator *iter = m->private;
3668 
3669 #ifdef CONFIG_TRACER_MAX_TRACE
3670 	if (iter->snapshot && iter->trace->use_max_tr)
3671 		return;
3672 #endif
3673 
3674 	if (!iter->snapshot)
3675 		atomic_dec(&trace_record_taskinfo_disabled);
3676 
3677 	trace_access_unlock(iter->cpu_file);
3678 	trace_event_read_unlock();
3679 }
3680 
3681 static void
3682 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3683 		      unsigned long *entries, int cpu)
3684 {
3685 	unsigned long count;
3686 
3687 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3688 	/*
3689 	 * If this buffer has skipped entries, then we hold all
3690 	 * entries for the trace and we need to ignore the
3691 	 * ones before the time stamp.
3692 	 */
3693 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3694 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3695 		/* total is the same as the entries */
3696 		*total = count;
3697 	} else
3698 		*total = count +
3699 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3700 	*entries = count;
3701 }
3702 
3703 static void
3704 get_total_entries(struct array_buffer *buf,
3705 		  unsigned long *total, unsigned long *entries)
3706 {
3707 	unsigned long t, e;
3708 	int cpu;
3709 
3710 	*total = 0;
3711 	*entries = 0;
3712 
3713 	for_each_tracing_cpu(cpu) {
3714 		get_total_entries_cpu(buf, &t, &e, cpu);
3715 		*total += t;
3716 		*entries += e;
3717 	}
3718 }
3719 
3720 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3721 {
3722 	unsigned long total, entries;
3723 
3724 	if (!tr)
3725 		tr = &global_trace;
3726 
3727 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3728 
3729 	return entries;
3730 }
3731 
3732 unsigned long trace_total_entries(struct trace_array *tr)
3733 {
3734 	unsigned long total, entries;
3735 
3736 	if (!tr)
3737 		tr = &global_trace;
3738 
3739 	get_total_entries(&tr->array_buffer, &total, &entries);
3740 
3741 	return entries;
3742 }
3743 
3744 static void print_lat_help_header(struct seq_file *m)
3745 {
3746 	seq_puts(m, "#                  _------=> CPU#            \n"
3747 		    "#                 / _-----=> irqs-off        \n"
3748 		    "#                | / _----=> need-resched    \n"
3749 		    "#                || / _---=> hardirq/softirq \n"
3750 		    "#                ||| / _--=> preempt-depth   \n"
3751 		    "#                |||| /     delay            \n"
3752 		    "#  cmd     pid   ||||| time  |   caller      \n"
3753 		    "#     \\   /      |||||  \\    |   /         \n");
3754 }
3755 
3756 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3757 {
3758 	unsigned long total;
3759 	unsigned long entries;
3760 
3761 	get_total_entries(buf, &total, &entries);
3762 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3763 		   entries, total, num_online_cpus());
3764 	seq_puts(m, "#\n");
3765 }
3766 
3767 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3768 				   unsigned int flags)
3769 {
3770 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3771 
3772 	print_event_info(buf, m);
3773 
3774 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3775 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3776 }
3777 
3778 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3779 				       unsigned int flags)
3780 {
3781 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3782 	const char *space = "          ";
3783 	int prec = tgid ? 10 : 2;
3784 
3785 	print_event_info(buf, m);
3786 
3787 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3788 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3789 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3790 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3791 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3792 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3793 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3794 }
3795 
3796 void
3797 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3798 {
3799 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3800 	struct array_buffer *buf = iter->array_buffer;
3801 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3802 	struct tracer *type = iter->trace;
3803 	unsigned long entries;
3804 	unsigned long total;
3805 	const char *name = "preemption";
3806 
3807 	name = type->name;
3808 
3809 	get_total_entries(buf, &total, &entries);
3810 
3811 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3812 		   name, UTS_RELEASE);
3813 	seq_puts(m, "# -----------------------------------"
3814 		 "---------------------------------\n");
3815 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3816 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3817 		   nsecs_to_usecs(data->saved_latency),
3818 		   entries,
3819 		   total,
3820 		   buf->cpu,
3821 #if defined(CONFIG_PREEMPT_NONE)
3822 		   "server",
3823 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3824 		   "desktop",
3825 #elif defined(CONFIG_PREEMPT)
3826 		   "preempt",
3827 #elif defined(CONFIG_PREEMPT_RT)
3828 		   "preempt_rt",
3829 #else
3830 		   "unknown",
3831 #endif
3832 		   /* These are reserved for later use */
3833 		   0, 0, 0, 0);
3834 #ifdef CONFIG_SMP
3835 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3836 #else
3837 	seq_puts(m, ")\n");
3838 #endif
3839 	seq_puts(m, "#    -----------------\n");
3840 	seq_printf(m, "#    | task: %.16s-%d "
3841 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3842 		   data->comm, data->pid,
3843 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3844 		   data->policy, data->rt_priority);
3845 	seq_puts(m, "#    -----------------\n");
3846 
3847 	if (data->critical_start) {
3848 		seq_puts(m, "#  => started at: ");
3849 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3850 		trace_print_seq(m, &iter->seq);
3851 		seq_puts(m, "\n#  => ended at:   ");
3852 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3853 		trace_print_seq(m, &iter->seq);
3854 		seq_puts(m, "\n#\n");
3855 	}
3856 
3857 	seq_puts(m, "#\n");
3858 }
3859 
3860 static void test_cpu_buff_start(struct trace_iterator *iter)
3861 {
3862 	struct trace_seq *s = &iter->seq;
3863 	struct trace_array *tr = iter->tr;
3864 
3865 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3866 		return;
3867 
3868 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3869 		return;
3870 
3871 	if (cpumask_available(iter->started) &&
3872 	    cpumask_test_cpu(iter->cpu, iter->started))
3873 		return;
3874 
3875 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3876 		return;
3877 
3878 	if (cpumask_available(iter->started))
3879 		cpumask_set_cpu(iter->cpu, iter->started);
3880 
3881 	/* Don't print started cpu buffer for the first entry of the trace */
3882 	if (iter->idx > 1)
3883 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3884 				iter->cpu);
3885 }
3886 
3887 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3888 {
3889 	struct trace_array *tr = iter->tr;
3890 	struct trace_seq *s = &iter->seq;
3891 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3892 	struct trace_entry *entry;
3893 	struct trace_event *event;
3894 
3895 	entry = iter->ent;
3896 
3897 	test_cpu_buff_start(iter);
3898 
3899 	event = ftrace_find_event(entry->type);
3900 
3901 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3902 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3903 			trace_print_lat_context(iter);
3904 		else
3905 			trace_print_context(iter);
3906 	}
3907 
3908 	if (trace_seq_has_overflowed(s))
3909 		return TRACE_TYPE_PARTIAL_LINE;
3910 
3911 	if (event)
3912 		return event->funcs->trace(iter, sym_flags, event);
3913 
3914 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3915 
3916 	return trace_handle_return(s);
3917 }
3918 
3919 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3920 {
3921 	struct trace_array *tr = iter->tr;
3922 	struct trace_seq *s = &iter->seq;
3923 	struct trace_entry *entry;
3924 	struct trace_event *event;
3925 
3926 	entry = iter->ent;
3927 
3928 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3929 		trace_seq_printf(s, "%d %d %llu ",
3930 				 entry->pid, iter->cpu, iter->ts);
3931 
3932 	if (trace_seq_has_overflowed(s))
3933 		return TRACE_TYPE_PARTIAL_LINE;
3934 
3935 	event = ftrace_find_event(entry->type);
3936 	if (event)
3937 		return event->funcs->raw(iter, 0, event);
3938 
3939 	trace_seq_printf(s, "%d ?\n", entry->type);
3940 
3941 	return trace_handle_return(s);
3942 }
3943 
3944 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3945 {
3946 	struct trace_array *tr = iter->tr;
3947 	struct trace_seq *s = &iter->seq;
3948 	unsigned char newline = '\n';
3949 	struct trace_entry *entry;
3950 	struct trace_event *event;
3951 
3952 	entry = iter->ent;
3953 
3954 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3955 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3956 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3957 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3958 		if (trace_seq_has_overflowed(s))
3959 			return TRACE_TYPE_PARTIAL_LINE;
3960 	}
3961 
3962 	event = ftrace_find_event(entry->type);
3963 	if (event) {
3964 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3965 		if (ret != TRACE_TYPE_HANDLED)
3966 			return ret;
3967 	}
3968 
3969 	SEQ_PUT_FIELD(s, newline);
3970 
3971 	return trace_handle_return(s);
3972 }
3973 
3974 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3975 {
3976 	struct trace_array *tr = iter->tr;
3977 	struct trace_seq *s = &iter->seq;
3978 	struct trace_entry *entry;
3979 	struct trace_event *event;
3980 
3981 	entry = iter->ent;
3982 
3983 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3984 		SEQ_PUT_FIELD(s, entry->pid);
3985 		SEQ_PUT_FIELD(s, iter->cpu);
3986 		SEQ_PUT_FIELD(s, iter->ts);
3987 		if (trace_seq_has_overflowed(s))
3988 			return TRACE_TYPE_PARTIAL_LINE;
3989 	}
3990 
3991 	event = ftrace_find_event(entry->type);
3992 	return event ? event->funcs->binary(iter, 0, event) :
3993 		TRACE_TYPE_HANDLED;
3994 }
3995 
3996 int trace_empty(struct trace_iterator *iter)
3997 {
3998 	struct ring_buffer_iter *buf_iter;
3999 	int cpu;
4000 
4001 	/* If we are looking at one CPU buffer, only check that one */
4002 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4003 		cpu = iter->cpu_file;
4004 		buf_iter = trace_buffer_iter(iter, cpu);
4005 		if (buf_iter) {
4006 			if (!ring_buffer_iter_empty(buf_iter))
4007 				return 0;
4008 		} else {
4009 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4010 				return 0;
4011 		}
4012 		return 1;
4013 	}
4014 
4015 	for_each_tracing_cpu(cpu) {
4016 		buf_iter = trace_buffer_iter(iter, cpu);
4017 		if (buf_iter) {
4018 			if (!ring_buffer_iter_empty(buf_iter))
4019 				return 0;
4020 		} else {
4021 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4022 				return 0;
4023 		}
4024 	}
4025 
4026 	return 1;
4027 }
4028 
4029 /*  Called with trace_event_read_lock() held. */
4030 enum print_line_t print_trace_line(struct trace_iterator *iter)
4031 {
4032 	struct trace_array *tr = iter->tr;
4033 	unsigned long trace_flags = tr->trace_flags;
4034 	enum print_line_t ret;
4035 
4036 	if (iter->lost_events) {
4037 		if (iter->lost_events == (unsigned long)-1)
4038 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4039 					 iter->cpu);
4040 		else
4041 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4042 					 iter->cpu, iter->lost_events);
4043 		if (trace_seq_has_overflowed(&iter->seq))
4044 			return TRACE_TYPE_PARTIAL_LINE;
4045 	}
4046 
4047 	if (iter->trace && iter->trace->print_line) {
4048 		ret = iter->trace->print_line(iter);
4049 		if (ret != TRACE_TYPE_UNHANDLED)
4050 			return ret;
4051 	}
4052 
4053 	if (iter->ent->type == TRACE_BPUTS &&
4054 			trace_flags & TRACE_ITER_PRINTK &&
4055 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4056 		return trace_print_bputs_msg_only(iter);
4057 
4058 	if (iter->ent->type == TRACE_BPRINT &&
4059 			trace_flags & TRACE_ITER_PRINTK &&
4060 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4061 		return trace_print_bprintk_msg_only(iter);
4062 
4063 	if (iter->ent->type == TRACE_PRINT &&
4064 			trace_flags & TRACE_ITER_PRINTK &&
4065 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4066 		return trace_print_printk_msg_only(iter);
4067 
4068 	if (trace_flags & TRACE_ITER_BIN)
4069 		return print_bin_fmt(iter);
4070 
4071 	if (trace_flags & TRACE_ITER_HEX)
4072 		return print_hex_fmt(iter);
4073 
4074 	if (trace_flags & TRACE_ITER_RAW)
4075 		return print_raw_fmt(iter);
4076 
4077 	return print_trace_fmt(iter);
4078 }
4079 
4080 void trace_latency_header(struct seq_file *m)
4081 {
4082 	struct trace_iterator *iter = m->private;
4083 	struct trace_array *tr = iter->tr;
4084 
4085 	/* print nothing if the buffers are empty */
4086 	if (trace_empty(iter))
4087 		return;
4088 
4089 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4090 		print_trace_header(m, iter);
4091 
4092 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4093 		print_lat_help_header(m);
4094 }
4095 
4096 void trace_default_header(struct seq_file *m)
4097 {
4098 	struct trace_iterator *iter = m->private;
4099 	struct trace_array *tr = iter->tr;
4100 	unsigned long trace_flags = tr->trace_flags;
4101 
4102 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4103 		return;
4104 
4105 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4106 		/* print nothing if the buffers are empty */
4107 		if (trace_empty(iter))
4108 			return;
4109 		print_trace_header(m, iter);
4110 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4111 			print_lat_help_header(m);
4112 	} else {
4113 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4114 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4115 				print_func_help_header_irq(iter->array_buffer,
4116 							   m, trace_flags);
4117 			else
4118 				print_func_help_header(iter->array_buffer, m,
4119 						       trace_flags);
4120 		}
4121 	}
4122 }
4123 
4124 static void test_ftrace_alive(struct seq_file *m)
4125 {
4126 	if (!ftrace_is_dead())
4127 		return;
4128 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4129 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4130 }
4131 
4132 #ifdef CONFIG_TRACER_MAX_TRACE
4133 static void show_snapshot_main_help(struct seq_file *m)
4134 {
4135 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4136 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4137 		    "#                      Takes a snapshot of the main buffer.\n"
4138 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4139 		    "#                      (Doesn't have to be '2' works with any number that\n"
4140 		    "#                       is not a '0' or '1')\n");
4141 }
4142 
4143 static void show_snapshot_percpu_help(struct seq_file *m)
4144 {
4145 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4146 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4147 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4148 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4149 #else
4150 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4151 		    "#                     Must use main snapshot file to allocate.\n");
4152 #endif
4153 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4154 		    "#                      (Doesn't have to be '2' works with any number that\n"
4155 		    "#                       is not a '0' or '1')\n");
4156 }
4157 
4158 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160 	if (iter->tr->allocated_snapshot)
4161 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4162 	else
4163 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4164 
4165 	seq_puts(m, "# Snapshot commands:\n");
4166 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4167 		show_snapshot_main_help(m);
4168 	else
4169 		show_snapshot_percpu_help(m);
4170 }
4171 #else
4172 /* Should never be called */
4173 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4174 #endif
4175 
4176 static int s_show(struct seq_file *m, void *v)
4177 {
4178 	struct trace_iterator *iter = v;
4179 	int ret;
4180 
4181 	if (iter->ent == NULL) {
4182 		if (iter->tr) {
4183 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4184 			seq_puts(m, "#\n");
4185 			test_ftrace_alive(m);
4186 		}
4187 		if (iter->snapshot && trace_empty(iter))
4188 			print_snapshot_help(m, iter);
4189 		else if (iter->trace && iter->trace->print_header)
4190 			iter->trace->print_header(m);
4191 		else
4192 			trace_default_header(m);
4193 
4194 	} else if (iter->leftover) {
4195 		/*
4196 		 * If we filled the seq_file buffer earlier, we
4197 		 * want to just show it now.
4198 		 */
4199 		ret = trace_print_seq(m, &iter->seq);
4200 
4201 		/* ret should this time be zero, but you never know */
4202 		iter->leftover = ret;
4203 
4204 	} else {
4205 		print_trace_line(iter);
4206 		ret = trace_print_seq(m, &iter->seq);
4207 		/*
4208 		 * If we overflow the seq_file buffer, then it will
4209 		 * ask us for this data again at start up.
4210 		 * Use that instead.
4211 		 *  ret is 0 if seq_file write succeeded.
4212 		 *        -1 otherwise.
4213 		 */
4214 		iter->leftover = ret;
4215 	}
4216 
4217 	return 0;
4218 }
4219 
4220 /*
4221  * Should be used after trace_array_get(), trace_types_lock
4222  * ensures that i_cdev was already initialized.
4223  */
4224 static inline int tracing_get_cpu(struct inode *inode)
4225 {
4226 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4227 		return (long)inode->i_cdev - 1;
4228 	return RING_BUFFER_ALL_CPUS;
4229 }
4230 
4231 static const struct seq_operations tracer_seq_ops = {
4232 	.start		= s_start,
4233 	.next		= s_next,
4234 	.stop		= s_stop,
4235 	.show		= s_show,
4236 };
4237 
4238 static struct trace_iterator *
4239 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4240 {
4241 	struct trace_array *tr = inode->i_private;
4242 	struct trace_iterator *iter;
4243 	int cpu;
4244 
4245 	if (tracing_disabled)
4246 		return ERR_PTR(-ENODEV);
4247 
4248 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4249 	if (!iter)
4250 		return ERR_PTR(-ENOMEM);
4251 
4252 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4253 				    GFP_KERNEL);
4254 	if (!iter->buffer_iter)
4255 		goto release;
4256 
4257 	/*
4258 	 * trace_find_next_entry() may need to save off iter->ent.
4259 	 * It will place it into the iter->temp buffer. As most
4260 	 * events are less than 128, allocate a buffer of that size.
4261 	 * If one is greater, then trace_find_next_entry() will
4262 	 * allocate a new buffer to adjust for the bigger iter->ent.
4263 	 * It's not critical if it fails to get allocated here.
4264 	 */
4265 	iter->temp = kmalloc(128, GFP_KERNEL);
4266 	if (iter->temp)
4267 		iter->temp_size = 128;
4268 
4269 	/*
4270 	 * We make a copy of the current tracer to avoid concurrent
4271 	 * changes on it while we are reading.
4272 	 */
4273 	mutex_lock(&trace_types_lock);
4274 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4275 	if (!iter->trace)
4276 		goto fail;
4277 
4278 	*iter->trace = *tr->current_trace;
4279 
4280 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4281 		goto fail;
4282 
4283 	iter->tr = tr;
4284 
4285 #ifdef CONFIG_TRACER_MAX_TRACE
4286 	/* Currently only the top directory has a snapshot */
4287 	if (tr->current_trace->print_max || snapshot)
4288 		iter->array_buffer = &tr->max_buffer;
4289 	else
4290 #endif
4291 		iter->array_buffer = &tr->array_buffer;
4292 	iter->snapshot = snapshot;
4293 	iter->pos = -1;
4294 	iter->cpu_file = tracing_get_cpu(inode);
4295 	mutex_init(&iter->mutex);
4296 
4297 	/* Notify the tracer early; before we stop tracing. */
4298 	if (iter->trace->open)
4299 		iter->trace->open(iter);
4300 
4301 	/* Annotate start of buffers if we had overruns */
4302 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4303 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4304 
4305 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4306 	if (trace_clocks[tr->clock_id].in_ns)
4307 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4308 
4309 	/*
4310 	 * If pause-on-trace is enabled, then stop the trace while
4311 	 * dumping, unless this is the "snapshot" file
4312 	 */
4313 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4314 		tracing_stop_tr(tr);
4315 
4316 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4317 		for_each_tracing_cpu(cpu) {
4318 			iter->buffer_iter[cpu] =
4319 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4320 							 cpu, GFP_KERNEL);
4321 		}
4322 		ring_buffer_read_prepare_sync();
4323 		for_each_tracing_cpu(cpu) {
4324 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4325 			tracing_iter_reset(iter, cpu);
4326 		}
4327 	} else {
4328 		cpu = iter->cpu_file;
4329 		iter->buffer_iter[cpu] =
4330 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4331 						 cpu, GFP_KERNEL);
4332 		ring_buffer_read_prepare_sync();
4333 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4334 		tracing_iter_reset(iter, cpu);
4335 	}
4336 
4337 	mutex_unlock(&trace_types_lock);
4338 
4339 	return iter;
4340 
4341  fail:
4342 	mutex_unlock(&trace_types_lock);
4343 	kfree(iter->trace);
4344 	kfree(iter->temp);
4345 	kfree(iter->buffer_iter);
4346 release:
4347 	seq_release_private(inode, file);
4348 	return ERR_PTR(-ENOMEM);
4349 }
4350 
4351 int tracing_open_generic(struct inode *inode, struct file *filp)
4352 {
4353 	int ret;
4354 
4355 	ret = tracing_check_open_get_tr(NULL);
4356 	if (ret)
4357 		return ret;
4358 
4359 	filp->private_data = inode->i_private;
4360 	return 0;
4361 }
4362 
4363 bool tracing_is_disabled(void)
4364 {
4365 	return (tracing_disabled) ? true: false;
4366 }
4367 
4368 /*
4369  * Open and update trace_array ref count.
4370  * Must have the current trace_array passed to it.
4371  */
4372 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4373 {
4374 	struct trace_array *tr = inode->i_private;
4375 	int ret;
4376 
4377 	ret = tracing_check_open_get_tr(tr);
4378 	if (ret)
4379 		return ret;
4380 
4381 	filp->private_data = inode->i_private;
4382 
4383 	return 0;
4384 }
4385 
4386 static int tracing_release(struct inode *inode, struct file *file)
4387 {
4388 	struct trace_array *tr = inode->i_private;
4389 	struct seq_file *m = file->private_data;
4390 	struct trace_iterator *iter;
4391 	int cpu;
4392 
4393 	if (!(file->f_mode & FMODE_READ)) {
4394 		trace_array_put(tr);
4395 		return 0;
4396 	}
4397 
4398 	/* Writes do not use seq_file */
4399 	iter = m->private;
4400 	mutex_lock(&trace_types_lock);
4401 
4402 	for_each_tracing_cpu(cpu) {
4403 		if (iter->buffer_iter[cpu])
4404 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4405 	}
4406 
4407 	if (iter->trace && iter->trace->close)
4408 		iter->trace->close(iter);
4409 
4410 	if (!iter->snapshot && tr->stop_count)
4411 		/* reenable tracing if it was previously enabled */
4412 		tracing_start_tr(tr);
4413 
4414 	__trace_array_put(tr);
4415 
4416 	mutex_unlock(&trace_types_lock);
4417 
4418 	mutex_destroy(&iter->mutex);
4419 	free_cpumask_var(iter->started);
4420 	kfree(iter->temp);
4421 	kfree(iter->trace);
4422 	kfree(iter->buffer_iter);
4423 	seq_release_private(inode, file);
4424 
4425 	return 0;
4426 }
4427 
4428 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4429 {
4430 	struct trace_array *tr = inode->i_private;
4431 
4432 	trace_array_put(tr);
4433 	return 0;
4434 }
4435 
4436 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4437 {
4438 	struct trace_array *tr = inode->i_private;
4439 
4440 	trace_array_put(tr);
4441 
4442 	return single_release(inode, file);
4443 }
4444 
4445 static int tracing_open(struct inode *inode, struct file *file)
4446 {
4447 	struct trace_array *tr = inode->i_private;
4448 	struct trace_iterator *iter;
4449 	int ret;
4450 
4451 	ret = tracing_check_open_get_tr(tr);
4452 	if (ret)
4453 		return ret;
4454 
4455 	/* If this file was open for write, then erase contents */
4456 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4457 		int cpu = tracing_get_cpu(inode);
4458 		struct array_buffer *trace_buf = &tr->array_buffer;
4459 
4460 #ifdef CONFIG_TRACER_MAX_TRACE
4461 		if (tr->current_trace->print_max)
4462 			trace_buf = &tr->max_buffer;
4463 #endif
4464 
4465 		if (cpu == RING_BUFFER_ALL_CPUS)
4466 			tracing_reset_online_cpus(trace_buf);
4467 		else
4468 			tracing_reset_cpu(trace_buf, cpu);
4469 	}
4470 
4471 	if (file->f_mode & FMODE_READ) {
4472 		iter = __tracing_open(inode, file, false);
4473 		if (IS_ERR(iter))
4474 			ret = PTR_ERR(iter);
4475 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4476 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4477 	}
4478 
4479 	if (ret < 0)
4480 		trace_array_put(tr);
4481 
4482 	return ret;
4483 }
4484 
4485 /*
4486  * Some tracers are not suitable for instance buffers.
4487  * A tracer is always available for the global array (toplevel)
4488  * or if it explicitly states that it is.
4489  */
4490 static bool
4491 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4492 {
4493 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4494 }
4495 
4496 /* Find the next tracer that this trace array may use */
4497 static struct tracer *
4498 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4499 {
4500 	while (t && !trace_ok_for_array(t, tr))
4501 		t = t->next;
4502 
4503 	return t;
4504 }
4505 
4506 static void *
4507 t_next(struct seq_file *m, void *v, loff_t *pos)
4508 {
4509 	struct trace_array *tr = m->private;
4510 	struct tracer *t = v;
4511 
4512 	(*pos)++;
4513 
4514 	if (t)
4515 		t = get_tracer_for_array(tr, t->next);
4516 
4517 	return t;
4518 }
4519 
4520 static void *t_start(struct seq_file *m, loff_t *pos)
4521 {
4522 	struct trace_array *tr = m->private;
4523 	struct tracer *t;
4524 	loff_t l = 0;
4525 
4526 	mutex_lock(&trace_types_lock);
4527 
4528 	t = get_tracer_for_array(tr, trace_types);
4529 	for (; t && l < *pos; t = t_next(m, t, &l))
4530 			;
4531 
4532 	return t;
4533 }
4534 
4535 static void t_stop(struct seq_file *m, void *p)
4536 {
4537 	mutex_unlock(&trace_types_lock);
4538 }
4539 
4540 static int t_show(struct seq_file *m, void *v)
4541 {
4542 	struct tracer *t = v;
4543 
4544 	if (!t)
4545 		return 0;
4546 
4547 	seq_puts(m, t->name);
4548 	if (t->next)
4549 		seq_putc(m, ' ');
4550 	else
4551 		seq_putc(m, '\n');
4552 
4553 	return 0;
4554 }
4555 
4556 static const struct seq_operations show_traces_seq_ops = {
4557 	.start		= t_start,
4558 	.next		= t_next,
4559 	.stop		= t_stop,
4560 	.show		= t_show,
4561 };
4562 
4563 static int show_traces_open(struct inode *inode, struct file *file)
4564 {
4565 	struct trace_array *tr = inode->i_private;
4566 	struct seq_file *m;
4567 	int ret;
4568 
4569 	ret = tracing_check_open_get_tr(tr);
4570 	if (ret)
4571 		return ret;
4572 
4573 	ret = seq_open(file, &show_traces_seq_ops);
4574 	if (ret) {
4575 		trace_array_put(tr);
4576 		return ret;
4577 	}
4578 
4579 	m = file->private_data;
4580 	m->private = tr;
4581 
4582 	return 0;
4583 }
4584 
4585 static int show_traces_release(struct inode *inode, struct file *file)
4586 {
4587 	struct trace_array *tr = inode->i_private;
4588 
4589 	trace_array_put(tr);
4590 	return seq_release(inode, file);
4591 }
4592 
4593 static ssize_t
4594 tracing_write_stub(struct file *filp, const char __user *ubuf,
4595 		   size_t count, loff_t *ppos)
4596 {
4597 	return count;
4598 }
4599 
4600 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4601 {
4602 	int ret;
4603 
4604 	if (file->f_mode & FMODE_READ)
4605 		ret = seq_lseek(file, offset, whence);
4606 	else
4607 		file->f_pos = ret = 0;
4608 
4609 	return ret;
4610 }
4611 
4612 static const struct file_operations tracing_fops = {
4613 	.open		= tracing_open,
4614 	.read		= seq_read,
4615 	.write		= tracing_write_stub,
4616 	.llseek		= tracing_lseek,
4617 	.release	= tracing_release,
4618 };
4619 
4620 static const struct file_operations show_traces_fops = {
4621 	.open		= show_traces_open,
4622 	.read		= seq_read,
4623 	.llseek		= seq_lseek,
4624 	.release	= show_traces_release,
4625 };
4626 
4627 static ssize_t
4628 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4629 		     size_t count, loff_t *ppos)
4630 {
4631 	struct trace_array *tr = file_inode(filp)->i_private;
4632 	char *mask_str;
4633 	int len;
4634 
4635 	len = snprintf(NULL, 0, "%*pb\n",
4636 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4637 	mask_str = kmalloc(len, GFP_KERNEL);
4638 	if (!mask_str)
4639 		return -ENOMEM;
4640 
4641 	len = snprintf(mask_str, len, "%*pb\n",
4642 		       cpumask_pr_args(tr->tracing_cpumask));
4643 	if (len >= count) {
4644 		count = -EINVAL;
4645 		goto out_err;
4646 	}
4647 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4648 
4649 out_err:
4650 	kfree(mask_str);
4651 
4652 	return count;
4653 }
4654 
4655 int tracing_set_cpumask(struct trace_array *tr,
4656 			cpumask_var_t tracing_cpumask_new)
4657 {
4658 	int cpu;
4659 
4660 	if (!tr)
4661 		return -EINVAL;
4662 
4663 	local_irq_disable();
4664 	arch_spin_lock(&tr->max_lock);
4665 	for_each_tracing_cpu(cpu) {
4666 		/*
4667 		 * Increase/decrease the disabled counter if we are
4668 		 * about to flip a bit in the cpumask:
4669 		 */
4670 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4671 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4672 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4673 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4674 		}
4675 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4676 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4677 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4678 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4679 		}
4680 	}
4681 	arch_spin_unlock(&tr->max_lock);
4682 	local_irq_enable();
4683 
4684 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4685 
4686 	return 0;
4687 }
4688 
4689 static ssize_t
4690 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4691 		      size_t count, loff_t *ppos)
4692 {
4693 	struct trace_array *tr = file_inode(filp)->i_private;
4694 	cpumask_var_t tracing_cpumask_new;
4695 	int err;
4696 
4697 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4698 		return -ENOMEM;
4699 
4700 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4701 	if (err)
4702 		goto err_free;
4703 
4704 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4705 	if (err)
4706 		goto err_free;
4707 
4708 	free_cpumask_var(tracing_cpumask_new);
4709 
4710 	return count;
4711 
4712 err_free:
4713 	free_cpumask_var(tracing_cpumask_new);
4714 
4715 	return err;
4716 }
4717 
4718 static const struct file_operations tracing_cpumask_fops = {
4719 	.open		= tracing_open_generic_tr,
4720 	.read		= tracing_cpumask_read,
4721 	.write		= tracing_cpumask_write,
4722 	.release	= tracing_release_generic_tr,
4723 	.llseek		= generic_file_llseek,
4724 };
4725 
4726 static int tracing_trace_options_show(struct seq_file *m, void *v)
4727 {
4728 	struct tracer_opt *trace_opts;
4729 	struct trace_array *tr = m->private;
4730 	u32 tracer_flags;
4731 	int i;
4732 
4733 	mutex_lock(&trace_types_lock);
4734 	tracer_flags = tr->current_trace->flags->val;
4735 	trace_opts = tr->current_trace->flags->opts;
4736 
4737 	for (i = 0; trace_options[i]; i++) {
4738 		if (tr->trace_flags & (1 << i))
4739 			seq_printf(m, "%s\n", trace_options[i]);
4740 		else
4741 			seq_printf(m, "no%s\n", trace_options[i]);
4742 	}
4743 
4744 	for (i = 0; trace_opts[i].name; i++) {
4745 		if (tracer_flags & trace_opts[i].bit)
4746 			seq_printf(m, "%s\n", trace_opts[i].name);
4747 		else
4748 			seq_printf(m, "no%s\n", trace_opts[i].name);
4749 	}
4750 	mutex_unlock(&trace_types_lock);
4751 
4752 	return 0;
4753 }
4754 
4755 static int __set_tracer_option(struct trace_array *tr,
4756 			       struct tracer_flags *tracer_flags,
4757 			       struct tracer_opt *opts, int neg)
4758 {
4759 	struct tracer *trace = tracer_flags->trace;
4760 	int ret;
4761 
4762 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4763 	if (ret)
4764 		return ret;
4765 
4766 	if (neg)
4767 		tracer_flags->val &= ~opts->bit;
4768 	else
4769 		tracer_flags->val |= opts->bit;
4770 	return 0;
4771 }
4772 
4773 /* Try to assign a tracer specific option */
4774 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4775 {
4776 	struct tracer *trace = tr->current_trace;
4777 	struct tracer_flags *tracer_flags = trace->flags;
4778 	struct tracer_opt *opts = NULL;
4779 	int i;
4780 
4781 	for (i = 0; tracer_flags->opts[i].name; i++) {
4782 		opts = &tracer_flags->opts[i];
4783 
4784 		if (strcmp(cmp, opts->name) == 0)
4785 			return __set_tracer_option(tr, trace->flags, opts, neg);
4786 	}
4787 
4788 	return -EINVAL;
4789 }
4790 
4791 /* Some tracers require overwrite to stay enabled */
4792 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4793 {
4794 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4795 		return -1;
4796 
4797 	return 0;
4798 }
4799 
4800 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4801 {
4802 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4803 	    (mask == TRACE_ITER_RECORD_CMD))
4804 		lockdep_assert_held(&event_mutex);
4805 
4806 	/* do nothing if flag is already set */
4807 	if (!!(tr->trace_flags & mask) == !!enabled)
4808 		return 0;
4809 
4810 	/* Give the tracer a chance to approve the change */
4811 	if (tr->current_trace->flag_changed)
4812 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4813 			return -EINVAL;
4814 
4815 	if (enabled)
4816 		tr->trace_flags |= mask;
4817 	else
4818 		tr->trace_flags &= ~mask;
4819 
4820 	if (mask == TRACE_ITER_RECORD_CMD)
4821 		trace_event_enable_cmd_record(enabled);
4822 
4823 	if (mask == TRACE_ITER_RECORD_TGID) {
4824 		if (!tgid_map)
4825 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4826 					   sizeof(*tgid_map),
4827 					   GFP_KERNEL);
4828 		if (!tgid_map) {
4829 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4830 			return -ENOMEM;
4831 		}
4832 
4833 		trace_event_enable_tgid_record(enabled);
4834 	}
4835 
4836 	if (mask == TRACE_ITER_EVENT_FORK)
4837 		trace_event_follow_fork(tr, enabled);
4838 
4839 	if (mask == TRACE_ITER_FUNC_FORK)
4840 		ftrace_pid_follow_fork(tr, enabled);
4841 
4842 	if (mask == TRACE_ITER_OVERWRITE) {
4843 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4844 #ifdef CONFIG_TRACER_MAX_TRACE
4845 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4846 #endif
4847 	}
4848 
4849 	if (mask == TRACE_ITER_PRINTK) {
4850 		trace_printk_start_stop_comm(enabled);
4851 		trace_printk_control(enabled);
4852 	}
4853 
4854 	return 0;
4855 }
4856 
4857 int trace_set_options(struct trace_array *tr, char *option)
4858 {
4859 	char *cmp;
4860 	int neg = 0;
4861 	int ret;
4862 	size_t orig_len = strlen(option);
4863 	int len;
4864 
4865 	cmp = strstrip(option);
4866 
4867 	len = str_has_prefix(cmp, "no");
4868 	if (len)
4869 		neg = 1;
4870 
4871 	cmp += len;
4872 
4873 	mutex_lock(&event_mutex);
4874 	mutex_lock(&trace_types_lock);
4875 
4876 	ret = match_string(trace_options, -1, cmp);
4877 	/* If no option could be set, test the specific tracer options */
4878 	if (ret < 0)
4879 		ret = set_tracer_option(tr, cmp, neg);
4880 	else
4881 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4882 
4883 	mutex_unlock(&trace_types_lock);
4884 	mutex_unlock(&event_mutex);
4885 
4886 	/*
4887 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4888 	 * turn it back into a space.
4889 	 */
4890 	if (orig_len > strlen(option))
4891 		option[strlen(option)] = ' ';
4892 
4893 	return ret;
4894 }
4895 
4896 static void __init apply_trace_boot_options(void)
4897 {
4898 	char *buf = trace_boot_options_buf;
4899 	char *option;
4900 
4901 	while (true) {
4902 		option = strsep(&buf, ",");
4903 
4904 		if (!option)
4905 			break;
4906 
4907 		if (*option)
4908 			trace_set_options(&global_trace, option);
4909 
4910 		/* Put back the comma to allow this to be called again */
4911 		if (buf)
4912 			*(buf - 1) = ',';
4913 	}
4914 }
4915 
4916 static ssize_t
4917 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4918 			size_t cnt, loff_t *ppos)
4919 {
4920 	struct seq_file *m = filp->private_data;
4921 	struct trace_array *tr = m->private;
4922 	char buf[64];
4923 	int ret;
4924 
4925 	if (cnt >= sizeof(buf))
4926 		return -EINVAL;
4927 
4928 	if (copy_from_user(buf, ubuf, cnt))
4929 		return -EFAULT;
4930 
4931 	buf[cnt] = 0;
4932 
4933 	ret = trace_set_options(tr, buf);
4934 	if (ret < 0)
4935 		return ret;
4936 
4937 	*ppos += cnt;
4938 
4939 	return cnt;
4940 }
4941 
4942 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4943 {
4944 	struct trace_array *tr = inode->i_private;
4945 	int ret;
4946 
4947 	ret = tracing_check_open_get_tr(tr);
4948 	if (ret)
4949 		return ret;
4950 
4951 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4952 	if (ret < 0)
4953 		trace_array_put(tr);
4954 
4955 	return ret;
4956 }
4957 
4958 static const struct file_operations tracing_iter_fops = {
4959 	.open		= tracing_trace_options_open,
4960 	.read		= seq_read,
4961 	.llseek		= seq_lseek,
4962 	.release	= tracing_single_release_tr,
4963 	.write		= tracing_trace_options_write,
4964 };
4965 
4966 static const char readme_msg[] =
4967 	"tracing mini-HOWTO:\n\n"
4968 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4969 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4970 	" Important files:\n"
4971 	"  trace\t\t\t- The static contents of the buffer\n"
4972 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4973 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4974 	"  current_tracer\t- function and latency tracers\n"
4975 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4976 	"  error_log\t- error log for failed commands (that support it)\n"
4977 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4978 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4979 	"  trace_clock\t\t-change the clock used to order events\n"
4980 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4981 	"      global:   Synced across CPUs but slows tracing down.\n"
4982 	"     counter:   Not a clock, but just an increment\n"
4983 	"      uptime:   Jiffy counter from time of boot\n"
4984 	"        perf:   Same clock that perf events use\n"
4985 #ifdef CONFIG_X86_64
4986 	"     x86-tsc:   TSC cycle counter\n"
4987 #endif
4988 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4989 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4990 	"    absolute:   Absolute (standalone) timestamp\n"
4991 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4992 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4993 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4994 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4995 	"\t\t\t  Remove sub-buffer with rmdir\n"
4996 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4997 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4998 	"\t\t\t  option name\n"
4999 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5000 #ifdef CONFIG_DYNAMIC_FTRACE
5001 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5002 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5003 	"\t\t\t  functions\n"
5004 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5005 	"\t     modules: Can select a group via module\n"
5006 	"\t      Format: :mod:<module-name>\n"
5007 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5008 	"\t    triggers: a command to perform when function is hit\n"
5009 	"\t      Format: <function>:<trigger>[:count]\n"
5010 	"\t     trigger: traceon, traceoff\n"
5011 	"\t\t      enable_event:<system>:<event>\n"
5012 	"\t\t      disable_event:<system>:<event>\n"
5013 #ifdef CONFIG_STACKTRACE
5014 	"\t\t      stacktrace\n"
5015 #endif
5016 #ifdef CONFIG_TRACER_SNAPSHOT
5017 	"\t\t      snapshot\n"
5018 #endif
5019 	"\t\t      dump\n"
5020 	"\t\t      cpudump\n"
5021 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5022 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5023 	"\t     The first one will disable tracing every time do_fault is hit\n"
5024 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5025 	"\t       The first time do trap is hit and it disables tracing, the\n"
5026 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5027 	"\t       the counter will not decrement. It only decrements when the\n"
5028 	"\t       trigger did work\n"
5029 	"\t     To remove trigger without count:\n"
5030 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5031 	"\t     To remove trigger with a count:\n"
5032 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5033 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5034 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5035 	"\t    modules: Can select a group via module command :mod:\n"
5036 	"\t    Does not accept triggers\n"
5037 #endif /* CONFIG_DYNAMIC_FTRACE */
5038 #ifdef CONFIG_FUNCTION_TRACER
5039 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5040 	"\t\t    (function)\n"
5041 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5042 	"\t\t    (function)\n"
5043 #endif
5044 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5045 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5046 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5047 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5048 #endif
5049 #ifdef CONFIG_TRACER_SNAPSHOT
5050 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5051 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5052 	"\t\t\t  information\n"
5053 #endif
5054 #ifdef CONFIG_STACK_TRACER
5055 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5056 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5057 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5058 	"\t\t\t  new trace)\n"
5059 #ifdef CONFIG_DYNAMIC_FTRACE
5060 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5061 	"\t\t\t  traces\n"
5062 #endif
5063 #endif /* CONFIG_STACK_TRACER */
5064 #ifdef CONFIG_DYNAMIC_EVENTS
5065 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5066 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5067 #endif
5068 #ifdef CONFIG_KPROBE_EVENTS
5069 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5070 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5071 #endif
5072 #ifdef CONFIG_UPROBE_EVENTS
5073 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5074 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5075 #endif
5076 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5077 	"\t  accepts: event-definitions (one definition per line)\n"
5078 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5079 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5080 #ifdef CONFIG_HIST_TRIGGERS
5081 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5082 #endif
5083 	"\t           -:[<group>/]<event>\n"
5084 #ifdef CONFIG_KPROBE_EVENTS
5085 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5086   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5087 #endif
5088 #ifdef CONFIG_UPROBE_EVENTS
5089   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5090 #endif
5091 	"\t     args: <name>=fetcharg[:type]\n"
5092 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5093 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5094 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5095 #else
5096 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5097 #endif
5098 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5099 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5100 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5101 	"\t           <type>\\[<array-size>\\]\n"
5102 #ifdef CONFIG_HIST_TRIGGERS
5103 	"\t    field: <stype> <name>;\n"
5104 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5105 	"\t           [unsigned] char/int/long\n"
5106 #endif
5107 #endif
5108 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5109 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5110 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5111 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5112 	"\t\t\t  events\n"
5113 	"      filter\t\t- If set, only events passing filter are traced\n"
5114 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5115 	"\t\t\t  <event>:\n"
5116 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5117 	"      filter\t\t- If set, only events passing filter are traced\n"
5118 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5119 	"\t    Format: <trigger>[:count][if <filter>]\n"
5120 	"\t   trigger: traceon, traceoff\n"
5121 	"\t            enable_event:<system>:<event>\n"
5122 	"\t            disable_event:<system>:<event>\n"
5123 #ifdef CONFIG_HIST_TRIGGERS
5124 	"\t            enable_hist:<system>:<event>\n"
5125 	"\t            disable_hist:<system>:<event>\n"
5126 #endif
5127 #ifdef CONFIG_STACKTRACE
5128 	"\t\t    stacktrace\n"
5129 #endif
5130 #ifdef CONFIG_TRACER_SNAPSHOT
5131 	"\t\t    snapshot\n"
5132 #endif
5133 #ifdef CONFIG_HIST_TRIGGERS
5134 	"\t\t    hist (see below)\n"
5135 #endif
5136 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5137 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5138 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5139 	"\t                  events/block/block_unplug/trigger\n"
5140 	"\t   The first disables tracing every time block_unplug is hit.\n"
5141 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5142 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5143 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5144 	"\t   Like function triggers, the counter is only decremented if it\n"
5145 	"\t    enabled or disabled tracing.\n"
5146 	"\t   To remove a trigger without a count:\n"
5147 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5148 	"\t   To remove a trigger with a count:\n"
5149 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5150 	"\t   Filters can be ignored when removing a trigger.\n"
5151 #ifdef CONFIG_HIST_TRIGGERS
5152 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5153 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5154 	"\t            [:values=<field1[,field2,...]>]\n"
5155 	"\t            [:sort=<field1[,field2,...]>]\n"
5156 	"\t            [:size=#entries]\n"
5157 	"\t            [:pause][:continue][:clear]\n"
5158 	"\t            [:name=histname1]\n"
5159 	"\t            [:<handler>.<action>]\n"
5160 	"\t            [if <filter>]\n\n"
5161 	"\t    When a matching event is hit, an entry is added to a hash\n"
5162 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5163 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5164 	"\t    correspond to fields in the event's format description.  Keys\n"
5165 	"\t    can be any field, or the special string 'stacktrace'.\n"
5166 	"\t    Compound keys consisting of up to two fields can be specified\n"
5167 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5168 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5169 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5170 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5171 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5172 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5173 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5174 	"\t    its histogram data will be shared with other triggers of the\n"
5175 	"\t    same name, and trigger hits will update this common data.\n\n"
5176 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5177 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5178 	"\t    triggers attached to an event, there will be a table for each\n"
5179 	"\t    trigger in the output.  The table displayed for a named\n"
5180 	"\t    trigger will be the same as any other instance having the\n"
5181 	"\t    same name.  The default format used to display a given field\n"
5182 	"\t    can be modified by appending any of the following modifiers\n"
5183 	"\t    to the field name, as applicable:\n\n"
5184 	"\t            .hex        display a number as a hex value\n"
5185 	"\t            .sym        display an address as a symbol\n"
5186 	"\t            .sym-offset display an address as a symbol and offset\n"
5187 	"\t            .execname   display a common_pid as a program name\n"
5188 	"\t            .syscall    display a syscall id as a syscall name\n"
5189 	"\t            .log2       display log2 value rather than raw number\n"
5190 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5191 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5192 	"\t    trigger or to start a hist trigger but not log any events\n"
5193 	"\t    until told to do so.  'continue' can be used to start or\n"
5194 	"\t    restart a paused hist trigger.\n\n"
5195 	"\t    The 'clear' parameter will clear the contents of a running\n"
5196 	"\t    hist trigger and leave its current paused/active state\n"
5197 	"\t    unchanged.\n\n"
5198 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5199 	"\t    have one event conditionally start and stop another event's\n"
5200 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5201 	"\t    the enable_event and disable_event triggers.\n\n"
5202 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5203 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5204 	"\t        <handler>.<action>\n\n"
5205 	"\t    The available handlers are:\n\n"
5206 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5207 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5208 	"\t        onchange(var)            - invoke action if var changes\n\n"
5209 	"\t    The available actions are:\n\n"
5210 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5211 	"\t        save(field,...)                      - save current event fields\n"
5212 #ifdef CONFIG_TRACER_SNAPSHOT
5213 	"\t        snapshot()                           - snapshot the trace buffer\n"
5214 #endif
5215 #endif
5216 ;
5217 
5218 static ssize_t
5219 tracing_readme_read(struct file *filp, char __user *ubuf,
5220 		       size_t cnt, loff_t *ppos)
5221 {
5222 	return simple_read_from_buffer(ubuf, cnt, ppos,
5223 					readme_msg, strlen(readme_msg));
5224 }
5225 
5226 static const struct file_operations tracing_readme_fops = {
5227 	.open		= tracing_open_generic,
5228 	.read		= tracing_readme_read,
5229 	.llseek		= generic_file_llseek,
5230 };
5231 
5232 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5233 {
5234 	int *ptr = v;
5235 
5236 	if (*pos || m->count)
5237 		ptr++;
5238 
5239 	(*pos)++;
5240 
5241 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5242 		if (trace_find_tgid(*ptr))
5243 			return ptr;
5244 	}
5245 
5246 	return NULL;
5247 }
5248 
5249 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5250 {
5251 	void *v;
5252 	loff_t l = 0;
5253 
5254 	if (!tgid_map)
5255 		return NULL;
5256 
5257 	v = &tgid_map[0];
5258 	while (l <= *pos) {
5259 		v = saved_tgids_next(m, v, &l);
5260 		if (!v)
5261 			return NULL;
5262 	}
5263 
5264 	return v;
5265 }
5266 
5267 static void saved_tgids_stop(struct seq_file *m, void *v)
5268 {
5269 }
5270 
5271 static int saved_tgids_show(struct seq_file *m, void *v)
5272 {
5273 	int pid = (int *)v - tgid_map;
5274 
5275 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5276 	return 0;
5277 }
5278 
5279 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5280 	.start		= saved_tgids_start,
5281 	.stop		= saved_tgids_stop,
5282 	.next		= saved_tgids_next,
5283 	.show		= saved_tgids_show,
5284 };
5285 
5286 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5287 {
5288 	int ret;
5289 
5290 	ret = tracing_check_open_get_tr(NULL);
5291 	if (ret)
5292 		return ret;
5293 
5294 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5295 }
5296 
5297 
5298 static const struct file_operations tracing_saved_tgids_fops = {
5299 	.open		= tracing_saved_tgids_open,
5300 	.read		= seq_read,
5301 	.llseek		= seq_lseek,
5302 	.release	= seq_release,
5303 };
5304 
5305 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5306 {
5307 	unsigned int *ptr = v;
5308 
5309 	if (*pos || m->count)
5310 		ptr++;
5311 
5312 	(*pos)++;
5313 
5314 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5315 	     ptr++) {
5316 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5317 			continue;
5318 
5319 		return ptr;
5320 	}
5321 
5322 	return NULL;
5323 }
5324 
5325 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5326 {
5327 	void *v;
5328 	loff_t l = 0;
5329 
5330 	preempt_disable();
5331 	arch_spin_lock(&trace_cmdline_lock);
5332 
5333 	v = &savedcmd->map_cmdline_to_pid[0];
5334 	while (l <= *pos) {
5335 		v = saved_cmdlines_next(m, v, &l);
5336 		if (!v)
5337 			return NULL;
5338 	}
5339 
5340 	return v;
5341 }
5342 
5343 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5344 {
5345 	arch_spin_unlock(&trace_cmdline_lock);
5346 	preempt_enable();
5347 }
5348 
5349 static int saved_cmdlines_show(struct seq_file *m, void *v)
5350 {
5351 	char buf[TASK_COMM_LEN];
5352 	unsigned int *pid = v;
5353 
5354 	__trace_find_cmdline(*pid, buf);
5355 	seq_printf(m, "%d %s\n", *pid, buf);
5356 	return 0;
5357 }
5358 
5359 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5360 	.start		= saved_cmdlines_start,
5361 	.next		= saved_cmdlines_next,
5362 	.stop		= saved_cmdlines_stop,
5363 	.show		= saved_cmdlines_show,
5364 };
5365 
5366 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5367 {
5368 	int ret;
5369 
5370 	ret = tracing_check_open_get_tr(NULL);
5371 	if (ret)
5372 		return ret;
5373 
5374 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5375 }
5376 
5377 static const struct file_operations tracing_saved_cmdlines_fops = {
5378 	.open		= tracing_saved_cmdlines_open,
5379 	.read		= seq_read,
5380 	.llseek		= seq_lseek,
5381 	.release	= seq_release,
5382 };
5383 
5384 static ssize_t
5385 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5386 				 size_t cnt, loff_t *ppos)
5387 {
5388 	char buf[64];
5389 	int r;
5390 
5391 	arch_spin_lock(&trace_cmdline_lock);
5392 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5393 	arch_spin_unlock(&trace_cmdline_lock);
5394 
5395 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5396 }
5397 
5398 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5399 {
5400 	kfree(s->saved_cmdlines);
5401 	kfree(s->map_cmdline_to_pid);
5402 	kfree(s);
5403 }
5404 
5405 static int tracing_resize_saved_cmdlines(unsigned int val)
5406 {
5407 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5408 
5409 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5410 	if (!s)
5411 		return -ENOMEM;
5412 
5413 	if (allocate_cmdlines_buffer(val, s) < 0) {
5414 		kfree(s);
5415 		return -ENOMEM;
5416 	}
5417 
5418 	arch_spin_lock(&trace_cmdline_lock);
5419 	savedcmd_temp = savedcmd;
5420 	savedcmd = s;
5421 	arch_spin_unlock(&trace_cmdline_lock);
5422 	free_saved_cmdlines_buffer(savedcmd_temp);
5423 
5424 	return 0;
5425 }
5426 
5427 static ssize_t
5428 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5429 				  size_t cnt, loff_t *ppos)
5430 {
5431 	unsigned long val;
5432 	int ret;
5433 
5434 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5435 	if (ret)
5436 		return ret;
5437 
5438 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5439 	if (!val || val > PID_MAX_DEFAULT)
5440 		return -EINVAL;
5441 
5442 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5443 	if (ret < 0)
5444 		return ret;
5445 
5446 	*ppos += cnt;
5447 
5448 	return cnt;
5449 }
5450 
5451 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5452 	.open		= tracing_open_generic,
5453 	.read		= tracing_saved_cmdlines_size_read,
5454 	.write		= tracing_saved_cmdlines_size_write,
5455 };
5456 
5457 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5458 static union trace_eval_map_item *
5459 update_eval_map(union trace_eval_map_item *ptr)
5460 {
5461 	if (!ptr->map.eval_string) {
5462 		if (ptr->tail.next) {
5463 			ptr = ptr->tail.next;
5464 			/* Set ptr to the next real item (skip head) */
5465 			ptr++;
5466 		} else
5467 			return NULL;
5468 	}
5469 	return ptr;
5470 }
5471 
5472 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5473 {
5474 	union trace_eval_map_item *ptr = v;
5475 
5476 	/*
5477 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5478 	 * This really should never happen.
5479 	 */
5480 	(*pos)++;
5481 	ptr = update_eval_map(ptr);
5482 	if (WARN_ON_ONCE(!ptr))
5483 		return NULL;
5484 
5485 	ptr++;
5486 	ptr = update_eval_map(ptr);
5487 
5488 	return ptr;
5489 }
5490 
5491 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5492 {
5493 	union trace_eval_map_item *v;
5494 	loff_t l = 0;
5495 
5496 	mutex_lock(&trace_eval_mutex);
5497 
5498 	v = trace_eval_maps;
5499 	if (v)
5500 		v++;
5501 
5502 	while (v && l < *pos) {
5503 		v = eval_map_next(m, v, &l);
5504 	}
5505 
5506 	return v;
5507 }
5508 
5509 static void eval_map_stop(struct seq_file *m, void *v)
5510 {
5511 	mutex_unlock(&trace_eval_mutex);
5512 }
5513 
5514 static int eval_map_show(struct seq_file *m, void *v)
5515 {
5516 	union trace_eval_map_item *ptr = v;
5517 
5518 	seq_printf(m, "%s %ld (%s)\n",
5519 		   ptr->map.eval_string, ptr->map.eval_value,
5520 		   ptr->map.system);
5521 
5522 	return 0;
5523 }
5524 
5525 static const struct seq_operations tracing_eval_map_seq_ops = {
5526 	.start		= eval_map_start,
5527 	.next		= eval_map_next,
5528 	.stop		= eval_map_stop,
5529 	.show		= eval_map_show,
5530 };
5531 
5532 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5533 {
5534 	int ret;
5535 
5536 	ret = tracing_check_open_get_tr(NULL);
5537 	if (ret)
5538 		return ret;
5539 
5540 	return seq_open(filp, &tracing_eval_map_seq_ops);
5541 }
5542 
5543 static const struct file_operations tracing_eval_map_fops = {
5544 	.open		= tracing_eval_map_open,
5545 	.read		= seq_read,
5546 	.llseek		= seq_lseek,
5547 	.release	= seq_release,
5548 };
5549 
5550 static inline union trace_eval_map_item *
5551 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5552 {
5553 	/* Return tail of array given the head */
5554 	return ptr + ptr->head.length + 1;
5555 }
5556 
5557 static void
5558 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5559 			   int len)
5560 {
5561 	struct trace_eval_map **stop;
5562 	struct trace_eval_map **map;
5563 	union trace_eval_map_item *map_array;
5564 	union trace_eval_map_item *ptr;
5565 
5566 	stop = start + len;
5567 
5568 	/*
5569 	 * The trace_eval_maps contains the map plus a head and tail item,
5570 	 * where the head holds the module and length of array, and the
5571 	 * tail holds a pointer to the next list.
5572 	 */
5573 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5574 	if (!map_array) {
5575 		pr_warn("Unable to allocate trace eval mapping\n");
5576 		return;
5577 	}
5578 
5579 	mutex_lock(&trace_eval_mutex);
5580 
5581 	if (!trace_eval_maps)
5582 		trace_eval_maps = map_array;
5583 	else {
5584 		ptr = trace_eval_maps;
5585 		for (;;) {
5586 			ptr = trace_eval_jmp_to_tail(ptr);
5587 			if (!ptr->tail.next)
5588 				break;
5589 			ptr = ptr->tail.next;
5590 
5591 		}
5592 		ptr->tail.next = map_array;
5593 	}
5594 	map_array->head.mod = mod;
5595 	map_array->head.length = len;
5596 	map_array++;
5597 
5598 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5599 		map_array->map = **map;
5600 		map_array++;
5601 	}
5602 	memset(map_array, 0, sizeof(*map_array));
5603 
5604 	mutex_unlock(&trace_eval_mutex);
5605 }
5606 
5607 static void trace_create_eval_file(struct dentry *d_tracer)
5608 {
5609 	trace_create_file("eval_map", 0444, d_tracer,
5610 			  NULL, &tracing_eval_map_fops);
5611 }
5612 
5613 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5614 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5615 static inline void trace_insert_eval_map_file(struct module *mod,
5616 			      struct trace_eval_map **start, int len) { }
5617 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5618 
5619 static void trace_insert_eval_map(struct module *mod,
5620 				  struct trace_eval_map **start, int len)
5621 {
5622 	struct trace_eval_map **map;
5623 
5624 	if (len <= 0)
5625 		return;
5626 
5627 	map = start;
5628 
5629 	trace_event_eval_update(map, len);
5630 
5631 	trace_insert_eval_map_file(mod, start, len);
5632 }
5633 
5634 static ssize_t
5635 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5636 		       size_t cnt, loff_t *ppos)
5637 {
5638 	struct trace_array *tr = filp->private_data;
5639 	char buf[MAX_TRACER_SIZE+2];
5640 	int r;
5641 
5642 	mutex_lock(&trace_types_lock);
5643 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5644 	mutex_unlock(&trace_types_lock);
5645 
5646 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5647 }
5648 
5649 int tracer_init(struct tracer *t, struct trace_array *tr)
5650 {
5651 	tracing_reset_online_cpus(&tr->array_buffer);
5652 	return t->init(tr);
5653 }
5654 
5655 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5656 {
5657 	int cpu;
5658 
5659 	for_each_tracing_cpu(cpu)
5660 		per_cpu_ptr(buf->data, cpu)->entries = val;
5661 }
5662 
5663 #ifdef CONFIG_TRACER_MAX_TRACE
5664 /* resize @tr's buffer to the size of @size_tr's entries */
5665 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5666 					struct array_buffer *size_buf, int cpu_id)
5667 {
5668 	int cpu, ret = 0;
5669 
5670 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5671 		for_each_tracing_cpu(cpu) {
5672 			ret = ring_buffer_resize(trace_buf->buffer,
5673 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5674 			if (ret < 0)
5675 				break;
5676 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5677 				per_cpu_ptr(size_buf->data, cpu)->entries;
5678 		}
5679 	} else {
5680 		ret = ring_buffer_resize(trace_buf->buffer,
5681 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5682 		if (ret == 0)
5683 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5684 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5685 	}
5686 
5687 	return ret;
5688 }
5689 #endif /* CONFIG_TRACER_MAX_TRACE */
5690 
5691 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5692 					unsigned long size, int cpu)
5693 {
5694 	int ret;
5695 
5696 	/*
5697 	 * If kernel or user changes the size of the ring buffer
5698 	 * we use the size that was given, and we can forget about
5699 	 * expanding it later.
5700 	 */
5701 	ring_buffer_expanded = true;
5702 
5703 	/* May be called before buffers are initialized */
5704 	if (!tr->array_buffer.buffer)
5705 		return 0;
5706 
5707 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5708 	if (ret < 0)
5709 		return ret;
5710 
5711 #ifdef CONFIG_TRACER_MAX_TRACE
5712 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5713 	    !tr->current_trace->use_max_tr)
5714 		goto out;
5715 
5716 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5717 	if (ret < 0) {
5718 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5719 						     &tr->array_buffer, cpu);
5720 		if (r < 0) {
5721 			/*
5722 			 * AARGH! We are left with different
5723 			 * size max buffer!!!!
5724 			 * The max buffer is our "snapshot" buffer.
5725 			 * When a tracer needs a snapshot (one of the
5726 			 * latency tracers), it swaps the max buffer
5727 			 * with the saved snap shot. We succeeded to
5728 			 * update the size of the main buffer, but failed to
5729 			 * update the size of the max buffer. But when we tried
5730 			 * to reset the main buffer to the original size, we
5731 			 * failed there too. This is very unlikely to
5732 			 * happen, but if it does, warn and kill all
5733 			 * tracing.
5734 			 */
5735 			WARN_ON(1);
5736 			tracing_disabled = 1;
5737 		}
5738 		return ret;
5739 	}
5740 
5741 	if (cpu == RING_BUFFER_ALL_CPUS)
5742 		set_buffer_entries(&tr->max_buffer, size);
5743 	else
5744 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5745 
5746  out:
5747 #endif /* CONFIG_TRACER_MAX_TRACE */
5748 
5749 	if (cpu == RING_BUFFER_ALL_CPUS)
5750 		set_buffer_entries(&tr->array_buffer, size);
5751 	else
5752 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5753 
5754 	return ret;
5755 }
5756 
5757 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5758 				  unsigned long size, int cpu_id)
5759 {
5760 	int ret = size;
5761 
5762 	mutex_lock(&trace_types_lock);
5763 
5764 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5765 		/* make sure, this cpu is enabled in the mask */
5766 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5767 			ret = -EINVAL;
5768 			goto out;
5769 		}
5770 	}
5771 
5772 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5773 	if (ret < 0)
5774 		ret = -ENOMEM;
5775 
5776 out:
5777 	mutex_unlock(&trace_types_lock);
5778 
5779 	return ret;
5780 }
5781 
5782 
5783 /**
5784  * tracing_update_buffers - used by tracing facility to expand ring buffers
5785  *
5786  * To save on memory when the tracing is never used on a system with it
5787  * configured in. The ring buffers are set to a minimum size. But once
5788  * a user starts to use the tracing facility, then they need to grow
5789  * to their default size.
5790  *
5791  * This function is to be called when a tracer is about to be used.
5792  */
5793 int tracing_update_buffers(void)
5794 {
5795 	int ret = 0;
5796 
5797 	mutex_lock(&trace_types_lock);
5798 	if (!ring_buffer_expanded)
5799 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5800 						RING_BUFFER_ALL_CPUS);
5801 	mutex_unlock(&trace_types_lock);
5802 
5803 	return ret;
5804 }
5805 
5806 struct trace_option_dentry;
5807 
5808 static void
5809 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5810 
5811 /*
5812  * Used to clear out the tracer before deletion of an instance.
5813  * Must have trace_types_lock held.
5814  */
5815 static void tracing_set_nop(struct trace_array *tr)
5816 {
5817 	if (tr->current_trace == &nop_trace)
5818 		return;
5819 
5820 	tr->current_trace->enabled--;
5821 
5822 	if (tr->current_trace->reset)
5823 		tr->current_trace->reset(tr);
5824 
5825 	tr->current_trace = &nop_trace;
5826 }
5827 
5828 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5829 {
5830 	/* Only enable if the directory has been created already. */
5831 	if (!tr->dir)
5832 		return;
5833 
5834 	create_trace_option_files(tr, t);
5835 }
5836 
5837 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5838 {
5839 	struct tracer *t;
5840 #ifdef CONFIG_TRACER_MAX_TRACE
5841 	bool had_max_tr;
5842 #endif
5843 	int ret = 0;
5844 
5845 	mutex_lock(&trace_types_lock);
5846 
5847 	if (!ring_buffer_expanded) {
5848 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5849 						RING_BUFFER_ALL_CPUS);
5850 		if (ret < 0)
5851 			goto out;
5852 		ret = 0;
5853 	}
5854 
5855 	for (t = trace_types; t; t = t->next) {
5856 		if (strcmp(t->name, buf) == 0)
5857 			break;
5858 	}
5859 	if (!t) {
5860 		ret = -EINVAL;
5861 		goto out;
5862 	}
5863 	if (t == tr->current_trace)
5864 		goto out;
5865 
5866 #ifdef CONFIG_TRACER_SNAPSHOT
5867 	if (t->use_max_tr) {
5868 		arch_spin_lock(&tr->max_lock);
5869 		if (tr->cond_snapshot)
5870 			ret = -EBUSY;
5871 		arch_spin_unlock(&tr->max_lock);
5872 		if (ret)
5873 			goto out;
5874 	}
5875 #endif
5876 	/* Some tracers won't work on kernel command line */
5877 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5878 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5879 			t->name);
5880 		goto out;
5881 	}
5882 
5883 	/* Some tracers are only allowed for the top level buffer */
5884 	if (!trace_ok_for_array(t, tr)) {
5885 		ret = -EINVAL;
5886 		goto out;
5887 	}
5888 
5889 	/* If trace pipe files are being read, we can't change the tracer */
5890 	if (tr->current_trace->ref) {
5891 		ret = -EBUSY;
5892 		goto out;
5893 	}
5894 
5895 	trace_branch_disable();
5896 
5897 	tr->current_trace->enabled--;
5898 
5899 	if (tr->current_trace->reset)
5900 		tr->current_trace->reset(tr);
5901 
5902 	/* Current trace needs to be nop_trace before synchronize_rcu */
5903 	tr->current_trace = &nop_trace;
5904 
5905 #ifdef CONFIG_TRACER_MAX_TRACE
5906 	had_max_tr = tr->allocated_snapshot;
5907 
5908 	if (had_max_tr && !t->use_max_tr) {
5909 		/*
5910 		 * We need to make sure that the update_max_tr sees that
5911 		 * current_trace changed to nop_trace to keep it from
5912 		 * swapping the buffers after we resize it.
5913 		 * The update_max_tr is called from interrupts disabled
5914 		 * so a synchronized_sched() is sufficient.
5915 		 */
5916 		synchronize_rcu();
5917 		free_snapshot(tr);
5918 	}
5919 #endif
5920 
5921 #ifdef CONFIG_TRACER_MAX_TRACE
5922 	if (t->use_max_tr && !had_max_tr) {
5923 		ret = tracing_alloc_snapshot_instance(tr);
5924 		if (ret < 0)
5925 			goto out;
5926 	}
5927 #endif
5928 
5929 	if (t->init) {
5930 		ret = tracer_init(t, tr);
5931 		if (ret)
5932 			goto out;
5933 	}
5934 
5935 	tr->current_trace = t;
5936 	tr->current_trace->enabled++;
5937 	trace_branch_enable(tr);
5938  out:
5939 	mutex_unlock(&trace_types_lock);
5940 
5941 	return ret;
5942 }
5943 
5944 static ssize_t
5945 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5946 			size_t cnt, loff_t *ppos)
5947 {
5948 	struct trace_array *tr = filp->private_data;
5949 	char buf[MAX_TRACER_SIZE+1];
5950 	int i;
5951 	size_t ret;
5952 	int err;
5953 
5954 	ret = cnt;
5955 
5956 	if (cnt > MAX_TRACER_SIZE)
5957 		cnt = MAX_TRACER_SIZE;
5958 
5959 	if (copy_from_user(buf, ubuf, cnt))
5960 		return -EFAULT;
5961 
5962 	buf[cnt] = 0;
5963 
5964 	/* strip ending whitespace. */
5965 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5966 		buf[i] = 0;
5967 
5968 	err = tracing_set_tracer(tr, buf);
5969 	if (err)
5970 		return err;
5971 
5972 	*ppos += ret;
5973 
5974 	return ret;
5975 }
5976 
5977 static ssize_t
5978 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5979 		   size_t cnt, loff_t *ppos)
5980 {
5981 	char buf[64];
5982 	int r;
5983 
5984 	r = snprintf(buf, sizeof(buf), "%ld\n",
5985 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5986 	if (r > sizeof(buf))
5987 		r = sizeof(buf);
5988 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5989 }
5990 
5991 static ssize_t
5992 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5993 		    size_t cnt, loff_t *ppos)
5994 {
5995 	unsigned long val;
5996 	int ret;
5997 
5998 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999 	if (ret)
6000 		return ret;
6001 
6002 	*ptr = val * 1000;
6003 
6004 	return cnt;
6005 }
6006 
6007 static ssize_t
6008 tracing_thresh_read(struct file *filp, char __user *ubuf,
6009 		    size_t cnt, loff_t *ppos)
6010 {
6011 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6012 }
6013 
6014 static ssize_t
6015 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6016 		     size_t cnt, loff_t *ppos)
6017 {
6018 	struct trace_array *tr = filp->private_data;
6019 	int ret;
6020 
6021 	mutex_lock(&trace_types_lock);
6022 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6023 	if (ret < 0)
6024 		goto out;
6025 
6026 	if (tr->current_trace->update_thresh) {
6027 		ret = tr->current_trace->update_thresh(tr);
6028 		if (ret < 0)
6029 			goto out;
6030 	}
6031 
6032 	ret = cnt;
6033 out:
6034 	mutex_unlock(&trace_types_lock);
6035 
6036 	return ret;
6037 }
6038 
6039 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6040 
6041 static ssize_t
6042 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6043 		     size_t cnt, loff_t *ppos)
6044 {
6045 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6046 }
6047 
6048 static ssize_t
6049 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6050 		      size_t cnt, loff_t *ppos)
6051 {
6052 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6053 }
6054 
6055 #endif
6056 
6057 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6058 {
6059 	struct trace_array *tr = inode->i_private;
6060 	struct trace_iterator *iter;
6061 	int ret;
6062 
6063 	ret = tracing_check_open_get_tr(tr);
6064 	if (ret)
6065 		return ret;
6066 
6067 	mutex_lock(&trace_types_lock);
6068 
6069 	/* create a buffer to store the information to pass to userspace */
6070 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6071 	if (!iter) {
6072 		ret = -ENOMEM;
6073 		__trace_array_put(tr);
6074 		goto out;
6075 	}
6076 
6077 	trace_seq_init(&iter->seq);
6078 	iter->trace = tr->current_trace;
6079 
6080 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6081 		ret = -ENOMEM;
6082 		goto fail;
6083 	}
6084 
6085 	/* trace pipe does not show start of buffer */
6086 	cpumask_setall(iter->started);
6087 
6088 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6089 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6090 
6091 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6092 	if (trace_clocks[tr->clock_id].in_ns)
6093 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6094 
6095 	iter->tr = tr;
6096 	iter->array_buffer = &tr->array_buffer;
6097 	iter->cpu_file = tracing_get_cpu(inode);
6098 	mutex_init(&iter->mutex);
6099 	filp->private_data = iter;
6100 
6101 	if (iter->trace->pipe_open)
6102 		iter->trace->pipe_open(iter);
6103 
6104 	nonseekable_open(inode, filp);
6105 
6106 	tr->current_trace->ref++;
6107 out:
6108 	mutex_unlock(&trace_types_lock);
6109 	return ret;
6110 
6111 fail:
6112 	kfree(iter);
6113 	__trace_array_put(tr);
6114 	mutex_unlock(&trace_types_lock);
6115 	return ret;
6116 }
6117 
6118 static int tracing_release_pipe(struct inode *inode, struct file *file)
6119 {
6120 	struct trace_iterator *iter = file->private_data;
6121 	struct trace_array *tr = inode->i_private;
6122 
6123 	mutex_lock(&trace_types_lock);
6124 
6125 	tr->current_trace->ref--;
6126 
6127 	if (iter->trace->pipe_close)
6128 		iter->trace->pipe_close(iter);
6129 
6130 	mutex_unlock(&trace_types_lock);
6131 
6132 	free_cpumask_var(iter->started);
6133 	mutex_destroy(&iter->mutex);
6134 	kfree(iter);
6135 
6136 	trace_array_put(tr);
6137 
6138 	return 0;
6139 }
6140 
6141 static __poll_t
6142 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6143 {
6144 	struct trace_array *tr = iter->tr;
6145 
6146 	/* Iterators are static, they should be filled or empty */
6147 	if (trace_buffer_iter(iter, iter->cpu_file))
6148 		return EPOLLIN | EPOLLRDNORM;
6149 
6150 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6151 		/*
6152 		 * Always select as readable when in blocking mode
6153 		 */
6154 		return EPOLLIN | EPOLLRDNORM;
6155 	else
6156 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6157 					     filp, poll_table);
6158 }
6159 
6160 static __poll_t
6161 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6162 {
6163 	struct trace_iterator *iter = filp->private_data;
6164 
6165 	return trace_poll(iter, filp, poll_table);
6166 }
6167 
6168 /* Must be called with iter->mutex held. */
6169 static int tracing_wait_pipe(struct file *filp)
6170 {
6171 	struct trace_iterator *iter = filp->private_data;
6172 	int ret;
6173 
6174 	while (trace_empty(iter)) {
6175 
6176 		if ((filp->f_flags & O_NONBLOCK)) {
6177 			return -EAGAIN;
6178 		}
6179 
6180 		/*
6181 		 * We block until we read something and tracing is disabled.
6182 		 * We still block if tracing is disabled, but we have never
6183 		 * read anything. This allows a user to cat this file, and
6184 		 * then enable tracing. But after we have read something,
6185 		 * we give an EOF when tracing is again disabled.
6186 		 *
6187 		 * iter->pos will be 0 if we haven't read anything.
6188 		 */
6189 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6190 			break;
6191 
6192 		mutex_unlock(&iter->mutex);
6193 
6194 		ret = wait_on_pipe(iter, 0);
6195 
6196 		mutex_lock(&iter->mutex);
6197 
6198 		if (ret)
6199 			return ret;
6200 	}
6201 
6202 	return 1;
6203 }
6204 
6205 /*
6206  * Consumer reader.
6207  */
6208 static ssize_t
6209 tracing_read_pipe(struct file *filp, char __user *ubuf,
6210 		  size_t cnt, loff_t *ppos)
6211 {
6212 	struct trace_iterator *iter = filp->private_data;
6213 	ssize_t sret;
6214 
6215 	/*
6216 	 * Avoid more than one consumer on a single file descriptor
6217 	 * This is just a matter of traces coherency, the ring buffer itself
6218 	 * is protected.
6219 	 */
6220 	mutex_lock(&iter->mutex);
6221 
6222 	/* return any leftover data */
6223 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6224 	if (sret != -EBUSY)
6225 		goto out;
6226 
6227 	trace_seq_init(&iter->seq);
6228 
6229 	if (iter->trace->read) {
6230 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6231 		if (sret)
6232 			goto out;
6233 	}
6234 
6235 waitagain:
6236 	sret = tracing_wait_pipe(filp);
6237 	if (sret <= 0)
6238 		goto out;
6239 
6240 	/* stop when tracing is finished */
6241 	if (trace_empty(iter)) {
6242 		sret = 0;
6243 		goto out;
6244 	}
6245 
6246 	if (cnt >= PAGE_SIZE)
6247 		cnt = PAGE_SIZE - 1;
6248 
6249 	/* reset all but tr, trace, and overruns */
6250 	memset(&iter->seq, 0,
6251 	       sizeof(struct trace_iterator) -
6252 	       offsetof(struct trace_iterator, seq));
6253 	cpumask_clear(iter->started);
6254 	trace_seq_init(&iter->seq);
6255 	iter->pos = -1;
6256 
6257 	trace_event_read_lock();
6258 	trace_access_lock(iter->cpu_file);
6259 	while (trace_find_next_entry_inc(iter) != NULL) {
6260 		enum print_line_t ret;
6261 		int save_len = iter->seq.seq.len;
6262 
6263 		ret = print_trace_line(iter);
6264 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6265 			/* don't print partial lines */
6266 			iter->seq.seq.len = save_len;
6267 			break;
6268 		}
6269 		if (ret != TRACE_TYPE_NO_CONSUME)
6270 			trace_consume(iter);
6271 
6272 		if (trace_seq_used(&iter->seq) >= cnt)
6273 			break;
6274 
6275 		/*
6276 		 * Setting the full flag means we reached the trace_seq buffer
6277 		 * size and we should leave by partial output condition above.
6278 		 * One of the trace_seq_* functions is not used properly.
6279 		 */
6280 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6281 			  iter->ent->type);
6282 	}
6283 	trace_access_unlock(iter->cpu_file);
6284 	trace_event_read_unlock();
6285 
6286 	/* Now copy what we have to the user */
6287 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6288 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6289 		trace_seq_init(&iter->seq);
6290 
6291 	/*
6292 	 * If there was nothing to send to user, in spite of consuming trace
6293 	 * entries, go back to wait for more entries.
6294 	 */
6295 	if (sret == -EBUSY)
6296 		goto waitagain;
6297 
6298 out:
6299 	mutex_unlock(&iter->mutex);
6300 
6301 	return sret;
6302 }
6303 
6304 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6305 				     unsigned int idx)
6306 {
6307 	__free_page(spd->pages[idx]);
6308 }
6309 
6310 static size_t
6311 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6312 {
6313 	size_t count;
6314 	int save_len;
6315 	int ret;
6316 
6317 	/* Seq buffer is page-sized, exactly what we need. */
6318 	for (;;) {
6319 		save_len = iter->seq.seq.len;
6320 		ret = print_trace_line(iter);
6321 
6322 		if (trace_seq_has_overflowed(&iter->seq)) {
6323 			iter->seq.seq.len = save_len;
6324 			break;
6325 		}
6326 
6327 		/*
6328 		 * This should not be hit, because it should only
6329 		 * be set if the iter->seq overflowed. But check it
6330 		 * anyway to be safe.
6331 		 */
6332 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6333 			iter->seq.seq.len = save_len;
6334 			break;
6335 		}
6336 
6337 		count = trace_seq_used(&iter->seq) - save_len;
6338 		if (rem < count) {
6339 			rem = 0;
6340 			iter->seq.seq.len = save_len;
6341 			break;
6342 		}
6343 
6344 		if (ret != TRACE_TYPE_NO_CONSUME)
6345 			trace_consume(iter);
6346 		rem -= count;
6347 		if (!trace_find_next_entry_inc(iter))	{
6348 			rem = 0;
6349 			iter->ent = NULL;
6350 			break;
6351 		}
6352 	}
6353 
6354 	return rem;
6355 }
6356 
6357 static ssize_t tracing_splice_read_pipe(struct file *filp,
6358 					loff_t *ppos,
6359 					struct pipe_inode_info *pipe,
6360 					size_t len,
6361 					unsigned int flags)
6362 {
6363 	struct page *pages_def[PIPE_DEF_BUFFERS];
6364 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6365 	struct trace_iterator *iter = filp->private_data;
6366 	struct splice_pipe_desc spd = {
6367 		.pages		= pages_def,
6368 		.partial	= partial_def,
6369 		.nr_pages	= 0, /* This gets updated below. */
6370 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6371 		.ops		= &default_pipe_buf_ops,
6372 		.spd_release	= tracing_spd_release_pipe,
6373 	};
6374 	ssize_t ret;
6375 	size_t rem;
6376 	unsigned int i;
6377 
6378 	if (splice_grow_spd(pipe, &spd))
6379 		return -ENOMEM;
6380 
6381 	mutex_lock(&iter->mutex);
6382 
6383 	if (iter->trace->splice_read) {
6384 		ret = iter->trace->splice_read(iter, filp,
6385 					       ppos, pipe, len, flags);
6386 		if (ret)
6387 			goto out_err;
6388 	}
6389 
6390 	ret = tracing_wait_pipe(filp);
6391 	if (ret <= 0)
6392 		goto out_err;
6393 
6394 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6395 		ret = -EFAULT;
6396 		goto out_err;
6397 	}
6398 
6399 	trace_event_read_lock();
6400 	trace_access_lock(iter->cpu_file);
6401 
6402 	/* Fill as many pages as possible. */
6403 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6404 		spd.pages[i] = alloc_page(GFP_KERNEL);
6405 		if (!spd.pages[i])
6406 			break;
6407 
6408 		rem = tracing_fill_pipe_page(rem, iter);
6409 
6410 		/* Copy the data into the page, so we can start over. */
6411 		ret = trace_seq_to_buffer(&iter->seq,
6412 					  page_address(spd.pages[i]),
6413 					  trace_seq_used(&iter->seq));
6414 		if (ret < 0) {
6415 			__free_page(spd.pages[i]);
6416 			break;
6417 		}
6418 		spd.partial[i].offset = 0;
6419 		spd.partial[i].len = trace_seq_used(&iter->seq);
6420 
6421 		trace_seq_init(&iter->seq);
6422 	}
6423 
6424 	trace_access_unlock(iter->cpu_file);
6425 	trace_event_read_unlock();
6426 	mutex_unlock(&iter->mutex);
6427 
6428 	spd.nr_pages = i;
6429 
6430 	if (i)
6431 		ret = splice_to_pipe(pipe, &spd);
6432 	else
6433 		ret = 0;
6434 out:
6435 	splice_shrink_spd(&spd);
6436 	return ret;
6437 
6438 out_err:
6439 	mutex_unlock(&iter->mutex);
6440 	goto out;
6441 }
6442 
6443 static ssize_t
6444 tracing_entries_read(struct file *filp, char __user *ubuf,
6445 		     size_t cnt, loff_t *ppos)
6446 {
6447 	struct inode *inode = file_inode(filp);
6448 	struct trace_array *tr = inode->i_private;
6449 	int cpu = tracing_get_cpu(inode);
6450 	char buf[64];
6451 	int r = 0;
6452 	ssize_t ret;
6453 
6454 	mutex_lock(&trace_types_lock);
6455 
6456 	if (cpu == RING_BUFFER_ALL_CPUS) {
6457 		int cpu, buf_size_same;
6458 		unsigned long size;
6459 
6460 		size = 0;
6461 		buf_size_same = 1;
6462 		/* check if all cpu sizes are same */
6463 		for_each_tracing_cpu(cpu) {
6464 			/* fill in the size from first enabled cpu */
6465 			if (size == 0)
6466 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6467 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6468 				buf_size_same = 0;
6469 				break;
6470 			}
6471 		}
6472 
6473 		if (buf_size_same) {
6474 			if (!ring_buffer_expanded)
6475 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6476 					    size >> 10,
6477 					    trace_buf_size >> 10);
6478 			else
6479 				r = sprintf(buf, "%lu\n", size >> 10);
6480 		} else
6481 			r = sprintf(buf, "X\n");
6482 	} else
6483 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6484 
6485 	mutex_unlock(&trace_types_lock);
6486 
6487 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6488 	return ret;
6489 }
6490 
6491 static ssize_t
6492 tracing_entries_write(struct file *filp, const char __user *ubuf,
6493 		      size_t cnt, loff_t *ppos)
6494 {
6495 	struct inode *inode = file_inode(filp);
6496 	struct trace_array *tr = inode->i_private;
6497 	unsigned long val;
6498 	int ret;
6499 
6500 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6501 	if (ret)
6502 		return ret;
6503 
6504 	/* must have at least 1 entry */
6505 	if (!val)
6506 		return -EINVAL;
6507 
6508 	/* value is in KB */
6509 	val <<= 10;
6510 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6511 	if (ret < 0)
6512 		return ret;
6513 
6514 	*ppos += cnt;
6515 
6516 	return cnt;
6517 }
6518 
6519 static ssize_t
6520 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6521 				size_t cnt, loff_t *ppos)
6522 {
6523 	struct trace_array *tr = filp->private_data;
6524 	char buf[64];
6525 	int r, cpu;
6526 	unsigned long size = 0, expanded_size = 0;
6527 
6528 	mutex_lock(&trace_types_lock);
6529 	for_each_tracing_cpu(cpu) {
6530 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6531 		if (!ring_buffer_expanded)
6532 			expanded_size += trace_buf_size >> 10;
6533 	}
6534 	if (ring_buffer_expanded)
6535 		r = sprintf(buf, "%lu\n", size);
6536 	else
6537 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6538 	mutex_unlock(&trace_types_lock);
6539 
6540 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6541 }
6542 
6543 static ssize_t
6544 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6545 			  size_t cnt, loff_t *ppos)
6546 {
6547 	/*
6548 	 * There is no need to read what the user has written, this function
6549 	 * is just to make sure that there is no error when "echo" is used
6550 	 */
6551 
6552 	*ppos += cnt;
6553 
6554 	return cnt;
6555 }
6556 
6557 static int
6558 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6559 {
6560 	struct trace_array *tr = inode->i_private;
6561 
6562 	/* disable tracing ? */
6563 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6564 		tracer_tracing_off(tr);
6565 	/* resize the ring buffer to 0 */
6566 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6567 
6568 	trace_array_put(tr);
6569 
6570 	return 0;
6571 }
6572 
6573 static ssize_t
6574 tracing_mark_write(struct file *filp, const char __user *ubuf,
6575 					size_t cnt, loff_t *fpos)
6576 {
6577 	struct trace_array *tr = filp->private_data;
6578 	struct ring_buffer_event *event;
6579 	enum event_trigger_type tt = ETT_NONE;
6580 	struct trace_buffer *buffer;
6581 	struct print_entry *entry;
6582 	unsigned long irq_flags;
6583 	ssize_t written;
6584 	int size;
6585 	int len;
6586 
6587 /* Used in tracing_mark_raw_write() as well */
6588 #define FAULTED_STR "<faulted>"
6589 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6590 
6591 	if (tracing_disabled)
6592 		return -EINVAL;
6593 
6594 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6595 		return -EINVAL;
6596 
6597 	if (cnt > TRACE_BUF_SIZE)
6598 		cnt = TRACE_BUF_SIZE;
6599 
6600 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6601 
6602 	local_save_flags(irq_flags);
6603 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6604 
6605 	/* If less than "<faulted>", then make sure we can still add that */
6606 	if (cnt < FAULTED_SIZE)
6607 		size += FAULTED_SIZE - cnt;
6608 
6609 	buffer = tr->array_buffer.buffer;
6610 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6611 					    irq_flags, preempt_count());
6612 	if (unlikely(!event))
6613 		/* Ring buffer disabled, return as if not open for write */
6614 		return -EBADF;
6615 
6616 	entry = ring_buffer_event_data(event);
6617 	entry->ip = _THIS_IP_;
6618 
6619 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6620 	if (len) {
6621 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6622 		cnt = FAULTED_SIZE;
6623 		written = -EFAULT;
6624 	} else
6625 		written = cnt;
6626 	len = cnt;
6627 
6628 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6629 		/* do not add \n before testing triggers, but add \0 */
6630 		entry->buf[cnt] = '\0';
6631 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6632 	}
6633 
6634 	if (entry->buf[cnt - 1] != '\n') {
6635 		entry->buf[cnt] = '\n';
6636 		entry->buf[cnt + 1] = '\0';
6637 	} else
6638 		entry->buf[cnt] = '\0';
6639 
6640 	__buffer_unlock_commit(buffer, event);
6641 
6642 	if (tt)
6643 		event_triggers_post_call(tr->trace_marker_file, tt);
6644 
6645 	if (written > 0)
6646 		*fpos += written;
6647 
6648 	return written;
6649 }
6650 
6651 /* Limit it for now to 3K (including tag) */
6652 #define RAW_DATA_MAX_SIZE (1024*3)
6653 
6654 static ssize_t
6655 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6656 					size_t cnt, loff_t *fpos)
6657 {
6658 	struct trace_array *tr = filp->private_data;
6659 	struct ring_buffer_event *event;
6660 	struct trace_buffer *buffer;
6661 	struct raw_data_entry *entry;
6662 	unsigned long irq_flags;
6663 	ssize_t written;
6664 	int size;
6665 	int len;
6666 
6667 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6668 
6669 	if (tracing_disabled)
6670 		return -EINVAL;
6671 
6672 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6673 		return -EINVAL;
6674 
6675 	/* The marker must at least have a tag id */
6676 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6677 		return -EINVAL;
6678 
6679 	if (cnt > TRACE_BUF_SIZE)
6680 		cnt = TRACE_BUF_SIZE;
6681 
6682 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6683 
6684 	local_save_flags(irq_flags);
6685 	size = sizeof(*entry) + cnt;
6686 	if (cnt < FAULT_SIZE_ID)
6687 		size += FAULT_SIZE_ID - cnt;
6688 
6689 	buffer = tr->array_buffer.buffer;
6690 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6691 					    irq_flags, preempt_count());
6692 	if (!event)
6693 		/* Ring buffer disabled, return as if not open for write */
6694 		return -EBADF;
6695 
6696 	entry = ring_buffer_event_data(event);
6697 
6698 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6699 	if (len) {
6700 		entry->id = -1;
6701 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6702 		written = -EFAULT;
6703 	} else
6704 		written = cnt;
6705 
6706 	__buffer_unlock_commit(buffer, event);
6707 
6708 	if (written > 0)
6709 		*fpos += written;
6710 
6711 	return written;
6712 }
6713 
6714 static int tracing_clock_show(struct seq_file *m, void *v)
6715 {
6716 	struct trace_array *tr = m->private;
6717 	int i;
6718 
6719 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6720 		seq_printf(m,
6721 			"%s%s%s%s", i ? " " : "",
6722 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6723 			i == tr->clock_id ? "]" : "");
6724 	seq_putc(m, '\n');
6725 
6726 	return 0;
6727 }
6728 
6729 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6730 {
6731 	int i;
6732 
6733 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6734 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6735 			break;
6736 	}
6737 	if (i == ARRAY_SIZE(trace_clocks))
6738 		return -EINVAL;
6739 
6740 	mutex_lock(&trace_types_lock);
6741 
6742 	tr->clock_id = i;
6743 
6744 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6745 
6746 	/*
6747 	 * New clock may not be consistent with the previous clock.
6748 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6749 	 */
6750 	tracing_reset_online_cpus(&tr->array_buffer);
6751 
6752 #ifdef CONFIG_TRACER_MAX_TRACE
6753 	if (tr->max_buffer.buffer)
6754 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6755 	tracing_reset_online_cpus(&tr->max_buffer);
6756 #endif
6757 
6758 	mutex_unlock(&trace_types_lock);
6759 
6760 	return 0;
6761 }
6762 
6763 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6764 				   size_t cnt, loff_t *fpos)
6765 {
6766 	struct seq_file *m = filp->private_data;
6767 	struct trace_array *tr = m->private;
6768 	char buf[64];
6769 	const char *clockstr;
6770 	int ret;
6771 
6772 	if (cnt >= sizeof(buf))
6773 		return -EINVAL;
6774 
6775 	if (copy_from_user(buf, ubuf, cnt))
6776 		return -EFAULT;
6777 
6778 	buf[cnt] = 0;
6779 
6780 	clockstr = strstrip(buf);
6781 
6782 	ret = tracing_set_clock(tr, clockstr);
6783 	if (ret)
6784 		return ret;
6785 
6786 	*fpos += cnt;
6787 
6788 	return cnt;
6789 }
6790 
6791 static int tracing_clock_open(struct inode *inode, struct file *file)
6792 {
6793 	struct trace_array *tr = inode->i_private;
6794 	int ret;
6795 
6796 	ret = tracing_check_open_get_tr(tr);
6797 	if (ret)
6798 		return ret;
6799 
6800 	ret = single_open(file, tracing_clock_show, inode->i_private);
6801 	if (ret < 0)
6802 		trace_array_put(tr);
6803 
6804 	return ret;
6805 }
6806 
6807 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6808 {
6809 	struct trace_array *tr = m->private;
6810 
6811 	mutex_lock(&trace_types_lock);
6812 
6813 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6814 		seq_puts(m, "delta [absolute]\n");
6815 	else
6816 		seq_puts(m, "[delta] absolute\n");
6817 
6818 	mutex_unlock(&trace_types_lock);
6819 
6820 	return 0;
6821 }
6822 
6823 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6824 {
6825 	struct trace_array *tr = inode->i_private;
6826 	int ret;
6827 
6828 	ret = tracing_check_open_get_tr(tr);
6829 	if (ret)
6830 		return ret;
6831 
6832 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6833 	if (ret < 0)
6834 		trace_array_put(tr);
6835 
6836 	return ret;
6837 }
6838 
6839 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6840 {
6841 	int ret = 0;
6842 
6843 	mutex_lock(&trace_types_lock);
6844 
6845 	if (abs && tr->time_stamp_abs_ref++)
6846 		goto out;
6847 
6848 	if (!abs) {
6849 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6850 			ret = -EINVAL;
6851 			goto out;
6852 		}
6853 
6854 		if (--tr->time_stamp_abs_ref)
6855 			goto out;
6856 	}
6857 
6858 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6859 
6860 #ifdef CONFIG_TRACER_MAX_TRACE
6861 	if (tr->max_buffer.buffer)
6862 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6863 #endif
6864  out:
6865 	mutex_unlock(&trace_types_lock);
6866 
6867 	return ret;
6868 }
6869 
6870 struct ftrace_buffer_info {
6871 	struct trace_iterator	iter;
6872 	void			*spare;
6873 	unsigned int		spare_cpu;
6874 	unsigned int		read;
6875 };
6876 
6877 #ifdef CONFIG_TRACER_SNAPSHOT
6878 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6879 {
6880 	struct trace_array *tr = inode->i_private;
6881 	struct trace_iterator *iter;
6882 	struct seq_file *m;
6883 	int ret;
6884 
6885 	ret = tracing_check_open_get_tr(tr);
6886 	if (ret)
6887 		return ret;
6888 
6889 	if (file->f_mode & FMODE_READ) {
6890 		iter = __tracing_open(inode, file, true);
6891 		if (IS_ERR(iter))
6892 			ret = PTR_ERR(iter);
6893 	} else {
6894 		/* Writes still need the seq_file to hold the private data */
6895 		ret = -ENOMEM;
6896 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6897 		if (!m)
6898 			goto out;
6899 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6900 		if (!iter) {
6901 			kfree(m);
6902 			goto out;
6903 		}
6904 		ret = 0;
6905 
6906 		iter->tr = tr;
6907 		iter->array_buffer = &tr->max_buffer;
6908 		iter->cpu_file = tracing_get_cpu(inode);
6909 		m->private = iter;
6910 		file->private_data = m;
6911 	}
6912 out:
6913 	if (ret < 0)
6914 		trace_array_put(tr);
6915 
6916 	return ret;
6917 }
6918 
6919 static ssize_t
6920 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6921 		       loff_t *ppos)
6922 {
6923 	struct seq_file *m = filp->private_data;
6924 	struct trace_iterator *iter = m->private;
6925 	struct trace_array *tr = iter->tr;
6926 	unsigned long val;
6927 	int ret;
6928 
6929 	ret = tracing_update_buffers();
6930 	if (ret < 0)
6931 		return ret;
6932 
6933 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6934 	if (ret)
6935 		return ret;
6936 
6937 	mutex_lock(&trace_types_lock);
6938 
6939 	if (tr->current_trace->use_max_tr) {
6940 		ret = -EBUSY;
6941 		goto out;
6942 	}
6943 
6944 	arch_spin_lock(&tr->max_lock);
6945 	if (tr->cond_snapshot)
6946 		ret = -EBUSY;
6947 	arch_spin_unlock(&tr->max_lock);
6948 	if (ret)
6949 		goto out;
6950 
6951 	switch (val) {
6952 	case 0:
6953 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6954 			ret = -EINVAL;
6955 			break;
6956 		}
6957 		if (tr->allocated_snapshot)
6958 			free_snapshot(tr);
6959 		break;
6960 	case 1:
6961 /* Only allow per-cpu swap if the ring buffer supports it */
6962 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6963 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6964 			ret = -EINVAL;
6965 			break;
6966 		}
6967 #endif
6968 		if (tr->allocated_snapshot)
6969 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6970 					&tr->array_buffer, iter->cpu_file);
6971 		else
6972 			ret = tracing_alloc_snapshot_instance(tr);
6973 		if (ret < 0)
6974 			break;
6975 		local_irq_disable();
6976 		/* Now, we're going to swap */
6977 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6978 			update_max_tr(tr, current, smp_processor_id(), NULL);
6979 		else
6980 			update_max_tr_single(tr, current, iter->cpu_file);
6981 		local_irq_enable();
6982 		break;
6983 	default:
6984 		if (tr->allocated_snapshot) {
6985 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6986 				tracing_reset_online_cpus(&tr->max_buffer);
6987 			else
6988 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6989 		}
6990 		break;
6991 	}
6992 
6993 	if (ret >= 0) {
6994 		*ppos += cnt;
6995 		ret = cnt;
6996 	}
6997 out:
6998 	mutex_unlock(&trace_types_lock);
6999 	return ret;
7000 }
7001 
7002 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7003 {
7004 	struct seq_file *m = file->private_data;
7005 	int ret;
7006 
7007 	ret = tracing_release(inode, file);
7008 
7009 	if (file->f_mode & FMODE_READ)
7010 		return ret;
7011 
7012 	/* If write only, the seq_file is just a stub */
7013 	if (m)
7014 		kfree(m->private);
7015 	kfree(m);
7016 
7017 	return 0;
7018 }
7019 
7020 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7021 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7022 				    size_t count, loff_t *ppos);
7023 static int tracing_buffers_release(struct inode *inode, struct file *file);
7024 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7025 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7026 
7027 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7028 {
7029 	struct ftrace_buffer_info *info;
7030 	int ret;
7031 
7032 	/* The following checks for tracefs lockdown */
7033 	ret = tracing_buffers_open(inode, filp);
7034 	if (ret < 0)
7035 		return ret;
7036 
7037 	info = filp->private_data;
7038 
7039 	if (info->iter.trace->use_max_tr) {
7040 		tracing_buffers_release(inode, filp);
7041 		return -EBUSY;
7042 	}
7043 
7044 	info->iter.snapshot = true;
7045 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7046 
7047 	return ret;
7048 }
7049 
7050 #endif /* CONFIG_TRACER_SNAPSHOT */
7051 
7052 
7053 static const struct file_operations tracing_thresh_fops = {
7054 	.open		= tracing_open_generic,
7055 	.read		= tracing_thresh_read,
7056 	.write		= tracing_thresh_write,
7057 	.llseek		= generic_file_llseek,
7058 };
7059 
7060 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7061 static const struct file_operations tracing_max_lat_fops = {
7062 	.open		= tracing_open_generic,
7063 	.read		= tracing_max_lat_read,
7064 	.write		= tracing_max_lat_write,
7065 	.llseek		= generic_file_llseek,
7066 };
7067 #endif
7068 
7069 static const struct file_operations set_tracer_fops = {
7070 	.open		= tracing_open_generic,
7071 	.read		= tracing_set_trace_read,
7072 	.write		= tracing_set_trace_write,
7073 	.llseek		= generic_file_llseek,
7074 };
7075 
7076 static const struct file_operations tracing_pipe_fops = {
7077 	.open		= tracing_open_pipe,
7078 	.poll		= tracing_poll_pipe,
7079 	.read		= tracing_read_pipe,
7080 	.splice_read	= tracing_splice_read_pipe,
7081 	.release	= tracing_release_pipe,
7082 	.llseek		= no_llseek,
7083 };
7084 
7085 static const struct file_operations tracing_entries_fops = {
7086 	.open		= tracing_open_generic_tr,
7087 	.read		= tracing_entries_read,
7088 	.write		= tracing_entries_write,
7089 	.llseek		= generic_file_llseek,
7090 	.release	= tracing_release_generic_tr,
7091 };
7092 
7093 static const struct file_operations tracing_total_entries_fops = {
7094 	.open		= tracing_open_generic_tr,
7095 	.read		= tracing_total_entries_read,
7096 	.llseek		= generic_file_llseek,
7097 	.release	= tracing_release_generic_tr,
7098 };
7099 
7100 static const struct file_operations tracing_free_buffer_fops = {
7101 	.open		= tracing_open_generic_tr,
7102 	.write		= tracing_free_buffer_write,
7103 	.release	= tracing_free_buffer_release,
7104 };
7105 
7106 static const struct file_operations tracing_mark_fops = {
7107 	.open		= tracing_open_generic_tr,
7108 	.write		= tracing_mark_write,
7109 	.llseek		= generic_file_llseek,
7110 	.release	= tracing_release_generic_tr,
7111 };
7112 
7113 static const struct file_operations tracing_mark_raw_fops = {
7114 	.open		= tracing_open_generic_tr,
7115 	.write		= tracing_mark_raw_write,
7116 	.llseek		= generic_file_llseek,
7117 	.release	= tracing_release_generic_tr,
7118 };
7119 
7120 static const struct file_operations trace_clock_fops = {
7121 	.open		= tracing_clock_open,
7122 	.read		= seq_read,
7123 	.llseek		= seq_lseek,
7124 	.release	= tracing_single_release_tr,
7125 	.write		= tracing_clock_write,
7126 };
7127 
7128 static const struct file_operations trace_time_stamp_mode_fops = {
7129 	.open		= tracing_time_stamp_mode_open,
7130 	.read		= seq_read,
7131 	.llseek		= seq_lseek,
7132 	.release	= tracing_single_release_tr,
7133 };
7134 
7135 #ifdef CONFIG_TRACER_SNAPSHOT
7136 static const struct file_operations snapshot_fops = {
7137 	.open		= tracing_snapshot_open,
7138 	.read		= seq_read,
7139 	.write		= tracing_snapshot_write,
7140 	.llseek		= tracing_lseek,
7141 	.release	= tracing_snapshot_release,
7142 };
7143 
7144 static const struct file_operations snapshot_raw_fops = {
7145 	.open		= snapshot_raw_open,
7146 	.read		= tracing_buffers_read,
7147 	.release	= tracing_buffers_release,
7148 	.splice_read	= tracing_buffers_splice_read,
7149 	.llseek		= no_llseek,
7150 };
7151 
7152 #endif /* CONFIG_TRACER_SNAPSHOT */
7153 
7154 #define TRACING_LOG_ERRS_MAX	8
7155 #define TRACING_LOG_LOC_MAX	128
7156 
7157 #define CMD_PREFIX "  Command: "
7158 
7159 struct err_info {
7160 	const char	**errs;	/* ptr to loc-specific array of err strings */
7161 	u8		type;	/* index into errs -> specific err string */
7162 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7163 	u64		ts;
7164 };
7165 
7166 struct tracing_log_err {
7167 	struct list_head	list;
7168 	struct err_info		info;
7169 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7170 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7171 };
7172 
7173 static DEFINE_MUTEX(tracing_err_log_lock);
7174 
7175 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7176 {
7177 	struct tracing_log_err *err;
7178 
7179 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7180 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7181 		if (!err)
7182 			err = ERR_PTR(-ENOMEM);
7183 		tr->n_err_log_entries++;
7184 
7185 		return err;
7186 	}
7187 
7188 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7189 	list_del(&err->list);
7190 
7191 	return err;
7192 }
7193 
7194 /**
7195  * err_pos - find the position of a string within a command for error careting
7196  * @cmd: The tracing command that caused the error
7197  * @str: The string to position the caret at within @cmd
7198  *
7199  * Finds the position of the first occurence of @str within @cmd.  The
7200  * return value can be passed to tracing_log_err() for caret placement
7201  * within @cmd.
7202  *
7203  * Returns the index within @cmd of the first occurence of @str or 0
7204  * if @str was not found.
7205  */
7206 unsigned int err_pos(char *cmd, const char *str)
7207 {
7208 	char *found;
7209 
7210 	if (WARN_ON(!strlen(cmd)))
7211 		return 0;
7212 
7213 	found = strstr(cmd, str);
7214 	if (found)
7215 		return found - cmd;
7216 
7217 	return 0;
7218 }
7219 
7220 /**
7221  * tracing_log_err - write an error to the tracing error log
7222  * @tr: The associated trace array for the error (NULL for top level array)
7223  * @loc: A string describing where the error occurred
7224  * @cmd: The tracing command that caused the error
7225  * @errs: The array of loc-specific static error strings
7226  * @type: The index into errs[], which produces the specific static err string
7227  * @pos: The position the caret should be placed in the cmd
7228  *
7229  * Writes an error into tracing/error_log of the form:
7230  *
7231  * <loc>: error: <text>
7232  *   Command: <cmd>
7233  *              ^
7234  *
7235  * tracing/error_log is a small log file containing the last
7236  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7237  * unless there has been a tracing error, and the error log can be
7238  * cleared and have its memory freed by writing the empty string in
7239  * truncation mode to it i.e. echo > tracing/error_log.
7240  *
7241  * NOTE: the @errs array along with the @type param are used to
7242  * produce a static error string - this string is not copied and saved
7243  * when the error is logged - only a pointer to it is saved.  See
7244  * existing callers for examples of how static strings are typically
7245  * defined for use with tracing_log_err().
7246  */
7247 void tracing_log_err(struct trace_array *tr,
7248 		     const char *loc, const char *cmd,
7249 		     const char **errs, u8 type, u8 pos)
7250 {
7251 	struct tracing_log_err *err;
7252 
7253 	if (!tr)
7254 		tr = &global_trace;
7255 
7256 	mutex_lock(&tracing_err_log_lock);
7257 	err = get_tracing_log_err(tr);
7258 	if (PTR_ERR(err) == -ENOMEM) {
7259 		mutex_unlock(&tracing_err_log_lock);
7260 		return;
7261 	}
7262 
7263 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7264 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7265 
7266 	err->info.errs = errs;
7267 	err->info.type = type;
7268 	err->info.pos = pos;
7269 	err->info.ts = local_clock();
7270 
7271 	list_add_tail(&err->list, &tr->err_log);
7272 	mutex_unlock(&tracing_err_log_lock);
7273 }
7274 
7275 static void clear_tracing_err_log(struct trace_array *tr)
7276 {
7277 	struct tracing_log_err *err, *next;
7278 
7279 	mutex_lock(&tracing_err_log_lock);
7280 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7281 		list_del(&err->list);
7282 		kfree(err);
7283 	}
7284 
7285 	tr->n_err_log_entries = 0;
7286 	mutex_unlock(&tracing_err_log_lock);
7287 }
7288 
7289 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7290 {
7291 	struct trace_array *tr = m->private;
7292 
7293 	mutex_lock(&tracing_err_log_lock);
7294 
7295 	return seq_list_start(&tr->err_log, *pos);
7296 }
7297 
7298 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7299 {
7300 	struct trace_array *tr = m->private;
7301 
7302 	return seq_list_next(v, &tr->err_log, pos);
7303 }
7304 
7305 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7306 {
7307 	mutex_unlock(&tracing_err_log_lock);
7308 }
7309 
7310 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7311 {
7312 	u8 i;
7313 
7314 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7315 		seq_putc(m, ' ');
7316 	for (i = 0; i < pos; i++)
7317 		seq_putc(m, ' ');
7318 	seq_puts(m, "^\n");
7319 }
7320 
7321 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7322 {
7323 	struct tracing_log_err *err = v;
7324 
7325 	if (err) {
7326 		const char *err_text = err->info.errs[err->info.type];
7327 		u64 sec = err->info.ts;
7328 		u32 nsec;
7329 
7330 		nsec = do_div(sec, NSEC_PER_SEC);
7331 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7332 			   err->loc, err_text);
7333 		seq_printf(m, "%s", err->cmd);
7334 		tracing_err_log_show_pos(m, err->info.pos);
7335 	}
7336 
7337 	return 0;
7338 }
7339 
7340 static const struct seq_operations tracing_err_log_seq_ops = {
7341 	.start  = tracing_err_log_seq_start,
7342 	.next   = tracing_err_log_seq_next,
7343 	.stop   = tracing_err_log_seq_stop,
7344 	.show   = tracing_err_log_seq_show
7345 };
7346 
7347 static int tracing_err_log_open(struct inode *inode, struct file *file)
7348 {
7349 	struct trace_array *tr = inode->i_private;
7350 	int ret = 0;
7351 
7352 	ret = tracing_check_open_get_tr(tr);
7353 	if (ret)
7354 		return ret;
7355 
7356 	/* If this file was opened for write, then erase contents */
7357 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7358 		clear_tracing_err_log(tr);
7359 
7360 	if (file->f_mode & FMODE_READ) {
7361 		ret = seq_open(file, &tracing_err_log_seq_ops);
7362 		if (!ret) {
7363 			struct seq_file *m = file->private_data;
7364 			m->private = tr;
7365 		} else {
7366 			trace_array_put(tr);
7367 		}
7368 	}
7369 	return ret;
7370 }
7371 
7372 static ssize_t tracing_err_log_write(struct file *file,
7373 				     const char __user *buffer,
7374 				     size_t count, loff_t *ppos)
7375 {
7376 	return count;
7377 }
7378 
7379 static int tracing_err_log_release(struct inode *inode, struct file *file)
7380 {
7381 	struct trace_array *tr = inode->i_private;
7382 
7383 	trace_array_put(tr);
7384 
7385 	if (file->f_mode & FMODE_READ)
7386 		seq_release(inode, file);
7387 
7388 	return 0;
7389 }
7390 
7391 static const struct file_operations tracing_err_log_fops = {
7392 	.open           = tracing_err_log_open,
7393 	.write		= tracing_err_log_write,
7394 	.read           = seq_read,
7395 	.llseek         = seq_lseek,
7396 	.release        = tracing_err_log_release,
7397 };
7398 
7399 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7400 {
7401 	struct trace_array *tr = inode->i_private;
7402 	struct ftrace_buffer_info *info;
7403 	int ret;
7404 
7405 	ret = tracing_check_open_get_tr(tr);
7406 	if (ret)
7407 		return ret;
7408 
7409 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7410 	if (!info) {
7411 		trace_array_put(tr);
7412 		return -ENOMEM;
7413 	}
7414 
7415 	mutex_lock(&trace_types_lock);
7416 
7417 	info->iter.tr		= tr;
7418 	info->iter.cpu_file	= tracing_get_cpu(inode);
7419 	info->iter.trace	= tr->current_trace;
7420 	info->iter.array_buffer = &tr->array_buffer;
7421 	info->spare		= NULL;
7422 	/* Force reading ring buffer for first read */
7423 	info->read		= (unsigned int)-1;
7424 
7425 	filp->private_data = info;
7426 
7427 	tr->current_trace->ref++;
7428 
7429 	mutex_unlock(&trace_types_lock);
7430 
7431 	ret = nonseekable_open(inode, filp);
7432 	if (ret < 0)
7433 		trace_array_put(tr);
7434 
7435 	return ret;
7436 }
7437 
7438 static __poll_t
7439 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7440 {
7441 	struct ftrace_buffer_info *info = filp->private_data;
7442 	struct trace_iterator *iter = &info->iter;
7443 
7444 	return trace_poll(iter, filp, poll_table);
7445 }
7446 
7447 static ssize_t
7448 tracing_buffers_read(struct file *filp, char __user *ubuf,
7449 		     size_t count, loff_t *ppos)
7450 {
7451 	struct ftrace_buffer_info *info = filp->private_data;
7452 	struct trace_iterator *iter = &info->iter;
7453 	ssize_t ret = 0;
7454 	ssize_t size;
7455 
7456 	if (!count)
7457 		return 0;
7458 
7459 #ifdef CONFIG_TRACER_MAX_TRACE
7460 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7461 		return -EBUSY;
7462 #endif
7463 
7464 	if (!info->spare) {
7465 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7466 							  iter->cpu_file);
7467 		if (IS_ERR(info->spare)) {
7468 			ret = PTR_ERR(info->spare);
7469 			info->spare = NULL;
7470 		} else {
7471 			info->spare_cpu = iter->cpu_file;
7472 		}
7473 	}
7474 	if (!info->spare)
7475 		return ret;
7476 
7477 	/* Do we have previous read data to read? */
7478 	if (info->read < PAGE_SIZE)
7479 		goto read;
7480 
7481  again:
7482 	trace_access_lock(iter->cpu_file);
7483 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7484 				    &info->spare,
7485 				    count,
7486 				    iter->cpu_file, 0);
7487 	trace_access_unlock(iter->cpu_file);
7488 
7489 	if (ret < 0) {
7490 		if (trace_empty(iter)) {
7491 			if ((filp->f_flags & O_NONBLOCK))
7492 				return -EAGAIN;
7493 
7494 			ret = wait_on_pipe(iter, 0);
7495 			if (ret)
7496 				return ret;
7497 
7498 			goto again;
7499 		}
7500 		return 0;
7501 	}
7502 
7503 	info->read = 0;
7504  read:
7505 	size = PAGE_SIZE - info->read;
7506 	if (size > count)
7507 		size = count;
7508 
7509 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7510 	if (ret == size)
7511 		return -EFAULT;
7512 
7513 	size -= ret;
7514 
7515 	*ppos += size;
7516 	info->read += size;
7517 
7518 	return size;
7519 }
7520 
7521 static int tracing_buffers_release(struct inode *inode, struct file *file)
7522 {
7523 	struct ftrace_buffer_info *info = file->private_data;
7524 	struct trace_iterator *iter = &info->iter;
7525 
7526 	mutex_lock(&trace_types_lock);
7527 
7528 	iter->tr->current_trace->ref--;
7529 
7530 	__trace_array_put(iter->tr);
7531 
7532 	if (info->spare)
7533 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7534 					   info->spare_cpu, info->spare);
7535 	kfree(info);
7536 
7537 	mutex_unlock(&trace_types_lock);
7538 
7539 	return 0;
7540 }
7541 
7542 struct buffer_ref {
7543 	struct trace_buffer	*buffer;
7544 	void			*page;
7545 	int			cpu;
7546 	refcount_t		refcount;
7547 };
7548 
7549 static void buffer_ref_release(struct buffer_ref *ref)
7550 {
7551 	if (!refcount_dec_and_test(&ref->refcount))
7552 		return;
7553 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7554 	kfree(ref);
7555 }
7556 
7557 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7558 				    struct pipe_buffer *buf)
7559 {
7560 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7561 
7562 	buffer_ref_release(ref);
7563 	buf->private = 0;
7564 }
7565 
7566 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7567 				struct pipe_buffer *buf)
7568 {
7569 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7570 
7571 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7572 		return false;
7573 
7574 	refcount_inc(&ref->refcount);
7575 	return true;
7576 }
7577 
7578 /* Pipe buffer operations for a buffer. */
7579 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7580 	.release		= buffer_pipe_buf_release,
7581 	.get			= buffer_pipe_buf_get,
7582 };
7583 
7584 /*
7585  * Callback from splice_to_pipe(), if we need to release some pages
7586  * at the end of the spd in case we error'ed out in filling the pipe.
7587  */
7588 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7589 {
7590 	struct buffer_ref *ref =
7591 		(struct buffer_ref *)spd->partial[i].private;
7592 
7593 	buffer_ref_release(ref);
7594 	spd->partial[i].private = 0;
7595 }
7596 
7597 static ssize_t
7598 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7599 			    struct pipe_inode_info *pipe, size_t len,
7600 			    unsigned int flags)
7601 {
7602 	struct ftrace_buffer_info *info = file->private_data;
7603 	struct trace_iterator *iter = &info->iter;
7604 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7605 	struct page *pages_def[PIPE_DEF_BUFFERS];
7606 	struct splice_pipe_desc spd = {
7607 		.pages		= pages_def,
7608 		.partial	= partial_def,
7609 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7610 		.ops		= &buffer_pipe_buf_ops,
7611 		.spd_release	= buffer_spd_release,
7612 	};
7613 	struct buffer_ref *ref;
7614 	int entries, i;
7615 	ssize_t ret = 0;
7616 
7617 #ifdef CONFIG_TRACER_MAX_TRACE
7618 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7619 		return -EBUSY;
7620 #endif
7621 
7622 	if (*ppos & (PAGE_SIZE - 1))
7623 		return -EINVAL;
7624 
7625 	if (len & (PAGE_SIZE - 1)) {
7626 		if (len < PAGE_SIZE)
7627 			return -EINVAL;
7628 		len &= PAGE_MASK;
7629 	}
7630 
7631 	if (splice_grow_spd(pipe, &spd))
7632 		return -ENOMEM;
7633 
7634  again:
7635 	trace_access_lock(iter->cpu_file);
7636 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7637 
7638 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7639 		struct page *page;
7640 		int r;
7641 
7642 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7643 		if (!ref) {
7644 			ret = -ENOMEM;
7645 			break;
7646 		}
7647 
7648 		refcount_set(&ref->refcount, 1);
7649 		ref->buffer = iter->array_buffer->buffer;
7650 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7651 		if (IS_ERR(ref->page)) {
7652 			ret = PTR_ERR(ref->page);
7653 			ref->page = NULL;
7654 			kfree(ref);
7655 			break;
7656 		}
7657 		ref->cpu = iter->cpu_file;
7658 
7659 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7660 					  len, iter->cpu_file, 1);
7661 		if (r < 0) {
7662 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7663 						   ref->page);
7664 			kfree(ref);
7665 			break;
7666 		}
7667 
7668 		page = virt_to_page(ref->page);
7669 
7670 		spd.pages[i] = page;
7671 		spd.partial[i].len = PAGE_SIZE;
7672 		spd.partial[i].offset = 0;
7673 		spd.partial[i].private = (unsigned long)ref;
7674 		spd.nr_pages++;
7675 		*ppos += PAGE_SIZE;
7676 
7677 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7678 	}
7679 
7680 	trace_access_unlock(iter->cpu_file);
7681 	spd.nr_pages = i;
7682 
7683 	/* did we read anything? */
7684 	if (!spd.nr_pages) {
7685 		if (ret)
7686 			goto out;
7687 
7688 		ret = -EAGAIN;
7689 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7690 			goto out;
7691 
7692 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7693 		if (ret)
7694 			goto out;
7695 
7696 		goto again;
7697 	}
7698 
7699 	ret = splice_to_pipe(pipe, &spd);
7700 out:
7701 	splice_shrink_spd(&spd);
7702 
7703 	return ret;
7704 }
7705 
7706 static const struct file_operations tracing_buffers_fops = {
7707 	.open		= tracing_buffers_open,
7708 	.read		= tracing_buffers_read,
7709 	.poll		= tracing_buffers_poll,
7710 	.release	= tracing_buffers_release,
7711 	.splice_read	= tracing_buffers_splice_read,
7712 	.llseek		= no_llseek,
7713 };
7714 
7715 static ssize_t
7716 tracing_stats_read(struct file *filp, char __user *ubuf,
7717 		   size_t count, loff_t *ppos)
7718 {
7719 	struct inode *inode = file_inode(filp);
7720 	struct trace_array *tr = inode->i_private;
7721 	struct array_buffer *trace_buf = &tr->array_buffer;
7722 	int cpu = tracing_get_cpu(inode);
7723 	struct trace_seq *s;
7724 	unsigned long cnt;
7725 	unsigned long long t;
7726 	unsigned long usec_rem;
7727 
7728 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7729 	if (!s)
7730 		return -ENOMEM;
7731 
7732 	trace_seq_init(s);
7733 
7734 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7735 	trace_seq_printf(s, "entries: %ld\n", cnt);
7736 
7737 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7738 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7739 
7740 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7741 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7742 
7743 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7744 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7745 
7746 	if (trace_clocks[tr->clock_id].in_ns) {
7747 		/* local or global for trace_clock */
7748 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7749 		usec_rem = do_div(t, USEC_PER_SEC);
7750 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7751 								t, usec_rem);
7752 
7753 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7754 		usec_rem = do_div(t, USEC_PER_SEC);
7755 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7756 	} else {
7757 		/* counter or tsc mode for trace_clock */
7758 		trace_seq_printf(s, "oldest event ts: %llu\n",
7759 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7760 
7761 		trace_seq_printf(s, "now ts: %llu\n",
7762 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7763 	}
7764 
7765 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7766 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7767 
7768 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7769 	trace_seq_printf(s, "read events: %ld\n", cnt);
7770 
7771 	count = simple_read_from_buffer(ubuf, count, ppos,
7772 					s->buffer, trace_seq_used(s));
7773 
7774 	kfree(s);
7775 
7776 	return count;
7777 }
7778 
7779 static const struct file_operations tracing_stats_fops = {
7780 	.open		= tracing_open_generic_tr,
7781 	.read		= tracing_stats_read,
7782 	.llseek		= generic_file_llseek,
7783 	.release	= tracing_release_generic_tr,
7784 };
7785 
7786 #ifdef CONFIG_DYNAMIC_FTRACE
7787 
7788 static ssize_t
7789 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7790 		  size_t cnt, loff_t *ppos)
7791 {
7792 	ssize_t ret;
7793 	char *buf;
7794 	int r;
7795 
7796 	/* 256 should be plenty to hold the amount needed */
7797 	buf = kmalloc(256, GFP_KERNEL);
7798 	if (!buf)
7799 		return -ENOMEM;
7800 
7801 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7802 		      ftrace_update_tot_cnt,
7803 		      ftrace_number_of_pages,
7804 		      ftrace_number_of_groups);
7805 
7806 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7807 	kfree(buf);
7808 	return ret;
7809 }
7810 
7811 static const struct file_operations tracing_dyn_info_fops = {
7812 	.open		= tracing_open_generic,
7813 	.read		= tracing_read_dyn_info,
7814 	.llseek		= generic_file_llseek,
7815 };
7816 #endif /* CONFIG_DYNAMIC_FTRACE */
7817 
7818 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7819 static void
7820 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7821 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7822 		void *data)
7823 {
7824 	tracing_snapshot_instance(tr);
7825 }
7826 
7827 static void
7828 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7829 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7830 		      void *data)
7831 {
7832 	struct ftrace_func_mapper *mapper = data;
7833 	long *count = NULL;
7834 
7835 	if (mapper)
7836 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7837 
7838 	if (count) {
7839 
7840 		if (*count <= 0)
7841 			return;
7842 
7843 		(*count)--;
7844 	}
7845 
7846 	tracing_snapshot_instance(tr);
7847 }
7848 
7849 static int
7850 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7851 		      struct ftrace_probe_ops *ops, void *data)
7852 {
7853 	struct ftrace_func_mapper *mapper = data;
7854 	long *count = NULL;
7855 
7856 	seq_printf(m, "%ps:", (void *)ip);
7857 
7858 	seq_puts(m, "snapshot");
7859 
7860 	if (mapper)
7861 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7862 
7863 	if (count)
7864 		seq_printf(m, ":count=%ld\n", *count);
7865 	else
7866 		seq_puts(m, ":unlimited\n");
7867 
7868 	return 0;
7869 }
7870 
7871 static int
7872 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7873 		     unsigned long ip, void *init_data, void **data)
7874 {
7875 	struct ftrace_func_mapper *mapper = *data;
7876 
7877 	if (!mapper) {
7878 		mapper = allocate_ftrace_func_mapper();
7879 		if (!mapper)
7880 			return -ENOMEM;
7881 		*data = mapper;
7882 	}
7883 
7884 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7885 }
7886 
7887 static void
7888 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7889 		     unsigned long ip, void *data)
7890 {
7891 	struct ftrace_func_mapper *mapper = data;
7892 
7893 	if (!ip) {
7894 		if (!mapper)
7895 			return;
7896 		free_ftrace_func_mapper(mapper, NULL);
7897 		return;
7898 	}
7899 
7900 	ftrace_func_mapper_remove_ip(mapper, ip);
7901 }
7902 
7903 static struct ftrace_probe_ops snapshot_probe_ops = {
7904 	.func			= ftrace_snapshot,
7905 	.print			= ftrace_snapshot_print,
7906 };
7907 
7908 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7909 	.func			= ftrace_count_snapshot,
7910 	.print			= ftrace_snapshot_print,
7911 	.init			= ftrace_snapshot_init,
7912 	.free			= ftrace_snapshot_free,
7913 };
7914 
7915 static int
7916 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7917 			       char *glob, char *cmd, char *param, int enable)
7918 {
7919 	struct ftrace_probe_ops *ops;
7920 	void *count = (void *)-1;
7921 	char *number;
7922 	int ret;
7923 
7924 	if (!tr)
7925 		return -ENODEV;
7926 
7927 	/* hash funcs only work with set_ftrace_filter */
7928 	if (!enable)
7929 		return -EINVAL;
7930 
7931 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7932 
7933 	if (glob[0] == '!')
7934 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7935 
7936 	if (!param)
7937 		goto out_reg;
7938 
7939 	number = strsep(&param, ":");
7940 
7941 	if (!strlen(number))
7942 		goto out_reg;
7943 
7944 	/*
7945 	 * We use the callback data field (which is a pointer)
7946 	 * as our counter.
7947 	 */
7948 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7949 	if (ret)
7950 		return ret;
7951 
7952  out_reg:
7953 	ret = tracing_alloc_snapshot_instance(tr);
7954 	if (ret < 0)
7955 		goto out;
7956 
7957 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7958 
7959  out:
7960 	return ret < 0 ? ret : 0;
7961 }
7962 
7963 static struct ftrace_func_command ftrace_snapshot_cmd = {
7964 	.name			= "snapshot",
7965 	.func			= ftrace_trace_snapshot_callback,
7966 };
7967 
7968 static __init int register_snapshot_cmd(void)
7969 {
7970 	return register_ftrace_command(&ftrace_snapshot_cmd);
7971 }
7972 #else
7973 static inline __init int register_snapshot_cmd(void) { return 0; }
7974 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7975 
7976 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7977 {
7978 	if (WARN_ON(!tr->dir))
7979 		return ERR_PTR(-ENODEV);
7980 
7981 	/* Top directory uses NULL as the parent */
7982 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7983 		return NULL;
7984 
7985 	/* All sub buffers have a descriptor */
7986 	return tr->dir;
7987 }
7988 
7989 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7990 {
7991 	struct dentry *d_tracer;
7992 
7993 	if (tr->percpu_dir)
7994 		return tr->percpu_dir;
7995 
7996 	d_tracer = tracing_get_dentry(tr);
7997 	if (IS_ERR(d_tracer))
7998 		return NULL;
7999 
8000 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8001 
8002 	MEM_FAIL(!tr->percpu_dir,
8003 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8004 
8005 	return tr->percpu_dir;
8006 }
8007 
8008 static struct dentry *
8009 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8010 		      void *data, long cpu, const struct file_operations *fops)
8011 {
8012 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8013 
8014 	if (ret) /* See tracing_get_cpu() */
8015 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8016 	return ret;
8017 }
8018 
8019 static void
8020 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8021 {
8022 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8023 	struct dentry *d_cpu;
8024 	char cpu_dir[30]; /* 30 characters should be more than enough */
8025 
8026 	if (!d_percpu)
8027 		return;
8028 
8029 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8030 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8031 	if (!d_cpu) {
8032 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8033 		return;
8034 	}
8035 
8036 	/* per cpu trace_pipe */
8037 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8038 				tr, cpu, &tracing_pipe_fops);
8039 
8040 	/* per cpu trace */
8041 	trace_create_cpu_file("trace", 0644, d_cpu,
8042 				tr, cpu, &tracing_fops);
8043 
8044 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8045 				tr, cpu, &tracing_buffers_fops);
8046 
8047 	trace_create_cpu_file("stats", 0444, d_cpu,
8048 				tr, cpu, &tracing_stats_fops);
8049 
8050 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8051 				tr, cpu, &tracing_entries_fops);
8052 
8053 #ifdef CONFIG_TRACER_SNAPSHOT
8054 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8055 				tr, cpu, &snapshot_fops);
8056 
8057 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8058 				tr, cpu, &snapshot_raw_fops);
8059 #endif
8060 }
8061 
8062 #ifdef CONFIG_FTRACE_SELFTEST
8063 /* Let selftest have access to static functions in this file */
8064 #include "trace_selftest.c"
8065 #endif
8066 
8067 static ssize_t
8068 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8069 			loff_t *ppos)
8070 {
8071 	struct trace_option_dentry *topt = filp->private_data;
8072 	char *buf;
8073 
8074 	if (topt->flags->val & topt->opt->bit)
8075 		buf = "1\n";
8076 	else
8077 		buf = "0\n";
8078 
8079 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8080 }
8081 
8082 static ssize_t
8083 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8084 			 loff_t *ppos)
8085 {
8086 	struct trace_option_dentry *topt = filp->private_data;
8087 	unsigned long val;
8088 	int ret;
8089 
8090 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8091 	if (ret)
8092 		return ret;
8093 
8094 	if (val != 0 && val != 1)
8095 		return -EINVAL;
8096 
8097 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8098 		mutex_lock(&trace_types_lock);
8099 		ret = __set_tracer_option(topt->tr, topt->flags,
8100 					  topt->opt, !val);
8101 		mutex_unlock(&trace_types_lock);
8102 		if (ret)
8103 			return ret;
8104 	}
8105 
8106 	*ppos += cnt;
8107 
8108 	return cnt;
8109 }
8110 
8111 
8112 static const struct file_operations trace_options_fops = {
8113 	.open = tracing_open_generic,
8114 	.read = trace_options_read,
8115 	.write = trace_options_write,
8116 	.llseek	= generic_file_llseek,
8117 };
8118 
8119 /*
8120  * In order to pass in both the trace_array descriptor as well as the index
8121  * to the flag that the trace option file represents, the trace_array
8122  * has a character array of trace_flags_index[], which holds the index
8123  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8124  * The address of this character array is passed to the flag option file
8125  * read/write callbacks.
8126  *
8127  * In order to extract both the index and the trace_array descriptor,
8128  * get_tr_index() uses the following algorithm.
8129  *
8130  *   idx = *ptr;
8131  *
8132  * As the pointer itself contains the address of the index (remember
8133  * index[1] == 1).
8134  *
8135  * Then to get the trace_array descriptor, by subtracting that index
8136  * from the ptr, we get to the start of the index itself.
8137  *
8138  *   ptr - idx == &index[0]
8139  *
8140  * Then a simple container_of() from that pointer gets us to the
8141  * trace_array descriptor.
8142  */
8143 static void get_tr_index(void *data, struct trace_array **ptr,
8144 			 unsigned int *pindex)
8145 {
8146 	*pindex = *(unsigned char *)data;
8147 
8148 	*ptr = container_of(data - *pindex, struct trace_array,
8149 			    trace_flags_index);
8150 }
8151 
8152 static ssize_t
8153 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8154 			loff_t *ppos)
8155 {
8156 	void *tr_index = filp->private_data;
8157 	struct trace_array *tr;
8158 	unsigned int index;
8159 	char *buf;
8160 
8161 	get_tr_index(tr_index, &tr, &index);
8162 
8163 	if (tr->trace_flags & (1 << index))
8164 		buf = "1\n";
8165 	else
8166 		buf = "0\n";
8167 
8168 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8169 }
8170 
8171 static ssize_t
8172 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8173 			 loff_t *ppos)
8174 {
8175 	void *tr_index = filp->private_data;
8176 	struct trace_array *tr;
8177 	unsigned int index;
8178 	unsigned long val;
8179 	int ret;
8180 
8181 	get_tr_index(tr_index, &tr, &index);
8182 
8183 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8184 	if (ret)
8185 		return ret;
8186 
8187 	if (val != 0 && val != 1)
8188 		return -EINVAL;
8189 
8190 	mutex_lock(&event_mutex);
8191 	mutex_lock(&trace_types_lock);
8192 	ret = set_tracer_flag(tr, 1 << index, val);
8193 	mutex_unlock(&trace_types_lock);
8194 	mutex_unlock(&event_mutex);
8195 
8196 	if (ret < 0)
8197 		return ret;
8198 
8199 	*ppos += cnt;
8200 
8201 	return cnt;
8202 }
8203 
8204 static const struct file_operations trace_options_core_fops = {
8205 	.open = tracing_open_generic,
8206 	.read = trace_options_core_read,
8207 	.write = trace_options_core_write,
8208 	.llseek = generic_file_llseek,
8209 };
8210 
8211 struct dentry *trace_create_file(const char *name,
8212 				 umode_t mode,
8213 				 struct dentry *parent,
8214 				 void *data,
8215 				 const struct file_operations *fops)
8216 {
8217 	struct dentry *ret;
8218 
8219 	ret = tracefs_create_file(name, mode, parent, data, fops);
8220 	if (!ret)
8221 		pr_warn("Could not create tracefs '%s' entry\n", name);
8222 
8223 	return ret;
8224 }
8225 
8226 
8227 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8228 {
8229 	struct dentry *d_tracer;
8230 
8231 	if (tr->options)
8232 		return tr->options;
8233 
8234 	d_tracer = tracing_get_dentry(tr);
8235 	if (IS_ERR(d_tracer))
8236 		return NULL;
8237 
8238 	tr->options = tracefs_create_dir("options", d_tracer);
8239 	if (!tr->options) {
8240 		pr_warn("Could not create tracefs directory 'options'\n");
8241 		return NULL;
8242 	}
8243 
8244 	return tr->options;
8245 }
8246 
8247 static void
8248 create_trace_option_file(struct trace_array *tr,
8249 			 struct trace_option_dentry *topt,
8250 			 struct tracer_flags *flags,
8251 			 struct tracer_opt *opt)
8252 {
8253 	struct dentry *t_options;
8254 
8255 	t_options = trace_options_init_dentry(tr);
8256 	if (!t_options)
8257 		return;
8258 
8259 	topt->flags = flags;
8260 	topt->opt = opt;
8261 	topt->tr = tr;
8262 
8263 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8264 				    &trace_options_fops);
8265 
8266 }
8267 
8268 static void
8269 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8270 {
8271 	struct trace_option_dentry *topts;
8272 	struct trace_options *tr_topts;
8273 	struct tracer_flags *flags;
8274 	struct tracer_opt *opts;
8275 	int cnt;
8276 	int i;
8277 
8278 	if (!tracer)
8279 		return;
8280 
8281 	flags = tracer->flags;
8282 
8283 	if (!flags || !flags->opts)
8284 		return;
8285 
8286 	/*
8287 	 * If this is an instance, only create flags for tracers
8288 	 * the instance may have.
8289 	 */
8290 	if (!trace_ok_for_array(tracer, tr))
8291 		return;
8292 
8293 	for (i = 0; i < tr->nr_topts; i++) {
8294 		/* Make sure there's no duplicate flags. */
8295 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8296 			return;
8297 	}
8298 
8299 	opts = flags->opts;
8300 
8301 	for (cnt = 0; opts[cnt].name; cnt++)
8302 		;
8303 
8304 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8305 	if (!topts)
8306 		return;
8307 
8308 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8309 			    GFP_KERNEL);
8310 	if (!tr_topts) {
8311 		kfree(topts);
8312 		return;
8313 	}
8314 
8315 	tr->topts = tr_topts;
8316 	tr->topts[tr->nr_topts].tracer = tracer;
8317 	tr->topts[tr->nr_topts].topts = topts;
8318 	tr->nr_topts++;
8319 
8320 	for (cnt = 0; opts[cnt].name; cnt++) {
8321 		create_trace_option_file(tr, &topts[cnt], flags,
8322 					 &opts[cnt]);
8323 		MEM_FAIL(topts[cnt].entry == NULL,
8324 			  "Failed to create trace option: %s",
8325 			  opts[cnt].name);
8326 	}
8327 }
8328 
8329 static struct dentry *
8330 create_trace_option_core_file(struct trace_array *tr,
8331 			      const char *option, long index)
8332 {
8333 	struct dentry *t_options;
8334 
8335 	t_options = trace_options_init_dentry(tr);
8336 	if (!t_options)
8337 		return NULL;
8338 
8339 	return trace_create_file(option, 0644, t_options,
8340 				 (void *)&tr->trace_flags_index[index],
8341 				 &trace_options_core_fops);
8342 }
8343 
8344 static void create_trace_options_dir(struct trace_array *tr)
8345 {
8346 	struct dentry *t_options;
8347 	bool top_level = tr == &global_trace;
8348 	int i;
8349 
8350 	t_options = trace_options_init_dentry(tr);
8351 	if (!t_options)
8352 		return;
8353 
8354 	for (i = 0; trace_options[i]; i++) {
8355 		if (top_level ||
8356 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8357 			create_trace_option_core_file(tr, trace_options[i], i);
8358 	}
8359 }
8360 
8361 static ssize_t
8362 rb_simple_read(struct file *filp, char __user *ubuf,
8363 	       size_t cnt, loff_t *ppos)
8364 {
8365 	struct trace_array *tr = filp->private_data;
8366 	char buf[64];
8367 	int r;
8368 
8369 	r = tracer_tracing_is_on(tr);
8370 	r = sprintf(buf, "%d\n", r);
8371 
8372 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8373 }
8374 
8375 static ssize_t
8376 rb_simple_write(struct file *filp, const char __user *ubuf,
8377 		size_t cnt, loff_t *ppos)
8378 {
8379 	struct trace_array *tr = filp->private_data;
8380 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8381 	unsigned long val;
8382 	int ret;
8383 
8384 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8385 	if (ret)
8386 		return ret;
8387 
8388 	if (buffer) {
8389 		mutex_lock(&trace_types_lock);
8390 		if (!!val == tracer_tracing_is_on(tr)) {
8391 			val = 0; /* do nothing */
8392 		} else if (val) {
8393 			tracer_tracing_on(tr);
8394 			if (tr->current_trace->start)
8395 				tr->current_trace->start(tr);
8396 		} else {
8397 			tracer_tracing_off(tr);
8398 			if (tr->current_trace->stop)
8399 				tr->current_trace->stop(tr);
8400 		}
8401 		mutex_unlock(&trace_types_lock);
8402 	}
8403 
8404 	(*ppos)++;
8405 
8406 	return cnt;
8407 }
8408 
8409 static const struct file_operations rb_simple_fops = {
8410 	.open		= tracing_open_generic_tr,
8411 	.read		= rb_simple_read,
8412 	.write		= rb_simple_write,
8413 	.release	= tracing_release_generic_tr,
8414 	.llseek		= default_llseek,
8415 };
8416 
8417 static ssize_t
8418 buffer_percent_read(struct file *filp, char __user *ubuf,
8419 		    size_t cnt, loff_t *ppos)
8420 {
8421 	struct trace_array *tr = filp->private_data;
8422 	char buf[64];
8423 	int r;
8424 
8425 	r = tr->buffer_percent;
8426 	r = sprintf(buf, "%d\n", r);
8427 
8428 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8429 }
8430 
8431 static ssize_t
8432 buffer_percent_write(struct file *filp, const char __user *ubuf,
8433 		     size_t cnt, loff_t *ppos)
8434 {
8435 	struct trace_array *tr = filp->private_data;
8436 	unsigned long val;
8437 	int ret;
8438 
8439 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8440 	if (ret)
8441 		return ret;
8442 
8443 	if (val > 100)
8444 		return -EINVAL;
8445 
8446 	if (!val)
8447 		val = 1;
8448 
8449 	tr->buffer_percent = val;
8450 
8451 	(*ppos)++;
8452 
8453 	return cnt;
8454 }
8455 
8456 static const struct file_operations buffer_percent_fops = {
8457 	.open		= tracing_open_generic_tr,
8458 	.read		= buffer_percent_read,
8459 	.write		= buffer_percent_write,
8460 	.release	= tracing_release_generic_tr,
8461 	.llseek		= default_llseek,
8462 };
8463 
8464 static struct dentry *trace_instance_dir;
8465 
8466 static void
8467 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8468 
8469 static int
8470 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8471 {
8472 	enum ring_buffer_flags rb_flags;
8473 
8474 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8475 
8476 	buf->tr = tr;
8477 
8478 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8479 	if (!buf->buffer)
8480 		return -ENOMEM;
8481 
8482 	buf->data = alloc_percpu(struct trace_array_cpu);
8483 	if (!buf->data) {
8484 		ring_buffer_free(buf->buffer);
8485 		buf->buffer = NULL;
8486 		return -ENOMEM;
8487 	}
8488 
8489 	/* Allocate the first page for all buffers */
8490 	set_buffer_entries(&tr->array_buffer,
8491 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8492 
8493 	return 0;
8494 }
8495 
8496 static int allocate_trace_buffers(struct trace_array *tr, int size)
8497 {
8498 	int ret;
8499 
8500 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8501 	if (ret)
8502 		return ret;
8503 
8504 #ifdef CONFIG_TRACER_MAX_TRACE
8505 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8506 				    allocate_snapshot ? size : 1);
8507 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8508 		ring_buffer_free(tr->array_buffer.buffer);
8509 		tr->array_buffer.buffer = NULL;
8510 		free_percpu(tr->array_buffer.data);
8511 		tr->array_buffer.data = NULL;
8512 		return -ENOMEM;
8513 	}
8514 	tr->allocated_snapshot = allocate_snapshot;
8515 
8516 	/*
8517 	 * Only the top level trace array gets its snapshot allocated
8518 	 * from the kernel command line.
8519 	 */
8520 	allocate_snapshot = false;
8521 #endif
8522 
8523 	return 0;
8524 }
8525 
8526 static void free_trace_buffer(struct array_buffer *buf)
8527 {
8528 	if (buf->buffer) {
8529 		ring_buffer_free(buf->buffer);
8530 		buf->buffer = NULL;
8531 		free_percpu(buf->data);
8532 		buf->data = NULL;
8533 	}
8534 }
8535 
8536 static void free_trace_buffers(struct trace_array *tr)
8537 {
8538 	if (!tr)
8539 		return;
8540 
8541 	free_trace_buffer(&tr->array_buffer);
8542 
8543 #ifdef CONFIG_TRACER_MAX_TRACE
8544 	free_trace_buffer(&tr->max_buffer);
8545 #endif
8546 }
8547 
8548 static void init_trace_flags_index(struct trace_array *tr)
8549 {
8550 	int i;
8551 
8552 	/* Used by the trace options files */
8553 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8554 		tr->trace_flags_index[i] = i;
8555 }
8556 
8557 static void __update_tracer_options(struct trace_array *tr)
8558 {
8559 	struct tracer *t;
8560 
8561 	for (t = trace_types; t; t = t->next)
8562 		add_tracer_options(tr, t);
8563 }
8564 
8565 static void update_tracer_options(struct trace_array *tr)
8566 {
8567 	mutex_lock(&trace_types_lock);
8568 	__update_tracer_options(tr);
8569 	mutex_unlock(&trace_types_lock);
8570 }
8571 
8572 /* Must have trace_types_lock held */
8573 struct trace_array *trace_array_find(const char *instance)
8574 {
8575 	struct trace_array *tr, *found = NULL;
8576 
8577 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8578 		if (tr->name && strcmp(tr->name, instance) == 0) {
8579 			found = tr;
8580 			break;
8581 		}
8582 	}
8583 
8584 	return found;
8585 }
8586 
8587 struct trace_array *trace_array_find_get(const char *instance)
8588 {
8589 	struct trace_array *tr;
8590 
8591 	mutex_lock(&trace_types_lock);
8592 	tr = trace_array_find(instance);
8593 	if (tr)
8594 		tr->ref++;
8595 	mutex_unlock(&trace_types_lock);
8596 
8597 	return tr;
8598 }
8599 
8600 static struct trace_array *trace_array_create(const char *name)
8601 {
8602 	struct trace_array *tr;
8603 	int ret;
8604 
8605 	ret = -ENOMEM;
8606 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8607 	if (!tr)
8608 		return ERR_PTR(ret);
8609 
8610 	tr->name = kstrdup(name, GFP_KERNEL);
8611 	if (!tr->name)
8612 		goto out_free_tr;
8613 
8614 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8615 		goto out_free_tr;
8616 
8617 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8618 
8619 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8620 
8621 	raw_spin_lock_init(&tr->start_lock);
8622 
8623 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8624 
8625 	tr->current_trace = &nop_trace;
8626 
8627 	INIT_LIST_HEAD(&tr->systems);
8628 	INIT_LIST_HEAD(&tr->events);
8629 	INIT_LIST_HEAD(&tr->hist_vars);
8630 	INIT_LIST_HEAD(&tr->err_log);
8631 
8632 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8633 		goto out_free_tr;
8634 
8635 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8636 	if (!tr->dir)
8637 		goto out_free_tr;
8638 
8639 	ret = event_trace_add_tracer(tr->dir, tr);
8640 	if (ret) {
8641 		tracefs_remove(tr->dir);
8642 		goto out_free_tr;
8643 	}
8644 
8645 	ftrace_init_trace_array(tr);
8646 
8647 	init_tracer_tracefs(tr, tr->dir);
8648 	init_trace_flags_index(tr);
8649 	__update_tracer_options(tr);
8650 
8651 	list_add(&tr->list, &ftrace_trace_arrays);
8652 
8653 	tr->ref++;
8654 
8655 
8656 	return tr;
8657 
8658  out_free_tr:
8659 	free_trace_buffers(tr);
8660 	free_cpumask_var(tr->tracing_cpumask);
8661 	kfree(tr->name);
8662 	kfree(tr);
8663 
8664 	return ERR_PTR(ret);
8665 }
8666 
8667 static int instance_mkdir(const char *name)
8668 {
8669 	struct trace_array *tr;
8670 	int ret;
8671 
8672 	mutex_lock(&event_mutex);
8673 	mutex_lock(&trace_types_lock);
8674 
8675 	ret = -EEXIST;
8676 	if (trace_array_find(name))
8677 		goto out_unlock;
8678 
8679 	tr = trace_array_create(name);
8680 
8681 	ret = PTR_ERR_OR_ZERO(tr);
8682 
8683 out_unlock:
8684 	mutex_unlock(&trace_types_lock);
8685 	mutex_unlock(&event_mutex);
8686 	return ret;
8687 }
8688 
8689 /**
8690  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8691  * @name: The name of the trace array to be looked up/created.
8692  *
8693  * Returns pointer to trace array with given name.
8694  * NULL, if it cannot be created.
8695  *
8696  * NOTE: This function increments the reference counter associated with the
8697  * trace array returned. This makes sure it cannot be freed while in use.
8698  * Use trace_array_put() once the trace array is no longer needed.
8699  * If the trace_array is to be freed, trace_array_destroy() needs to
8700  * be called after the trace_array_put(), or simply let user space delete
8701  * it from the tracefs instances directory. But until the
8702  * trace_array_put() is called, user space can not delete it.
8703  *
8704  */
8705 struct trace_array *trace_array_get_by_name(const char *name)
8706 {
8707 	struct trace_array *tr;
8708 
8709 	mutex_lock(&event_mutex);
8710 	mutex_lock(&trace_types_lock);
8711 
8712 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8713 		if (tr->name && strcmp(tr->name, name) == 0)
8714 			goto out_unlock;
8715 	}
8716 
8717 	tr = trace_array_create(name);
8718 
8719 	if (IS_ERR(tr))
8720 		tr = NULL;
8721 out_unlock:
8722 	if (tr)
8723 		tr->ref++;
8724 
8725 	mutex_unlock(&trace_types_lock);
8726 	mutex_unlock(&event_mutex);
8727 	return tr;
8728 }
8729 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8730 
8731 static int __remove_instance(struct trace_array *tr)
8732 {
8733 	int i;
8734 
8735 	/* Reference counter for a newly created trace array = 1. */
8736 	if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8737 		return -EBUSY;
8738 
8739 	list_del(&tr->list);
8740 
8741 	/* Disable all the flags that were enabled coming in */
8742 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8743 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8744 			set_tracer_flag(tr, 1 << i, 0);
8745 	}
8746 
8747 	tracing_set_nop(tr);
8748 	clear_ftrace_function_probes(tr);
8749 	event_trace_del_tracer(tr);
8750 	ftrace_clear_pids(tr);
8751 	ftrace_destroy_function_files(tr);
8752 	tracefs_remove(tr->dir);
8753 	free_trace_buffers(tr);
8754 
8755 	for (i = 0; i < tr->nr_topts; i++) {
8756 		kfree(tr->topts[i].topts);
8757 	}
8758 	kfree(tr->topts);
8759 
8760 	free_cpumask_var(tr->tracing_cpumask);
8761 	kfree(tr->name);
8762 	kfree(tr);
8763 	tr = NULL;
8764 
8765 	return 0;
8766 }
8767 
8768 int trace_array_destroy(struct trace_array *this_tr)
8769 {
8770 	struct trace_array *tr;
8771 	int ret;
8772 
8773 	if (!this_tr)
8774 		return -EINVAL;
8775 
8776 	mutex_lock(&event_mutex);
8777 	mutex_lock(&trace_types_lock);
8778 
8779 	ret = -ENODEV;
8780 
8781 	/* Making sure trace array exists before destroying it. */
8782 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8783 		if (tr == this_tr) {
8784 			ret = __remove_instance(tr);
8785 			break;
8786 		}
8787 	}
8788 
8789 	mutex_unlock(&trace_types_lock);
8790 	mutex_unlock(&event_mutex);
8791 
8792 	return ret;
8793 }
8794 EXPORT_SYMBOL_GPL(trace_array_destroy);
8795 
8796 static int instance_rmdir(const char *name)
8797 {
8798 	struct trace_array *tr;
8799 	int ret;
8800 
8801 	mutex_lock(&event_mutex);
8802 	mutex_lock(&trace_types_lock);
8803 
8804 	ret = -ENODEV;
8805 	tr = trace_array_find(name);
8806 	if (tr)
8807 		ret = __remove_instance(tr);
8808 
8809 	mutex_unlock(&trace_types_lock);
8810 	mutex_unlock(&event_mutex);
8811 
8812 	return ret;
8813 }
8814 
8815 static __init void create_trace_instances(struct dentry *d_tracer)
8816 {
8817 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8818 							 instance_mkdir,
8819 							 instance_rmdir);
8820 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8821 		return;
8822 }
8823 
8824 static void
8825 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8826 {
8827 	struct trace_event_file *file;
8828 	int cpu;
8829 
8830 	trace_create_file("available_tracers", 0444, d_tracer,
8831 			tr, &show_traces_fops);
8832 
8833 	trace_create_file("current_tracer", 0644, d_tracer,
8834 			tr, &set_tracer_fops);
8835 
8836 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8837 			  tr, &tracing_cpumask_fops);
8838 
8839 	trace_create_file("trace_options", 0644, d_tracer,
8840 			  tr, &tracing_iter_fops);
8841 
8842 	trace_create_file("trace", 0644, d_tracer,
8843 			  tr, &tracing_fops);
8844 
8845 	trace_create_file("trace_pipe", 0444, d_tracer,
8846 			  tr, &tracing_pipe_fops);
8847 
8848 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8849 			  tr, &tracing_entries_fops);
8850 
8851 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8852 			  tr, &tracing_total_entries_fops);
8853 
8854 	trace_create_file("free_buffer", 0200, d_tracer,
8855 			  tr, &tracing_free_buffer_fops);
8856 
8857 	trace_create_file("trace_marker", 0220, d_tracer,
8858 			  tr, &tracing_mark_fops);
8859 
8860 	file = __find_event_file(tr, "ftrace", "print");
8861 	if (file && file->dir)
8862 		trace_create_file("trigger", 0644, file->dir, file,
8863 				  &event_trigger_fops);
8864 	tr->trace_marker_file = file;
8865 
8866 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8867 			  tr, &tracing_mark_raw_fops);
8868 
8869 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8870 			  &trace_clock_fops);
8871 
8872 	trace_create_file("tracing_on", 0644, d_tracer,
8873 			  tr, &rb_simple_fops);
8874 
8875 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8876 			  &trace_time_stamp_mode_fops);
8877 
8878 	tr->buffer_percent = 50;
8879 
8880 	trace_create_file("buffer_percent", 0444, d_tracer,
8881 			tr, &buffer_percent_fops);
8882 
8883 	create_trace_options_dir(tr);
8884 
8885 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8886 	trace_create_maxlat_file(tr, d_tracer);
8887 #endif
8888 
8889 	if (ftrace_create_function_files(tr, d_tracer))
8890 		MEM_FAIL(1, "Could not allocate function filter files");
8891 
8892 #ifdef CONFIG_TRACER_SNAPSHOT
8893 	trace_create_file("snapshot", 0644, d_tracer,
8894 			  tr, &snapshot_fops);
8895 #endif
8896 
8897 	trace_create_file("error_log", 0644, d_tracer,
8898 			  tr, &tracing_err_log_fops);
8899 
8900 	for_each_tracing_cpu(cpu)
8901 		tracing_init_tracefs_percpu(tr, cpu);
8902 
8903 	ftrace_init_tracefs(tr, d_tracer);
8904 }
8905 
8906 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8907 {
8908 	struct vfsmount *mnt;
8909 	struct file_system_type *type;
8910 
8911 	/*
8912 	 * To maintain backward compatibility for tools that mount
8913 	 * debugfs to get to the tracing facility, tracefs is automatically
8914 	 * mounted to the debugfs/tracing directory.
8915 	 */
8916 	type = get_fs_type("tracefs");
8917 	if (!type)
8918 		return NULL;
8919 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8920 	put_filesystem(type);
8921 	if (IS_ERR(mnt))
8922 		return NULL;
8923 	mntget(mnt);
8924 
8925 	return mnt;
8926 }
8927 
8928 /**
8929  * tracing_init_dentry - initialize top level trace array
8930  *
8931  * This is called when creating files or directories in the tracing
8932  * directory. It is called via fs_initcall() by any of the boot up code
8933  * and expects to return the dentry of the top level tracing directory.
8934  */
8935 struct dentry *tracing_init_dentry(void)
8936 {
8937 	struct trace_array *tr = &global_trace;
8938 
8939 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8940 		pr_warn("Tracing disabled due to lockdown\n");
8941 		return ERR_PTR(-EPERM);
8942 	}
8943 
8944 	/* The top level trace array uses  NULL as parent */
8945 	if (tr->dir)
8946 		return NULL;
8947 
8948 	if (WARN_ON(!tracefs_initialized()) ||
8949 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8950 		 WARN_ON(!debugfs_initialized())))
8951 		return ERR_PTR(-ENODEV);
8952 
8953 	/*
8954 	 * As there may still be users that expect the tracing
8955 	 * files to exist in debugfs/tracing, we must automount
8956 	 * the tracefs file system there, so older tools still
8957 	 * work with the newer kerenl.
8958 	 */
8959 	tr->dir = debugfs_create_automount("tracing", NULL,
8960 					   trace_automount, NULL);
8961 
8962 	return NULL;
8963 }
8964 
8965 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8966 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8967 
8968 static void __init trace_eval_init(void)
8969 {
8970 	int len;
8971 
8972 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8973 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8974 }
8975 
8976 #ifdef CONFIG_MODULES
8977 static void trace_module_add_evals(struct module *mod)
8978 {
8979 	if (!mod->num_trace_evals)
8980 		return;
8981 
8982 	/*
8983 	 * Modules with bad taint do not have events created, do
8984 	 * not bother with enums either.
8985 	 */
8986 	if (trace_module_has_bad_taint(mod))
8987 		return;
8988 
8989 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8990 }
8991 
8992 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8993 static void trace_module_remove_evals(struct module *mod)
8994 {
8995 	union trace_eval_map_item *map;
8996 	union trace_eval_map_item **last = &trace_eval_maps;
8997 
8998 	if (!mod->num_trace_evals)
8999 		return;
9000 
9001 	mutex_lock(&trace_eval_mutex);
9002 
9003 	map = trace_eval_maps;
9004 
9005 	while (map) {
9006 		if (map->head.mod == mod)
9007 			break;
9008 		map = trace_eval_jmp_to_tail(map);
9009 		last = &map->tail.next;
9010 		map = map->tail.next;
9011 	}
9012 	if (!map)
9013 		goto out;
9014 
9015 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9016 	kfree(map);
9017  out:
9018 	mutex_unlock(&trace_eval_mutex);
9019 }
9020 #else
9021 static inline void trace_module_remove_evals(struct module *mod) { }
9022 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9023 
9024 static int trace_module_notify(struct notifier_block *self,
9025 			       unsigned long val, void *data)
9026 {
9027 	struct module *mod = data;
9028 
9029 	switch (val) {
9030 	case MODULE_STATE_COMING:
9031 		trace_module_add_evals(mod);
9032 		break;
9033 	case MODULE_STATE_GOING:
9034 		trace_module_remove_evals(mod);
9035 		break;
9036 	}
9037 
9038 	return 0;
9039 }
9040 
9041 static struct notifier_block trace_module_nb = {
9042 	.notifier_call = trace_module_notify,
9043 	.priority = 0,
9044 };
9045 #endif /* CONFIG_MODULES */
9046 
9047 static __init int tracer_init_tracefs(void)
9048 {
9049 	struct dentry *d_tracer;
9050 
9051 	trace_access_lock_init();
9052 
9053 	d_tracer = tracing_init_dentry();
9054 	if (IS_ERR(d_tracer))
9055 		return 0;
9056 
9057 	event_trace_init();
9058 
9059 	init_tracer_tracefs(&global_trace, d_tracer);
9060 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9061 
9062 	trace_create_file("tracing_thresh", 0644, d_tracer,
9063 			&global_trace, &tracing_thresh_fops);
9064 
9065 	trace_create_file("README", 0444, d_tracer,
9066 			NULL, &tracing_readme_fops);
9067 
9068 	trace_create_file("saved_cmdlines", 0444, d_tracer,
9069 			NULL, &tracing_saved_cmdlines_fops);
9070 
9071 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9072 			  NULL, &tracing_saved_cmdlines_size_fops);
9073 
9074 	trace_create_file("saved_tgids", 0444, d_tracer,
9075 			NULL, &tracing_saved_tgids_fops);
9076 
9077 	trace_eval_init();
9078 
9079 	trace_create_eval_file(d_tracer);
9080 
9081 #ifdef CONFIG_MODULES
9082 	register_module_notifier(&trace_module_nb);
9083 #endif
9084 
9085 #ifdef CONFIG_DYNAMIC_FTRACE
9086 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9087 			NULL, &tracing_dyn_info_fops);
9088 #endif
9089 
9090 	create_trace_instances(d_tracer);
9091 
9092 	update_tracer_options(&global_trace);
9093 
9094 	return 0;
9095 }
9096 
9097 static int trace_panic_handler(struct notifier_block *this,
9098 			       unsigned long event, void *unused)
9099 {
9100 	if (ftrace_dump_on_oops)
9101 		ftrace_dump(ftrace_dump_on_oops);
9102 	return NOTIFY_OK;
9103 }
9104 
9105 static struct notifier_block trace_panic_notifier = {
9106 	.notifier_call  = trace_panic_handler,
9107 	.next           = NULL,
9108 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9109 };
9110 
9111 static int trace_die_handler(struct notifier_block *self,
9112 			     unsigned long val,
9113 			     void *data)
9114 {
9115 	switch (val) {
9116 	case DIE_OOPS:
9117 		if (ftrace_dump_on_oops)
9118 			ftrace_dump(ftrace_dump_on_oops);
9119 		break;
9120 	default:
9121 		break;
9122 	}
9123 	return NOTIFY_OK;
9124 }
9125 
9126 static struct notifier_block trace_die_notifier = {
9127 	.notifier_call = trace_die_handler,
9128 	.priority = 200
9129 };
9130 
9131 /*
9132  * printk is set to max of 1024, we really don't need it that big.
9133  * Nothing should be printing 1000 characters anyway.
9134  */
9135 #define TRACE_MAX_PRINT		1000
9136 
9137 /*
9138  * Define here KERN_TRACE so that we have one place to modify
9139  * it if we decide to change what log level the ftrace dump
9140  * should be at.
9141  */
9142 #define KERN_TRACE		KERN_EMERG
9143 
9144 void
9145 trace_printk_seq(struct trace_seq *s)
9146 {
9147 	/* Probably should print a warning here. */
9148 	if (s->seq.len >= TRACE_MAX_PRINT)
9149 		s->seq.len = TRACE_MAX_PRINT;
9150 
9151 	/*
9152 	 * More paranoid code. Although the buffer size is set to
9153 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9154 	 * an extra layer of protection.
9155 	 */
9156 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9157 		s->seq.len = s->seq.size - 1;
9158 
9159 	/* should be zero ended, but we are paranoid. */
9160 	s->buffer[s->seq.len] = 0;
9161 
9162 	printk(KERN_TRACE "%s", s->buffer);
9163 
9164 	trace_seq_init(s);
9165 }
9166 
9167 void trace_init_global_iter(struct trace_iterator *iter)
9168 {
9169 	iter->tr = &global_trace;
9170 	iter->trace = iter->tr->current_trace;
9171 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9172 	iter->array_buffer = &global_trace.array_buffer;
9173 
9174 	if (iter->trace && iter->trace->open)
9175 		iter->trace->open(iter);
9176 
9177 	/* Annotate start of buffers if we had overruns */
9178 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9179 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9180 
9181 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9182 	if (trace_clocks[iter->tr->clock_id].in_ns)
9183 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9184 }
9185 
9186 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9187 {
9188 	/* use static because iter can be a bit big for the stack */
9189 	static struct trace_iterator iter;
9190 	static atomic_t dump_running;
9191 	struct trace_array *tr = &global_trace;
9192 	unsigned int old_userobj;
9193 	unsigned long flags;
9194 	int cnt = 0, cpu;
9195 
9196 	/* Only allow one dump user at a time. */
9197 	if (atomic_inc_return(&dump_running) != 1) {
9198 		atomic_dec(&dump_running);
9199 		return;
9200 	}
9201 
9202 	/*
9203 	 * Always turn off tracing when we dump.
9204 	 * We don't need to show trace output of what happens
9205 	 * between multiple crashes.
9206 	 *
9207 	 * If the user does a sysrq-z, then they can re-enable
9208 	 * tracing with echo 1 > tracing_on.
9209 	 */
9210 	tracing_off();
9211 
9212 	local_irq_save(flags);
9213 	printk_nmi_direct_enter();
9214 
9215 	/* Simulate the iterator */
9216 	trace_init_global_iter(&iter);
9217 	/* Can not use kmalloc for iter.temp */
9218 	iter.temp = static_temp_buf;
9219 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9220 
9221 	for_each_tracing_cpu(cpu) {
9222 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9223 	}
9224 
9225 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9226 
9227 	/* don't look at user memory in panic mode */
9228 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9229 
9230 	switch (oops_dump_mode) {
9231 	case DUMP_ALL:
9232 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9233 		break;
9234 	case DUMP_ORIG:
9235 		iter.cpu_file = raw_smp_processor_id();
9236 		break;
9237 	case DUMP_NONE:
9238 		goto out_enable;
9239 	default:
9240 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9241 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9242 	}
9243 
9244 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9245 
9246 	/* Did function tracer already get disabled? */
9247 	if (ftrace_is_dead()) {
9248 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9249 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9250 	}
9251 
9252 	/*
9253 	 * We need to stop all tracing on all CPUS to read the
9254 	 * the next buffer. This is a bit expensive, but is
9255 	 * not done often. We fill all what we can read,
9256 	 * and then release the locks again.
9257 	 */
9258 
9259 	while (!trace_empty(&iter)) {
9260 
9261 		if (!cnt)
9262 			printk(KERN_TRACE "---------------------------------\n");
9263 
9264 		cnt++;
9265 
9266 		trace_iterator_reset(&iter);
9267 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9268 
9269 		if (trace_find_next_entry_inc(&iter) != NULL) {
9270 			int ret;
9271 
9272 			ret = print_trace_line(&iter);
9273 			if (ret != TRACE_TYPE_NO_CONSUME)
9274 				trace_consume(&iter);
9275 		}
9276 		touch_nmi_watchdog();
9277 
9278 		trace_printk_seq(&iter.seq);
9279 	}
9280 
9281 	if (!cnt)
9282 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9283 	else
9284 		printk(KERN_TRACE "---------------------------------\n");
9285 
9286  out_enable:
9287 	tr->trace_flags |= old_userobj;
9288 
9289 	for_each_tracing_cpu(cpu) {
9290 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9291 	}
9292 	atomic_dec(&dump_running);
9293 	printk_nmi_direct_exit();
9294 	local_irq_restore(flags);
9295 }
9296 EXPORT_SYMBOL_GPL(ftrace_dump);
9297 
9298 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9299 {
9300 	char **argv;
9301 	int argc, ret;
9302 
9303 	argc = 0;
9304 	ret = 0;
9305 	argv = argv_split(GFP_KERNEL, buf, &argc);
9306 	if (!argv)
9307 		return -ENOMEM;
9308 
9309 	if (argc)
9310 		ret = createfn(argc, argv);
9311 
9312 	argv_free(argv);
9313 
9314 	return ret;
9315 }
9316 
9317 #define WRITE_BUFSIZE  4096
9318 
9319 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9320 				size_t count, loff_t *ppos,
9321 				int (*createfn)(int, char **))
9322 {
9323 	char *kbuf, *buf, *tmp;
9324 	int ret = 0;
9325 	size_t done = 0;
9326 	size_t size;
9327 
9328 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9329 	if (!kbuf)
9330 		return -ENOMEM;
9331 
9332 	while (done < count) {
9333 		size = count - done;
9334 
9335 		if (size >= WRITE_BUFSIZE)
9336 			size = WRITE_BUFSIZE - 1;
9337 
9338 		if (copy_from_user(kbuf, buffer + done, size)) {
9339 			ret = -EFAULT;
9340 			goto out;
9341 		}
9342 		kbuf[size] = '\0';
9343 		buf = kbuf;
9344 		do {
9345 			tmp = strchr(buf, '\n');
9346 			if (tmp) {
9347 				*tmp = '\0';
9348 				size = tmp - buf + 1;
9349 			} else {
9350 				size = strlen(buf);
9351 				if (done + size < count) {
9352 					if (buf != kbuf)
9353 						break;
9354 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9355 					pr_warn("Line length is too long: Should be less than %d\n",
9356 						WRITE_BUFSIZE - 2);
9357 					ret = -EINVAL;
9358 					goto out;
9359 				}
9360 			}
9361 			done += size;
9362 
9363 			/* Remove comments */
9364 			tmp = strchr(buf, '#');
9365 
9366 			if (tmp)
9367 				*tmp = '\0';
9368 
9369 			ret = trace_run_command(buf, createfn);
9370 			if (ret)
9371 				goto out;
9372 			buf += size;
9373 
9374 		} while (done < count);
9375 	}
9376 	ret = done;
9377 
9378 out:
9379 	kfree(kbuf);
9380 
9381 	return ret;
9382 }
9383 
9384 __init static int tracer_alloc_buffers(void)
9385 {
9386 	int ring_buf_size;
9387 	int ret = -ENOMEM;
9388 
9389 
9390 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9391 		pr_warn("Tracing disabled due to lockdown\n");
9392 		return -EPERM;
9393 	}
9394 
9395 	/*
9396 	 * Make sure we don't accidently add more trace options
9397 	 * than we have bits for.
9398 	 */
9399 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9400 
9401 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9402 		goto out;
9403 
9404 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9405 		goto out_free_buffer_mask;
9406 
9407 	/* Only allocate trace_printk buffers if a trace_printk exists */
9408 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9409 		/* Must be called before global_trace.buffer is allocated */
9410 		trace_printk_init_buffers();
9411 
9412 	/* To save memory, keep the ring buffer size to its minimum */
9413 	if (ring_buffer_expanded)
9414 		ring_buf_size = trace_buf_size;
9415 	else
9416 		ring_buf_size = 1;
9417 
9418 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9419 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9420 
9421 	raw_spin_lock_init(&global_trace.start_lock);
9422 
9423 	/*
9424 	 * The prepare callbacks allocates some memory for the ring buffer. We
9425 	 * don't free the buffer if the if the CPU goes down. If we were to free
9426 	 * the buffer, then the user would lose any trace that was in the
9427 	 * buffer. The memory will be removed once the "instance" is removed.
9428 	 */
9429 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9430 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9431 				      NULL);
9432 	if (ret < 0)
9433 		goto out_free_cpumask;
9434 	/* Used for event triggers */
9435 	ret = -ENOMEM;
9436 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9437 	if (!temp_buffer)
9438 		goto out_rm_hp_state;
9439 
9440 	if (trace_create_savedcmd() < 0)
9441 		goto out_free_temp_buffer;
9442 
9443 	/* TODO: make the number of buffers hot pluggable with CPUS */
9444 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9445 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9446 		goto out_free_savedcmd;
9447 	}
9448 
9449 	if (global_trace.buffer_disabled)
9450 		tracing_off();
9451 
9452 	if (trace_boot_clock) {
9453 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9454 		if (ret < 0)
9455 			pr_warn("Trace clock %s not defined, going back to default\n",
9456 				trace_boot_clock);
9457 	}
9458 
9459 	/*
9460 	 * register_tracer() might reference current_trace, so it
9461 	 * needs to be set before we register anything. This is
9462 	 * just a bootstrap of current_trace anyway.
9463 	 */
9464 	global_trace.current_trace = &nop_trace;
9465 
9466 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9467 
9468 	ftrace_init_global_array_ops(&global_trace);
9469 
9470 	init_trace_flags_index(&global_trace);
9471 
9472 	register_tracer(&nop_trace);
9473 
9474 	/* Function tracing may start here (via kernel command line) */
9475 	init_function_trace();
9476 
9477 	/* All seems OK, enable tracing */
9478 	tracing_disabled = 0;
9479 
9480 	atomic_notifier_chain_register(&panic_notifier_list,
9481 				       &trace_panic_notifier);
9482 
9483 	register_die_notifier(&trace_die_notifier);
9484 
9485 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9486 
9487 	INIT_LIST_HEAD(&global_trace.systems);
9488 	INIT_LIST_HEAD(&global_trace.events);
9489 	INIT_LIST_HEAD(&global_trace.hist_vars);
9490 	INIT_LIST_HEAD(&global_trace.err_log);
9491 	list_add(&global_trace.list, &ftrace_trace_arrays);
9492 
9493 	apply_trace_boot_options();
9494 
9495 	register_snapshot_cmd();
9496 
9497 	return 0;
9498 
9499 out_free_savedcmd:
9500 	free_saved_cmdlines_buffer(savedcmd);
9501 out_free_temp_buffer:
9502 	ring_buffer_free(temp_buffer);
9503 out_rm_hp_state:
9504 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9505 out_free_cpumask:
9506 	free_cpumask_var(global_trace.tracing_cpumask);
9507 out_free_buffer_mask:
9508 	free_cpumask_var(tracing_buffer_mask);
9509 out:
9510 	return ret;
9511 }
9512 
9513 void __init early_trace_init(void)
9514 {
9515 	if (tracepoint_printk) {
9516 		tracepoint_print_iter =
9517 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9518 		if (MEM_FAIL(!tracepoint_print_iter,
9519 			     "Failed to allocate trace iterator\n"))
9520 			tracepoint_printk = 0;
9521 		else
9522 			static_key_enable(&tracepoint_printk_key.key);
9523 	}
9524 	tracer_alloc_buffers();
9525 }
9526 
9527 void __init trace_init(void)
9528 {
9529 	trace_event_init();
9530 }
9531 
9532 __init static int clear_boot_tracer(void)
9533 {
9534 	/*
9535 	 * The default tracer at boot buffer is an init section.
9536 	 * This function is called in lateinit. If we did not
9537 	 * find the boot tracer, then clear it out, to prevent
9538 	 * later registration from accessing the buffer that is
9539 	 * about to be freed.
9540 	 */
9541 	if (!default_bootup_tracer)
9542 		return 0;
9543 
9544 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9545 	       default_bootup_tracer);
9546 	default_bootup_tracer = NULL;
9547 
9548 	return 0;
9549 }
9550 
9551 fs_initcall(tracer_init_tracefs);
9552 late_initcall_sync(clear_boot_tracer);
9553 
9554 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9555 __init static int tracing_set_default_clock(void)
9556 {
9557 	/* sched_clock_stable() is determined in late_initcall */
9558 	if (!trace_boot_clock && !sched_clock_stable()) {
9559 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9560 			pr_warn("Can not set tracing clock due to lockdown\n");
9561 			return -EPERM;
9562 		}
9563 
9564 		printk(KERN_WARNING
9565 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9566 		       "If you want to keep using the local clock, then add:\n"
9567 		       "  \"trace_clock=local\"\n"
9568 		       "on the kernel command line\n");
9569 		tracing_set_clock(&global_trace, "global");
9570 	}
9571 
9572 	return 0;
9573 }
9574 late_initcall_sync(tracing_set_default_clock);
9575 #endif
9576