xref: /openbmc/linux/kernel/trace/trace.c (revision babbdf5b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
251 static int __init set_tracepoint_printk(char *str)
252 {
253 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 		tracepoint_printk = 1;
255 	return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258 
259 unsigned long long ns2usecs(u64 nsec)
260 {
261 	nsec += 500;
262 	do_div(nsec, 1000);
263 	return nsec;
264 }
265 
266 static void
267 trace_process_export(struct trace_export *export,
268 	       struct ring_buffer_event *event, int flag)
269 {
270 	struct trace_entry *entry;
271 	unsigned int size = 0;
272 
273 	if (export->flags & flag) {
274 		entry = ring_buffer_event_data(event);
275 		size = ring_buffer_event_length(event);
276 		export->write(export, entry, size);
277 	}
278 }
279 
280 static DEFINE_MUTEX(ftrace_export_lock);
281 
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283 
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287 
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290 	if (export->flags & TRACE_EXPORT_FUNCTION)
291 		static_branch_inc(&trace_function_exports_enabled);
292 
293 	if (export->flags & TRACE_EXPORT_EVENT)
294 		static_branch_inc(&trace_event_exports_enabled);
295 
296 	if (export->flags & TRACE_EXPORT_MARKER)
297 		static_branch_inc(&trace_marker_exports_enabled);
298 }
299 
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302 	if (export->flags & TRACE_EXPORT_FUNCTION)
303 		static_branch_dec(&trace_function_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_EVENT)
306 		static_branch_dec(&trace_event_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_MARKER)
309 		static_branch_dec(&trace_marker_exports_enabled);
310 }
311 
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314 	struct trace_export *export;
315 
316 	preempt_disable_notrace();
317 
318 	export = rcu_dereference_raw_check(ftrace_exports_list);
319 	while (export) {
320 		trace_process_export(export, event, flag);
321 		export = rcu_dereference_raw_check(export->next);
322 	}
323 
324 	preempt_enable_notrace();
325 }
326 
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330 	rcu_assign_pointer(export->next, *list);
331 	/*
332 	 * We are entering export into the list but another
333 	 * CPU might be walking that list. We need to make sure
334 	 * the export->next pointer is valid before another CPU sees
335 	 * the export pointer included into the list.
336 	 */
337 	rcu_assign_pointer(*list, export);
338 }
339 
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	struct trace_export **p;
344 
345 	for (p = list; *p != NULL; p = &(*p)->next)
346 		if (*p == export)
347 			break;
348 
349 	if (*p != export)
350 		return -1;
351 
352 	rcu_assign_pointer(*p, (*p)->next);
353 
354 	return 0;
355 }
356 
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360 	ftrace_exports_enable(export);
361 
362 	add_trace_export(list, export);
363 }
364 
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	int ret;
369 
370 	ret = rm_trace_export(list, export);
371 	ftrace_exports_disable(export);
372 
373 	return ret;
374 }
375 
376 int register_ftrace_export(struct trace_export *export)
377 {
378 	if (WARN_ON_ONCE(!export->write))
379 		return -1;
380 
381 	mutex_lock(&ftrace_export_lock);
382 
383 	add_ftrace_export(&ftrace_exports_list, export);
384 
385 	mutex_unlock(&ftrace_export_lock);
386 
387 	return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390 
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393 	int ret;
394 
395 	mutex_lock(&ftrace_export_lock);
396 
397 	ret = rm_ftrace_export(&ftrace_exports_list, export);
398 
399 	mutex_unlock(&ftrace_export_lock);
400 
401 	return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404 
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS						\
407 	(FUNCTION_DEFAULT_FLAGS |					\
408 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
409 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
410 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
411 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
412 	 TRACE_ITER_HASH_PTR)
413 
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
416 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417 
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421 
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427 	.trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429 
430 LIST_HEAD(ftrace_trace_arrays);
431 
432 int trace_array_get(struct trace_array *this_tr)
433 {
434 	struct trace_array *tr;
435 	int ret = -ENODEV;
436 
437 	mutex_lock(&trace_types_lock);
438 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439 		if (tr == this_tr) {
440 			tr->ref++;
441 			ret = 0;
442 			break;
443 		}
444 	}
445 	mutex_unlock(&trace_types_lock);
446 
447 	return ret;
448 }
449 
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452 	WARN_ON(!this_tr->ref);
453 	this_tr->ref--;
454 }
455 
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467 	if (!this_tr)
468 		return;
469 
470 	mutex_lock(&trace_types_lock);
471 	__trace_array_put(this_tr);
472 	mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475 
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478 	int ret;
479 
480 	ret = security_locked_down(LOCKDOWN_TRACEFS);
481 	if (ret)
482 		return ret;
483 
484 	if (tracing_disabled)
485 		return -ENODEV;
486 
487 	if (tr && trace_array_get(tr) < 0)
488 		return -ENODEV;
489 
490 	return 0;
491 }
492 
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 			      struct trace_buffer *buffer,
495 			      struct ring_buffer_event *event)
496 {
497 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 	    !filter_match_preds(call->filter, rec)) {
499 		__trace_event_discard_commit(buffer, event);
500 		return 1;
501 	}
502 
503 	return 0;
504 }
505 
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508 	vfree(pid_list->pids);
509 	kfree(pid_list);
510 }
511 
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522 	/*
523 	 * If pid_max changed after filtered_pids was created, we
524 	 * by default ignore all pids greater than the previous pid_max.
525 	 */
526 	if (search_pid >= filtered_pids->pid_max)
527 		return false;
528 
529 	return test_bit(search_pid, filtered_pids->pids);
530 }
531 
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 		       struct trace_pid_list *filtered_no_pids,
545 		       struct task_struct *task)
546 {
547 	/*
548 	 * If filtered_no_pids is not empty, and the task's pid is listed
549 	 * in filtered_no_pids, then return true.
550 	 * Otherwise, if filtered_pids is empty, that means we can
551 	 * trace all tasks. If it has content, then only trace pids
552 	 * within filtered_pids.
553 	 */
554 
555 	return (filtered_pids &&
556 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 		(filtered_no_pids &&
558 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560 
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 				  struct task_struct *self,
575 				  struct task_struct *task)
576 {
577 	if (!pid_list)
578 		return;
579 
580 	/* For forks, we only add if the forking task is listed */
581 	if (self) {
582 		if (!trace_find_filtered_pid(pid_list, self->pid))
583 			return;
584 	}
585 
586 	/* Sorry, but we don't support pid_max changing after setting */
587 	if (task->pid >= pid_list->pid_max)
588 		return;
589 
590 	/* "self" is set for forks, and NULL for exits */
591 	if (self)
592 		set_bit(task->pid, pid_list->pids);
593 	else
594 		clear_bit(task->pid, pid_list->pids);
595 }
596 
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611 	unsigned long pid = (unsigned long)v;
612 
613 	(*pos)++;
614 
615 	/* pid already is +1 of the actual previous bit */
616 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617 
618 	/* Return pid + 1 to allow zero to be represented */
619 	if (pid < pid_list->pid_max)
620 		return (void *)(pid + 1);
621 
622 	return NULL;
623 }
624 
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638 	unsigned long pid;
639 	loff_t l = 0;
640 
641 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 	if (pid >= pid_list->pid_max)
643 		return NULL;
644 
645 	/* Return pid + 1 so that zero can be the exit value */
646 	for (pid++; pid && l < *pos;
647 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648 		;
649 	return (void *)pid;
650 }
651 
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662 	unsigned long pid = (unsigned long)v - 1;
663 
664 	seq_printf(m, "%lu\n", pid);
665 	return 0;
666 }
667 
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE		127
670 
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 		    struct trace_pid_list **new_pid_list,
673 		    const char __user *ubuf, size_t cnt)
674 {
675 	struct trace_pid_list *pid_list;
676 	struct trace_parser parser;
677 	unsigned long val;
678 	int nr_pids = 0;
679 	ssize_t read = 0;
680 	ssize_t ret = 0;
681 	loff_t pos;
682 	pid_t pid;
683 
684 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685 		return -ENOMEM;
686 
687 	/*
688 	 * Always recreate a new array. The write is an all or nothing
689 	 * operation. Always create a new array when adding new pids by
690 	 * the user. If the operation fails, then the current list is
691 	 * not modified.
692 	 */
693 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694 	if (!pid_list) {
695 		trace_parser_put(&parser);
696 		return -ENOMEM;
697 	}
698 
699 	pid_list->pid_max = READ_ONCE(pid_max);
700 
701 	/* Only truncating will shrink pid_max */
702 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 		pid_list->pid_max = filtered_pids->pid_max;
704 
705 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 	if (!pid_list->pids) {
707 		trace_parser_put(&parser);
708 		kfree(pid_list);
709 		return -ENOMEM;
710 	}
711 
712 	if (filtered_pids) {
713 		/* copy the current bits to the new max */
714 		for_each_set_bit(pid, filtered_pids->pids,
715 				 filtered_pids->pid_max) {
716 			set_bit(pid, pid_list->pids);
717 			nr_pids++;
718 		}
719 	}
720 
721 	while (cnt > 0) {
722 
723 		pos = 0;
724 
725 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 		if (ret < 0 || !trace_parser_loaded(&parser))
727 			break;
728 
729 		read += ret;
730 		ubuf += ret;
731 		cnt -= ret;
732 
733 		ret = -EINVAL;
734 		if (kstrtoul(parser.buffer, 0, &val))
735 			break;
736 		if (val >= pid_list->pid_max)
737 			break;
738 
739 		pid = (pid_t)val;
740 
741 		set_bit(pid, pid_list->pids);
742 		nr_pids++;
743 
744 		trace_parser_clear(&parser);
745 		ret = 0;
746 	}
747 	trace_parser_put(&parser);
748 
749 	if (ret < 0) {
750 		trace_free_pid_list(pid_list);
751 		return ret;
752 	}
753 
754 	if (!nr_pids) {
755 		/* Cleared the list of pids */
756 		trace_free_pid_list(pid_list);
757 		read = ret;
758 		pid_list = NULL;
759 	}
760 
761 	*new_pid_list = pid_list;
762 
763 	return read;
764 }
765 
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768 	u64 ts;
769 
770 	/* Early boot up does not have a buffer yet */
771 	if (!buf->buffer)
772 		return trace_clock_local();
773 
774 	ts = ring_buffer_time_stamp(buf->buffer);
775 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776 
777 	return ts;
778 }
779 
780 u64 ftrace_now(int cpu)
781 {
782 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784 
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796 	/*
797 	 * For quick access (irqsoff uses this in fast path), just
798 	 * return the mirror variable of the state of the ring buffer.
799 	 * It's a little racy, but we don't really care.
800 	 */
801 	smp_rmb();
802 	return !global_trace.buffer_disabled;
803 }
804 
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
816 
817 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818 
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer		*trace_types __read_mostly;
821 
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826 
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848 
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852 
853 static inline void trace_access_lock(int cpu)
854 {
855 	if (cpu == RING_BUFFER_ALL_CPUS) {
856 		/* gain it for accessing the whole ring buffer. */
857 		down_write(&all_cpu_access_lock);
858 	} else {
859 		/* gain it for accessing a cpu ring buffer. */
860 
861 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 		down_read(&all_cpu_access_lock);
863 
864 		/* Secondly block other access to this @cpu ring buffer. */
865 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
866 	}
867 }
868 
869 static inline void trace_access_unlock(int cpu)
870 {
871 	if (cpu == RING_BUFFER_ALL_CPUS) {
872 		up_write(&all_cpu_access_lock);
873 	} else {
874 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 		up_read(&all_cpu_access_lock);
876 	}
877 }
878 
879 static inline void trace_access_lock_init(void)
880 {
881 	int cpu;
882 
883 	for_each_possible_cpu(cpu)
884 		mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886 
887 #else
888 
889 static DEFINE_MUTEX(access_lock);
890 
891 static inline void trace_access_lock(int cpu)
892 {
893 	(void)cpu;
894 	mutex_lock(&access_lock);
895 }
896 
897 static inline void trace_access_unlock(int cpu)
898 {
899 	(void)cpu;
900 	mutex_unlock(&access_lock);
901 }
902 
903 static inline void trace_access_lock_init(void)
904 {
905 }
906 
907 #endif
908 
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 				 unsigned int trace_ctx,
912 				 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 				      struct trace_buffer *buffer,
915 				      unsigned int trace_ctx,
916 				      int skip, struct pt_regs *regs);
917 
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 					unsigned int trace_ctx,
921 					int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 				      struct trace_buffer *buffer,
926 				      unsigned long trace_ctx,
927 				      int skip, struct pt_regs *regs)
928 {
929 }
930 
931 #endif
932 
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 		  int type, unsigned int trace_ctx)
936 {
937 	struct trace_entry *ent = ring_buffer_event_data(event);
938 
939 	tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941 
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944 			  int type,
945 			  unsigned long len,
946 			  unsigned int trace_ctx)
947 {
948 	struct ring_buffer_event *event;
949 
950 	event = ring_buffer_lock_reserve(buffer, len);
951 	if (event != NULL)
952 		trace_event_setup(event, type, trace_ctx);
953 
954 	return event;
955 }
956 
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959 	if (tr->array_buffer.buffer)
960 		ring_buffer_record_on(tr->array_buffer.buffer);
961 	/*
962 	 * This flag is looked at when buffers haven't been allocated
963 	 * yet, or by some tracers (like irqsoff), that just want to
964 	 * know if the ring buffer has been disabled, but it can handle
965 	 * races of where it gets disabled but we still do a record.
966 	 * As the check is in the fast path of the tracers, it is more
967 	 * important to be fast than accurate.
968 	 */
969 	tr->buffer_disabled = 0;
970 	/* Make the flag seen by readers */
971 	smp_wmb();
972 }
973 
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982 	tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985 
986 
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990 	__this_cpu_write(trace_taskinfo_save, true);
991 
992 	/* If this is the temp buffer, we need to commit fully */
993 	if (this_cpu_read(trace_buffered_event) == event) {
994 		/* Length is in event->array[0] */
995 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 		/* Release the temp buffer */
997 		this_cpu_dec(trace_buffered_event_cnt);
998 	} else
999 		ring_buffer_unlock_commit(buffer, event);
1000 }
1001 
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:	   The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010 	struct ring_buffer_event *event;
1011 	struct trace_buffer *buffer;
1012 	struct print_entry *entry;
1013 	unsigned int trace_ctx;
1014 	int alloc;
1015 
1016 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017 		return 0;
1018 
1019 	if (unlikely(tracing_selftest_running || tracing_disabled))
1020 		return 0;
1021 
1022 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 
1024 	trace_ctx = tracing_gen_ctx();
1025 	buffer = global_trace.array_buffer.buffer;
1026 	ring_buffer_nest_start(buffer);
1027 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028 					    trace_ctx);
1029 	if (!event) {
1030 		size = 0;
1031 		goto out;
1032 	}
1033 
1034 	entry = ring_buffer_event_data(event);
1035 	entry->ip = ip;
1036 
1037 	memcpy(&entry->buf, str, size);
1038 
1039 	/* Add a newline if necessary */
1040 	if (entry->buf[size - 1] != '\n') {
1041 		entry->buf[size] = '\n';
1042 		entry->buf[size + 1] = '\0';
1043 	} else
1044 		entry->buf[size] = '\0';
1045 
1046 	__buffer_unlock_commit(buffer, event);
1047 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049 	ring_buffer_nest_end(buffer);
1050 	return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		internal_trace_puts("*** stopping trace here!   ***\n");
1113 		tracing_off();
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	arch_spin_lock(&tr->max_lock);
1194 
1195 	if (tr->cond_snapshot)
1196 		cond_data = tr->cond_snapshot->cond_data;
1197 
1198 	arch_spin_unlock(&tr->max_lock);
1199 
1200 	return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 					struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207 
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210 	int ret;
1211 
1212 	if (!tr->allocated_snapshot) {
1213 
1214 		/* allocate spare buffer */
1215 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217 		if (ret < 0)
1218 			return ret;
1219 
1220 		tr->allocated_snapshot = true;
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228 	/*
1229 	 * We don't free the ring buffer. instead, resize it because
1230 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 	 * we want preserve it.
1232 	 */
1233 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 	set_buffer_entries(&tr->max_buffer, 1);
1235 	tracing_reset_online_cpus(&tr->max_buffer);
1236 	tr->allocated_snapshot = false;
1237 }
1238 
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251 	struct trace_array *tr = &global_trace;
1252 	int ret;
1253 
1254 	ret = tracing_alloc_snapshot_instance(tr);
1255 	WARN_ON(ret < 0);
1256 
1257 	return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260 
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274 	int ret;
1275 
1276 	ret = tracing_alloc_snapshot();
1277 	if (ret < 0)
1278 		return;
1279 
1280 	tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283 
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:		The tracing instance
1287  * @cond_data:	User data to associate with the snapshot
1288  * @update:	Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 				 cond_update_fn_t update)
1299 {
1300 	struct cond_snapshot *cond_snapshot;
1301 	int ret = 0;
1302 
1303 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304 	if (!cond_snapshot)
1305 		return -ENOMEM;
1306 
1307 	cond_snapshot->cond_data = cond_data;
1308 	cond_snapshot->update = update;
1309 
1310 	mutex_lock(&trace_types_lock);
1311 
1312 	ret = tracing_alloc_snapshot_instance(tr);
1313 	if (ret)
1314 		goto fail_unlock;
1315 
1316 	if (tr->current_trace->use_max_tr) {
1317 		ret = -EBUSY;
1318 		goto fail_unlock;
1319 	}
1320 
1321 	/*
1322 	 * The cond_snapshot can only change to NULL without the
1323 	 * trace_types_lock. We don't care if we race with it going
1324 	 * to NULL, but we want to make sure that it's not set to
1325 	 * something other than NULL when we get here, which we can
1326 	 * do safely with only holding the trace_types_lock and not
1327 	 * having to take the max_lock.
1328 	 */
1329 	if (tr->cond_snapshot) {
1330 		ret = -EBUSY;
1331 		goto fail_unlock;
1332 	}
1333 
1334 	arch_spin_lock(&tr->max_lock);
1335 	tr->cond_snapshot = cond_snapshot;
1336 	arch_spin_unlock(&tr->max_lock);
1337 
1338 	mutex_unlock(&trace_types_lock);
1339 
1340 	return ret;
1341 
1342  fail_unlock:
1343 	mutex_unlock(&trace_types_lock);
1344 	kfree(cond_snapshot);
1345 	return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348 
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:		The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361 	int ret = 0;
1362 
1363 	arch_spin_lock(&tr->max_lock);
1364 
1365 	if (!tr->cond_snapshot)
1366 		ret = -EINVAL;
1367 	else {
1368 		kfree(tr->cond_snapshot);
1369 		tr->cond_snapshot = NULL;
1370 	}
1371 
1372 	arch_spin_unlock(&tr->max_lock);
1373 
1374 	return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391 	return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396 	/* Give warning */
1397 	tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402 	return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407 	return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412 	return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416 
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419 	if (tr->array_buffer.buffer)
1420 		ring_buffer_record_off(tr->array_buffer.buffer);
1421 	/*
1422 	 * This flag is looked at when buffers haven't been allocated
1423 	 * yet, or by some tracers (like irqsoff), that just want to
1424 	 * know if the ring buffer has been disabled, but it can handle
1425 	 * races of where it gets disabled but we still do a record.
1426 	 * As the check is in the fast path of the tracers, it is more
1427 	 * important to be fast than accurate.
1428 	 */
1429 	tr->buffer_disabled = 1;
1430 	/* Make the flag seen by readers */
1431 	smp_wmb();
1432 }
1433 
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444 	tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447 
1448 void disable_trace_on_warning(void)
1449 {
1450 	if (__disable_trace_on_warning) {
1451 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 			"Disabling tracing due to warning\n");
1453 		tracing_off();
1454 	}
1455 }
1456 
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465 	if (tr->array_buffer.buffer)
1466 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 	return !tr->buffer_disabled;
1468 }
1469 
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475 	return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478 
1479 static int __init set_buf_size(char *str)
1480 {
1481 	unsigned long buf_size;
1482 
1483 	if (!str)
1484 		return 0;
1485 	buf_size = memparse(str, &str);
1486 	/* nr_entries can not be zero */
1487 	if (buf_size == 0)
1488 		return 0;
1489 	trace_buf_size = buf_size;
1490 	return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493 
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496 	unsigned long threshold;
1497 	int ret;
1498 
1499 	if (!str)
1500 		return 0;
1501 	ret = kstrtoul(str, 0, &threshold);
1502 	if (ret < 0)
1503 		return 0;
1504 	tracing_thresh = threshold * 1000;
1505 	return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508 
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511 	return nsecs / 1000;
1512 }
1513 
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522 
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525 	TRACE_FLAGS
1526 	NULL
1527 };
1528 
1529 static struct {
1530 	u64 (*func)(void);
1531 	const char *name;
1532 	int in_ns;		/* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 	{ trace_clock_local,		"local",	1 },
1535 	{ trace_clock_global,		"global",	1 },
1536 	{ trace_clock_counter,		"counter",	0 },
1537 	{ trace_clock_jiffies,		"uptime",	0 },
1538 	{ trace_clock,			"perf",		1 },
1539 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1540 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1541 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1542 	ARCH_TRACE_CLOCKS
1543 };
1544 
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547 	if (trace_clocks[tr->clock_id].in_ns)
1548 		return true;
1549 
1550 	return false;
1551 }
1552 
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558 	memset(parser, 0, sizeof(*parser));
1559 
1560 	parser->buffer = kmalloc(size, GFP_KERNEL);
1561 	if (!parser->buffer)
1562 		return 1;
1563 
1564 	parser->size = size;
1565 	return 0;
1566 }
1567 
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573 	kfree(parser->buffer);
1574 	parser->buffer = NULL;
1575 }
1576 
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 	size_t cnt, loff_t *ppos)
1590 {
1591 	char ch;
1592 	size_t read = 0;
1593 	ssize_t ret;
1594 
1595 	if (!*ppos)
1596 		trace_parser_clear(parser);
1597 
1598 	ret = get_user(ch, ubuf++);
1599 	if (ret)
1600 		goto out;
1601 
1602 	read++;
1603 	cnt--;
1604 
1605 	/*
1606 	 * The parser is not finished with the last write,
1607 	 * continue reading the user input without skipping spaces.
1608 	 */
1609 	if (!parser->cont) {
1610 		/* skip white space */
1611 		while (cnt && isspace(ch)) {
1612 			ret = get_user(ch, ubuf++);
1613 			if (ret)
1614 				goto out;
1615 			read++;
1616 			cnt--;
1617 		}
1618 
1619 		parser->idx = 0;
1620 
1621 		/* only spaces were written */
1622 		if (isspace(ch) || !ch) {
1623 			*ppos += read;
1624 			ret = read;
1625 			goto out;
1626 		}
1627 	}
1628 
1629 	/* read the non-space input */
1630 	while (cnt && !isspace(ch) && ch) {
1631 		if (parser->idx < parser->size - 1)
1632 			parser->buffer[parser->idx++] = ch;
1633 		else {
1634 			ret = -EINVAL;
1635 			goto out;
1636 		}
1637 		ret = get_user(ch, ubuf++);
1638 		if (ret)
1639 			goto out;
1640 		read++;
1641 		cnt--;
1642 	}
1643 
1644 	/* We either got finished input or we have to wait for another call. */
1645 	if (isspace(ch) || !ch) {
1646 		parser->buffer[parser->idx] = 0;
1647 		parser->cont = false;
1648 	} else if (parser->idx < parser->size - 1) {
1649 		parser->cont = true;
1650 		parser->buffer[parser->idx++] = ch;
1651 		/* Make sure the parsed string always terminates with '\0'. */
1652 		parser->buffer[parser->idx] = 0;
1653 	} else {
1654 		ret = -EINVAL;
1655 		goto out;
1656 	}
1657 
1658 	*ppos += read;
1659 	ret = read;
1660 
1661 out:
1662 	return ret;
1663 }
1664 
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668 	int len;
1669 
1670 	if (trace_seq_used(s) <= s->seq.readpos)
1671 		return -EBUSY;
1672 
1673 	len = trace_seq_used(s) - s->seq.readpos;
1674 	if (cnt > len)
1675 		cnt = len;
1676 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677 
1678 	s->seq.readpos += cnt;
1679 	return cnt;
1680 }
1681 
1682 unsigned long __read_mostly	tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684 
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 	defined(CONFIG_FSNOTIFY)
1687 
1688 static struct workqueue_struct *fsnotify_wq;
1689 
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692 	struct trace_array *tr = container_of(work, struct trace_array,
1693 					      fsnotify_work);
1694 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696 
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699 	struct trace_array *tr = container_of(iwork, struct trace_array,
1700 					      fsnotify_irqwork);
1701 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703 
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 				     struct dentry *d_tracer)
1706 {
1707 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 					      d_tracer, &tr->max_latency,
1711 					      &tracing_max_lat_fops);
1712 }
1713 
1714 __init static int latency_fsnotify_init(void)
1715 {
1716 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1718 	if (!fsnotify_wq) {
1719 		pr_err("Unable to allocate tr_max_lat_wq\n");
1720 		return -ENOMEM;
1721 	}
1722 	return 0;
1723 }
1724 
1725 late_initcall_sync(latency_fsnotify_init);
1726 
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729 	if (!fsnotify_wq)
1730 		return;
1731 	/*
1732 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 	 * possible that we are called from __schedule() or do_idle(), which
1734 	 * could cause a deadlock.
1735 	 */
1736 	irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738 
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744 
1745 #define trace_create_maxlat_file(tr, d_tracer)				\
1746 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1747 			  &tr->max_latency, &tracing_max_lat_fops)
1748 
1749 #endif
1750 
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760 	struct array_buffer *trace_buf = &tr->array_buffer;
1761 	struct array_buffer *max_buf = &tr->max_buffer;
1762 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764 
1765 	max_buf->cpu = cpu;
1766 	max_buf->time_start = data->preempt_timestamp;
1767 
1768 	max_data->saved_latency = tr->max_latency;
1769 	max_data->critical_start = data->critical_start;
1770 	max_data->critical_end = data->critical_end;
1771 
1772 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 	max_data->pid = tsk->pid;
1774 	/*
1775 	 * If tsk == current, then use current_uid(), as that does not use
1776 	 * RCU. The irq tracer can be called out of RCU scope.
1777 	 */
1778 	if (tsk == current)
1779 		max_data->uid = current_uid();
1780 	else
1781 		max_data->uid = task_uid(tsk);
1782 
1783 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 	max_data->policy = tsk->policy;
1785 	max_data->rt_priority = tsk->rt_priority;
1786 
1787 	/* record this tasks comm */
1788 	tracing_record_cmdline(tsk);
1789 	latency_fsnotify(tr);
1790 }
1791 
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804 	      void *cond_data)
1805 {
1806 	if (tr->stop_count)
1807 		return;
1808 
1809 	WARN_ON_ONCE(!irqs_disabled());
1810 
1811 	if (!tr->allocated_snapshot) {
1812 		/* Only the nop tracer should hit this when disabling */
1813 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814 		return;
1815 	}
1816 
1817 	arch_spin_lock(&tr->max_lock);
1818 
1819 	/* Inherit the recordable setting from array_buffer */
1820 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 		ring_buffer_record_on(tr->max_buffer.buffer);
1822 	else
1823 		ring_buffer_record_off(tr->max_buffer.buffer);
1824 
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827 		goto out_unlock;
1828 #endif
1829 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830 
1831 	__update_max_tr(tr, tsk, cpu);
1832 
1833  out_unlock:
1834 	arch_spin_unlock(&tr->max_lock);
1835 }
1836 
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848 	int ret;
1849 
1850 	if (tr->stop_count)
1851 		return;
1852 
1853 	WARN_ON_ONCE(!irqs_disabled());
1854 	if (!tr->allocated_snapshot) {
1855 		/* Only the nop tracer should hit this when disabling */
1856 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857 		return;
1858 	}
1859 
1860 	arch_spin_lock(&tr->max_lock);
1861 
1862 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863 
1864 	if (ret == -EBUSY) {
1865 		/*
1866 		 * We failed to swap the buffer due to a commit taking
1867 		 * place on this CPU. We fail to record, but we reset
1868 		 * the max trace buffer (no one writes directly to it)
1869 		 * and flag that it failed.
1870 		 */
1871 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 			"Failed to swap buffers due to commit in progress\n");
1873 	}
1874 
1875 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876 
1877 	__update_max_tr(tr, tsk, cpu);
1878 	arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881 
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884 	/* Iterators are static, they should be filled or empty */
1885 	if (trace_buffer_iter(iter, iter->cpu_file))
1886 		return 0;
1887 
1888 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889 				full);
1890 }
1891 
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894 
1895 struct trace_selftests {
1896 	struct list_head		list;
1897 	struct tracer			*type;
1898 };
1899 
1900 static LIST_HEAD(postponed_selftests);
1901 
1902 static int save_selftest(struct tracer *type)
1903 {
1904 	struct trace_selftests *selftest;
1905 
1906 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907 	if (!selftest)
1908 		return -ENOMEM;
1909 
1910 	selftest->type = type;
1911 	list_add(&selftest->list, &postponed_selftests);
1912 	return 0;
1913 }
1914 
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917 	struct trace_array *tr = &global_trace;
1918 	struct tracer *saved_tracer = tr->current_trace;
1919 	int ret;
1920 
1921 	if (!type->selftest || tracing_selftest_disabled)
1922 		return 0;
1923 
1924 	/*
1925 	 * If a tracer registers early in boot up (before scheduling is
1926 	 * initialized and such), then do not run its selftests yet.
1927 	 * Instead, run it a little later in the boot process.
1928 	 */
1929 	if (!selftests_can_run)
1930 		return save_selftest(type);
1931 
1932 	if (!tracing_is_on()) {
1933 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934 			type->name);
1935 		return 0;
1936 	}
1937 
1938 	/*
1939 	 * Run a selftest on this tracer.
1940 	 * Here we reset the trace buffer, and set the current
1941 	 * tracer to be this tracer. The tracer can then run some
1942 	 * internal tracing to verify that everything is in order.
1943 	 * If we fail, we do not register this tracer.
1944 	 */
1945 	tracing_reset_online_cpus(&tr->array_buffer);
1946 
1947 	tr->current_trace = type;
1948 
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950 	if (type->use_max_tr) {
1951 		/* If we expanded the buffers, make sure the max is expanded too */
1952 		if (ring_buffer_expanded)
1953 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954 					   RING_BUFFER_ALL_CPUS);
1955 		tr->allocated_snapshot = true;
1956 	}
1957 #endif
1958 
1959 	/* the test is responsible for initializing and enabling */
1960 	pr_info("Testing tracer %s: ", type->name);
1961 	ret = type->selftest(type, tr);
1962 	/* the test is responsible for resetting too */
1963 	tr->current_trace = saved_tracer;
1964 	if (ret) {
1965 		printk(KERN_CONT "FAILED!\n");
1966 		/* Add the warning after printing 'FAILED' */
1967 		WARN_ON(1);
1968 		return -1;
1969 	}
1970 	/* Only reset on passing, to avoid touching corrupted buffers */
1971 	tracing_reset_online_cpus(&tr->array_buffer);
1972 
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 	if (type->use_max_tr) {
1975 		tr->allocated_snapshot = false;
1976 
1977 		/* Shrink the max buffer again */
1978 		if (ring_buffer_expanded)
1979 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1980 					   RING_BUFFER_ALL_CPUS);
1981 	}
1982 #endif
1983 
1984 	printk(KERN_CONT "PASSED\n");
1985 	return 0;
1986 }
1987 
1988 static __init int init_trace_selftests(void)
1989 {
1990 	struct trace_selftests *p, *n;
1991 	struct tracer *t, **last;
1992 	int ret;
1993 
1994 	selftests_can_run = true;
1995 
1996 	mutex_lock(&trace_types_lock);
1997 
1998 	if (list_empty(&postponed_selftests))
1999 		goto out;
2000 
2001 	pr_info("Running postponed tracer tests:\n");
2002 
2003 	tracing_selftest_running = true;
2004 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005 		/* This loop can take minutes when sanitizers are enabled, so
2006 		 * lets make sure we allow RCU processing.
2007 		 */
2008 		cond_resched();
2009 		ret = run_tracer_selftest(p->type);
2010 		/* If the test fails, then warn and remove from available_tracers */
2011 		if (ret < 0) {
2012 			WARN(1, "tracer: %s failed selftest, disabling\n",
2013 			     p->type->name);
2014 			last = &trace_types;
2015 			for (t = trace_types; t; t = t->next) {
2016 				if (t == p->type) {
2017 					*last = t->next;
2018 					break;
2019 				}
2020 				last = &t->next;
2021 			}
2022 		}
2023 		list_del(&p->list);
2024 		kfree(p);
2025 	}
2026 	tracing_selftest_running = false;
2027 
2028  out:
2029 	mutex_unlock(&trace_types_lock);
2030 
2031 	return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037 	return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040 
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042 
2043 static void __init apply_trace_boot_options(void);
2044 
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053 	struct tracer *t;
2054 	int ret = 0;
2055 
2056 	if (!type->name) {
2057 		pr_info("Tracer must have a name\n");
2058 		return -1;
2059 	}
2060 
2061 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063 		return -1;
2064 	}
2065 
2066 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067 		pr_warn("Can not register tracer %s due to lockdown\n",
2068 			   type->name);
2069 		return -EPERM;
2070 	}
2071 
2072 	mutex_lock(&trace_types_lock);
2073 
2074 	tracing_selftest_running = true;
2075 
2076 	for (t = trace_types; t; t = t->next) {
2077 		if (strcmp(type->name, t->name) == 0) {
2078 			/* already found */
2079 			pr_info("Tracer %s already registered\n",
2080 				type->name);
2081 			ret = -1;
2082 			goto out;
2083 		}
2084 	}
2085 
2086 	if (!type->set_flag)
2087 		type->set_flag = &dummy_set_flag;
2088 	if (!type->flags) {
2089 		/*allocate a dummy tracer_flags*/
2090 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091 		if (!type->flags) {
2092 			ret = -ENOMEM;
2093 			goto out;
2094 		}
2095 		type->flags->val = 0;
2096 		type->flags->opts = dummy_tracer_opt;
2097 	} else
2098 		if (!type->flags->opts)
2099 			type->flags->opts = dummy_tracer_opt;
2100 
2101 	/* store the tracer for __set_tracer_option */
2102 	type->flags->trace = type;
2103 
2104 	ret = run_tracer_selftest(type);
2105 	if (ret < 0)
2106 		goto out;
2107 
2108 	type->next = trace_types;
2109 	trace_types = type;
2110 	add_tracer_options(&global_trace, type);
2111 
2112  out:
2113 	tracing_selftest_running = false;
2114 	mutex_unlock(&trace_types_lock);
2115 
2116 	if (ret || !default_bootup_tracer)
2117 		goto out_unlock;
2118 
2119 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120 		goto out_unlock;
2121 
2122 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123 	/* Do we want this tracer to start on bootup? */
2124 	tracing_set_tracer(&global_trace, type->name);
2125 	default_bootup_tracer = NULL;
2126 
2127 	apply_trace_boot_options();
2128 
2129 	/* disable other selftests, since this will break it. */
2130 	disable_tracing_selftest("running a tracer");
2131 
2132  out_unlock:
2133 	return ret;
2134 }
2135 
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138 	struct trace_buffer *buffer = buf->buffer;
2139 
2140 	if (!buffer)
2141 		return;
2142 
2143 	ring_buffer_record_disable(buffer);
2144 
2145 	/* Make sure all commits have finished */
2146 	synchronize_rcu();
2147 	ring_buffer_reset_cpu(buffer, cpu);
2148 
2149 	ring_buffer_record_enable(buffer);
2150 }
2151 
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154 	struct trace_buffer *buffer = buf->buffer;
2155 
2156 	if (!buffer)
2157 		return;
2158 
2159 	ring_buffer_record_disable(buffer);
2160 
2161 	/* Make sure all commits have finished */
2162 	synchronize_rcu();
2163 
2164 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165 
2166 	ring_buffer_reset_online_cpus(buffer);
2167 
2168 	ring_buffer_record_enable(buffer);
2169 }
2170 
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174 	struct trace_array *tr;
2175 
2176 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177 		if (!tr->clear_trace)
2178 			continue;
2179 		tr->clear_trace = false;
2180 		tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182 		tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184 	}
2185 }
2186 
2187 static int *tgid_map;
2188 
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194 	unsigned *map_cmdline_to_pid;
2195 	unsigned cmdline_num;
2196 	int cmdline_idx;
2197 	char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200 
2201 static inline char *get_saved_cmdlines(int idx)
2202 {
2203 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2204 }
2205 
2206 static inline void set_cmdline(int idx, const char *cmdline)
2207 {
2208 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2209 }
2210 
2211 static int allocate_cmdlines_buffer(unsigned int val,
2212 				    struct saved_cmdlines_buffer *s)
2213 {
2214 	s->map_cmdline_to_pid = kmalloc_array(val,
2215 					      sizeof(*s->map_cmdline_to_pid),
2216 					      GFP_KERNEL);
2217 	if (!s->map_cmdline_to_pid)
2218 		return -ENOMEM;
2219 
2220 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2221 	if (!s->saved_cmdlines) {
2222 		kfree(s->map_cmdline_to_pid);
2223 		return -ENOMEM;
2224 	}
2225 
2226 	s->cmdline_idx = 0;
2227 	s->cmdline_num = val;
2228 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2229 	       sizeof(s->map_pid_to_cmdline));
2230 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2231 	       val * sizeof(*s->map_cmdline_to_pid));
2232 
2233 	return 0;
2234 }
2235 
2236 static int trace_create_savedcmd(void)
2237 {
2238 	int ret;
2239 
2240 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2241 	if (!savedcmd)
2242 		return -ENOMEM;
2243 
2244 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2245 	if (ret < 0) {
2246 		kfree(savedcmd);
2247 		savedcmd = NULL;
2248 		return -ENOMEM;
2249 	}
2250 
2251 	return 0;
2252 }
2253 
2254 int is_tracing_stopped(void)
2255 {
2256 	return global_trace.stop_count;
2257 }
2258 
2259 /**
2260  * tracing_start - quick start of the tracer
2261  *
2262  * If tracing is enabled but was stopped by tracing_stop,
2263  * this will start the tracer back up.
2264  */
2265 void tracing_start(void)
2266 {
2267 	struct trace_buffer *buffer;
2268 	unsigned long flags;
2269 
2270 	if (tracing_disabled)
2271 		return;
2272 
2273 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2274 	if (--global_trace.stop_count) {
2275 		if (global_trace.stop_count < 0) {
2276 			/* Someone screwed up their debugging */
2277 			WARN_ON_ONCE(1);
2278 			global_trace.stop_count = 0;
2279 		}
2280 		goto out;
2281 	}
2282 
2283 	/* Prevent the buffers from switching */
2284 	arch_spin_lock(&global_trace.max_lock);
2285 
2286 	buffer = global_trace.array_buffer.buffer;
2287 	if (buffer)
2288 		ring_buffer_record_enable(buffer);
2289 
2290 #ifdef CONFIG_TRACER_MAX_TRACE
2291 	buffer = global_trace.max_buffer.buffer;
2292 	if (buffer)
2293 		ring_buffer_record_enable(buffer);
2294 #endif
2295 
2296 	arch_spin_unlock(&global_trace.max_lock);
2297 
2298  out:
2299 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2300 }
2301 
2302 static void tracing_start_tr(struct trace_array *tr)
2303 {
2304 	struct trace_buffer *buffer;
2305 	unsigned long flags;
2306 
2307 	if (tracing_disabled)
2308 		return;
2309 
2310 	/* If global, we need to also start the max tracer */
2311 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2312 		return tracing_start();
2313 
2314 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2315 
2316 	if (--tr->stop_count) {
2317 		if (tr->stop_count < 0) {
2318 			/* Someone screwed up their debugging */
2319 			WARN_ON_ONCE(1);
2320 			tr->stop_count = 0;
2321 		}
2322 		goto out;
2323 	}
2324 
2325 	buffer = tr->array_buffer.buffer;
2326 	if (buffer)
2327 		ring_buffer_record_enable(buffer);
2328 
2329  out:
2330 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2331 }
2332 
2333 /**
2334  * tracing_stop - quick stop of the tracer
2335  *
2336  * Light weight way to stop tracing. Use in conjunction with
2337  * tracing_start.
2338  */
2339 void tracing_stop(void)
2340 {
2341 	struct trace_buffer *buffer;
2342 	unsigned long flags;
2343 
2344 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2345 	if (global_trace.stop_count++)
2346 		goto out;
2347 
2348 	/* Prevent the buffers from switching */
2349 	arch_spin_lock(&global_trace.max_lock);
2350 
2351 	buffer = global_trace.array_buffer.buffer;
2352 	if (buffer)
2353 		ring_buffer_record_disable(buffer);
2354 
2355 #ifdef CONFIG_TRACER_MAX_TRACE
2356 	buffer = global_trace.max_buffer.buffer;
2357 	if (buffer)
2358 		ring_buffer_record_disable(buffer);
2359 #endif
2360 
2361 	arch_spin_unlock(&global_trace.max_lock);
2362 
2363  out:
2364 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2365 }
2366 
2367 static void tracing_stop_tr(struct trace_array *tr)
2368 {
2369 	struct trace_buffer *buffer;
2370 	unsigned long flags;
2371 
2372 	/* If global, we need to also stop the max tracer */
2373 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2374 		return tracing_stop();
2375 
2376 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2377 	if (tr->stop_count++)
2378 		goto out;
2379 
2380 	buffer = tr->array_buffer.buffer;
2381 	if (buffer)
2382 		ring_buffer_record_disable(buffer);
2383 
2384  out:
2385 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2386 }
2387 
2388 static int trace_save_cmdline(struct task_struct *tsk)
2389 {
2390 	unsigned tpid, idx;
2391 
2392 	/* treat recording of idle task as a success */
2393 	if (!tsk->pid)
2394 		return 1;
2395 
2396 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2397 
2398 	/*
2399 	 * It's not the end of the world if we don't get
2400 	 * the lock, but we also don't want to spin
2401 	 * nor do we want to disable interrupts,
2402 	 * so if we miss here, then better luck next time.
2403 	 */
2404 	if (!arch_spin_trylock(&trace_cmdline_lock))
2405 		return 0;
2406 
2407 	idx = savedcmd->map_pid_to_cmdline[tpid];
2408 	if (idx == NO_CMDLINE_MAP) {
2409 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2410 
2411 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2412 		savedcmd->cmdline_idx = idx;
2413 	}
2414 
2415 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416 	set_cmdline(idx, tsk->comm);
2417 
2418 	arch_spin_unlock(&trace_cmdline_lock);
2419 
2420 	return 1;
2421 }
2422 
2423 static void __trace_find_cmdline(int pid, char comm[])
2424 {
2425 	unsigned map;
2426 	int tpid;
2427 
2428 	if (!pid) {
2429 		strcpy(comm, "<idle>");
2430 		return;
2431 	}
2432 
2433 	if (WARN_ON_ONCE(pid < 0)) {
2434 		strcpy(comm, "<XXX>");
2435 		return;
2436 	}
2437 
2438 	tpid = pid & (PID_MAX_DEFAULT - 1);
2439 	map = savedcmd->map_pid_to_cmdline[tpid];
2440 	if (map != NO_CMDLINE_MAP) {
2441 		tpid = savedcmd->map_cmdline_to_pid[map];
2442 		if (tpid == pid) {
2443 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2444 			return;
2445 		}
2446 	}
2447 	strcpy(comm, "<...>");
2448 }
2449 
2450 void trace_find_cmdline(int pid, char comm[])
2451 {
2452 	preempt_disable();
2453 	arch_spin_lock(&trace_cmdline_lock);
2454 
2455 	__trace_find_cmdline(pid, comm);
2456 
2457 	arch_spin_unlock(&trace_cmdline_lock);
2458 	preempt_enable();
2459 }
2460 
2461 int trace_find_tgid(int pid)
2462 {
2463 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2464 		return 0;
2465 
2466 	return tgid_map[pid];
2467 }
2468 
2469 static int trace_save_tgid(struct task_struct *tsk)
2470 {
2471 	/* treat recording of idle task as a success */
2472 	if (!tsk->pid)
2473 		return 1;
2474 
2475 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2476 		return 0;
2477 
2478 	tgid_map[tsk->pid] = tsk->tgid;
2479 	return 1;
2480 }
2481 
2482 static bool tracing_record_taskinfo_skip(int flags)
2483 {
2484 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2485 		return true;
2486 	if (!__this_cpu_read(trace_taskinfo_save))
2487 		return true;
2488 	return false;
2489 }
2490 
2491 /**
2492  * tracing_record_taskinfo - record the task info of a task
2493  *
2494  * @task:  task to record
2495  * @flags: TRACE_RECORD_CMDLINE for recording comm
2496  *         TRACE_RECORD_TGID for recording tgid
2497  */
2498 void tracing_record_taskinfo(struct task_struct *task, int flags)
2499 {
2500 	bool done;
2501 
2502 	if (tracing_record_taskinfo_skip(flags))
2503 		return;
2504 
2505 	/*
2506 	 * Record as much task information as possible. If some fail, continue
2507 	 * to try to record the others.
2508 	 */
2509 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2510 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2511 
2512 	/* If recording any information failed, retry again soon. */
2513 	if (!done)
2514 		return;
2515 
2516 	__this_cpu_write(trace_taskinfo_save, false);
2517 }
2518 
2519 /**
2520  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2521  *
2522  * @prev: previous task during sched_switch
2523  * @next: next task during sched_switch
2524  * @flags: TRACE_RECORD_CMDLINE for recording comm
2525  *         TRACE_RECORD_TGID for recording tgid
2526  */
2527 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2528 					  struct task_struct *next, int flags)
2529 {
2530 	bool done;
2531 
2532 	if (tracing_record_taskinfo_skip(flags))
2533 		return;
2534 
2535 	/*
2536 	 * Record as much task information as possible. If some fail, continue
2537 	 * to try to record the others.
2538 	 */
2539 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2540 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2541 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /* Helpers to record a specific task information */
2552 void tracing_record_cmdline(struct task_struct *task)
2553 {
2554 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2555 }
2556 
2557 void tracing_record_tgid(struct task_struct *task)
2558 {
2559 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2560 }
2561 
2562 /*
2563  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2564  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2565  * simplifies those functions and keeps them in sync.
2566  */
2567 enum print_line_t trace_handle_return(struct trace_seq *s)
2568 {
2569 	return trace_seq_has_overflowed(s) ?
2570 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2571 }
2572 EXPORT_SYMBOL_GPL(trace_handle_return);
2573 
2574 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2575 {
2576 	unsigned int trace_flags = irqs_status;
2577 	unsigned int pc;
2578 
2579 	pc = preempt_count();
2580 
2581 	if (pc & NMI_MASK)
2582 		trace_flags |= TRACE_FLAG_NMI;
2583 	if (pc & HARDIRQ_MASK)
2584 		trace_flags |= TRACE_FLAG_HARDIRQ;
2585 	if (in_serving_softirq())
2586 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2587 
2588 	if (tif_need_resched())
2589 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2590 	if (test_preempt_need_resched())
2591 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2592 	return (trace_flags << 16) | (pc & 0xff);
2593 }
2594 
2595 struct ring_buffer_event *
2596 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2597 			  int type,
2598 			  unsigned long len,
2599 			  unsigned int trace_ctx)
2600 {
2601 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2602 }
2603 
2604 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2605 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2606 static int trace_buffered_event_ref;
2607 
2608 /**
2609  * trace_buffered_event_enable - enable buffering events
2610  *
2611  * When events are being filtered, it is quicker to use a temporary
2612  * buffer to write the event data into if there's a likely chance
2613  * that it will not be committed. The discard of the ring buffer
2614  * is not as fast as committing, and is much slower than copying
2615  * a commit.
2616  *
2617  * When an event is to be filtered, allocate per cpu buffers to
2618  * write the event data into, and if the event is filtered and discarded
2619  * it is simply dropped, otherwise, the entire data is to be committed
2620  * in one shot.
2621  */
2622 void trace_buffered_event_enable(void)
2623 {
2624 	struct ring_buffer_event *event;
2625 	struct page *page;
2626 	int cpu;
2627 
2628 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2629 
2630 	if (trace_buffered_event_ref++)
2631 		return;
2632 
2633 	for_each_tracing_cpu(cpu) {
2634 		page = alloc_pages_node(cpu_to_node(cpu),
2635 					GFP_KERNEL | __GFP_NORETRY, 0);
2636 		if (!page)
2637 			goto failed;
2638 
2639 		event = page_address(page);
2640 		memset(event, 0, sizeof(*event));
2641 
2642 		per_cpu(trace_buffered_event, cpu) = event;
2643 
2644 		preempt_disable();
2645 		if (cpu == smp_processor_id() &&
2646 		    __this_cpu_read(trace_buffered_event) !=
2647 		    per_cpu(trace_buffered_event, cpu))
2648 			WARN_ON_ONCE(1);
2649 		preempt_enable();
2650 	}
2651 
2652 	return;
2653  failed:
2654 	trace_buffered_event_disable();
2655 }
2656 
2657 static void enable_trace_buffered_event(void *data)
2658 {
2659 	/* Probably not needed, but do it anyway */
2660 	smp_rmb();
2661 	this_cpu_dec(trace_buffered_event_cnt);
2662 }
2663 
2664 static void disable_trace_buffered_event(void *data)
2665 {
2666 	this_cpu_inc(trace_buffered_event_cnt);
2667 }
2668 
2669 /**
2670  * trace_buffered_event_disable - disable buffering events
2671  *
2672  * When a filter is removed, it is faster to not use the buffered
2673  * events, and to commit directly into the ring buffer. Free up
2674  * the temp buffers when there are no more users. This requires
2675  * special synchronization with current events.
2676  */
2677 void trace_buffered_event_disable(void)
2678 {
2679 	int cpu;
2680 
2681 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2682 
2683 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2684 		return;
2685 
2686 	if (--trace_buffered_event_ref)
2687 		return;
2688 
2689 	preempt_disable();
2690 	/* For each CPU, set the buffer as used. */
2691 	smp_call_function_many(tracing_buffer_mask,
2692 			       disable_trace_buffered_event, NULL, 1);
2693 	preempt_enable();
2694 
2695 	/* Wait for all current users to finish */
2696 	synchronize_rcu();
2697 
2698 	for_each_tracing_cpu(cpu) {
2699 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2700 		per_cpu(trace_buffered_event, cpu) = NULL;
2701 	}
2702 	/*
2703 	 * Make sure trace_buffered_event is NULL before clearing
2704 	 * trace_buffered_event_cnt.
2705 	 */
2706 	smp_wmb();
2707 
2708 	preempt_disable();
2709 	/* Do the work on each cpu */
2710 	smp_call_function_many(tracing_buffer_mask,
2711 			       enable_trace_buffered_event, NULL, 1);
2712 	preempt_enable();
2713 }
2714 
2715 static struct trace_buffer *temp_buffer;
2716 
2717 struct ring_buffer_event *
2718 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2719 			  struct trace_event_file *trace_file,
2720 			  int type, unsigned long len,
2721 			  unsigned int trace_ctx)
2722 {
2723 	struct ring_buffer_event *entry;
2724 	struct trace_array *tr = trace_file->tr;
2725 	int val;
2726 
2727 	*current_rb = tr->array_buffer.buffer;
2728 
2729 	if (!tr->no_filter_buffering_ref &&
2730 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2731 	    (entry = this_cpu_read(trace_buffered_event))) {
2732 		/* Try to use the per cpu buffer first */
2733 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2734 		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2735 			trace_event_setup(entry, type, trace_ctx);
2736 			entry->array[0] = len;
2737 			return entry;
2738 		}
2739 		this_cpu_dec(trace_buffered_event_cnt);
2740 	}
2741 
2742 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2743 					    trace_ctx);
2744 	/*
2745 	 * If tracing is off, but we have triggers enabled
2746 	 * we still need to look at the event data. Use the temp_buffer
2747 	 * to store the trace event for the trigger to use. It's recursive
2748 	 * safe and will not be recorded anywhere.
2749 	 */
2750 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2751 		*current_rb = temp_buffer;
2752 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2753 						    trace_ctx);
2754 	}
2755 	return entry;
2756 }
2757 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2758 
2759 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2760 static DEFINE_MUTEX(tracepoint_printk_mutex);
2761 
2762 static void output_printk(struct trace_event_buffer *fbuffer)
2763 {
2764 	struct trace_event_call *event_call;
2765 	struct trace_event_file *file;
2766 	struct trace_event *event;
2767 	unsigned long flags;
2768 	struct trace_iterator *iter = tracepoint_print_iter;
2769 
2770 	/* We should never get here if iter is NULL */
2771 	if (WARN_ON_ONCE(!iter))
2772 		return;
2773 
2774 	event_call = fbuffer->trace_file->event_call;
2775 	if (!event_call || !event_call->event.funcs ||
2776 	    !event_call->event.funcs->trace)
2777 		return;
2778 
2779 	file = fbuffer->trace_file;
2780 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2781 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2782 	     !filter_match_preds(file->filter, fbuffer->entry)))
2783 		return;
2784 
2785 	event = &fbuffer->trace_file->event_call->event;
2786 
2787 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2788 	trace_seq_init(&iter->seq);
2789 	iter->ent = fbuffer->entry;
2790 	event_call->event.funcs->trace(iter, 0, event);
2791 	trace_seq_putc(&iter->seq, 0);
2792 	printk("%s", iter->seq.buffer);
2793 
2794 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2795 }
2796 
2797 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2798 			     void *buffer, size_t *lenp,
2799 			     loff_t *ppos)
2800 {
2801 	int save_tracepoint_printk;
2802 	int ret;
2803 
2804 	mutex_lock(&tracepoint_printk_mutex);
2805 	save_tracepoint_printk = tracepoint_printk;
2806 
2807 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2808 
2809 	/*
2810 	 * This will force exiting early, as tracepoint_printk
2811 	 * is always zero when tracepoint_printk_iter is not allocated
2812 	 */
2813 	if (!tracepoint_print_iter)
2814 		tracepoint_printk = 0;
2815 
2816 	if (save_tracepoint_printk == tracepoint_printk)
2817 		goto out;
2818 
2819 	if (tracepoint_printk)
2820 		static_key_enable(&tracepoint_printk_key.key);
2821 	else
2822 		static_key_disable(&tracepoint_printk_key.key);
2823 
2824  out:
2825 	mutex_unlock(&tracepoint_printk_mutex);
2826 
2827 	return ret;
2828 }
2829 
2830 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2831 {
2832 	if (static_key_false(&tracepoint_printk_key.key))
2833 		output_printk(fbuffer);
2834 
2835 	if (static_branch_unlikely(&trace_event_exports_enabled))
2836 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2837 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2838 				    fbuffer->event, fbuffer->entry,
2839 				    fbuffer->trace_ctx, fbuffer->regs);
2840 }
2841 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2842 
2843 /*
2844  * Skip 3:
2845  *
2846  *   trace_buffer_unlock_commit_regs()
2847  *   trace_event_buffer_commit()
2848  *   trace_event_raw_event_xxx()
2849  */
2850 # define STACK_SKIP 3
2851 
2852 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2853 				     struct trace_buffer *buffer,
2854 				     struct ring_buffer_event *event,
2855 				     unsigned int trace_ctx,
2856 				     struct pt_regs *regs)
2857 {
2858 	__buffer_unlock_commit(buffer, event);
2859 
2860 	/*
2861 	 * If regs is not set, then skip the necessary functions.
2862 	 * Note, we can still get here via blktrace, wakeup tracer
2863 	 * and mmiotrace, but that's ok if they lose a function or
2864 	 * two. They are not that meaningful.
2865 	 */
2866 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2867 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2868 }
2869 
2870 /*
2871  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2872  */
2873 void
2874 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2875 				   struct ring_buffer_event *event)
2876 {
2877 	__buffer_unlock_commit(buffer, event);
2878 }
2879 
2880 void
2881 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2882 	       parent_ip, unsigned int trace_ctx)
2883 {
2884 	struct trace_event_call *call = &event_function;
2885 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2886 	struct ring_buffer_event *event;
2887 	struct ftrace_entry *entry;
2888 
2889 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2890 					    trace_ctx);
2891 	if (!event)
2892 		return;
2893 	entry	= ring_buffer_event_data(event);
2894 	entry->ip			= ip;
2895 	entry->parent_ip		= parent_ip;
2896 
2897 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2898 		if (static_branch_unlikely(&trace_function_exports_enabled))
2899 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2900 		__buffer_unlock_commit(buffer, event);
2901 	}
2902 }
2903 
2904 #ifdef CONFIG_STACKTRACE
2905 
2906 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2907 #define FTRACE_KSTACK_NESTING	4
2908 
2909 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2910 
2911 struct ftrace_stack {
2912 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2913 };
2914 
2915 
2916 struct ftrace_stacks {
2917 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2918 };
2919 
2920 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2921 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2922 
2923 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2924 				 unsigned int trace_ctx,
2925 				 int skip, struct pt_regs *regs)
2926 {
2927 	struct trace_event_call *call = &event_kernel_stack;
2928 	struct ring_buffer_event *event;
2929 	unsigned int size, nr_entries;
2930 	struct ftrace_stack *fstack;
2931 	struct stack_entry *entry;
2932 	int stackidx;
2933 
2934 	/*
2935 	 * Add one, for this function and the call to save_stack_trace()
2936 	 * If regs is set, then these functions will not be in the way.
2937 	 */
2938 #ifndef CONFIG_UNWINDER_ORC
2939 	if (!regs)
2940 		skip++;
2941 #endif
2942 
2943 	preempt_disable_notrace();
2944 
2945 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2946 
2947 	/* This should never happen. If it does, yell once and skip */
2948 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2949 		goto out;
2950 
2951 	/*
2952 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2953 	 * interrupt will either see the value pre increment or post
2954 	 * increment. If the interrupt happens pre increment it will have
2955 	 * restored the counter when it returns.  We just need a barrier to
2956 	 * keep gcc from moving things around.
2957 	 */
2958 	barrier();
2959 
2960 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2961 	size = ARRAY_SIZE(fstack->calls);
2962 
2963 	if (regs) {
2964 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2965 						   size, skip);
2966 	} else {
2967 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2968 	}
2969 
2970 	size = nr_entries * sizeof(unsigned long);
2971 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2972 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2973 				    trace_ctx);
2974 	if (!event)
2975 		goto out;
2976 	entry = ring_buffer_event_data(event);
2977 
2978 	memcpy(&entry->caller, fstack->calls, size);
2979 	entry->size = nr_entries;
2980 
2981 	if (!call_filter_check_discard(call, entry, buffer, event))
2982 		__buffer_unlock_commit(buffer, event);
2983 
2984  out:
2985 	/* Again, don't let gcc optimize things here */
2986 	barrier();
2987 	__this_cpu_dec(ftrace_stack_reserve);
2988 	preempt_enable_notrace();
2989 
2990 }
2991 
2992 static inline void ftrace_trace_stack(struct trace_array *tr,
2993 				      struct trace_buffer *buffer,
2994 				      unsigned int trace_ctx,
2995 				      int skip, struct pt_regs *regs)
2996 {
2997 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2998 		return;
2999 
3000 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3001 }
3002 
3003 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3004 		   int skip)
3005 {
3006 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3007 
3008 	if (rcu_is_watching()) {
3009 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3010 		return;
3011 	}
3012 
3013 	/*
3014 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3015 	 * but if the above rcu_is_watching() failed, then the NMI
3016 	 * triggered someplace critical, and rcu_irq_enter() should
3017 	 * not be called from NMI.
3018 	 */
3019 	if (unlikely(in_nmi()))
3020 		return;
3021 
3022 	rcu_irq_enter_irqson();
3023 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3024 	rcu_irq_exit_irqson();
3025 }
3026 
3027 /**
3028  * trace_dump_stack - record a stack back trace in the trace buffer
3029  * @skip: Number of functions to skip (helper handlers)
3030  */
3031 void trace_dump_stack(int skip)
3032 {
3033 	if (tracing_disabled || tracing_selftest_running)
3034 		return;
3035 
3036 #ifndef CONFIG_UNWINDER_ORC
3037 	/* Skip 1 to skip this function. */
3038 	skip++;
3039 #endif
3040 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3041 			     tracing_gen_ctx(), skip, NULL);
3042 }
3043 EXPORT_SYMBOL_GPL(trace_dump_stack);
3044 
3045 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3046 static DEFINE_PER_CPU(int, user_stack_count);
3047 
3048 static void
3049 ftrace_trace_userstack(struct trace_array *tr,
3050 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3051 {
3052 	struct trace_event_call *call = &event_user_stack;
3053 	struct ring_buffer_event *event;
3054 	struct userstack_entry *entry;
3055 
3056 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3057 		return;
3058 
3059 	/*
3060 	 * NMIs can not handle page faults, even with fix ups.
3061 	 * The save user stack can (and often does) fault.
3062 	 */
3063 	if (unlikely(in_nmi()))
3064 		return;
3065 
3066 	/*
3067 	 * prevent recursion, since the user stack tracing may
3068 	 * trigger other kernel events.
3069 	 */
3070 	preempt_disable();
3071 	if (__this_cpu_read(user_stack_count))
3072 		goto out;
3073 
3074 	__this_cpu_inc(user_stack_count);
3075 
3076 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077 					    sizeof(*entry), trace_ctx);
3078 	if (!event)
3079 		goto out_drop_count;
3080 	entry	= ring_buffer_event_data(event);
3081 
3082 	entry->tgid		= current->tgid;
3083 	memset(&entry->caller, 0, sizeof(entry->caller));
3084 
3085 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086 	if (!call_filter_check_discard(call, entry, buffer, event))
3087 		__buffer_unlock_commit(buffer, event);
3088 
3089  out_drop_count:
3090 	__this_cpu_dec(user_stack_count);
3091  out:
3092 	preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_array *tr,
3096 				   struct trace_buffer *buffer,
3097 				   unsigned int trace_ctx)
3098 {
3099 }
3100 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3101 
3102 #endif /* CONFIG_STACKTRACE */
3103 
3104 static inline void
3105 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3106 			  unsigned long long delta)
3107 {
3108 	entry->bottom_delta_ts = delta & U32_MAX;
3109 	entry->top_delta_ts = (delta >> 32);
3110 }
3111 
3112 void trace_last_func_repeats(struct trace_array *tr,
3113 			     struct trace_func_repeats *last_info,
3114 			     unsigned int trace_ctx)
3115 {
3116 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3117 	struct func_repeats_entry *entry;
3118 	struct ring_buffer_event *event;
3119 	u64 delta;
3120 
3121 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3122 					    sizeof(*entry), trace_ctx);
3123 	if (!event)
3124 		return;
3125 
3126 	delta = ring_buffer_event_time_stamp(buffer, event) -
3127 		last_info->ts_last_call;
3128 
3129 	entry = ring_buffer_event_data(event);
3130 	entry->ip = last_info->ip;
3131 	entry->parent_ip = last_info->parent_ip;
3132 	entry->count = last_info->count;
3133 	func_repeats_set_delta_ts(entry, delta);
3134 
3135 	__buffer_unlock_commit(buffer, event);
3136 }
3137 
3138 /* created for use with alloc_percpu */
3139 struct trace_buffer_struct {
3140 	int nesting;
3141 	char buffer[4][TRACE_BUF_SIZE];
3142 };
3143 
3144 static struct trace_buffer_struct *trace_percpu_buffer;
3145 
3146 /*
3147  * This allows for lockless recording.  If we're nested too deeply, then
3148  * this returns NULL.
3149  */
3150 static char *get_trace_buf(void)
3151 {
3152 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3153 
3154 	if (!buffer || buffer->nesting >= 4)
3155 		return NULL;
3156 
3157 	buffer->nesting++;
3158 
3159 	/* Interrupts must see nesting incremented before we use the buffer */
3160 	barrier();
3161 	return &buffer->buffer[buffer->nesting - 1][0];
3162 }
3163 
3164 static void put_trace_buf(void)
3165 {
3166 	/* Don't let the decrement of nesting leak before this */
3167 	barrier();
3168 	this_cpu_dec(trace_percpu_buffer->nesting);
3169 }
3170 
3171 static int alloc_percpu_trace_buffer(void)
3172 {
3173 	struct trace_buffer_struct *buffers;
3174 
3175 	if (trace_percpu_buffer)
3176 		return 0;
3177 
3178 	buffers = alloc_percpu(struct trace_buffer_struct);
3179 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3180 		return -ENOMEM;
3181 
3182 	trace_percpu_buffer = buffers;
3183 	return 0;
3184 }
3185 
3186 static int buffers_allocated;
3187 
3188 void trace_printk_init_buffers(void)
3189 {
3190 	if (buffers_allocated)
3191 		return;
3192 
3193 	if (alloc_percpu_trace_buffer())
3194 		return;
3195 
3196 	/* trace_printk() is for debug use only. Don't use it in production. */
3197 
3198 	pr_warn("\n");
3199 	pr_warn("**********************************************************\n");
3200 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3201 	pr_warn("**                                                      **\n");
3202 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3203 	pr_warn("**                                                      **\n");
3204 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3205 	pr_warn("** unsafe for production use.                           **\n");
3206 	pr_warn("**                                                      **\n");
3207 	pr_warn("** If you see this message and you are not debugging    **\n");
3208 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3209 	pr_warn("**                                                      **\n");
3210 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3211 	pr_warn("**********************************************************\n");
3212 
3213 	/* Expand the buffers to set size */
3214 	tracing_update_buffers();
3215 
3216 	buffers_allocated = 1;
3217 
3218 	/*
3219 	 * trace_printk_init_buffers() can be called by modules.
3220 	 * If that happens, then we need to start cmdline recording
3221 	 * directly here. If the global_trace.buffer is already
3222 	 * allocated here, then this was called by module code.
3223 	 */
3224 	if (global_trace.array_buffer.buffer)
3225 		tracing_start_cmdline_record();
3226 }
3227 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3228 
3229 void trace_printk_start_comm(void)
3230 {
3231 	/* Start tracing comms if trace printk is set */
3232 	if (!buffers_allocated)
3233 		return;
3234 	tracing_start_cmdline_record();
3235 }
3236 
3237 static void trace_printk_start_stop_comm(int enabled)
3238 {
3239 	if (!buffers_allocated)
3240 		return;
3241 
3242 	if (enabled)
3243 		tracing_start_cmdline_record();
3244 	else
3245 		tracing_stop_cmdline_record();
3246 }
3247 
3248 /**
3249  * trace_vbprintk - write binary msg to tracing buffer
3250  * @ip:    The address of the caller
3251  * @fmt:   The string format to write to the buffer
3252  * @args:  Arguments for @fmt
3253  */
3254 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3255 {
3256 	struct trace_event_call *call = &event_bprint;
3257 	struct ring_buffer_event *event;
3258 	struct trace_buffer *buffer;
3259 	struct trace_array *tr = &global_trace;
3260 	struct bprint_entry *entry;
3261 	unsigned int trace_ctx;
3262 	char *tbuffer;
3263 	int len = 0, size;
3264 
3265 	if (unlikely(tracing_selftest_running || tracing_disabled))
3266 		return 0;
3267 
3268 	/* Don't pollute graph traces with trace_vprintk internals */
3269 	pause_graph_tracing();
3270 
3271 	trace_ctx = tracing_gen_ctx();
3272 	preempt_disable_notrace();
3273 
3274 	tbuffer = get_trace_buf();
3275 	if (!tbuffer) {
3276 		len = 0;
3277 		goto out_nobuffer;
3278 	}
3279 
3280 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3281 
3282 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3283 		goto out_put;
3284 
3285 	size = sizeof(*entry) + sizeof(u32) * len;
3286 	buffer = tr->array_buffer.buffer;
3287 	ring_buffer_nest_start(buffer);
3288 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3289 					    trace_ctx);
3290 	if (!event)
3291 		goto out;
3292 	entry = ring_buffer_event_data(event);
3293 	entry->ip			= ip;
3294 	entry->fmt			= fmt;
3295 
3296 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3297 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3298 		__buffer_unlock_commit(buffer, event);
3299 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3300 	}
3301 
3302 out:
3303 	ring_buffer_nest_end(buffer);
3304 out_put:
3305 	put_trace_buf();
3306 
3307 out_nobuffer:
3308 	preempt_enable_notrace();
3309 	unpause_graph_tracing();
3310 
3311 	return len;
3312 }
3313 EXPORT_SYMBOL_GPL(trace_vbprintk);
3314 
3315 __printf(3, 0)
3316 static int
3317 __trace_array_vprintk(struct trace_buffer *buffer,
3318 		      unsigned long ip, const char *fmt, va_list args)
3319 {
3320 	struct trace_event_call *call = &event_print;
3321 	struct ring_buffer_event *event;
3322 	int len = 0, size;
3323 	struct print_entry *entry;
3324 	unsigned int trace_ctx;
3325 	char *tbuffer;
3326 
3327 	if (tracing_disabled || tracing_selftest_running)
3328 		return 0;
3329 
3330 	/* Don't pollute graph traces with trace_vprintk internals */
3331 	pause_graph_tracing();
3332 
3333 	trace_ctx = tracing_gen_ctx();
3334 	preempt_disable_notrace();
3335 
3336 
3337 	tbuffer = get_trace_buf();
3338 	if (!tbuffer) {
3339 		len = 0;
3340 		goto out_nobuffer;
3341 	}
3342 
3343 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3344 
3345 	size = sizeof(*entry) + len + 1;
3346 	ring_buffer_nest_start(buffer);
3347 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3348 					    trace_ctx);
3349 	if (!event)
3350 		goto out;
3351 	entry = ring_buffer_event_data(event);
3352 	entry->ip = ip;
3353 
3354 	memcpy(&entry->buf, tbuffer, len + 1);
3355 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3356 		__buffer_unlock_commit(buffer, event);
3357 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3358 	}
3359 
3360 out:
3361 	ring_buffer_nest_end(buffer);
3362 	put_trace_buf();
3363 
3364 out_nobuffer:
3365 	preempt_enable_notrace();
3366 	unpause_graph_tracing();
3367 
3368 	return len;
3369 }
3370 
3371 __printf(3, 0)
3372 int trace_array_vprintk(struct trace_array *tr,
3373 			unsigned long ip, const char *fmt, va_list args)
3374 {
3375 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3376 }
3377 
3378 /**
3379  * trace_array_printk - Print a message to a specific instance
3380  * @tr: The instance trace_array descriptor
3381  * @ip: The instruction pointer that this is called from.
3382  * @fmt: The format to print (printf format)
3383  *
3384  * If a subsystem sets up its own instance, they have the right to
3385  * printk strings into their tracing instance buffer using this
3386  * function. Note, this function will not write into the top level
3387  * buffer (use trace_printk() for that), as writing into the top level
3388  * buffer should only have events that can be individually disabled.
3389  * trace_printk() is only used for debugging a kernel, and should not
3390  * be ever incorporated in normal use.
3391  *
3392  * trace_array_printk() can be used, as it will not add noise to the
3393  * top level tracing buffer.
3394  *
3395  * Note, trace_array_init_printk() must be called on @tr before this
3396  * can be used.
3397  */
3398 __printf(3, 0)
3399 int trace_array_printk(struct trace_array *tr,
3400 		       unsigned long ip, const char *fmt, ...)
3401 {
3402 	int ret;
3403 	va_list ap;
3404 
3405 	if (!tr)
3406 		return -ENOENT;
3407 
3408 	/* This is only allowed for created instances */
3409 	if (tr == &global_trace)
3410 		return 0;
3411 
3412 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3413 		return 0;
3414 
3415 	va_start(ap, fmt);
3416 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3417 	va_end(ap);
3418 	return ret;
3419 }
3420 EXPORT_SYMBOL_GPL(trace_array_printk);
3421 
3422 /**
3423  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3424  * @tr: The trace array to initialize the buffers for
3425  *
3426  * As trace_array_printk() only writes into instances, they are OK to
3427  * have in the kernel (unlike trace_printk()). This needs to be called
3428  * before trace_array_printk() can be used on a trace_array.
3429  */
3430 int trace_array_init_printk(struct trace_array *tr)
3431 {
3432 	if (!tr)
3433 		return -ENOENT;
3434 
3435 	/* This is only allowed for created instances */
3436 	if (tr == &global_trace)
3437 		return -EINVAL;
3438 
3439 	return alloc_percpu_trace_buffer();
3440 }
3441 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3442 
3443 __printf(3, 4)
3444 int trace_array_printk_buf(struct trace_buffer *buffer,
3445 			   unsigned long ip, const char *fmt, ...)
3446 {
3447 	int ret;
3448 	va_list ap;
3449 
3450 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3451 		return 0;
3452 
3453 	va_start(ap, fmt);
3454 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3455 	va_end(ap);
3456 	return ret;
3457 }
3458 
3459 __printf(2, 0)
3460 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3461 {
3462 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3463 }
3464 EXPORT_SYMBOL_GPL(trace_vprintk);
3465 
3466 static void trace_iterator_increment(struct trace_iterator *iter)
3467 {
3468 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3469 
3470 	iter->idx++;
3471 	if (buf_iter)
3472 		ring_buffer_iter_advance(buf_iter);
3473 }
3474 
3475 static struct trace_entry *
3476 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3477 		unsigned long *lost_events)
3478 {
3479 	struct ring_buffer_event *event;
3480 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3481 
3482 	if (buf_iter) {
3483 		event = ring_buffer_iter_peek(buf_iter, ts);
3484 		if (lost_events)
3485 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3486 				(unsigned long)-1 : 0;
3487 	} else {
3488 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3489 					 lost_events);
3490 	}
3491 
3492 	if (event) {
3493 		iter->ent_size = ring_buffer_event_length(event);
3494 		return ring_buffer_event_data(event);
3495 	}
3496 	iter->ent_size = 0;
3497 	return NULL;
3498 }
3499 
3500 static struct trace_entry *
3501 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3502 		  unsigned long *missing_events, u64 *ent_ts)
3503 {
3504 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3505 	struct trace_entry *ent, *next = NULL;
3506 	unsigned long lost_events = 0, next_lost = 0;
3507 	int cpu_file = iter->cpu_file;
3508 	u64 next_ts = 0, ts;
3509 	int next_cpu = -1;
3510 	int next_size = 0;
3511 	int cpu;
3512 
3513 	/*
3514 	 * If we are in a per_cpu trace file, don't bother by iterating over
3515 	 * all cpu and peek directly.
3516 	 */
3517 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3518 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3519 			return NULL;
3520 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3521 		if (ent_cpu)
3522 			*ent_cpu = cpu_file;
3523 
3524 		return ent;
3525 	}
3526 
3527 	for_each_tracing_cpu(cpu) {
3528 
3529 		if (ring_buffer_empty_cpu(buffer, cpu))
3530 			continue;
3531 
3532 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3533 
3534 		/*
3535 		 * Pick the entry with the smallest timestamp:
3536 		 */
3537 		if (ent && (!next || ts < next_ts)) {
3538 			next = ent;
3539 			next_cpu = cpu;
3540 			next_ts = ts;
3541 			next_lost = lost_events;
3542 			next_size = iter->ent_size;
3543 		}
3544 	}
3545 
3546 	iter->ent_size = next_size;
3547 
3548 	if (ent_cpu)
3549 		*ent_cpu = next_cpu;
3550 
3551 	if (ent_ts)
3552 		*ent_ts = next_ts;
3553 
3554 	if (missing_events)
3555 		*missing_events = next_lost;
3556 
3557 	return next;
3558 }
3559 
3560 #define STATIC_FMT_BUF_SIZE	128
3561 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3562 
3563 static char *trace_iter_expand_format(struct trace_iterator *iter)
3564 {
3565 	char *tmp;
3566 
3567 	/*
3568 	 * iter->tr is NULL when used with tp_printk, which makes
3569 	 * this get called where it is not safe to call krealloc().
3570 	 */
3571 	if (!iter->tr || iter->fmt == static_fmt_buf)
3572 		return NULL;
3573 
3574 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3575 		       GFP_KERNEL);
3576 	if (tmp) {
3577 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3578 		iter->fmt = tmp;
3579 	}
3580 
3581 	return tmp;
3582 }
3583 
3584 /* Returns true if the string is safe to dereference from an event */
3585 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3586 {
3587 	unsigned long addr = (unsigned long)str;
3588 	struct trace_event *trace_event;
3589 	struct trace_event_call *event;
3590 
3591 	/* OK if part of the event data */
3592 	if ((addr >= (unsigned long)iter->ent) &&
3593 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3594 		return true;
3595 
3596 	/* OK if part of the temp seq buffer */
3597 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3598 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3599 		return true;
3600 
3601 	/* Core rodata can not be freed */
3602 	if (is_kernel_rodata(addr))
3603 		return true;
3604 
3605 	if (trace_is_tracepoint_string(str))
3606 		return true;
3607 
3608 	/*
3609 	 * Now this could be a module event, referencing core module
3610 	 * data, which is OK.
3611 	 */
3612 	if (!iter->ent)
3613 		return false;
3614 
3615 	trace_event = ftrace_find_event(iter->ent->type);
3616 	if (!trace_event)
3617 		return false;
3618 
3619 	event = container_of(trace_event, struct trace_event_call, event);
3620 	if (!event->mod)
3621 		return false;
3622 
3623 	/* Would rather have rodata, but this will suffice */
3624 	if (within_module_core(addr, event->mod))
3625 		return true;
3626 
3627 	return false;
3628 }
3629 
3630 static const char *show_buffer(struct trace_seq *s)
3631 {
3632 	struct seq_buf *seq = &s->seq;
3633 
3634 	seq_buf_terminate(seq);
3635 
3636 	return seq->buffer;
3637 }
3638 
3639 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3640 
3641 static int test_can_verify_check(const char *fmt, ...)
3642 {
3643 	char buf[16];
3644 	va_list ap;
3645 	int ret;
3646 
3647 	/*
3648 	 * The verifier is dependent on vsnprintf() modifies the va_list
3649 	 * passed to it, where it is sent as a reference. Some architectures
3650 	 * (like x86_32) passes it by value, which means that vsnprintf()
3651 	 * does not modify the va_list passed to it, and the verifier
3652 	 * would then need to be able to understand all the values that
3653 	 * vsnprintf can use. If it is passed by value, then the verifier
3654 	 * is disabled.
3655 	 */
3656 	va_start(ap, fmt);
3657 	vsnprintf(buf, 16, "%d", ap);
3658 	ret = va_arg(ap, int);
3659 	va_end(ap);
3660 
3661 	return ret;
3662 }
3663 
3664 static void test_can_verify(void)
3665 {
3666 	if (!test_can_verify_check("%d %d", 0, 1)) {
3667 		pr_info("trace event string verifier disabled\n");
3668 		static_branch_inc(&trace_no_verify);
3669 	}
3670 }
3671 
3672 /**
3673  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3674  * @iter: The iterator that holds the seq buffer and the event being printed
3675  * @fmt: The format used to print the event
3676  * @ap: The va_list holding the data to print from @fmt.
3677  *
3678  * This writes the data into the @iter->seq buffer using the data from
3679  * @fmt and @ap. If the format has a %s, then the source of the string
3680  * is examined to make sure it is safe to print, otherwise it will
3681  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3682  * pointer.
3683  */
3684 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3685 			 va_list ap)
3686 {
3687 	const char *p = fmt;
3688 	const char *str;
3689 	int i, j;
3690 
3691 	if (WARN_ON_ONCE(!fmt))
3692 		return;
3693 
3694 	if (static_branch_unlikely(&trace_no_verify))
3695 		goto print;
3696 
3697 	/* Don't bother checking when doing a ftrace_dump() */
3698 	if (iter->fmt == static_fmt_buf)
3699 		goto print;
3700 
3701 	while (*p) {
3702 		bool star = false;
3703 		int len = 0;
3704 
3705 		j = 0;
3706 
3707 		/* We only care about %s and variants */
3708 		for (i = 0; p[i]; i++) {
3709 			if (i + 1 >= iter->fmt_size) {
3710 				/*
3711 				 * If we can't expand the copy buffer,
3712 				 * just print it.
3713 				 */
3714 				if (!trace_iter_expand_format(iter))
3715 					goto print;
3716 			}
3717 
3718 			if (p[i] == '\\' && p[i+1]) {
3719 				i++;
3720 				continue;
3721 			}
3722 			if (p[i] == '%') {
3723 				/* Need to test cases like %08.*s */
3724 				for (j = 1; p[i+j]; j++) {
3725 					if (isdigit(p[i+j]) ||
3726 					    p[i+j] == '.')
3727 						continue;
3728 					if (p[i+j] == '*') {
3729 						star = true;
3730 						continue;
3731 					}
3732 					break;
3733 				}
3734 				if (p[i+j] == 's')
3735 					break;
3736 				star = false;
3737 			}
3738 			j = 0;
3739 		}
3740 		/* If no %s found then just print normally */
3741 		if (!p[i])
3742 			break;
3743 
3744 		/* Copy up to the %s, and print that */
3745 		strncpy(iter->fmt, p, i);
3746 		iter->fmt[i] = '\0';
3747 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3748 
3749 		if (star)
3750 			len = va_arg(ap, int);
3751 
3752 		/* The ap now points to the string data of the %s */
3753 		str = va_arg(ap, const char *);
3754 
3755 		/*
3756 		 * If you hit this warning, it is likely that the
3757 		 * trace event in question used %s on a string that
3758 		 * was saved at the time of the event, but may not be
3759 		 * around when the trace is read. Use __string(),
3760 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3761 		 * instead. See samples/trace_events/trace-events-sample.h
3762 		 * for reference.
3763 		 */
3764 		if (WARN_ONCE(!trace_safe_str(iter, str),
3765 			      "fmt: '%s' current_buffer: '%s'",
3766 			      fmt, show_buffer(&iter->seq))) {
3767 			int ret;
3768 
3769 			/* Try to safely read the string */
3770 			if (star) {
3771 				if (len + 1 > iter->fmt_size)
3772 					len = iter->fmt_size - 1;
3773 				if (len < 0)
3774 					len = 0;
3775 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3776 				iter->fmt[len] = 0;
3777 				star = false;
3778 			} else {
3779 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3780 								  iter->fmt_size);
3781 			}
3782 			if (ret < 0)
3783 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3784 			else
3785 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3786 						 str, iter->fmt);
3787 			str = "[UNSAFE-MEMORY]";
3788 			strcpy(iter->fmt, "%s");
3789 		} else {
3790 			strncpy(iter->fmt, p + i, j + 1);
3791 			iter->fmt[j+1] = '\0';
3792 		}
3793 		if (star)
3794 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3795 		else
3796 			trace_seq_printf(&iter->seq, iter->fmt, str);
3797 
3798 		p += i + j + 1;
3799 	}
3800  print:
3801 	if (*p)
3802 		trace_seq_vprintf(&iter->seq, p, ap);
3803 }
3804 
3805 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3806 {
3807 	const char *p, *new_fmt;
3808 	char *q;
3809 
3810 	if (WARN_ON_ONCE(!fmt))
3811 		return fmt;
3812 
3813 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3814 		return fmt;
3815 
3816 	p = fmt;
3817 	new_fmt = q = iter->fmt;
3818 	while (*p) {
3819 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3820 			if (!trace_iter_expand_format(iter))
3821 				return fmt;
3822 
3823 			q += iter->fmt - new_fmt;
3824 			new_fmt = iter->fmt;
3825 		}
3826 
3827 		*q++ = *p++;
3828 
3829 		/* Replace %p with %px */
3830 		if (p[-1] == '%') {
3831 			if (p[0] == '%') {
3832 				*q++ = *p++;
3833 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3834 				*q++ = *p++;
3835 				*q++ = 'x';
3836 			}
3837 		}
3838 	}
3839 	*q = '\0';
3840 
3841 	return new_fmt;
3842 }
3843 
3844 #define STATIC_TEMP_BUF_SIZE	128
3845 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3846 
3847 /* Find the next real entry, without updating the iterator itself */
3848 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3849 					  int *ent_cpu, u64 *ent_ts)
3850 {
3851 	/* __find_next_entry will reset ent_size */
3852 	int ent_size = iter->ent_size;
3853 	struct trace_entry *entry;
3854 
3855 	/*
3856 	 * If called from ftrace_dump(), then the iter->temp buffer
3857 	 * will be the static_temp_buf and not created from kmalloc.
3858 	 * If the entry size is greater than the buffer, we can
3859 	 * not save it. Just return NULL in that case. This is only
3860 	 * used to add markers when two consecutive events' time
3861 	 * stamps have a large delta. See trace_print_lat_context()
3862 	 */
3863 	if (iter->temp == static_temp_buf &&
3864 	    STATIC_TEMP_BUF_SIZE < ent_size)
3865 		return NULL;
3866 
3867 	/*
3868 	 * The __find_next_entry() may call peek_next_entry(), which may
3869 	 * call ring_buffer_peek() that may make the contents of iter->ent
3870 	 * undefined. Need to copy iter->ent now.
3871 	 */
3872 	if (iter->ent && iter->ent != iter->temp) {
3873 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3874 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3875 			void *temp;
3876 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3877 			if (!temp)
3878 				return NULL;
3879 			kfree(iter->temp);
3880 			iter->temp = temp;
3881 			iter->temp_size = iter->ent_size;
3882 		}
3883 		memcpy(iter->temp, iter->ent, iter->ent_size);
3884 		iter->ent = iter->temp;
3885 	}
3886 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3887 	/* Put back the original ent_size */
3888 	iter->ent_size = ent_size;
3889 
3890 	return entry;
3891 }
3892 
3893 /* Find the next real entry, and increment the iterator to the next entry */
3894 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3895 {
3896 	iter->ent = __find_next_entry(iter, &iter->cpu,
3897 				      &iter->lost_events, &iter->ts);
3898 
3899 	if (iter->ent)
3900 		trace_iterator_increment(iter);
3901 
3902 	return iter->ent ? iter : NULL;
3903 }
3904 
3905 static void trace_consume(struct trace_iterator *iter)
3906 {
3907 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3908 			    &iter->lost_events);
3909 }
3910 
3911 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3912 {
3913 	struct trace_iterator *iter = m->private;
3914 	int i = (int)*pos;
3915 	void *ent;
3916 
3917 	WARN_ON_ONCE(iter->leftover);
3918 
3919 	(*pos)++;
3920 
3921 	/* can't go backwards */
3922 	if (iter->idx > i)
3923 		return NULL;
3924 
3925 	if (iter->idx < 0)
3926 		ent = trace_find_next_entry_inc(iter);
3927 	else
3928 		ent = iter;
3929 
3930 	while (ent && iter->idx < i)
3931 		ent = trace_find_next_entry_inc(iter);
3932 
3933 	iter->pos = *pos;
3934 
3935 	return ent;
3936 }
3937 
3938 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3939 {
3940 	struct ring_buffer_iter *buf_iter;
3941 	unsigned long entries = 0;
3942 	u64 ts;
3943 
3944 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3945 
3946 	buf_iter = trace_buffer_iter(iter, cpu);
3947 	if (!buf_iter)
3948 		return;
3949 
3950 	ring_buffer_iter_reset(buf_iter);
3951 
3952 	/*
3953 	 * We could have the case with the max latency tracers
3954 	 * that a reset never took place on a cpu. This is evident
3955 	 * by the timestamp being before the start of the buffer.
3956 	 */
3957 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3958 		if (ts >= iter->array_buffer->time_start)
3959 			break;
3960 		entries++;
3961 		ring_buffer_iter_advance(buf_iter);
3962 	}
3963 
3964 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3965 }
3966 
3967 /*
3968  * The current tracer is copied to avoid a global locking
3969  * all around.
3970  */
3971 static void *s_start(struct seq_file *m, loff_t *pos)
3972 {
3973 	struct trace_iterator *iter = m->private;
3974 	struct trace_array *tr = iter->tr;
3975 	int cpu_file = iter->cpu_file;
3976 	void *p = NULL;
3977 	loff_t l = 0;
3978 	int cpu;
3979 
3980 	/*
3981 	 * copy the tracer to avoid using a global lock all around.
3982 	 * iter->trace is a copy of current_trace, the pointer to the
3983 	 * name may be used instead of a strcmp(), as iter->trace->name
3984 	 * will point to the same string as current_trace->name.
3985 	 */
3986 	mutex_lock(&trace_types_lock);
3987 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3988 		*iter->trace = *tr->current_trace;
3989 	mutex_unlock(&trace_types_lock);
3990 
3991 #ifdef CONFIG_TRACER_MAX_TRACE
3992 	if (iter->snapshot && iter->trace->use_max_tr)
3993 		return ERR_PTR(-EBUSY);
3994 #endif
3995 
3996 	if (*pos != iter->pos) {
3997 		iter->ent = NULL;
3998 		iter->cpu = 0;
3999 		iter->idx = -1;
4000 
4001 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4002 			for_each_tracing_cpu(cpu)
4003 				tracing_iter_reset(iter, cpu);
4004 		} else
4005 			tracing_iter_reset(iter, cpu_file);
4006 
4007 		iter->leftover = 0;
4008 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4009 			;
4010 
4011 	} else {
4012 		/*
4013 		 * If we overflowed the seq_file before, then we want
4014 		 * to just reuse the trace_seq buffer again.
4015 		 */
4016 		if (iter->leftover)
4017 			p = iter;
4018 		else {
4019 			l = *pos - 1;
4020 			p = s_next(m, p, &l);
4021 		}
4022 	}
4023 
4024 	trace_event_read_lock();
4025 	trace_access_lock(cpu_file);
4026 	return p;
4027 }
4028 
4029 static void s_stop(struct seq_file *m, void *p)
4030 {
4031 	struct trace_iterator *iter = m->private;
4032 
4033 #ifdef CONFIG_TRACER_MAX_TRACE
4034 	if (iter->snapshot && iter->trace->use_max_tr)
4035 		return;
4036 #endif
4037 
4038 	trace_access_unlock(iter->cpu_file);
4039 	trace_event_read_unlock();
4040 }
4041 
4042 static void
4043 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4044 		      unsigned long *entries, int cpu)
4045 {
4046 	unsigned long count;
4047 
4048 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4049 	/*
4050 	 * If this buffer has skipped entries, then we hold all
4051 	 * entries for the trace and we need to ignore the
4052 	 * ones before the time stamp.
4053 	 */
4054 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4055 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4056 		/* total is the same as the entries */
4057 		*total = count;
4058 	} else
4059 		*total = count +
4060 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4061 	*entries = count;
4062 }
4063 
4064 static void
4065 get_total_entries(struct array_buffer *buf,
4066 		  unsigned long *total, unsigned long *entries)
4067 {
4068 	unsigned long t, e;
4069 	int cpu;
4070 
4071 	*total = 0;
4072 	*entries = 0;
4073 
4074 	for_each_tracing_cpu(cpu) {
4075 		get_total_entries_cpu(buf, &t, &e, cpu);
4076 		*total += t;
4077 		*entries += e;
4078 	}
4079 }
4080 
4081 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4082 {
4083 	unsigned long total, entries;
4084 
4085 	if (!tr)
4086 		tr = &global_trace;
4087 
4088 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4089 
4090 	return entries;
4091 }
4092 
4093 unsigned long trace_total_entries(struct trace_array *tr)
4094 {
4095 	unsigned long total, entries;
4096 
4097 	if (!tr)
4098 		tr = &global_trace;
4099 
4100 	get_total_entries(&tr->array_buffer, &total, &entries);
4101 
4102 	return entries;
4103 }
4104 
4105 static void print_lat_help_header(struct seq_file *m)
4106 {
4107 	seq_puts(m, "#                    _------=> CPU#            \n"
4108 		    "#                   / _-----=> irqs-off        \n"
4109 		    "#                  | / _----=> need-resched    \n"
4110 		    "#                  || / _---=> hardirq/softirq \n"
4111 		    "#                  ||| / _--=> preempt-depth   \n"
4112 		    "#                  |||| /     delay            \n"
4113 		    "#  cmd     pid     ||||| time  |   caller      \n"
4114 		    "#     \\   /        |||||  \\    |   /         \n");
4115 }
4116 
4117 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4118 {
4119 	unsigned long total;
4120 	unsigned long entries;
4121 
4122 	get_total_entries(buf, &total, &entries);
4123 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4124 		   entries, total, num_online_cpus());
4125 	seq_puts(m, "#\n");
4126 }
4127 
4128 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4129 				   unsigned int flags)
4130 {
4131 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4132 
4133 	print_event_info(buf, m);
4134 
4135 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4136 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4137 }
4138 
4139 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4140 				       unsigned int flags)
4141 {
4142 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4143 	const char *space = "            ";
4144 	int prec = tgid ? 12 : 2;
4145 
4146 	print_event_info(buf, m);
4147 
4148 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4149 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4150 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4151 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4152 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4153 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4154 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4155 }
4156 
4157 void
4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4161 	struct array_buffer *buf = iter->array_buffer;
4162 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4163 	struct tracer *type = iter->trace;
4164 	unsigned long entries;
4165 	unsigned long total;
4166 	const char *name = "preemption";
4167 
4168 	name = type->name;
4169 
4170 	get_total_entries(buf, &total, &entries);
4171 
4172 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4173 		   name, UTS_RELEASE);
4174 	seq_puts(m, "# -----------------------------------"
4175 		 "---------------------------------\n");
4176 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4177 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4178 		   nsecs_to_usecs(data->saved_latency),
4179 		   entries,
4180 		   total,
4181 		   buf->cpu,
4182 #if defined(CONFIG_PREEMPT_NONE)
4183 		   "server",
4184 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4185 		   "desktop",
4186 #elif defined(CONFIG_PREEMPT)
4187 		   "preempt",
4188 #elif defined(CONFIG_PREEMPT_RT)
4189 		   "preempt_rt",
4190 #else
4191 		   "unknown",
4192 #endif
4193 		   /* These are reserved for later use */
4194 		   0, 0, 0, 0);
4195 #ifdef CONFIG_SMP
4196 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4197 #else
4198 	seq_puts(m, ")\n");
4199 #endif
4200 	seq_puts(m, "#    -----------------\n");
4201 	seq_printf(m, "#    | task: %.16s-%d "
4202 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4203 		   data->comm, data->pid,
4204 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4205 		   data->policy, data->rt_priority);
4206 	seq_puts(m, "#    -----------------\n");
4207 
4208 	if (data->critical_start) {
4209 		seq_puts(m, "#  => started at: ");
4210 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4211 		trace_print_seq(m, &iter->seq);
4212 		seq_puts(m, "\n#  => ended at:   ");
4213 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4214 		trace_print_seq(m, &iter->seq);
4215 		seq_puts(m, "\n#\n");
4216 	}
4217 
4218 	seq_puts(m, "#\n");
4219 }
4220 
4221 static void test_cpu_buff_start(struct trace_iterator *iter)
4222 {
4223 	struct trace_seq *s = &iter->seq;
4224 	struct trace_array *tr = iter->tr;
4225 
4226 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4227 		return;
4228 
4229 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4230 		return;
4231 
4232 	if (cpumask_available(iter->started) &&
4233 	    cpumask_test_cpu(iter->cpu, iter->started))
4234 		return;
4235 
4236 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4237 		return;
4238 
4239 	if (cpumask_available(iter->started))
4240 		cpumask_set_cpu(iter->cpu, iter->started);
4241 
4242 	/* Don't print started cpu buffer for the first entry of the trace */
4243 	if (iter->idx > 1)
4244 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4245 				iter->cpu);
4246 }
4247 
4248 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4249 {
4250 	struct trace_array *tr = iter->tr;
4251 	struct trace_seq *s = &iter->seq;
4252 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4253 	struct trace_entry *entry;
4254 	struct trace_event *event;
4255 
4256 	entry = iter->ent;
4257 
4258 	test_cpu_buff_start(iter);
4259 
4260 	event = ftrace_find_event(entry->type);
4261 
4262 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4263 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4264 			trace_print_lat_context(iter);
4265 		else
4266 			trace_print_context(iter);
4267 	}
4268 
4269 	if (trace_seq_has_overflowed(s))
4270 		return TRACE_TYPE_PARTIAL_LINE;
4271 
4272 	if (event)
4273 		return event->funcs->trace(iter, sym_flags, event);
4274 
4275 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4276 
4277 	return trace_handle_return(s);
4278 }
4279 
4280 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4281 {
4282 	struct trace_array *tr = iter->tr;
4283 	struct trace_seq *s = &iter->seq;
4284 	struct trace_entry *entry;
4285 	struct trace_event *event;
4286 
4287 	entry = iter->ent;
4288 
4289 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4290 		trace_seq_printf(s, "%d %d %llu ",
4291 				 entry->pid, iter->cpu, iter->ts);
4292 
4293 	if (trace_seq_has_overflowed(s))
4294 		return TRACE_TYPE_PARTIAL_LINE;
4295 
4296 	event = ftrace_find_event(entry->type);
4297 	if (event)
4298 		return event->funcs->raw(iter, 0, event);
4299 
4300 	trace_seq_printf(s, "%d ?\n", entry->type);
4301 
4302 	return trace_handle_return(s);
4303 }
4304 
4305 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4306 {
4307 	struct trace_array *tr = iter->tr;
4308 	struct trace_seq *s = &iter->seq;
4309 	unsigned char newline = '\n';
4310 	struct trace_entry *entry;
4311 	struct trace_event *event;
4312 
4313 	entry = iter->ent;
4314 
4315 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4316 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4317 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4318 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4319 		if (trace_seq_has_overflowed(s))
4320 			return TRACE_TYPE_PARTIAL_LINE;
4321 	}
4322 
4323 	event = ftrace_find_event(entry->type);
4324 	if (event) {
4325 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4326 		if (ret != TRACE_TYPE_HANDLED)
4327 			return ret;
4328 	}
4329 
4330 	SEQ_PUT_FIELD(s, newline);
4331 
4332 	return trace_handle_return(s);
4333 }
4334 
4335 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4336 {
4337 	struct trace_array *tr = iter->tr;
4338 	struct trace_seq *s = &iter->seq;
4339 	struct trace_entry *entry;
4340 	struct trace_event *event;
4341 
4342 	entry = iter->ent;
4343 
4344 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4345 		SEQ_PUT_FIELD(s, entry->pid);
4346 		SEQ_PUT_FIELD(s, iter->cpu);
4347 		SEQ_PUT_FIELD(s, iter->ts);
4348 		if (trace_seq_has_overflowed(s))
4349 			return TRACE_TYPE_PARTIAL_LINE;
4350 	}
4351 
4352 	event = ftrace_find_event(entry->type);
4353 	return event ? event->funcs->binary(iter, 0, event) :
4354 		TRACE_TYPE_HANDLED;
4355 }
4356 
4357 int trace_empty(struct trace_iterator *iter)
4358 {
4359 	struct ring_buffer_iter *buf_iter;
4360 	int cpu;
4361 
4362 	/* If we are looking at one CPU buffer, only check that one */
4363 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4364 		cpu = iter->cpu_file;
4365 		buf_iter = trace_buffer_iter(iter, cpu);
4366 		if (buf_iter) {
4367 			if (!ring_buffer_iter_empty(buf_iter))
4368 				return 0;
4369 		} else {
4370 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4371 				return 0;
4372 		}
4373 		return 1;
4374 	}
4375 
4376 	for_each_tracing_cpu(cpu) {
4377 		buf_iter = trace_buffer_iter(iter, cpu);
4378 		if (buf_iter) {
4379 			if (!ring_buffer_iter_empty(buf_iter))
4380 				return 0;
4381 		} else {
4382 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4383 				return 0;
4384 		}
4385 	}
4386 
4387 	return 1;
4388 }
4389 
4390 /*  Called with trace_event_read_lock() held. */
4391 enum print_line_t print_trace_line(struct trace_iterator *iter)
4392 {
4393 	struct trace_array *tr = iter->tr;
4394 	unsigned long trace_flags = tr->trace_flags;
4395 	enum print_line_t ret;
4396 
4397 	if (iter->lost_events) {
4398 		if (iter->lost_events == (unsigned long)-1)
4399 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4400 					 iter->cpu);
4401 		else
4402 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4403 					 iter->cpu, iter->lost_events);
4404 		if (trace_seq_has_overflowed(&iter->seq))
4405 			return TRACE_TYPE_PARTIAL_LINE;
4406 	}
4407 
4408 	if (iter->trace && iter->trace->print_line) {
4409 		ret = iter->trace->print_line(iter);
4410 		if (ret != TRACE_TYPE_UNHANDLED)
4411 			return ret;
4412 	}
4413 
4414 	if (iter->ent->type == TRACE_BPUTS &&
4415 			trace_flags & TRACE_ITER_PRINTK &&
4416 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4417 		return trace_print_bputs_msg_only(iter);
4418 
4419 	if (iter->ent->type == TRACE_BPRINT &&
4420 			trace_flags & TRACE_ITER_PRINTK &&
4421 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4422 		return trace_print_bprintk_msg_only(iter);
4423 
4424 	if (iter->ent->type == TRACE_PRINT &&
4425 			trace_flags & TRACE_ITER_PRINTK &&
4426 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4427 		return trace_print_printk_msg_only(iter);
4428 
4429 	if (trace_flags & TRACE_ITER_BIN)
4430 		return print_bin_fmt(iter);
4431 
4432 	if (trace_flags & TRACE_ITER_HEX)
4433 		return print_hex_fmt(iter);
4434 
4435 	if (trace_flags & TRACE_ITER_RAW)
4436 		return print_raw_fmt(iter);
4437 
4438 	return print_trace_fmt(iter);
4439 }
4440 
4441 void trace_latency_header(struct seq_file *m)
4442 {
4443 	struct trace_iterator *iter = m->private;
4444 	struct trace_array *tr = iter->tr;
4445 
4446 	/* print nothing if the buffers are empty */
4447 	if (trace_empty(iter))
4448 		return;
4449 
4450 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4451 		print_trace_header(m, iter);
4452 
4453 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4454 		print_lat_help_header(m);
4455 }
4456 
4457 void trace_default_header(struct seq_file *m)
4458 {
4459 	struct trace_iterator *iter = m->private;
4460 	struct trace_array *tr = iter->tr;
4461 	unsigned long trace_flags = tr->trace_flags;
4462 
4463 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4464 		return;
4465 
4466 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4467 		/* print nothing if the buffers are empty */
4468 		if (trace_empty(iter))
4469 			return;
4470 		print_trace_header(m, iter);
4471 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4472 			print_lat_help_header(m);
4473 	} else {
4474 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4475 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4476 				print_func_help_header_irq(iter->array_buffer,
4477 							   m, trace_flags);
4478 			else
4479 				print_func_help_header(iter->array_buffer, m,
4480 						       trace_flags);
4481 		}
4482 	}
4483 }
4484 
4485 static void test_ftrace_alive(struct seq_file *m)
4486 {
4487 	if (!ftrace_is_dead())
4488 		return;
4489 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4490 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4491 }
4492 
4493 #ifdef CONFIG_TRACER_MAX_TRACE
4494 static void show_snapshot_main_help(struct seq_file *m)
4495 {
4496 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4497 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4498 		    "#                      Takes a snapshot of the main buffer.\n"
4499 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4500 		    "#                      (Doesn't have to be '2' works with any number that\n"
4501 		    "#                       is not a '0' or '1')\n");
4502 }
4503 
4504 static void show_snapshot_percpu_help(struct seq_file *m)
4505 {
4506 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4507 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4508 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4509 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4510 #else
4511 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4512 		    "#                     Must use main snapshot file to allocate.\n");
4513 #endif
4514 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4515 		    "#                      (Doesn't have to be '2' works with any number that\n"
4516 		    "#                       is not a '0' or '1')\n");
4517 }
4518 
4519 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4520 {
4521 	if (iter->tr->allocated_snapshot)
4522 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4523 	else
4524 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4525 
4526 	seq_puts(m, "# Snapshot commands:\n");
4527 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4528 		show_snapshot_main_help(m);
4529 	else
4530 		show_snapshot_percpu_help(m);
4531 }
4532 #else
4533 /* Should never be called */
4534 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4535 #endif
4536 
4537 static int s_show(struct seq_file *m, void *v)
4538 {
4539 	struct trace_iterator *iter = v;
4540 	int ret;
4541 
4542 	if (iter->ent == NULL) {
4543 		if (iter->tr) {
4544 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4545 			seq_puts(m, "#\n");
4546 			test_ftrace_alive(m);
4547 		}
4548 		if (iter->snapshot && trace_empty(iter))
4549 			print_snapshot_help(m, iter);
4550 		else if (iter->trace && iter->trace->print_header)
4551 			iter->trace->print_header(m);
4552 		else
4553 			trace_default_header(m);
4554 
4555 	} else if (iter->leftover) {
4556 		/*
4557 		 * If we filled the seq_file buffer earlier, we
4558 		 * want to just show it now.
4559 		 */
4560 		ret = trace_print_seq(m, &iter->seq);
4561 
4562 		/* ret should this time be zero, but you never know */
4563 		iter->leftover = ret;
4564 
4565 	} else {
4566 		print_trace_line(iter);
4567 		ret = trace_print_seq(m, &iter->seq);
4568 		/*
4569 		 * If we overflow the seq_file buffer, then it will
4570 		 * ask us for this data again at start up.
4571 		 * Use that instead.
4572 		 *  ret is 0 if seq_file write succeeded.
4573 		 *        -1 otherwise.
4574 		 */
4575 		iter->leftover = ret;
4576 	}
4577 
4578 	return 0;
4579 }
4580 
4581 /*
4582  * Should be used after trace_array_get(), trace_types_lock
4583  * ensures that i_cdev was already initialized.
4584  */
4585 static inline int tracing_get_cpu(struct inode *inode)
4586 {
4587 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4588 		return (long)inode->i_cdev - 1;
4589 	return RING_BUFFER_ALL_CPUS;
4590 }
4591 
4592 static const struct seq_operations tracer_seq_ops = {
4593 	.start		= s_start,
4594 	.next		= s_next,
4595 	.stop		= s_stop,
4596 	.show		= s_show,
4597 };
4598 
4599 static struct trace_iterator *
4600 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4601 {
4602 	struct trace_array *tr = inode->i_private;
4603 	struct trace_iterator *iter;
4604 	int cpu;
4605 
4606 	if (tracing_disabled)
4607 		return ERR_PTR(-ENODEV);
4608 
4609 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4610 	if (!iter)
4611 		return ERR_PTR(-ENOMEM);
4612 
4613 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4614 				    GFP_KERNEL);
4615 	if (!iter->buffer_iter)
4616 		goto release;
4617 
4618 	/*
4619 	 * trace_find_next_entry() may need to save off iter->ent.
4620 	 * It will place it into the iter->temp buffer. As most
4621 	 * events are less than 128, allocate a buffer of that size.
4622 	 * If one is greater, then trace_find_next_entry() will
4623 	 * allocate a new buffer to adjust for the bigger iter->ent.
4624 	 * It's not critical if it fails to get allocated here.
4625 	 */
4626 	iter->temp = kmalloc(128, GFP_KERNEL);
4627 	if (iter->temp)
4628 		iter->temp_size = 128;
4629 
4630 	/*
4631 	 * trace_event_printf() may need to modify given format
4632 	 * string to replace %p with %px so that it shows real address
4633 	 * instead of hash value. However, that is only for the event
4634 	 * tracing, other tracer may not need. Defer the allocation
4635 	 * until it is needed.
4636 	 */
4637 	iter->fmt = NULL;
4638 	iter->fmt_size = 0;
4639 
4640 	/*
4641 	 * We make a copy of the current tracer to avoid concurrent
4642 	 * changes on it while we are reading.
4643 	 */
4644 	mutex_lock(&trace_types_lock);
4645 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4646 	if (!iter->trace)
4647 		goto fail;
4648 
4649 	*iter->trace = *tr->current_trace;
4650 
4651 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4652 		goto fail;
4653 
4654 	iter->tr = tr;
4655 
4656 #ifdef CONFIG_TRACER_MAX_TRACE
4657 	/* Currently only the top directory has a snapshot */
4658 	if (tr->current_trace->print_max || snapshot)
4659 		iter->array_buffer = &tr->max_buffer;
4660 	else
4661 #endif
4662 		iter->array_buffer = &tr->array_buffer;
4663 	iter->snapshot = snapshot;
4664 	iter->pos = -1;
4665 	iter->cpu_file = tracing_get_cpu(inode);
4666 	mutex_init(&iter->mutex);
4667 
4668 	/* Notify the tracer early; before we stop tracing. */
4669 	if (iter->trace->open)
4670 		iter->trace->open(iter);
4671 
4672 	/* Annotate start of buffers if we had overruns */
4673 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4674 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4675 
4676 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4677 	if (trace_clocks[tr->clock_id].in_ns)
4678 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4679 
4680 	/*
4681 	 * If pause-on-trace is enabled, then stop the trace while
4682 	 * dumping, unless this is the "snapshot" file
4683 	 */
4684 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4685 		tracing_stop_tr(tr);
4686 
4687 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4688 		for_each_tracing_cpu(cpu) {
4689 			iter->buffer_iter[cpu] =
4690 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4691 							 cpu, GFP_KERNEL);
4692 		}
4693 		ring_buffer_read_prepare_sync();
4694 		for_each_tracing_cpu(cpu) {
4695 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4696 			tracing_iter_reset(iter, cpu);
4697 		}
4698 	} else {
4699 		cpu = iter->cpu_file;
4700 		iter->buffer_iter[cpu] =
4701 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4702 						 cpu, GFP_KERNEL);
4703 		ring_buffer_read_prepare_sync();
4704 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4705 		tracing_iter_reset(iter, cpu);
4706 	}
4707 
4708 	mutex_unlock(&trace_types_lock);
4709 
4710 	return iter;
4711 
4712  fail:
4713 	mutex_unlock(&trace_types_lock);
4714 	kfree(iter->trace);
4715 	kfree(iter->temp);
4716 	kfree(iter->buffer_iter);
4717 release:
4718 	seq_release_private(inode, file);
4719 	return ERR_PTR(-ENOMEM);
4720 }
4721 
4722 int tracing_open_generic(struct inode *inode, struct file *filp)
4723 {
4724 	int ret;
4725 
4726 	ret = tracing_check_open_get_tr(NULL);
4727 	if (ret)
4728 		return ret;
4729 
4730 	filp->private_data = inode->i_private;
4731 	return 0;
4732 }
4733 
4734 bool tracing_is_disabled(void)
4735 {
4736 	return (tracing_disabled) ? true: false;
4737 }
4738 
4739 /*
4740  * Open and update trace_array ref count.
4741  * Must have the current trace_array passed to it.
4742  */
4743 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4744 {
4745 	struct trace_array *tr = inode->i_private;
4746 	int ret;
4747 
4748 	ret = tracing_check_open_get_tr(tr);
4749 	if (ret)
4750 		return ret;
4751 
4752 	filp->private_data = inode->i_private;
4753 
4754 	return 0;
4755 }
4756 
4757 static int tracing_release(struct inode *inode, struct file *file)
4758 {
4759 	struct trace_array *tr = inode->i_private;
4760 	struct seq_file *m = file->private_data;
4761 	struct trace_iterator *iter;
4762 	int cpu;
4763 
4764 	if (!(file->f_mode & FMODE_READ)) {
4765 		trace_array_put(tr);
4766 		return 0;
4767 	}
4768 
4769 	/* Writes do not use seq_file */
4770 	iter = m->private;
4771 	mutex_lock(&trace_types_lock);
4772 
4773 	for_each_tracing_cpu(cpu) {
4774 		if (iter->buffer_iter[cpu])
4775 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4776 	}
4777 
4778 	if (iter->trace && iter->trace->close)
4779 		iter->trace->close(iter);
4780 
4781 	if (!iter->snapshot && tr->stop_count)
4782 		/* reenable tracing if it was previously enabled */
4783 		tracing_start_tr(tr);
4784 
4785 	__trace_array_put(tr);
4786 
4787 	mutex_unlock(&trace_types_lock);
4788 
4789 	mutex_destroy(&iter->mutex);
4790 	free_cpumask_var(iter->started);
4791 	kfree(iter->fmt);
4792 	kfree(iter->temp);
4793 	kfree(iter->trace);
4794 	kfree(iter->buffer_iter);
4795 	seq_release_private(inode, file);
4796 
4797 	return 0;
4798 }
4799 
4800 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4801 {
4802 	struct trace_array *tr = inode->i_private;
4803 
4804 	trace_array_put(tr);
4805 	return 0;
4806 }
4807 
4808 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4809 {
4810 	struct trace_array *tr = inode->i_private;
4811 
4812 	trace_array_put(tr);
4813 
4814 	return single_release(inode, file);
4815 }
4816 
4817 static int tracing_open(struct inode *inode, struct file *file)
4818 {
4819 	struct trace_array *tr = inode->i_private;
4820 	struct trace_iterator *iter;
4821 	int ret;
4822 
4823 	ret = tracing_check_open_get_tr(tr);
4824 	if (ret)
4825 		return ret;
4826 
4827 	/* If this file was open for write, then erase contents */
4828 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4829 		int cpu = tracing_get_cpu(inode);
4830 		struct array_buffer *trace_buf = &tr->array_buffer;
4831 
4832 #ifdef CONFIG_TRACER_MAX_TRACE
4833 		if (tr->current_trace->print_max)
4834 			trace_buf = &tr->max_buffer;
4835 #endif
4836 
4837 		if (cpu == RING_BUFFER_ALL_CPUS)
4838 			tracing_reset_online_cpus(trace_buf);
4839 		else
4840 			tracing_reset_cpu(trace_buf, cpu);
4841 	}
4842 
4843 	if (file->f_mode & FMODE_READ) {
4844 		iter = __tracing_open(inode, file, false);
4845 		if (IS_ERR(iter))
4846 			ret = PTR_ERR(iter);
4847 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4848 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4849 	}
4850 
4851 	if (ret < 0)
4852 		trace_array_put(tr);
4853 
4854 	return ret;
4855 }
4856 
4857 /*
4858  * Some tracers are not suitable for instance buffers.
4859  * A tracer is always available for the global array (toplevel)
4860  * or if it explicitly states that it is.
4861  */
4862 static bool
4863 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4864 {
4865 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4866 }
4867 
4868 /* Find the next tracer that this trace array may use */
4869 static struct tracer *
4870 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4871 {
4872 	while (t && !trace_ok_for_array(t, tr))
4873 		t = t->next;
4874 
4875 	return t;
4876 }
4877 
4878 static void *
4879 t_next(struct seq_file *m, void *v, loff_t *pos)
4880 {
4881 	struct trace_array *tr = m->private;
4882 	struct tracer *t = v;
4883 
4884 	(*pos)++;
4885 
4886 	if (t)
4887 		t = get_tracer_for_array(tr, t->next);
4888 
4889 	return t;
4890 }
4891 
4892 static void *t_start(struct seq_file *m, loff_t *pos)
4893 {
4894 	struct trace_array *tr = m->private;
4895 	struct tracer *t;
4896 	loff_t l = 0;
4897 
4898 	mutex_lock(&trace_types_lock);
4899 
4900 	t = get_tracer_for_array(tr, trace_types);
4901 	for (; t && l < *pos; t = t_next(m, t, &l))
4902 			;
4903 
4904 	return t;
4905 }
4906 
4907 static void t_stop(struct seq_file *m, void *p)
4908 {
4909 	mutex_unlock(&trace_types_lock);
4910 }
4911 
4912 static int t_show(struct seq_file *m, void *v)
4913 {
4914 	struct tracer *t = v;
4915 
4916 	if (!t)
4917 		return 0;
4918 
4919 	seq_puts(m, t->name);
4920 	if (t->next)
4921 		seq_putc(m, ' ');
4922 	else
4923 		seq_putc(m, '\n');
4924 
4925 	return 0;
4926 }
4927 
4928 static const struct seq_operations show_traces_seq_ops = {
4929 	.start		= t_start,
4930 	.next		= t_next,
4931 	.stop		= t_stop,
4932 	.show		= t_show,
4933 };
4934 
4935 static int show_traces_open(struct inode *inode, struct file *file)
4936 {
4937 	struct trace_array *tr = inode->i_private;
4938 	struct seq_file *m;
4939 	int ret;
4940 
4941 	ret = tracing_check_open_get_tr(tr);
4942 	if (ret)
4943 		return ret;
4944 
4945 	ret = seq_open(file, &show_traces_seq_ops);
4946 	if (ret) {
4947 		trace_array_put(tr);
4948 		return ret;
4949 	}
4950 
4951 	m = file->private_data;
4952 	m->private = tr;
4953 
4954 	return 0;
4955 }
4956 
4957 static int show_traces_release(struct inode *inode, struct file *file)
4958 {
4959 	struct trace_array *tr = inode->i_private;
4960 
4961 	trace_array_put(tr);
4962 	return seq_release(inode, file);
4963 }
4964 
4965 static ssize_t
4966 tracing_write_stub(struct file *filp, const char __user *ubuf,
4967 		   size_t count, loff_t *ppos)
4968 {
4969 	return count;
4970 }
4971 
4972 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4973 {
4974 	int ret;
4975 
4976 	if (file->f_mode & FMODE_READ)
4977 		ret = seq_lseek(file, offset, whence);
4978 	else
4979 		file->f_pos = ret = 0;
4980 
4981 	return ret;
4982 }
4983 
4984 static const struct file_operations tracing_fops = {
4985 	.open		= tracing_open,
4986 	.read		= seq_read,
4987 	.write		= tracing_write_stub,
4988 	.llseek		= tracing_lseek,
4989 	.release	= tracing_release,
4990 };
4991 
4992 static const struct file_operations show_traces_fops = {
4993 	.open		= show_traces_open,
4994 	.read		= seq_read,
4995 	.llseek		= seq_lseek,
4996 	.release	= show_traces_release,
4997 };
4998 
4999 static ssize_t
5000 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5001 		     size_t count, loff_t *ppos)
5002 {
5003 	struct trace_array *tr = file_inode(filp)->i_private;
5004 	char *mask_str;
5005 	int len;
5006 
5007 	len = snprintf(NULL, 0, "%*pb\n",
5008 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5009 	mask_str = kmalloc(len, GFP_KERNEL);
5010 	if (!mask_str)
5011 		return -ENOMEM;
5012 
5013 	len = snprintf(mask_str, len, "%*pb\n",
5014 		       cpumask_pr_args(tr->tracing_cpumask));
5015 	if (len >= count) {
5016 		count = -EINVAL;
5017 		goto out_err;
5018 	}
5019 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5020 
5021 out_err:
5022 	kfree(mask_str);
5023 
5024 	return count;
5025 }
5026 
5027 int tracing_set_cpumask(struct trace_array *tr,
5028 			cpumask_var_t tracing_cpumask_new)
5029 {
5030 	int cpu;
5031 
5032 	if (!tr)
5033 		return -EINVAL;
5034 
5035 	local_irq_disable();
5036 	arch_spin_lock(&tr->max_lock);
5037 	for_each_tracing_cpu(cpu) {
5038 		/*
5039 		 * Increase/decrease the disabled counter if we are
5040 		 * about to flip a bit in the cpumask:
5041 		 */
5042 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5043 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5044 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5045 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5046 		}
5047 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5048 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5049 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5050 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5051 		}
5052 	}
5053 	arch_spin_unlock(&tr->max_lock);
5054 	local_irq_enable();
5055 
5056 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5057 
5058 	return 0;
5059 }
5060 
5061 static ssize_t
5062 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5063 		      size_t count, loff_t *ppos)
5064 {
5065 	struct trace_array *tr = file_inode(filp)->i_private;
5066 	cpumask_var_t tracing_cpumask_new;
5067 	int err;
5068 
5069 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5070 		return -ENOMEM;
5071 
5072 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5073 	if (err)
5074 		goto err_free;
5075 
5076 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5077 	if (err)
5078 		goto err_free;
5079 
5080 	free_cpumask_var(tracing_cpumask_new);
5081 
5082 	return count;
5083 
5084 err_free:
5085 	free_cpumask_var(tracing_cpumask_new);
5086 
5087 	return err;
5088 }
5089 
5090 static const struct file_operations tracing_cpumask_fops = {
5091 	.open		= tracing_open_generic_tr,
5092 	.read		= tracing_cpumask_read,
5093 	.write		= tracing_cpumask_write,
5094 	.release	= tracing_release_generic_tr,
5095 	.llseek		= generic_file_llseek,
5096 };
5097 
5098 static int tracing_trace_options_show(struct seq_file *m, void *v)
5099 {
5100 	struct tracer_opt *trace_opts;
5101 	struct trace_array *tr = m->private;
5102 	u32 tracer_flags;
5103 	int i;
5104 
5105 	mutex_lock(&trace_types_lock);
5106 	tracer_flags = tr->current_trace->flags->val;
5107 	trace_opts = tr->current_trace->flags->opts;
5108 
5109 	for (i = 0; trace_options[i]; i++) {
5110 		if (tr->trace_flags & (1 << i))
5111 			seq_printf(m, "%s\n", trace_options[i]);
5112 		else
5113 			seq_printf(m, "no%s\n", trace_options[i]);
5114 	}
5115 
5116 	for (i = 0; trace_opts[i].name; i++) {
5117 		if (tracer_flags & trace_opts[i].bit)
5118 			seq_printf(m, "%s\n", trace_opts[i].name);
5119 		else
5120 			seq_printf(m, "no%s\n", trace_opts[i].name);
5121 	}
5122 	mutex_unlock(&trace_types_lock);
5123 
5124 	return 0;
5125 }
5126 
5127 static int __set_tracer_option(struct trace_array *tr,
5128 			       struct tracer_flags *tracer_flags,
5129 			       struct tracer_opt *opts, int neg)
5130 {
5131 	struct tracer *trace = tracer_flags->trace;
5132 	int ret;
5133 
5134 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5135 	if (ret)
5136 		return ret;
5137 
5138 	if (neg)
5139 		tracer_flags->val &= ~opts->bit;
5140 	else
5141 		tracer_flags->val |= opts->bit;
5142 	return 0;
5143 }
5144 
5145 /* Try to assign a tracer specific option */
5146 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5147 {
5148 	struct tracer *trace = tr->current_trace;
5149 	struct tracer_flags *tracer_flags = trace->flags;
5150 	struct tracer_opt *opts = NULL;
5151 	int i;
5152 
5153 	for (i = 0; tracer_flags->opts[i].name; i++) {
5154 		opts = &tracer_flags->opts[i];
5155 
5156 		if (strcmp(cmp, opts->name) == 0)
5157 			return __set_tracer_option(tr, trace->flags, opts, neg);
5158 	}
5159 
5160 	return -EINVAL;
5161 }
5162 
5163 /* Some tracers require overwrite to stay enabled */
5164 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5165 {
5166 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5167 		return -1;
5168 
5169 	return 0;
5170 }
5171 
5172 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5173 {
5174 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5175 	    (mask == TRACE_ITER_RECORD_CMD))
5176 		lockdep_assert_held(&event_mutex);
5177 
5178 	/* do nothing if flag is already set */
5179 	if (!!(tr->trace_flags & mask) == !!enabled)
5180 		return 0;
5181 
5182 	/* Give the tracer a chance to approve the change */
5183 	if (tr->current_trace->flag_changed)
5184 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5185 			return -EINVAL;
5186 
5187 	if (enabled)
5188 		tr->trace_flags |= mask;
5189 	else
5190 		tr->trace_flags &= ~mask;
5191 
5192 	if (mask == TRACE_ITER_RECORD_CMD)
5193 		trace_event_enable_cmd_record(enabled);
5194 
5195 	if (mask == TRACE_ITER_RECORD_TGID) {
5196 		if (!tgid_map)
5197 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5198 					   sizeof(*tgid_map),
5199 					   GFP_KERNEL);
5200 		if (!tgid_map) {
5201 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5202 			return -ENOMEM;
5203 		}
5204 
5205 		trace_event_enable_tgid_record(enabled);
5206 	}
5207 
5208 	if (mask == TRACE_ITER_EVENT_FORK)
5209 		trace_event_follow_fork(tr, enabled);
5210 
5211 	if (mask == TRACE_ITER_FUNC_FORK)
5212 		ftrace_pid_follow_fork(tr, enabled);
5213 
5214 	if (mask == TRACE_ITER_OVERWRITE) {
5215 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5216 #ifdef CONFIG_TRACER_MAX_TRACE
5217 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5218 #endif
5219 	}
5220 
5221 	if (mask == TRACE_ITER_PRINTK) {
5222 		trace_printk_start_stop_comm(enabled);
5223 		trace_printk_control(enabled);
5224 	}
5225 
5226 	return 0;
5227 }
5228 
5229 int trace_set_options(struct trace_array *tr, char *option)
5230 {
5231 	char *cmp;
5232 	int neg = 0;
5233 	int ret;
5234 	size_t orig_len = strlen(option);
5235 	int len;
5236 
5237 	cmp = strstrip(option);
5238 
5239 	len = str_has_prefix(cmp, "no");
5240 	if (len)
5241 		neg = 1;
5242 
5243 	cmp += len;
5244 
5245 	mutex_lock(&event_mutex);
5246 	mutex_lock(&trace_types_lock);
5247 
5248 	ret = match_string(trace_options, -1, cmp);
5249 	/* If no option could be set, test the specific tracer options */
5250 	if (ret < 0)
5251 		ret = set_tracer_option(tr, cmp, neg);
5252 	else
5253 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5254 
5255 	mutex_unlock(&trace_types_lock);
5256 	mutex_unlock(&event_mutex);
5257 
5258 	/*
5259 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5260 	 * turn it back into a space.
5261 	 */
5262 	if (orig_len > strlen(option))
5263 		option[strlen(option)] = ' ';
5264 
5265 	return ret;
5266 }
5267 
5268 static void __init apply_trace_boot_options(void)
5269 {
5270 	char *buf = trace_boot_options_buf;
5271 	char *option;
5272 
5273 	while (true) {
5274 		option = strsep(&buf, ",");
5275 
5276 		if (!option)
5277 			break;
5278 
5279 		if (*option)
5280 			trace_set_options(&global_trace, option);
5281 
5282 		/* Put back the comma to allow this to be called again */
5283 		if (buf)
5284 			*(buf - 1) = ',';
5285 	}
5286 }
5287 
5288 static ssize_t
5289 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5290 			size_t cnt, loff_t *ppos)
5291 {
5292 	struct seq_file *m = filp->private_data;
5293 	struct trace_array *tr = m->private;
5294 	char buf[64];
5295 	int ret;
5296 
5297 	if (cnt >= sizeof(buf))
5298 		return -EINVAL;
5299 
5300 	if (copy_from_user(buf, ubuf, cnt))
5301 		return -EFAULT;
5302 
5303 	buf[cnt] = 0;
5304 
5305 	ret = trace_set_options(tr, buf);
5306 	if (ret < 0)
5307 		return ret;
5308 
5309 	*ppos += cnt;
5310 
5311 	return cnt;
5312 }
5313 
5314 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5315 {
5316 	struct trace_array *tr = inode->i_private;
5317 	int ret;
5318 
5319 	ret = tracing_check_open_get_tr(tr);
5320 	if (ret)
5321 		return ret;
5322 
5323 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5324 	if (ret < 0)
5325 		trace_array_put(tr);
5326 
5327 	return ret;
5328 }
5329 
5330 static const struct file_operations tracing_iter_fops = {
5331 	.open		= tracing_trace_options_open,
5332 	.read		= seq_read,
5333 	.llseek		= seq_lseek,
5334 	.release	= tracing_single_release_tr,
5335 	.write		= tracing_trace_options_write,
5336 };
5337 
5338 static const char readme_msg[] =
5339 	"tracing mini-HOWTO:\n\n"
5340 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5341 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5342 	" Important files:\n"
5343 	"  trace\t\t\t- The static contents of the buffer\n"
5344 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5345 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5346 	"  current_tracer\t- function and latency tracers\n"
5347 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5348 	"  error_log\t- error log for failed commands (that support it)\n"
5349 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5350 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5351 	"  trace_clock\t\t-change the clock used to order events\n"
5352 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5353 	"      global:   Synced across CPUs but slows tracing down.\n"
5354 	"     counter:   Not a clock, but just an increment\n"
5355 	"      uptime:   Jiffy counter from time of boot\n"
5356 	"        perf:   Same clock that perf events use\n"
5357 #ifdef CONFIG_X86_64
5358 	"     x86-tsc:   TSC cycle counter\n"
5359 #endif
5360 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5361 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5362 	"    absolute:   Absolute (standalone) timestamp\n"
5363 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5364 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5365 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5366 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5367 	"\t\t\t  Remove sub-buffer with rmdir\n"
5368 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5369 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5370 	"\t\t\t  option name\n"
5371 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5372 #ifdef CONFIG_DYNAMIC_FTRACE
5373 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5374 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5375 	"\t\t\t  functions\n"
5376 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5377 	"\t     modules: Can select a group via module\n"
5378 	"\t      Format: :mod:<module-name>\n"
5379 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5380 	"\t    triggers: a command to perform when function is hit\n"
5381 	"\t      Format: <function>:<trigger>[:count]\n"
5382 	"\t     trigger: traceon, traceoff\n"
5383 	"\t\t      enable_event:<system>:<event>\n"
5384 	"\t\t      disable_event:<system>:<event>\n"
5385 #ifdef CONFIG_STACKTRACE
5386 	"\t\t      stacktrace\n"
5387 #endif
5388 #ifdef CONFIG_TRACER_SNAPSHOT
5389 	"\t\t      snapshot\n"
5390 #endif
5391 	"\t\t      dump\n"
5392 	"\t\t      cpudump\n"
5393 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5394 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5395 	"\t     The first one will disable tracing every time do_fault is hit\n"
5396 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5397 	"\t       The first time do trap is hit and it disables tracing, the\n"
5398 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5399 	"\t       the counter will not decrement. It only decrements when the\n"
5400 	"\t       trigger did work\n"
5401 	"\t     To remove trigger without count:\n"
5402 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5403 	"\t     To remove trigger with a count:\n"
5404 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5405 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5406 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5407 	"\t    modules: Can select a group via module command :mod:\n"
5408 	"\t    Does not accept triggers\n"
5409 #endif /* CONFIG_DYNAMIC_FTRACE */
5410 #ifdef CONFIG_FUNCTION_TRACER
5411 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5412 	"\t\t    (function)\n"
5413 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5414 	"\t\t    (function)\n"
5415 #endif
5416 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5417 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5418 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5419 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5420 #endif
5421 #ifdef CONFIG_TRACER_SNAPSHOT
5422 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5423 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5424 	"\t\t\t  information\n"
5425 #endif
5426 #ifdef CONFIG_STACK_TRACER
5427 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5428 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5429 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5430 	"\t\t\t  new trace)\n"
5431 #ifdef CONFIG_DYNAMIC_FTRACE
5432 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5433 	"\t\t\t  traces\n"
5434 #endif
5435 #endif /* CONFIG_STACK_TRACER */
5436 #ifdef CONFIG_DYNAMIC_EVENTS
5437 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5438 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5439 #endif
5440 #ifdef CONFIG_KPROBE_EVENTS
5441 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5442 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5443 #endif
5444 #ifdef CONFIG_UPROBE_EVENTS
5445 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5446 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5447 #endif
5448 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5449 	"\t  accepts: event-definitions (one definition per line)\n"
5450 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5451 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5452 #ifdef CONFIG_HIST_TRIGGERS
5453 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5454 #endif
5455 	"\t           -:[<group>/]<event>\n"
5456 #ifdef CONFIG_KPROBE_EVENTS
5457 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5458   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5459 #endif
5460 #ifdef CONFIG_UPROBE_EVENTS
5461   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5462 #endif
5463 	"\t     args: <name>=fetcharg[:type]\n"
5464 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5465 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5466 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5467 #else
5468 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5469 #endif
5470 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5471 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5472 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5473 	"\t           <type>\\[<array-size>\\]\n"
5474 #ifdef CONFIG_HIST_TRIGGERS
5475 	"\t    field: <stype> <name>;\n"
5476 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5477 	"\t           [unsigned] char/int/long\n"
5478 #endif
5479 #endif
5480 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5481 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5482 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5483 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5484 	"\t\t\t  events\n"
5485 	"      filter\t\t- If set, only events passing filter are traced\n"
5486 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5487 	"\t\t\t  <event>:\n"
5488 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5489 	"      filter\t\t- If set, only events passing filter are traced\n"
5490 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5491 	"\t    Format: <trigger>[:count][if <filter>]\n"
5492 	"\t   trigger: traceon, traceoff\n"
5493 	"\t            enable_event:<system>:<event>\n"
5494 	"\t            disable_event:<system>:<event>\n"
5495 #ifdef CONFIG_HIST_TRIGGERS
5496 	"\t            enable_hist:<system>:<event>\n"
5497 	"\t            disable_hist:<system>:<event>\n"
5498 #endif
5499 #ifdef CONFIG_STACKTRACE
5500 	"\t\t    stacktrace\n"
5501 #endif
5502 #ifdef CONFIG_TRACER_SNAPSHOT
5503 	"\t\t    snapshot\n"
5504 #endif
5505 #ifdef CONFIG_HIST_TRIGGERS
5506 	"\t\t    hist (see below)\n"
5507 #endif
5508 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5509 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5510 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5511 	"\t                  events/block/block_unplug/trigger\n"
5512 	"\t   The first disables tracing every time block_unplug is hit.\n"
5513 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5514 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5515 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5516 	"\t   Like function triggers, the counter is only decremented if it\n"
5517 	"\t    enabled or disabled tracing.\n"
5518 	"\t   To remove a trigger without a count:\n"
5519 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5520 	"\t   To remove a trigger with a count:\n"
5521 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5522 	"\t   Filters can be ignored when removing a trigger.\n"
5523 #ifdef CONFIG_HIST_TRIGGERS
5524 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5525 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5526 	"\t            [:values=<field1[,field2,...]>]\n"
5527 	"\t            [:sort=<field1[,field2,...]>]\n"
5528 	"\t            [:size=#entries]\n"
5529 	"\t            [:pause][:continue][:clear]\n"
5530 	"\t            [:name=histname1]\n"
5531 	"\t            [:<handler>.<action>]\n"
5532 	"\t            [if <filter>]\n\n"
5533 	"\t    When a matching event is hit, an entry is added to a hash\n"
5534 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5535 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5536 	"\t    correspond to fields in the event's format description.  Keys\n"
5537 	"\t    can be any field, or the special string 'stacktrace'.\n"
5538 	"\t    Compound keys consisting of up to two fields can be specified\n"
5539 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5540 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5541 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5542 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5543 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5544 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5545 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5546 	"\t    its histogram data will be shared with other triggers of the\n"
5547 	"\t    same name, and trigger hits will update this common data.\n\n"
5548 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5549 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5550 	"\t    triggers attached to an event, there will be a table for each\n"
5551 	"\t    trigger in the output.  The table displayed for a named\n"
5552 	"\t    trigger will be the same as any other instance having the\n"
5553 	"\t    same name.  The default format used to display a given field\n"
5554 	"\t    can be modified by appending any of the following modifiers\n"
5555 	"\t    to the field name, as applicable:\n\n"
5556 	"\t            .hex        display a number as a hex value\n"
5557 	"\t            .sym        display an address as a symbol\n"
5558 	"\t            .sym-offset display an address as a symbol and offset\n"
5559 	"\t            .execname   display a common_pid as a program name\n"
5560 	"\t            .syscall    display a syscall id as a syscall name\n"
5561 	"\t            .log2       display log2 value rather than raw number\n"
5562 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5563 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5564 	"\t    trigger or to start a hist trigger but not log any events\n"
5565 	"\t    until told to do so.  'continue' can be used to start or\n"
5566 	"\t    restart a paused hist trigger.\n\n"
5567 	"\t    The 'clear' parameter will clear the contents of a running\n"
5568 	"\t    hist trigger and leave its current paused/active state\n"
5569 	"\t    unchanged.\n\n"
5570 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5571 	"\t    have one event conditionally start and stop another event's\n"
5572 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5573 	"\t    the enable_event and disable_event triggers.\n\n"
5574 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5575 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5576 	"\t        <handler>.<action>\n\n"
5577 	"\t    The available handlers are:\n\n"
5578 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5579 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5580 	"\t        onchange(var)            - invoke action if var changes\n\n"
5581 	"\t    The available actions are:\n\n"
5582 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5583 	"\t        save(field,...)                      - save current event fields\n"
5584 #ifdef CONFIG_TRACER_SNAPSHOT
5585 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5586 #endif
5587 #ifdef CONFIG_SYNTH_EVENTS
5588 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5589 	"\t  Write into this file to define/undefine new synthetic events.\n"
5590 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5591 #endif
5592 #endif
5593 ;
5594 
5595 static ssize_t
5596 tracing_readme_read(struct file *filp, char __user *ubuf,
5597 		       size_t cnt, loff_t *ppos)
5598 {
5599 	return simple_read_from_buffer(ubuf, cnt, ppos,
5600 					readme_msg, strlen(readme_msg));
5601 }
5602 
5603 static const struct file_operations tracing_readme_fops = {
5604 	.open		= tracing_open_generic,
5605 	.read		= tracing_readme_read,
5606 	.llseek		= generic_file_llseek,
5607 };
5608 
5609 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5610 {
5611 	int *ptr = v;
5612 
5613 	if (*pos || m->count)
5614 		ptr++;
5615 
5616 	(*pos)++;
5617 
5618 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5619 		if (trace_find_tgid(*ptr))
5620 			return ptr;
5621 	}
5622 
5623 	return NULL;
5624 }
5625 
5626 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5627 {
5628 	void *v;
5629 	loff_t l = 0;
5630 
5631 	if (!tgid_map)
5632 		return NULL;
5633 
5634 	v = &tgid_map[0];
5635 	while (l <= *pos) {
5636 		v = saved_tgids_next(m, v, &l);
5637 		if (!v)
5638 			return NULL;
5639 	}
5640 
5641 	return v;
5642 }
5643 
5644 static void saved_tgids_stop(struct seq_file *m, void *v)
5645 {
5646 }
5647 
5648 static int saved_tgids_show(struct seq_file *m, void *v)
5649 {
5650 	int pid = (int *)v - tgid_map;
5651 
5652 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5653 	return 0;
5654 }
5655 
5656 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5657 	.start		= saved_tgids_start,
5658 	.stop		= saved_tgids_stop,
5659 	.next		= saved_tgids_next,
5660 	.show		= saved_tgids_show,
5661 };
5662 
5663 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5664 {
5665 	int ret;
5666 
5667 	ret = tracing_check_open_get_tr(NULL);
5668 	if (ret)
5669 		return ret;
5670 
5671 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5672 }
5673 
5674 
5675 static const struct file_operations tracing_saved_tgids_fops = {
5676 	.open		= tracing_saved_tgids_open,
5677 	.read		= seq_read,
5678 	.llseek		= seq_lseek,
5679 	.release	= seq_release,
5680 };
5681 
5682 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5683 {
5684 	unsigned int *ptr = v;
5685 
5686 	if (*pos || m->count)
5687 		ptr++;
5688 
5689 	(*pos)++;
5690 
5691 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5692 	     ptr++) {
5693 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5694 			continue;
5695 
5696 		return ptr;
5697 	}
5698 
5699 	return NULL;
5700 }
5701 
5702 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5703 {
5704 	void *v;
5705 	loff_t l = 0;
5706 
5707 	preempt_disable();
5708 	arch_spin_lock(&trace_cmdline_lock);
5709 
5710 	v = &savedcmd->map_cmdline_to_pid[0];
5711 	while (l <= *pos) {
5712 		v = saved_cmdlines_next(m, v, &l);
5713 		if (!v)
5714 			return NULL;
5715 	}
5716 
5717 	return v;
5718 }
5719 
5720 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5721 {
5722 	arch_spin_unlock(&trace_cmdline_lock);
5723 	preempt_enable();
5724 }
5725 
5726 static int saved_cmdlines_show(struct seq_file *m, void *v)
5727 {
5728 	char buf[TASK_COMM_LEN];
5729 	unsigned int *pid = v;
5730 
5731 	__trace_find_cmdline(*pid, buf);
5732 	seq_printf(m, "%d %s\n", *pid, buf);
5733 	return 0;
5734 }
5735 
5736 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5737 	.start		= saved_cmdlines_start,
5738 	.next		= saved_cmdlines_next,
5739 	.stop		= saved_cmdlines_stop,
5740 	.show		= saved_cmdlines_show,
5741 };
5742 
5743 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5744 {
5745 	int ret;
5746 
5747 	ret = tracing_check_open_get_tr(NULL);
5748 	if (ret)
5749 		return ret;
5750 
5751 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5752 }
5753 
5754 static const struct file_operations tracing_saved_cmdlines_fops = {
5755 	.open		= tracing_saved_cmdlines_open,
5756 	.read		= seq_read,
5757 	.llseek		= seq_lseek,
5758 	.release	= seq_release,
5759 };
5760 
5761 static ssize_t
5762 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5763 				 size_t cnt, loff_t *ppos)
5764 {
5765 	char buf[64];
5766 	int r;
5767 
5768 	arch_spin_lock(&trace_cmdline_lock);
5769 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5770 	arch_spin_unlock(&trace_cmdline_lock);
5771 
5772 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5773 }
5774 
5775 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5776 {
5777 	kfree(s->saved_cmdlines);
5778 	kfree(s->map_cmdline_to_pid);
5779 	kfree(s);
5780 }
5781 
5782 static int tracing_resize_saved_cmdlines(unsigned int val)
5783 {
5784 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5785 
5786 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5787 	if (!s)
5788 		return -ENOMEM;
5789 
5790 	if (allocate_cmdlines_buffer(val, s) < 0) {
5791 		kfree(s);
5792 		return -ENOMEM;
5793 	}
5794 
5795 	arch_spin_lock(&trace_cmdline_lock);
5796 	savedcmd_temp = savedcmd;
5797 	savedcmd = s;
5798 	arch_spin_unlock(&trace_cmdline_lock);
5799 	free_saved_cmdlines_buffer(savedcmd_temp);
5800 
5801 	return 0;
5802 }
5803 
5804 static ssize_t
5805 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5806 				  size_t cnt, loff_t *ppos)
5807 {
5808 	unsigned long val;
5809 	int ret;
5810 
5811 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5812 	if (ret)
5813 		return ret;
5814 
5815 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5816 	if (!val || val > PID_MAX_DEFAULT)
5817 		return -EINVAL;
5818 
5819 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5820 	if (ret < 0)
5821 		return ret;
5822 
5823 	*ppos += cnt;
5824 
5825 	return cnt;
5826 }
5827 
5828 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5829 	.open		= tracing_open_generic,
5830 	.read		= tracing_saved_cmdlines_size_read,
5831 	.write		= tracing_saved_cmdlines_size_write,
5832 };
5833 
5834 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5835 static union trace_eval_map_item *
5836 update_eval_map(union trace_eval_map_item *ptr)
5837 {
5838 	if (!ptr->map.eval_string) {
5839 		if (ptr->tail.next) {
5840 			ptr = ptr->tail.next;
5841 			/* Set ptr to the next real item (skip head) */
5842 			ptr++;
5843 		} else
5844 			return NULL;
5845 	}
5846 	return ptr;
5847 }
5848 
5849 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5850 {
5851 	union trace_eval_map_item *ptr = v;
5852 
5853 	/*
5854 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5855 	 * This really should never happen.
5856 	 */
5857 	(*pos)++;
5858 	ptr = update_eval_map(ptr);
5859 	if (WARN_ON_ONCE(!ptr))
5860 		return NULL;
5861 
5862 	ptr++;
5863 	ptr = update_eval_map(ptr);
5864 
5865 	return ptr;
5866 }
5867 
5868 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5869 {
5870 	union trace_eval_map_item *v;
5871 	loff_t l = 0;
5872 
5873 	mutex_lock(&trace_eval_mutex);
5874 
5875 	v = trace_eval_maps;
5876 	if (v)
5877 		v++;
5878 
5879 	while (v && l < *pos) {
5880 		v = eval_map_next(m, v, &l);
5881 	}
5882 
5883 	return v;
5884 }
5885 
5886 static void eval_map_stop(struct seq_file *m, void *v)
5887 {
5888 	mutex_unlock(&trace_eval_mutex);
5889 }
5890 
5891 static int eval_map_show(struct seq_file *m, void *v)
5892 {
5893 	union trace_eval_map_item *ptr = v;
5894 
5895 	seq_printf(m, "%s %ld (%s)\n",
5896 		   ptr->map.eval_string, ptr->map.eval_value,
5897 		   ptr->map.system);
5898 
5899 	return 0;
5900 }
5901 
5902 static const struct seq_operations tracing_eval_map_seq_ops = {
5903 	.start		= eval_map_start,
5904 	.next		= eval_map_next,
5905 	.stop		= eval_map_stop,
5906 	.show		= eval_map_show,
5907 };
5908 
5909 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5910 {
5911 	int ret;
5912 
5913 	ret = tracing_check_open_get_tr(NULL);
5914 	if (ret)
5915 		return ret;
5916 
5917 	return seq_open(filp, &tracing_eval_map_seq_ops);
5918 }
5919 
5920 static const struct file_operations tracing_eval_map_fops = {
5921 	.open		= tracing_eval_map_open,
5922 	.read		= seq_read,
5923 	.llseek		= seq_lseek,
5924 	.release	= seq_release,
5925 };
5926 
5927 static inline union trace_eval_map_item *
5928 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5929 {
5930 	/* Return tail of array given the head */
5931 	return ptr + ptr->head.length + 1;
5932 }
5933 
5934 static void
5935 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5936 			   int len)
5937 {
5938 	struct trace_eval_map **stop;
5939 	struct trace_eval_map **map;
5940 	union trace_eval_map_item *map_array;
5941 	union trace_eval_map_item *ptr;
5942 
5943 	stop = start + len;
5944 
5945 	/*
5946 	 * The trace_eval_maps contains the map plus a head and tail item,
5947 	 * where the head holds the module and length of array, and the
5948 	 * tail holds a pointer to the next list.
5949 	 */
5950 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5951 	if (!map_array) {
5952 		pr_warn("Unable to allocate trace eval mapping\n");
5953 		return;
5954 	}
5955 
5956 	mutex_lock(&trace_eval_mutex);
5957 
5958 	if (!trace_eval_maps)
5959 		trace_eval_maps = map_array;
5960 	else {
5961 		ptr = trace_eval_maps;
5962 		for (;;) {
5963 			ptr = trace_eval_jmp_to_tail(ptr);
5964 			if (!ptr->tail.next)
5965 				break;
5966 			ptr = ptr->tail.next;
5967 
5968 		}
5969 		ptr->tail.next = map_array;
5970 	}
5971 	map_array->head.mod = mod;
5972 	map_array->head.length = len;
5973 	map_array++;
5974 
5975 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5976 		map_array->map = **map;
5977 		map_array++;
5978 	}
5979 	memset(map_array, 0, sizeof(*map_array));
5980 
5981 	mutex_unlock(&trace_eval_mutex);
5982 }
5983 
5984 static void trace_create_eval_file(struct dentry *d_tracer)
5985 {
5986 	trace_create_file("eval_map", 0444, d_tracer,
5987 			  NULL, &tracing_eval_map_fops);
5988 }
5989 
5990 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5991 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5992 static inline void trace_insert_eval_map_file(struct module *mod,
5993 			      struct trace_eval_map **start, int len) { }
5994 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5995 
5996 static void trace_insert_eval_map(struct module *mod,
5997 				  struct trace_eval_map **start, int len)
5998 {
5999 	struct trace_eval_map **map;
6000 
6001 	if (len <= 0)
6002 		return;
6003 
6004 	map = start;
6005 
6006 	trace_event_eval_update(map, len);
6007 
6008 	trace_insert_eval_map_file(mod, start, len);
6009 }
6010 
6011 static ssize_t
6012 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6013 		       size_t cnt, loff_t *ppos)
6014 {
6015 	struct trace_array *tr = filp->private_data;
6016 	char buf[MAX_TRACER_SIZE+2];
6017 	int r;
6018 
6019 	mutex_lock(&trace_types_lock);
6020 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6021 	mutex_unlock(&trace_types_lock);
6022 
6023 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6024 }
6025 
6026 int tracer_init(struct tracer *t, struct trace_array *tr)
6027 {
6028 	tracing_reset_online_cpus(&tr->array_buffer);
6029 	return t->init(tr);
6030 }
6031 
6032 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6033 {
6034 	int cpu;
6035 
6036 	for_each_tracing_cpu(cpu)
6037 		per_cpu_ptr(buf->data, cpu)->entries = val;
6038 }
6039 
6040 #ifdef CONFIG_TRACER_MAX_TRACE
6041 /* resize @tr's buffer to the size of @size_tr's entries */
6042 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6043 					struct array_buffer *size_buf, int cpu_id)
6044 {
6045 	int cpu, ret = 0;
6046 
6047 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6048 		for_each_tracing_cpu(cpu) {
6049 			ret = ring_buffer_resize(trace_buf->buffer,
6050 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6051 			if (ret < 0)
6052 				break;
6053 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6054 				per_cpu_ptr(size_buf->data, cpu)->entries;
6055 		}
6056 	} else {
6057 		ret = ring_buffer_resize(trace_buf->buffer,
6058 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6059 		if (ret == 0)
6060 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6061 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6062 	}
6063 
6064 	return ret;
6065 }
6066 #endif /* CONFIG_TRACER_MAX_TRACE */
6067 
6068 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6069 					unsigned long size, int cpu)
6070 {
6071 	int ret;
6072 
6073 	/*
6074 	 * If kernel or user changes the size of the ring buffer
6075 	 * we use the size that was given, and we can forget about
6076 	 * expanding it later.
6077 	 */
6078 	ring_buffer_expanded = true;
6079 
6080 	/* May be called before buffers are initialized */
6081 	if (!tr->array_buffer.buffer)
6082 		return 0;
6083 
6084 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6085 	if (ret < 0)
6086 		return ret;
6087 
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6090 	    !tr->current_trace->use_max_tr)
6091 		goto out;
6092 
6093 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6094 	if (ret < 0) {
6095 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6096 						     &tr->array_buffer, cpu);
6097 		if (r < 0) {
6098 			/*
6099 			 * AARGH! We are left with different
6100 			 * size max buffer!!!!
6101 			 * The max buffer is our "snapshot" buffer.
6102 			 * When a tracer needs a snapshot (one of the
6103 			 * latency tracers), it swaps the max buffer
6104 			 * with the saved snap shot. We succeeded to
6105 			 * update the size of the main buffer, but failed to
6106 			 * update the size of the max buffer. But when we tried
6107 			 * to reset the main buffer to the original size, we
6108 			 * failed there too. This is very unlikely to
6109 			 * happen, but if it does, warn and kill all
6110 			 * tracing.
6111 			 */
6112 			WARN_ON(1);
6113 			tracing_disabled = 1;
6114 		}
6115 		return ret;
6116 	}
6117 
6118 	if (cpu == RING_BUFFER_ALL_CPUS)
6119 		set_buffer_entries(&tr->max_buffer, size);
6120 	else
6121 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6122 
6123  out:
6124 #endif /* CONFIG_TRACER_MAX_TRACE */
6125 
6126 	if (cpu == RING_BUFFER_ALL_CPUS)
6127 		set_buffer_entries(&tr->array_buffer, size);
6128 	else
6129 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6130 
6131 	return ret;
6132 }
6133 
6134 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6135 				  unsigned long size, int cpu_id)
6136 {
6137 	int ret = size;
6138 
6139 	mutex_lock(&trace_types_lock);
6140 
6141 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6142 		/* make sure, this cpu is enabled in the mask */
6143 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6144 			ret = -EINVAL;
6145 			goto out;
6146 		}
6147 	}
6148 
6149 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6150 	if (ret < 0)
6151 		ret = -ENOMEM;
6152 
6153 out:
6154 	mutex_unlock(&trace_types_lock);
6155 
6156 	return ret;
6157 }
6158 
6159 
6160 /**
6161  * tracing_update_buffers - used by tracing facility to expand ring buffers
6162  *
6163  * To save on memory when the tracing is never used on a system with it
6164  * configured in. The ring buffers are set to a minimum size. But once
6165  * a user starts to use the tracing facility, then they need to grow
6166  * to their default size.
6167  *
6168  * This function is to be called when a tracer is about to be used.
6169  */
6170 int tracing_update_buffers(void)
6171 {
6172 	int ret = 0;
6173 
6174 	mutex_lock(&trace_types_lock);
6175 	if (!ring_buffer_expanded)
6176 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6177 						RING_BUFFER_ALL_CPUS);
6178 	mutex_unlock(&trace_types_lock);
6179 
6180 	return ret;
6181 }
6182 
6183 struct trace_option_dentry;
6184 
6185 static void
6186 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6187 
6188 /*
6189  * Used to clear out the tracer before deletion of an instance.
6190  * Must have trace_types_lock held.
6191  */
6192 static void tracing_set_nop(struct trace_array *tr)
6193 {
6194 	if (tr->current_trace == &nop_trace)
6195 		return;
6196 
6197 	tr->current_trace->enabled--;
6198 
6199 	if (tr->current_trace->reset)
6200 		tr->current_trace->reset(tr);
6201 
6202 	tr->current_trace = &nop_trace;
6203 }
6204 
6205 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6206 {
6207 	/* Only enable if the directory has been created already. */
6208 	if (!tr->dir)
6209 		return;
6210 
6211 	create_trace_option_files(tr, t);
6212 }
6213 
6214 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6215 {
6216 	struct tracer *t;
6217 #ifdef CONFIG_TRACER_MAX_TRACE
6218 	bool had_max_tr;
6219 #endif
6220 	int ret = 0;
6221 
6222 	mutex_lock(&trace_types_lock);
6223 
6224 	if (!ring_buffer_expanded) {
6225 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6226 						RING_BUFFER_ALL_CPUS);
6227 		if (ret < 0)
6228 			goto out;
6229 		ret = 0;
6230 	}
6231 
6232 	for (t = trace_types; t; t = t->next) {
6233 		if (strcmp(t->name, buf) == 0)
6234 			break;
6235 	}
6236 	if (!t) {
6237 		ret = -EINVAL;
6238 		goto out;
6239 	}
6240 	if (t == tr->current_trace)
6241 		goto out;
6242 
6243 #ifdef CONFIG_TRACER_SNAPSHOT
6244 	if (t->use_max_tr) {
6245 		arch_spin_lock(&tr->max_lock);
6246 		if (tr->cond_snapshot)
6247 			ret = -EBUSY;
6248 		arch_spin_unlock(&tr->max_lock);
6249 		if (ret)
6250 			goto out;
6251 	}
6252 #endif
6253 	/* Some tracers won't work on kernel command line */
6254 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6255 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6256 			t->name);
6257 		goto out;
6258 	}
6259 
6260 	/* Some tracers are only allowed for the top level buffer */
6261 	if (!trace_ok_for_array(t, tr)) {
6262 		ret = -EINVAL;
6263 		goto out;
6264 	}
6265 
6266 	/* If trace pipe files are being read, we can't change the tracer */
6267 	if (tr->trace_ref) {
6268 		ret = -EBUSY;
6269 		goto out;
6270 	}
6271 
6272 	trace_branch_disable();
6273 
6274 	tr->current_trace->enabled--;
6275 
6276 	if (tr->current_trace->reset)
6277 		tr->current_trace->reset(tr);
6278 
6279 	/* Current trace needs to be nop_trace before synchronize_rcu */
6280 	tr->current_trace = &nop_trace;
6281 
6282 #ifdef CONFIG_TRACER_MAX_TRACE
6283 	had_max_tr = tr->allocated_snapshot;
6284 
6285 	if (had_max_tr && !t->use_max_tr) {
6286 		/*
6287 		 * We need to make sure that the update_max_tr sees that
6288 		 * current_trace changed to nop_trace to keep it from
6289 		 * swapping the buffers after we resize it.
6290 		 * The update_max_tr is called from interrupts disabled
6291 		 * so a synchronized_sched() is sufficient.
6292 		 */
6293 		synchronize_rcu();
6294 		free_snapshot(tr);
6295 	}
6296 #endif
6297 
6298 #ifdef CONFIG_TRACER_MAX_TRACE
6299 	if (t->use_max_tr && !had_max_tr) {
6300 		ret = tracing_alloc_snapshot_instance(tr);
6301 		if (ret < 0)
6302 			goto out;
6303 	}
6304 #endif
6305 
6306 	if (t->init) {
6307 		ret = tracer_init(t, tr);
6308 		if (ret)
6309 			goto out;
6310 	}
6311 
6312 	tr->current_trace = t;
6313 	tr->current_trace->enabled++;
6314 	trace_branch_enable(tr);
6315  out:
6316 	mutex_unlock(&trace_types_lock);
6317 
6318 	return ret;
6319 }
6320 
6321 static ssize_t
6322 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6323 			size_t cnt, loff_t *ppos)
6324 {
6325 	struct trace_array *tr = filp->private_data;
6326 	char buf[MAX_TRACER_SIZE+1];
6327 	int i;
6328 	size_t ret;
6329 	int err;
6330 
6331 	ret = cnt;
6332 
6333 	if (cnt > MAX_TRACER_SIZE)
6334 		cnt = MAX_TRACER_SIZE;
6335 
6336 	if (copy_from_user(buf, ubuf, cnt))
6337 		return -EFAULT;
6338 
6339 	buf[cnt] = 0;
6340 
6341 	/* strip ending whitespace. */
6342 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6343 		buf[i] = 0;
6344 
6345 	err = tracing_set_tracer(tr, buf);
6346 	if (err)
6347 		return err;
6348 
6349 	*ppos += ret;
6350 
6351 	return ret;
6352 }
6353 
6354 static ssize_t
6355 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6356 		   size_t cnt, loff_t *ppos)
6357 {
6358 	char buf[64];
6359 	int r;
6360 
6361 	r = snprintf(buf, sizeof(buf), "%ld\n",
6362 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6363 	if (r > sizeof(buf))
6364 		r = sizeof(buf);
6365 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6366 }
6367 
6368 static ssize_t
6369 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6370 		    size_t cnt, loff_t *ppos)
6371 {
6372 	unsigned long val;
6373 	int ret;
6374 
6375 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6376 	if (ret)
6377 		return ret;
6378 
6379 	*ptr = val * 1000;
6380 
6381 	return cnt;
6382 }
6383 
6384 static ssize_t
6385 tracing_thresh_read(struct file *filp, char __user *ubuf,
6386 		    size_t cnt, loff_t *ppos)
6387 {
6388 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6389 }
6390 
6391 static ssize_t
6392 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6393 		     size_t cnt, loff_t *ppos)
6394 {
6395 	struct trace_array *tr = filp->private_data;
6396 	int ret;
6397 
6398 	mutex_lock(&trace_types_lock);
6399 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6400 	if (ret < 0)
6401 		goto out;
6402 
6403 	if (tr->current_trace->update_thresh) {
6404 		ret = tr->current_trace->update_thresh(tr);
6405 		if (ret < 0)
6406 			goto out;
6407 	}
6408 
6409 	ret = cnt;
6410 out:
6411 	mutex_unlock(&trace_types_lock);
6412 
6413 	return ret;
6414 }
6415 
6416 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6417 
6418 static ssize_t
6419 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6420 		     size_t cnt, loff_t *ppos)
6421 {
6422 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6423 }
6424 
6425 static ssize_t
6426 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6427 		      size_t cnt, loff_t *ppos)
6428 {
6429 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6430 }
6431 
6432 #endif
6433 
6434 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6435 {
6436 	struct trace_array *tr = inode->i_private;
6437 	struct trace_iterator *iter;
6438 	int ret;
6439 
6440 	ret = tracing_check_open_get_tr(tr);
6441 	if (ret)
6442 		return ret;
6443 
6444 	mutex_lock(&trace_types_lock);
6445 
6446 	/* create a buffer to store the information to pass to userspace */
6447 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6448 	if (!iter) {
6449 		ret = -ENOMEM;
6450 		__trace_array_put(tr);
6451 		goto out;
6452 	}
6453 
6454 	trace_seq_init(&iter->seq);
6455 	iter->trace = tr->current_trace;
6456 
6457 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6458 		ret = -ENOMEM;
6459 		goto fail;
6460 	}
6461 
6462 	/* trace pipe does not show start of buffer */
6463 	cpumask_setall(iter->started);
6464 
6465 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6466 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6467 
6468 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6469 	if (trace_clocks[tr->clock_id].in_ns)
6470 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6471 
6472 	iter->tr = tr;
6473 	iter->array_buffer = &tr->array_buffer;
6474 	iter->cpu_file = tracing_get_cpu(inode);
6475 	mutex_init(&iter->mutex);
6476 	filp->private_data = iter;
6477 
6478 	if (iter->trace->pipe_open)
6479 		iter->trace->pipe_open(iter);
6480 
6481 	nonseekable_open(inode, filp);
6482 
6483 	tr->trace_ref++;
6484 out:
6485 	mutex_unlock(&trace_types_lock);
6486 	return ret;
6487 
6488 fail:
6489 	kfree(iter);
6490 	__trace_array_put(tr);
6491 	mutex_unlock(&trace_types_lock);
6492 	return ret;
6493 }
6494 
6495 static int tracing_release_pipe(struct inode *inode, struct file *file)
6496 {
6497 	struct trace_iterator *iter = file->private_data;
6498 	struct trace_array *tr = inode->i_private;
6499 
6500 	mutex_lock(&trace_types_lock);
6501 
6502 	tr->trace_ref--;
6503 
6504 	if (iter->trace->pipe_close)
6505 		iter->trace->pipe_close(iter);
6506 
6507 	mutex_unlock(&trace_types_lock);
6508 
6509 	free_cpumask_var(iter->started);
6510 	mutex_destroy(&iter->mutex);
6511 	kfree(iter);
6512 
6513 	trace_array_put(tr);
6514 
6515 	return 0;
6516 }
6517 
6518 static __poll_t
6519 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6520 {
6521 	struct trace_array *tr = iter->tr;
6522 
6523 	/* Iterators are static, they should be filled or empty */
6524 	if (trace_buffer_iter(iter, iter->cpu_file))
6525 		return EPOLLIN | EPOLLRDNORM;
6526 
6527 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6528 		/*
6529 		 * Always select as readable when in blocking mode
6530 		 */
6531 		return EPOLLIN | EPOLLRDNORM;
6532 	else
6533 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6534 					     filp, poll_table);
6535 }
6536 
6537 static __poll_t
6538 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6539 {
6540 	struct trace_iterator *iter = filp->private_data;
6541 
6542 	return trace_poll(iter, filp, poll_table);
6543 }
6544 
6545 /* Must be called with iter->mutex held. */
6546 static int tracing_wait_pipe(struct file *filp)
6547 {
6548 	struct trace_iterator *iter = filp->private_data;
6549 	int ret;
6550 
6551 	while (trace_empty(iter)) {
6552 
6553 		if ((filp->f_flags & O_NONBLOCK)) {
6554 			return -EAGAIN;
6555 		}
6556 
6557 		/*
6558 		 * We block until we read something and tracing is disabled.
6559 		 * We still block if tracing is disabled, but we have never
6560 		 * read anything. This allows a user to cat this file, and
6561 		 * then enable tracing. But after we have read something,
6562 		 * we give an EOF when tracing is again disabled.
6563 		 *
6564 		 * iter->pos will be 0 if we haven't read anything.
6565 		 */
6566 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6567 			break;
6568 
6569 		mutex_unlock(&iter->mutex);
6570 
6571 		ret = wait_on_pipe(iter, 0);
6572 
6573 		mutex_lock(&iter->mutex);
6574 
6575 		if (ret)
6576 			return ret;
6577 	}
6578 
6579 	return 1;
6580 }
6581 
6582 /*
6583  * Consumer reader.
6584  */
6585 static ssize_t
6586 tracing_read_pipe(struct file *filp, char __user *ubuf,
6587 		  size_t cnt, loff_t *ppos)
6588 {
6589 	struct trace_iterator *iter = filp->private_data;
6590 	ssize_t sret;
6591 
6592 	/*
6593 	 * Avoid more than one consumer on a single file descriptor
6594 	 * This is just a matter of traces coherency, the ring buffer itself
6595 	 * is protected.
6596 	 */
6597 	mutex_lock(&iter->mutex);
6598 
6599 	/* return any leftover data */
6600 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6601 	if (sret != -EBUSY)
6602 		goto out;
6603 
6604 	trace_seq_init(&iter->seq);
6605 
6606 	if (iter->trace->read) {
6607 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6608 		if (sret)
6609 			goto out;
6610 	}
6611 
6612 waitagain:
6613 	sret = tracing_wait_pipe(filp);
6614 	if (sret <= 0)
6615 		goto out;
6616 
6617 	/* stop when tracing is finished */
6618 	if (trace_empty(iter)) {
6619 		sret = 0;
6620 		goto out;
6621 	}
6622 
6623 	if (cnt >= PAGE_SIZE)
6624 		cnt = PAGE_SIZE - 1;
6625 
6626 	/* reset all but tr, trace, and overruns */
6627 	memset(&iter->seq, 0,
6628 	       sizeof(struct trace_iterator) -
6629 	       offsetof(struct trace_iterator, seq));
6630 	cpumask_clear(iter->started);
6631 	trace_seq_init(&iter->seq);
6632 	iter->pos = -1;
6633 
6634 	trace_event_read_lock();
6635 	trace_access_lock(iter->cpu_file);
6636 	while (trace_find_next_entry_inc(iter) != NULL) {
6637 		enum print_line_t ret;
6638 		int save_len = iter->seq.seq.len;
6639 
6640 		ret = print_trace_line(iter);
6641 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6642 			/* don't print partial lines */
6643 			iter->seq.seq.len = save_len;
6644 			break;
6645 		}
6646 		if (ret != TRACE_TYPE_NO_CONSUME)
6647 			trace_consume(iter);
6648 
6649 		if (trace_seq_used(&iter->seq) >= cnt)
6650 			break;
6651 
6652 		/*
6653 		 * Setting the full flag means we reached the trace_seq buffer
6654 		 * size and we should leave by partial output condition above.
6655 		 * One of the trace_seq_* functions is not used properly.
6656 		 */
6657 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6658 			  iter->ent->type);
6659 	}
6660 	trace_access_unlock(iter->cpu_file);
6661 	trace_event_read_unlock();
6662 
6663 	/* Now copy what we have to the user */
6664 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6665 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6666 		trace_seq_init(&iter->seq);
6667 
6668 	/*
6669 	 * If there was nothing to send to user, in spite of consuming trace
6670 	 * entries, go back to wait for more entries.
6671 	 */
6672 	if (sret == -EBUSY)
6673 		goto waitagain;
6674 
6675 out:
6676 	mutex_unlock(&iter->mutex);
6677 
6678 	return sret;
6679 }
6680 
6681 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6682 				     unsigned int idx)
6683 {
6684 	__free_page(spd->pages[idx]);
6685 }
6686 
6687 static size_t
6688 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6689 {
6690 	size_t count;
6691 	int save_len;
6692 	int ret;
6693 
6694 	/* Seq buffer is page-sized, exactly what we need. */
6695 	for (;;) {
6696 		save_len = iter->seq.seq.len;
6697 		ret = print_trace_line(iter);
6698 
6699 		if (trace_seq_has_overflowed(&iter->seq)) {
6700 			iter->seq.seq.len = save_len;
6701 			break;
6702 		}
6703 
6704 		/*
6705 		 * This should not be hit, because it should only
6706 		 * be set if the iter->seq overflowed. But check it
6707 		 * anyway to be safe.
6708 		 */
6709 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6710 			iter->seq.seq.len = save_len;
6711 			break;
6712 		}
6713 
6714 		count = trace_seq_used(&iter->seq) - save_len;
6715 		if (rem < count) {
6716 			rem = 0;
6717 			iter->seq.seq.len = save_len;
6718 			break;
6719 		}
6720 
6721 		if (ret != TRACE_TYPE_NO_CONSUME)
6722 			trace_consume(iter);
6723 		rem -= count;
6724 		if (!trace_find_next_entry_inc(iter))	{
6725 			rem = 0;
6726 			iter->ent = NULL;
6727 			break;
6728 		}
6729 	}
6730 
6731 	return rem;
6732 }
6733 
6734 static ssize_t tracing_splice_read_pipe(struct file *filp,
6735 					loff_t *ppos,
6736 					struct pipe_inode_info *pipe,
6737 					size_t len,
6738 					unsigned int flags)
6739 {
6740 	struct page *pages_def[PIPE_DEF_BUFFERS];
6741 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6742 	struct trace_iterator *iter = filp->private_data;
6743 	struct splice_pipe_desc spd = {
6744 		.pages		= pages_def,
6745 		.partial	= partial_def,
6746 		.nr_pages	= 0, /* This gets updated below. */
6747 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6748 		.ops		= &default_pipe_buf_ops,
6749 		.spd_release	= tracing_spd_release_pipe,
6750 	};
6751 	ssize_t ret;
6752 	size_t rem;
6753 	unsigned int i;
6754 
6755 	if (splice_grow_spd(pipe, &spd))
6756 		return -ENOMEM;
6757 
6758 	mutex_lock(&iter->mutex);
6759 
6760 	if (iter->trace->splice_read) {
6761 		ret = iter->trace->splice_read(iter, filp,
6762 					       ppos, pipe, len, flags);
6763 		if (ret)
6764 			goto out_err;
6765 	}
6766 
6767 	ret = tracing_wait_pipe(filp);
6768 	if (ret <= 0)
6769 		goto out_err;
6770 
6771 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6772 		ret = -EFAULT;
6773 		goto out_err;
6774 	}
6775 
6776 	trace_event_read_lock();
6777 	trace_access_lock(iter->cpu_file);
6778 
6779 	/* Fill as many pages as possible. */
6780 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6781 		spd.pages[i] = alloc_page(GFP_KERNEL);
6782 		if (!spd.pages[i])
6783 			break;
6784 
6785 		rem = tracing_fill_pipe_page(rem, iter);
6786 
6787 		/* Copy the data into the page, so we can start over. */
6788 		ret = trace_seq_to_buffer(&iter->seq,
6789 					  page_address(spd.pages[i]),
6790 					  trace_seq_used(&iter->seq));
6791 		if (ret < 0) {
6792 			__free_page(spd.pages[i]);
6793 			break;
6794 		}
6795 		spd.partial[i].offset = 0;
6796 		spd.partial[i].len = trace_seq_used(&iter->seq);
6797 
6798 		trace_seq_init(&iter->seq);
6799 	}
6800 
6801 	trace_access_unlock(iter->cpu_file);
6802 	trace_event_read_unlock();
6803 	mutex_unlock(&iter->mutex);
6804 
6805 	spd.nr_pages = i;
6806 
6807 	if (i)
6808 		ret = splice_to_pipe(pipe, &spd);
6809 	else
6810 		ret = 0;
6811 out:
6812 	splice_shrink_spd(&spd);
6813 	return ret;
6814 
6815 out_err:
6816 	mutex_unlock(&iter->mutex);
6817 	goto out;
6818 }
6819 
6820 static ssize_t
6821 tracing_entries_read(struct file *filp, char __user *ubuf,
6822 		     size_t cnt, loff_t *ppos)
6823 {
6824 	struct inode *inode = file_inode(filp);
6825 	struct trace_array *tr = inode->i_private;
6826 	int cpu = tracing_get_cpu(inode);
6827 	char buf[64];
6828 	int r = 0;
6829 	ssize_t ret;
6830 
6831 	mutex_lock(&trace_types_lock);
6832 
6833 	if (cpu == RING_BUFFER_ALL_CPUS) {
6834 		int cpu, buf_size_same;
6835 		unsigned long size;
6836 
6837 		size = 0;
6838 		buf_size_same = 1;
6839 		/* check if all cpu sizes are same */
6840 		for_each_tracing_cpu(cpu) {
6841 			/* fill in the size from first enabled cpu */
6842 			if (size == 0)
6843 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6844 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6845 				buf_size_same = 0;
6846 				break;
6847 			}
6848 		}
6849 
6850 		if (buf_size_same) {
6851 			if (!ring_buffer_expanded)
6852 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6853 					    size >> 10,
6854 					    trace_buf_size >> 10);
6855 			else
6856 				r = sprintf(buf, "%lu\n", size >> 10);
6857 		} else
6858 			r = sprintf(buf, "X\n");
6859 	} else
6860 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6861 
6862 	mutex_unlock(&trace_types_lock);
6863 
6864 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6865 	return ret;
6866 }
6867 
6868 static ssize_t
6869 tracing_entries_write(struct file *filp, const char __user *ubuf,
6870 		      size_t cnt, loff_t *ppos)
6871 {
6872 	struct inode *inode = file_inode(filp);
6873 	struct trace_array *tr = inode->i_private;
6874 	unsigned long val;
6875 	int ret;
6876 
6877 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6878 	if (ret)
6879 		return ret;
6880 
6881 	/* must have at least 1 entry */
6882 	if (!val)
6883 		return -EINVAL;
6884 
6885 	/* value is in KB */
6886 	val <<= 10;
6887 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6888 	if (ret < 0)
6889 		return ret;
6890 
6891 	*ppos += cnt;
6892 
6893 	return cnt;
6894 }
6895 
6896 static ssize_t
6897 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6898 				size_t cnt, loff_t *ppos)
6899 {
6900 	struct trace_array *tr = filp->private_data;
6901 	char buf[64];
6902 	int r, cpu;
6903 	unsigned long size = 0, expanded_size = 0;
6904 
6905 	mutex_lock(&trace_types_lock);
6906 	for_each_tracing_cpu(cpu) {
6907 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6908 		if (!ring_buffer_expanded)
6909 			expanded_size += trace_buf_size >> 10;
6910 	}
6911 	if (ring_buffer_expanded)
6912 		r = sprintf(buf, "%lu\n", size);
6913 	else
6914 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6915 	mutex_unlock(&trace_types_lock);
6916 
6917 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6918 }
6919 
6920 static ssize_t
6921 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6922 			  size_t cnt, loff_t *ppos)
6923 {
6924 	/*
6925 	 * There is no need to read what the user has written, this function
6926 	 * is just to make sure that there is no error when "echo" is used
6927 	 */
6928 
6929 	*ppos += cnt;
6930 
6931 	return cnt;
6932 }
6933 
6934 static int
6935 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6936 {
6937 	struct trace_array *tr = inode->i_private;
6938 
6939 	/* disable tracing ? */
6940 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6941 		tracer_tracing_off(tr);
6942 	/* resize the ring buffer to 0 */
6943 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6944 
6945 	trace_array_put(tr);
6946 
6947 	return 0;
6948 }
6949 
6950 static ssize_t
6951 tracing_mark_write(struct file *filp, const char __user *ubuf,
6952 					size_t cnt, loff_t *fpos)
6953 {
6954 	struct trace_array *tr = filp->private_data;
6955 	struct ring_buffer_event *event;
6956 	enum event_trigger_type tt = ETT_NONE;
6957 	struct trace_buffer *buffer;
6958 	struct print_entry *entry;
6959 	ssize_t written;
6960 	int size;
6961 	int len;
6962 
6963 /* Used in tracing_mark_raw_write() as well */
6964 #define FAULTED_STR "<faulted>"
6965 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6966 
6967 	if (tracing_disabled)
6968 		return -EINVAL;
6969 
6970 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6971 		return -EINVAL;
6972 
6973 	if (cnt > TRACE_BUF_SIZE)
6974 		cnt = TRACE_BUF_SIZE;
6975 
6976 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6977 
6978 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6979 
6980 	/* If less than "<faulted>", then make sure we can still add that */
6981 	if (cnt < FAULTED_SIZE)
6982 		size += FAULTED_SIZE - cnt;
6983 
6984 	buffer = tr->array_buffer.buffer;
6985 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6986 					    tracing_gen_ctx());
6987 	if (unlikely(!event))
6988 		/* Ring buffer disabled, return as if not open for write */
6989 		return -EBADF;
6990 
6991 	entry = ring_buffer_event_data(event);
6992 	entry->ip = _THIS_IP_;
6993 
6994 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6995 	if (len) {
6996 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6997 		cnt = FAULTED_SIZE;
6998 		written = -EFAULT;
6999 	} else
7000 		written = cnt;
7001 
7002 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7003 		/* do not add \n before testing triggers, but add \0 */
7004 		entry->buf[cnt] = '\0';
7005 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7006 	}
7007 
7008 	if (entry->buf[cnt - 1] != '\n') {
7009 		entry->buf[cnt] = '\n';
7010 		entry->buf[cnt + 1] = '\0';
7011 	} else
7012 		entry->buf[cnt] = '\0';
7013 
7014 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7015 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7016 	__buffer_unlock_commit(buffer, event);
7017 
7018 	if (tt)
7019 		event_triggers_post_call(tr->trace_marker_file, tt);
7020 
7021 	if (written > 0)
7022 		*fpos += written;
7023 
7024 	return written;
7025 }
7026 
7027 /* Limit it for now to 3K (including tag) */
7028 #define RAW_DATA_MAX_SIZE (1024*3)
7029 
7030 static ssize_t
7031 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7032 					size_t cnt, loff_t *fpos)
7033 {
7034 	struct trace_array *tr = filp->private_data;
7035 	struct ring_buffer_event *event;
7036 	struct trace_buffer *buffer;
7037 	struct raw_data_entry *entry;
7038 	ssize_t written;
7039 	int size;
7040 	int len;
7041 
7042 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7043 
7044 	if (tracing_disabled)
7045 		return -EINVAL;
7046 
7047 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7048 		return -EINVAL;
7049 
7050 	/* The marker must at least have a tag id */
7051 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7052 		return -EINVAL;
7053 
7054 	if (cnt > TRACE_BUF_SIZE)
7055 		cnt = TRACE_BUF_SIZE;
7056 
7057 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7058 
7059 	size = sizeof(*entry) + cnt;
7060 	if (cnt < FAULT_SIZE_ID)
7061 		size += FAULT_SIZE_ID - cnt;
7062 
7063 	buffer = tr->array_buffer.buffer;
7064 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7065 					    tracing_gen_ctx());
7066 	if (!event)
7067 		/* Ring buffer disabled, return as if not open for write */
7068 		return -EBADF;
7069 
7070 	entry = ring_buffer_event_data(event);
7071 
7072 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7073 	if (len) {
7074 		entry->id = -1;
7075 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7076 		written = -EFAULT;
7077 	} else
7078 		written = cnt;
7079 
7080 	__buffer_unlock_commit(buffer, event);
7081 
7082 	if (written > 0)
7083 		*fpos += written;
7084 
7085 	return written;
7086 }
7087 
7088 static int tracing_clock_show(struct seq_file *m, void *v)
7089 {
7090 	struct trace_array *tr = m->private;
7091 	int i;
7092 
7093 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7094 		seq_printf(m,
7095 			"%s%s%s%s", i ? " " : "",
7096 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7097 			i == tr->clock_id ? "]" : "");
7098 	seq_putc(m, '\n');
7099 
7100 	return 0;
7101 }
7102 
7103 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7104 {
7105 	int i;
7106 
7107 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7108 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7109 			break;
7110 	}
7111 	if (i == ARRAY_SIZE(trace_clocks))
7112 		return -EINVAL;
7113 
7114 	mutex_lock(&trace_types_lock);
7115 
7116 	tr->clock_id = i;
7117 
7118 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7119 
7120 	/*
7121 	 * New clock may not be consistent with the previous clock.
7122 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7123 	 */
7124 	tracing_reset_online_cpus(&tr->array_buffer);
7125 
7126 #ifdef CONFIG_TRACER_MAX_TRACE
7127 	if (tr->max_buffer.buffer)
7128 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7129 	tracing_reset_online_cpus(&tr->max_buffer);
7130 #endif
7131 
7132 	mutex_unlock(&trace_types_lock);
7133 
7134 	return 0;
7135 }
7136 
7137 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7138 				   size_t cnt, loff_t *fpos)
7139 {
7140 	struct seq_file *m = filp->private_data;
7141 	struct trace_array *tr = m->private;
7142 	char buf[64];
7143 	const char *clockstr;
7144 	int ret;
7145 
7146 	if (cnt >= sizeof(buf))
7147 		return -EINVAL;
7148 
7149 	if (copy_from_user(buf, ubuf, cnt))
7150 		return -EFAULT;
7151 
7152 	buf[cnt] = 0;
7153 
7154 	clockstr = strstrip(buf);
7155 
7156 	ret = tracing_set_clock(tr, clockstr);
7157 	if (ret)
7158 		return ret;
7159 
7160 	*fpos += cnt;
7161 
7162 	return cnt;
7163 }
7164 
7165 static int tracing_clock_open(struct inode *inode, struct file *file)
7166 {
7167 	struct trace_array *tr = inode->i_private;
7168 	int ret;
7169 
7170 	ret = tracing_check_open_get_tr(tr);
7171 	if (ret)
7172 		return ret;
7173 
7174 	ret = single_open(file, tracing_clock_show, inode->i_private);
7175 	if (ret < 0)
7176 		trace_array_put(tr);
7177 
7178 	return ret;
7179 }
7180 
7181 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7182 {
7183 	struct trace_array *tr = m->private;
7184 
7185 	mutex_lock(&trace_types_lock);
7186 
7187 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7188 		seq_puts(m, "delta [absolute]\n");
7189 	else
7190 		seq_puts(m, "[delta] absolute\n");
7191 
7192 	mutex_unlock(&trace_types_lock);
7193 
7194 	return 0;
7195 }
7196 
7197 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7198 {
7199 	struct trace_array *tr = inode->i_private;
7200 	int ret;
7201 
7202 	ret = tracing_check_open_get_tr(tr);
7203 	if (ret)
7204 		return ret;
7205 
7206 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7207 	if (ret < 0)
7208 		trace_array_put(tr);
7209 
7210 	return ret;
7211 }
7212 
7213 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7214 {
7215 	if (rbe == this_cpu_read(trace_buffered_event))
7216 		return ring_buffer_time_stamp(buffer);
7217 
7218 	return ring_buffer_event_time_stamp(buffer, rbe);
7219 }
7220 
7221 /*
7222  * Set or disable using the per CPU trace_buffer_event when possible.
7223  */
7224 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7225 {
7226 	int ret = 0;
7227 
7228 	mutex_lock(&trace_types_lock);
7229 
7230 	if (set && tr->no_filter_buffering_ref++)
7231 		goto out;
7232 
7233 	if (!set) {
7234 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7235 			ret = -EINVAL;
7236 			goto out;
7237 		}
7238 
7239 		--tr->no_filter_buffering_ref;
7240 	}
7241  out:
7242 	mutex_unlock(&trace_types_lock);
7243 
7244 	return ret;
7245 }
7246 
7247 struct ftrace_buffer_info {
7248 	struct trace_iterator	iter;
7249 	void			*spare;
7250 	unsigned int		spare_cpu;
7251 	unsigned int		read;
7252 };
7253 
7254 #ifdef CONFIG_TRACER_SNAPSHOT
7255 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7256 {
7257 	struct trace_array *tr = inode->i_private;
7258 	struct trace_iterator *iter;
7259 	struct seq_file *m;
7260 	int ret;
7261 
7262 	ret = tracing_check_open_get_tr(tr);
7263 	if (ret)
7264 		return ret;
7265 
7266 	if (file->f_mode & FMODE_READ) {
7267 		iter = __tracing_open(inode, file, true);
7268 		if (IS_ERR(iter))
7269 			ret = PTR_ERR(iter);
7270 	} else {
7271 		/* Writes still need the seq_file to hold the private data */
7272 		ret = -ENOMEM;
7273 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7274 		if (!m)
7275 			goto out;
7276 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7277 		if (!iter) {
7278 			kfree(m);
7279 			goto out;
7280 		}
7281 		ret = 0;
7282 
7283 		iter->tr = tr;
7284 		iter->array_buffer = &tr->max_buffer;
7285 		iter->cpu_file = tracing_get_cpu(inode);
7286 		m->private = iter;
7287 		file->private_data = m;
7288 	}
7289 out:
7290 	if (ret < 0)
7291 		trace_array_put(tr);
7292 
7293 	return ret;
7294 }
7295 
7296 static ssize_t
7297 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7298 		       loff_t *ppos)
7299 {
7300 	struct seq_file *m = filp->private_data;
7301 	struct trace_iterator *iter = m->private;
7302 	struct trace_array *tr = iter->tr;
7303 	unsigned long val;
7304 	int ret;
7305 
7306 	ret = tracing_update_buffers();
7307 	if (ret < 0)
7308 		return ret;
7309 
7310 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7311 	if (ret)
7312 		return ret;
7313 
7314 	mutex_lock(&trace_types_lock);
7315 
7316 	if (tr->current_trace->use_max_tr) {
7317 		ret = -EBUSY;
7318 		goto out;
7319 	}
7320 
7321 	arch_spin_lock(&tr->max_lock);
7322 	if (tr->cond_snapshot)
7323 		ret = -EBUSY;
7324 	arch_spin_unlock(&tr->max_lock);
7325 	if (ret)
7326 		goto out;
7327 
7328 	switch (val) {
7329 	case 0:
7330 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7331 			ret = -EINVAL;
7332 			break;
7333 		}
7334 		if (tr->allocated_snapshot)
7335 			free_snapshot(tr);
7336 		break;
7337 	case 1:
7338 /* Only allow per-cpu swap if the ring buffer supports it */
7339 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7340 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7341 			ret = -EINVAL;
7342 			break;
7343 		}
7344 #endif
7345 		if (tr->allocated_snapshot)
7346 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7347 					&tr->array_buffer, iter->cpu_file);
7348 		else
7349 			ret = tracing_alloc_snapshot_instance(tr);
7350 		if (ret < 0)
7351 			break;
7352 		local_irq_disable();
7353 		/* Now, we're going to swap */
7354 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7355 			update_max_tr(tr, current, smp_processor_id(), NULL);
7356 		else
7357 			update_max_tr_single(tr, current, iter->cpu_file);
7358 		local_irq_enable();
7359 		break;
7360 	default:
7361 		if (tr->allocated_snapshot) {
7362 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7363 				tracing_reset_online_cpus(&tr->max_buffer);
7364 			else
7365 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7366 		}
7367 		break;
7368 	}
7369 
7370 	if (ret >= 0) {
7371 		*ppos += cnt;
7372 		ret = cnt;
7373 	}
7374 out:
7375 	mutex_unlock(&trace_types_lock);
7376 	return ret;
7377 }
7378 
7379 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7380 {
7381 	struct seq_file *m = file->private_data;
7382 	int ret;
7383 
7384 	ret = tracing_release(inode, file);
7385 
7386 	if (file->f_mode & FMODE_READ)
7387 		return ret;
7388 
7389 	/* If write only, the seq_file is just a stub */
7390 	if (m)
7391 		kfree(m->private);
7392 	kfree(m);
7393 
7394 	return 0;
7395 }
7396 
7397 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7398 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7399 				    size_t count, loff_t *ppos);
7400 static int tracing_buffers_release(struct inode *inode, struct file *file);
7401 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7402 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7403 
7404 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7405 {
7406 	struct ftrace_buffer_info *info;
7407 	int ret;
7408 
7409 	/* The following checks for tracefs lockdown */
7410 	ret = tracing_buffers_open(inode, filp);
7411 	if (ret < 0)
7412 		return ret;
7413 
7414 	info = filp->private_data;
7415 
7416 	if (info->iter.trace->use_max_tr) {
7417 		tracing_buffers_release(inode, filp);
7418 		return -EBUSY;
7419 	}
7420 
7421 	info->iter.snapshot = true;
7422 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7423 
7424 	return ret;
7425 }
7426 
7427 #endif /* CONFIG_TRACER_SNAPSHOT */
7428 
7429 
7430 static const struct file_operations tracing_thresh_fops = {
7431 	.open		= tracing_open_generic,
7432 	.read		= tracing_thresh_read,
7433 	.write		= tracing_thresh_write,
7434 	.llseek		= generic_file_llseek,
7435 };
7436 
7437 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7438 static const struct file_operations tracing_max_lat_fops = {
7439 	.open		= tracing_open_generic,
7440 	.read		= tracing_max_lat_read,
7441 	.write		= tracing_max_lat_write,
7442 	.llseek		= generic_file_llseek,
7443 };
7444 #endif
7445 
7446 static const struct file_operations set_tracer_fops = {
7447 	.open		= tracing_open_generic,
7448 	.read		= tracing_set_trace_read,
7449 	.write		= tracing_set_trace_write,
7450 	.llseek		= generic_file_llseek,
7451 };
7452 
7453 static const struct file_operations tracing_pipe_fops = {
7454 	.open		= tracing_open_pipe,
7455 	.poll		= tracing_poll_pipe,
7456 	.read		= tracing_read_pipe,
7457 	.splice_read	= tracing_splice_read_pipe,
7458 	.release	= tracing_release_pipe,
7459 	.llseek		= no_llseek,
7460 };
7461 
7462 static const struct file_operations tracing_entries_fops = {
7463 	.open		= tracing_open_generic_tr,
7464 	.read		= tracing_entries_read,
7465 	.write		= tracing_entries_write,
7466 	.llseek		= generic_file_llseek,
7467 	.release	= tracing_release_generic_tr,
7468 };
7469 
7470 static const struct file_operations tracing_total_entries_fops = {
7471 	.open		= tracing_open_generic_tr,
7472 	.read		= tracing_total_entries_read,
7473 	.llseek		= generic_file_llseek,
7474 	.release	= tracing_release_generic_tr,
7475 };
7476 
7477 static const struct file_operations tracing_free_buffer_fops = {
7478 	.open		= tracing_open_generic_tr,
7479 	.write		= tracing_free_buffer_write,
7480 	.release	= tracing_free_buffer_release,
7481 };
7482 
7483 static const struct file_operations tracing_mark_fops = {
7484 	.open		= tracing_open_generic_tr,
7485 	.write		= tracing_mark_write,
7486 	.llseek		= generic_file_llseek,
7487 	.release	= tracing_release_generic_tr,
7488 };
7489 
7490 static const struct file_operations tracing_mark_raw_fops = {
7491 	.open		= tracing_open_generic_tr,
7492 	.write		= tracing_mark_raw_write,
7493 	.llseek		= generic_file_llseek,
7494 	.release	= tracing_release_generic_tr,
7495 };
7496 
7497 static const struct file_operations trace_clock_fops = {
7498 	.open		= tracing_clock_open,
7499 	.read		= seq_read,
7500 	.llseek		= seq_lseek,
7501 	.release	= tracing_single_release_tr,
7502 	.write		= tracing_clock_write,
7503 };
7504 
7505 static const struct file_operations trace_time_stamp_mode_fops = {
7506 	.open		= tracing_time_stamp_mode_open,
7507 	.read		= seq_read,
7508 	.llseek		= seq_lseek,
7509 	.release	= tracing_single_release_tr,
7510 };
7511 
7512 #ifdef CONFIG_TRACER_SNAPSHOT
7513 static const struct file_operations snapshot_fops = {
7514 	.open		= tracing_snapshot_open,
7515 	.read		= seq_read,
7516 	.write		= tracing_snapshot_write,
7517 	.llseek		= tracing_lseek,
7518 	.release	= tracing_snapshot_release,
7519 };
7520 
7521 static const struct file_operations snapshot_raw_fops = {
7522 	.open		= snapshot_raw_open,
7523 	.read		= tracing_buffers_read,
7524 	.release	= tracing_buffers_release,
7525 	.splice_read	= tracing_buffers_splice_read,
7526 	.llseek		= no_llseek,
7527 };
7528 
7529 #endif /* CONFIG_TRACER_SNAPSHOT */
7530 
7531 #define TRACING_LOG_ERRS_MAX	8
7532 #define TRACING_LOG_LOC_MAX	128
7533 
7534 #define CMD_PREFIX "  Command: "
7535 
7536 struct err_info {
7537 	const char	**errs;	/* ptr to loc-specific array of err strings */
7538 	u8		type;	/* index into errs -> specific err string */
7539 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7540 	u64		ts;
7541 };
7542 
7543 struct tracing_log_err {
7544 	struct list_head	list;
7545 	struct err_info		info;
7546 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7547 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7548 };
7549 
7550 static DEFINE_MUTEX(tracing_err_log_lock);
7551 
7552 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7553 {
7554 	struct tracing_log_err *err;
7555 
7556 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7557 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7558 		if (!err)
7559 			err = ERR_PTR(-ENOMEM);
7560 		tr->n_err_log_entries++;
7561 
7562 		return err;
7563 	}
7564 
7565 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7566 	list_del(&err->list);
7567 
7568 	return err;
7569 }
7570 
7571 /**
7572  * err_pos - find the position of a string within a command for error careting
7573  * @cmd: The tracing command that caused the error
7574  * @str: The string to position the caret at within @cmd
7575  *
7576  * Finds the position of the first occurrence of @str within @cmd.  The
7577  * return value can be passed to tracing_log_err() for caret placement
7578  * within @cmd.
7579  *
7580  * Returns the index within @cmd of the first occurrence of @str or 0
7581  * if @str was not found.
7582  */
7583 unsigned int err_pos(char *cmd, const char *str)
7584 {
7585 	char *found;
7586 
7587 	if (WARN_ON(!strlen(cmd)))
7588 		return 0;
7589 
7590 	found = strstr(cmd, str);
7591 	if (found)
7592 		return found - cmd;
7593 
7594 	return 0;
7595 }
7596 
7597 /**
7598  * tracing_log_err - write an error to the tracing error log
7599  * @tr: The associated trace array for the error (NULL for top level array)
7600  * @loc: A string describing where the error occurred
7601  * @cmd: The tracing command that caused the error
7602  * @errs: The array of loc-specific static error strings
7603  * @type: The index into errs[], which produces the specific static err string
7604  * @pos: The position the caret should be placed in the cmd
7605  *
7606  * Writes an error into tracing/error_log of the form:
7607  *
7608  * <loc>: error: <text>
7609  *   Command: <cmd>
7610  *              ^
7611  *
7612  * tracing/error_log is a small log file containing the last
7613  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7614  * unless there has been a tracing error, and the error log can be
7615  * cleared and have its memory freed by writing the empty string in
7616  * truncation mode to it i.e. echo > tracing/error_log.
7617  *
7618  * NOTE: the @errs array along with the @type param are used to
7619  * produce a static error string - this string is not copied and saved
7620  * when the error is logged - only a pointer to it is saved.  See
7621  * existing callers for examples of how static strings are typically
7622  * defined for use with tracing_log_err().
7623  */
7624 void tracing_log_err(struct trace_array *tr,
7625 		     const char *loc, const char *cmd,
7626 		     const char **errs, u8 type, u8 pos)
7627 {
7628 	struct tracing_log_err *err;
7629 
7630 	if (!tr)
7631 		tr = &global_trace;
7632 
7633 	mutex_lock(&tracing_err_log_lock);
7634 	err = get_tracing_log_err(tr);
7635 	if (PTR_ERR(err) == -ENOMEM) {
7636 		mutex_unlock(&tracing_err_log_lock);
7637 		return;
7638 	}
7639 
7640 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7641 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7642 
7643 	err->info.errs = errs;
7644 	err->info.type = type;
7645 	err->info.pos = pos;
7646 	err->info.ts = local_clock();
7647 
7648 	list_add_tail(&err->list, &tr->err_log);
7649 	mutex_unlock(&tracing_err_log_lock);
7650 }
7651 
7652 static void clear_tracing_err_log(struct trace_array *tr)
7653 {
7654 	struct tracing_log_err *err, *next;
7655 
7656 	mutex_lock(&tracing_err_log_lock);
7657 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7658 		list_del(&err->list);
7659 		kfree(err);
7660 	}
7661 
7662 	tr->n_err_log_entries = 0;
7663 	mutex_unlock(&tracing_err_log_lock);
7664 }
7665 
7666 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7667 {
7668 	struct trace_array *tr = m->private;
7669 
7670 	mutex_lock(&tracing_err_log_lock);
7671 
7672 	return seq_list_start(&tr->err_log, *pos);
7673 }
7674 
7675 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7676 {
7677 	struct trace_array *tr = m->private;
7678 
7679 	return seq_list_next(v, &tr->err_log, pos);
7680 }
7681 
7682 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7683 {
7684 	mutex_unlock(&tracing_err_log_lock);
7685 }
7686 
7687 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7688 {
7689 	u8 i;
7690 
7691 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7692 		seq_putc(m, ' ');
7693 	for (i = 0; i < pos; i++)
7694 		seq_putc(m, ' ');
7695 	seq_puts(m, "^\n");
7696 }
7697 
7698 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7699 {
7700 	struct tracing_log_err *err = v;
7701 
7702 	if (err) {
7703 		const char *err_text = err->info.errs[err->info.type];
7704 		u64 sec = err->info.ts;
7705 		u32 nsec;
7706 
7707 		nsec = do_div(sec, NSEC_PER_SEC);
7708 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7709 			   err->loc, err_text);
7710 		seq_printf(m, "%s", err->cmd);
7711 		tracing_err_log_show_pos(m, err->info.pos);
7712 	}
7713 
7714 	return 0;
7715 }
7716 
7717 static const struct seq_operations tracing_err_log_seq_ops = {
7718 	.start  = tracing_err_log_seq_start,
7719 	.next   = tracing_err_log_seq_next,
7720 	.stop   = tracing_err_log_seq_stop,
7721 	.show   = tracing_err_log_seq_show
7722 };
7723 
7724 static int tracing_err_log_open(struct inode *inode, struct file *file)
7725 {
7726 	struct trace_array *tr = inode->i_private;
7727 	int ret = 0;
7728 
7729 	ret = tracing_check_open_get_tr(tr);
7730 	if (ret)
7731 		return ret;
7732 
7733 	/* If this file was opened for write, then erase contents */
7734 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7735 		clear_tracing_err_log(tr);
7736 
7737 	if (file->f_mode & FMODE_READ) {
7738 		ret = seq_open(file, &tracing_err_log_seq_ops);
7739 		if (!ret) {
7740 			struct seq_file *m = file->private_data;
7741 			m->private = tr;
7742 		} else {
7743 			trace_array_put(tr);
7744 		}
7745 	}
7746 	return ret;
7747 }
7748 
7749 static ssize_t tracing_err_log_write(struct file *file,
7750 				     const char __user *buffer,
7751 				     size_t count, loff_t *ppos)
7752 {
7753 	return count;
7754 }
7755 
7756 static int tracing_err_log_release(struct inode *inode, struct file *file)
7757 {
7758 	struct trace_array *tr = inode->i_private;
7759 
7760 	trace_array_put(tr);
7761 
7762 	if (file->f_mode & FMODE_READ)
7763 		seq_release(inode, file);
7764 
7765 	return 0;
7766 }
7767 
7768 static const struct file_operations tracing_err_log_fops = {
7769 	.open           = tracing_err_log_open,
7770 	.write		= tracing_err_log_write,
7771 	.read           = seq_read,
7772 	.llseek         = seq_lseek,
7773 	.release        = tracing_err_log_release,
7774 };
7775 
7776 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7777 {
7778 	struct trace_array *tr = inode->i_private;
7779 	struct ftrace_buffer_info *info;
7780 	int ret;
7781 
7782 	ret = tracing_check_open_get_tr(tr);
7783 	if (ret)
7784 		return ret;
7785 
7786 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7787 	if (!info) {
7788 		trace_array_put(tr);
7789 		return -ENOMEM;
7790 	}
7791 
7792 	mutex_lock(&trace_types_lock);
7793 
7794 	info->iter.tr		= tr;
7795 	info->iter.cpu_file	= tracing_get_cpu(inode);
7796 	info->iter.trace	= tr->current_trace;
7797 	info->iter.array_buffer = &tr->array_buffer;
7798 	info->spare		= NULL;
7799 	/* Force reading ring buffer for first read */
7800 	info->read		= (unsigned int)-1;
7801 
7802 	filp->private_data = info;
7803 
7804 	tr->trace_ref++;
7805 
7806 	mutex_unlock(&trace_types_lock);
7807 
7808 	ret = nonseekable_open(inode, filp);
7809 	if (ret < 0)
7810 		trace_array_put(tr);
7811 
7812 	return ret;
7813 }
7814 
7815 static __poll_t
7816 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7817 {
7818 	struct ftrace_buffer_info *info = filp->private_data;
7819 	struct trace_iterator *iter = &info->iter;
7820 
7821 	return trace_poll(iter, filp, poll_table);
7822 }
7823 
7824 static ssize_t
7825 tracing_buffers_read(struct file *filp, char __user *ubuf,
7826 		     size_t count, loff_t *ppos)
7827 {
7828 	struct ftrace_buffer_info *info = filp->private_data;
7829 	struct trace_iterator *iter = &info->iter;
7830 	ssize_t ret = 0;
7831 	ssize_t size;
7832 
7833 	if (!count)
7834 		return 0;
7835 
7836 #ifdef CONFIG_TRACER_MAX_TRACE
7837 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7838 		return -EBUSY;
7839 #endif
7840 
7841 	if (!info->spare) {
7842 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7843 							  iter->cpu_file);
7844 		if (IS_ERR(info->spare)) {
7845 			ret = PTR_ERR(info->spare);
7846 			info->spare = NULL;
7847 		} else {
7848 			info->spare_cpu = iter->cpu_file;
7849 		}
7850 	}
7851 	if (!info->spare)
7852 		return ret;
7853 
7854 	/* Do we have previous read data to read? */
7855 	if (info->read < PAGE_SIZE)
7856 		goto read;
7857 
7858  again:
7859 	trace_access_lock(iter->cpu_file);
7860 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7861 				    &info->spare,
7862 				    count,
7863 				    iter->cpu_file, 0);
7864 	trace_access_unlock(iter->cpu_file);
7865 
7866 	if (ret < 0) {
7867 		if (trace_empty(iter)) {
7868 			if ((filp->f_flags & O_NONBLOCK))
7869 				return -EAGAIN;
7870 
7871 			ret = wait_on_pipe(iter, 0);
7872 			if (ret)
7873 				return ret;
7874 
7875 			goto again;
7876 		}
7877 		return 0;
7878 	}
7879 
7880 	info->read = 0;
7881  read:
7882 	size = PAGE_SIZE - info->read;
7883 	if (size > count)
7884 		size = count;
7885 
7886 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7887 	if (ret == size)
7888 		return -EFAULT;
7889 
7890 	size -= ret;
7891 
7892 	*ppos += size;
7893 	info->read += size;
7894 
7895 	return size;
7896 }
7897 
7898 static int tracing_buffers_release(struct inode *inode, struct file *file)
7899 {
7900 	struct ftrace_buffer_info *info = file->private_data;
7901 	struct trace_iterator *iter = &info->iter;
7902 
7903 	mutex_lock(&trace_types_lock);
7904 
7905 	iter->tr->trace_ref--;
7906 
7907 	__trace_array_put(iter->tr);
7908 
7909 	if (info->spare)
7910 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7911 					   info->spare_cpu, info->spare);
7912 	kvfree(info);
7913 
7914 	mutex_unlock(&trace_types_lock);
7915 
7916 	return 0;
7917 }
7918 
7919 struct buffer_ref {
7920 	struct trace_buffer	*buffer;
7921 	void			*page;
7922 	int			cpu;
7923 	refcount_t		refcount;
7924 };
7925 
7926 static void buffer_ref_release(struct buffer_ref *ref)
7927 {
7928 	if (!refcount_dec_and_test(&ref->refcount))
7929 		return;
7930 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7931 	kfree(ref);
7932 }
7933 
7934 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7935 				    struct pipe_buffer *buf)
7936 {
7937 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7938 
7939 	buffer_ref_release(ref);
7940 	buf->private = 0;
7941 }
7942 
7943 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7944 				struct pipe_buffer *buf)
7945 {
7946 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7947 
7948 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7949 		return false;
7950 
7951 	refcount_inc(&ref->refcount);
7952 	return true;
7953 }
7954 
7955 /* Pipe buffer operations for a buffer. */
7956 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7957 	.release		= buffer_pipe_buf_release,
7958 	.get			= buffer_pipe_buf_get,
7959 };
7960 
7961 /*
7962  * Callback from splice_to_pipe(), if we need to release some pages
7963  * at the end of the spd in case we error'ed out in filling the pipe.
7964  */
7965 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7966 {
7967 	struct buffer_ref *ref =
7968 		(struct buffer_ref *)spd->partial[i].private;
7969 
7970 	buffer_ref_release(ref);
7971 	spd->partial[i].private = 0;
7972 }
7973 
7974 static ssize_t
7975 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7976 			    struct pipe_inode_info *pipe, size_t len,
7977 			    unsigned int flags)
7978 {
7979 	struct ftrace_buffer_info *info = file->private_data;
7980 	struct trace_iterator *iter = &info->iter;
7981 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7982 	struct page *pages_def[PIPE_DEF_BUFFERS];
7983 	struct splice_pipe_desc spd = {
7984 		.pages		= pages_def,
7985 		.partial	= partial_def,
7986 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7987 		.ops		= &buffer_pipe_buf_ops,
7988 		.spd_release	= buffer_spd_release,
7989 	};
7990 	struct buffer_ref *ref;
7991 	int entries, i;
7992 	ssize_t ret = 0;
7993 
7994 #ifdef CONFIG_TRACER_MAX_TRACE
7995 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7996 		return -EBUSY;
7997 #endif
7998 
7999 	if (*ppos & (PAGE_SIZE - 1))
8000 		return -EINVAL;
8001 
8002 	if (len & (PAGE_SIZE - 1)) {
8003 		if (len < PAGE_SIZE)
8004 			return -EINVAL;
8005 		len &= PAGE_MASK;
8006 	}
8007 
8008 	if (splice_grow_spd(pipe, &spd))
8009 		return -ENOMEM;
8010 
8011  again:
8012 	trace_access_lock(iter->cpu_file);
8013 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8014 
8015 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8016 		struct page *page;
8017 		int r;
8018 
8019 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8020 		if (!ref) {
8021 			ret = -ENOMEM;
8022 			break;
8023 		}
8024 
8025 		refcount_set(&ref->refcount, 1);
8026 		ref->buffer = iter->array_buffer->buffer;
8027 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8028 		if (IS_ERR(ref->page)) {
8029 			ret = PTR_ERR(ref->page);
8030 			ref->page = NULL;
8031 			kfree(ref);
8032 			break;
8033 		}
8034 		ref->cpu = iter->cpu_file;
8035 
8036 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8037 					  len, iter->cpu_file, 1);
8038 		if (r < 0) {
8039 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8040 						   ref->page);
8041 			kfree(ref);
8042 			break;
8043 		}
8044 
8045 		page = virt_to_page(ref->page);
8046 
8047 		spd.pages[i] = page;
8048 		spd.partial[i].len = PAGE_SIZE;
8049 		spd.partial[i].offset = 0;
8050 		spd.partial[i].private = (unsigned long)ref;
8051 		spd.nr_pages++;
8052 		*ppos += PAGE_SIZE;
8053 
8054 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8055 	}
8056 
8057 	trace_access_unlock(iter->cpu_file);
8058 	spd.nr_pages = i;
8059 
8060 	/* did we read anything? */
8061 	if (!spd.nr_pages) {
8062 		if (ret)
8063 			goto out;
8064 
8065 		ret = -EAGAIN;
8066 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8067 			goto out;
8068 
8069 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8070 		if (ret)
8071 			goto out;
8072 
8073 		goto again;
8074 	}
8075 
8076 	ret = splice_to_pipe(pipe, &spd);
8077 out:
8078 	splice_shrink_spd(&spd);
8079 
8080 	return ret;
8081 }
8082 
8083 static const struct file_operations tracing_buffers_fops = {
8084 	.open		= tracing_buffers_open,
8085 	.read		= tracing_buffers_read,
8086 	.poll		= tracing_buffers_poll,
8087 	.release	= tracing_buffers_release,
8088 	.splice_read	= tracing_buffers_splice_read,
8089 	.llseek		= no_llseek,
8090 };
8091 
8092 static ssize_t
8093 tracing_stats_read(struct file *filp, char __user *ubuf,
8094 		   size_t count, loff_t *ppos)
8095 {
8096 	struct inode *inode = file_inode(filp);
8097 	struct trace_array *tr = inode->i_private;
8098 	struct array_buffer *trace_buf = &tr->array_buffer;
8099 	int cpu = tracing_get_cpu(inode);
8100 	struct trace_seq *s;
8101 	unsigned long cnt;
8102 	unsigned long long t;
8103 	unsigned long usec_rem;
8104 
8105 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8106 	if (!s)
8107 		return -ENOMEM;
8108 
8109 	trace_seq_init(s);
8110 
8111 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8112 	trace_seq_printf(s, "entries: %ld\n", cnt);
8113 
8114 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8115 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8116 
8117 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8118 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8119 
8120 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8121 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8122 
8123 	if (trace_clocks[tr->clock_id].in_ns) {
8124 		/* local or global for trace_clock */
8125 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8126 		usec_rem = do_div(t, USEC_PER_SEC);
8127 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8128 								t, usec_rem);
8129 
8130 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8131 		usec_rem = do_div(t, USEC_PER_SEC);
8132 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8133 	} else {
8134 		/* counter or tsc mode for trace_clock */
8135 		trace_seq_printf(s, "oldest event ts: %llu\n",
8136 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8137 
8138 		trace_seq_printf(s, "now ts: %llu\n",
8139 				ring_buffer_time_stamp(trace_buf->buffer));
8140 	}
8141 
8142 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8143 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8144 
8145 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8146 	trace_seq_printf(s, "read events: %ld\n", cnt);
8147 
8148 	count = simple_read_from_buffer(ubuf, count, ppos,
8149 					s->buffer, trace_seq_used(s));
8150 
8151 	kfree(s);
8152 
8153 	return count;
8154 }
8155 
8156 static const struct file_operations tracing_stats_fops = {
8157 	.open		= tracing_open_generic_tr,
8158 	.read		= tracing_stats_read,
8159 	.llseek		= generic_file_llseek,
8160 	.release	= tracing_release_generic_tr,
8161 };
8162 
8163 #ifdef CONFIG_DYNAMIC_FTRACE
8164 
8165 static ssize_t
8166 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8167 		  size_t cnt, loff_t *ppos)
8168 {
8169 	ssize_t ret;
8170 	char *buf;
8171 	int r;
8172 
8173 	/* 256 should be plenty to hold the amount needed */
8174 	buf = kmalloc(256, GFP_KERNEL);
8175 	if (!buf)
8176 		return -ENOMEM;
8177 
8178 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8179 		      ftrace_update_tot_cnt,
8180 		      ftrace_number_of_pages,
8181 		      ftrace_number_of_groups);
8182 
8183 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8184 	kfree(buf);
8185 	return ret;
8186 }
8187 
8188 static const struct file_operations tracing_dyn_info_fops = {
8189 	.open		= tracing_open_generic,
8190 	.read		= tracing_read_dyn_info,
8191 	.llseek		= generic_file_llseek,
8192 };
8193 #endif /* CONFIG_DYNAMIC_FTRACE */
8194 
8195 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8196 static void
8197 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8198 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8199 		void *data)
8200 {
8201 	tracing_snapshot_instance(tr);
8202 }
8203 
8204 static void
8205 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8206 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8207 		      void *data)
8208 {
8209 	struct ftrace_func_mapper *mapper = data;
8210 	long *count = NULL;
8211 
8212 	if (mapper)
8213 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8214 
8215 	if (count) {
8216 
8217 		if (*count <= 0)
8218 			return;
8219 
8220 		(*count)--;
8221 	}
8222 
8223 	tracing_snapshot_instance(tr);
8224 }
8225 
8226 static int
8227 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8228 		      struct ftrace_probe_ops *ops, void *data)
8229 {
8230 	struct ftrace_func_mapper *mapper = data;
8231 	long *count = NULL;
8232 
8233 	seq_printf(m, "%ps:", (void *)ip);
8234 
8235 	seq_puts(m, "snapshot");
8236 
8237 	if (mapper)
8238 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8239 
8240 	if (count)
8241 		seq_printf(m, ":count=%ld\n", *count);
8242 	else
8243 		seq_puts(m, ":unlimited\n");
8244 
8245 	return 0;
8246 }
8247 
8248 static int
8249 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8250 		     unsigned long ip, void *init_data, void **data)
8251 {
8252 	struct ftrace_func_mapper *mapper = *data;
8253 
8254 	if (!mapper) {
8255 		mapper = allocate_ftrace_func_mapper();
8256 		if (!mapper)
8257 			return -ENOMEM;
8258 		*data = mapper;
8259 	}
8260 
8261 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8262 }
8263 
8264 static void
8265 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8266 		     unsigned long ip, void *data)
8267 {
8268 	struct ftrace_func_mapper *mapper = data;
8269 
8270 	if (!ip) {
8271 		if (!mapper)
8272 			return;
8273 		free_ftrace_func_mapper(mapper, NULL);
8274 		return;
8275 	}
8276 
8277 	ftrace_func_mapper_remove_ip(mapper, ip);
8278 }
8279 
8280 static struct ftrace_probe_ops snapshot_probe_ops = {
8281 	.func			= ftrace_snapshot,
8282 	.print			= ftrace_snapshot_print,
8283 };
8284 
8285 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8286 	.func			= ftrace_count_snapshot,
8287 	.print			= ftrace_snapshot_print,
8288 	.init			= ftrace_snapshot_init,
8289 	.free			= ftrace_snapshot_free,
8290 };
8291 
8292 static int
8293 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8294 			       char *glob, char *cmd, char *param, int enable)
8295 {
8296 	struct ftrace_probe_ops *ops;
8297 	void *count = (void *)-1;
8298 	char *number;
8299 	int ret;
8300 
8301 	if (!tr)
8302 		return -ENODEV;
8303 
8304 	/* hash funcs only work with set_ftrace_filter */
8305 	if (!enable)
8306 		return -EINVAL;
8307 
8308 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8309 
8310 	if (glob[0] == '!')
8311 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8312 
8313 	if (!param)
8314 		goto out_reg;
8315 
8316 	number = strsep(&param, ":");
8317 
8318 	if (!strlen(number))
8319 		goto out_reg;
8320 
8321 	/*
8322 	 * We use the callback data field (which is a pointer)
8323 	 * as our counter.
8324 	 */
8325 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8326 	if (ret)
8327 		return ret;
8328 
8329  out_reg:
8330 	ret = tracing_alloc_snapshot_instance(tr);
8331 	if (ret < 0)
8332 		goto out;
8333 
8334 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8335 
8336  out:
8337 	return ret < 0 ? ret : 0;
8338 }
8339 
8340 static struct ftrace_func_command ftrace_snapshot_cmd = {
8341 	.name			= "snapshot",
8342 	.func			= ftrace_trace_snapshot_callback,
8343 };
8344 
8345 static __init int register_snapshot_cmd(void)
8346 {
8347 	return register_ftrace_command(&ftrace_snapshot_cmd);
8348 }
8349 #else
8350 static inline __init int register_snapshot_cmd(void) { return 0; }
8351 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8352 
8353 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8354 {
8355 	if (WARN_ON(!tr->dir))
8356 		return ERR_PTR(-ENODEV);
8357 
8358 	/* Top directory uses NULL as the parent */
8359 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8360 		return NULL;
8361 
8362 	/* All sub buffers have a descriptor */
8363 	return tr->dir;
8364 }
8365 
8366 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8367 {
8368 	struct dentry *d_tracer;
8369 
8370 	if (tr->percpu_dir)
8371 		return tr->percpu_dir;
8372 
8373 	d_tracer = tracing_get_dentry(tr);
8374 	if (IS_ERR(d_tracer))
8375 		return NULL;
8376 
8377 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8378 
8379 	MEM_FAIL(!tr->percpu_dir,
8380 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8381 
8382 	return tr->percpu_dir;
8383 }
8384 
8385 static struct dentry *
8386 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8387 		      void *data, long cpu, const struct file_operations *fops)
8388 {
8389 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8390 
8391 	if (ret) /* See tracing_get_cpu() */
8392 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8393 	return ret;
8394 }
8395 
8396 static void
8397 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8398 {
8399 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8400 	struct dentry *d_cpu;
8401 	char cpu_dir[30]; /* 30 characters should be more than enough */
8402 
8403 	if (!d_percpu)
8404 		return;
8405 
8406 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8407 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8408 	if (!d_cpu) {
8409 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8410 		return;
8411 	}
8412 
8413 	/* per cpu trace_pipe */
8414 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8415 				tr, cpu, &tracing_pipe_fops);
8416 
8417 	/* per cpu trace */
8418 	trace_create_cpu_file("trace", 0644, d_cpu,
8419 				tr, cpu, &tracing_fops);
8420 
8421 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8422 				tr, cpu, &tracing_buffers_fops);
8423 
8424 	trace_create_cpu_file("stats", 0444, d_cpu,
8425 				tr, cpu, &tracing_stats_fops);
8426 
8427 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8428 				tr, cpu, &tracing_entries_fops);
8429 
8430 #ifdef CONFIG_TRACER_SNAPSHOT
8431 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8432 				tr, cpu, &snapshot_fops);
8433 
8434 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8435 				tr, cpu, &snapshot_raw_fops);
8436 #endif
8437 }
8438 
8439 #ifdef CONFIG_FTRACE_SELFTEST
8440 /* Let selftest have access to static functions in this file */
8441 #include "trace_selftest.c"
8442 #endif
8443 
8444 static ssize_t
8445 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8446 			loff_t *ppos)
8447 {
8448 	struct trace_option_dentry *topt = filp->private_data;
8449 	char *buf;
8450 
8451 	if (topt->flags->val & topt->opt->bit)
8452 		buf = "1\n";
8453 	else
8454 		buf = "0\n";
8455 
8456 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8457 }
8458 
8459 static ssize_t
8460 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8461 			 loff_t *ppos)
8462 {
8463 	struct trace_option_dentry *topt = filp->private_data;
8464 	unsigned long val;
8465 	int ret;
8466 
8467 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8468 	if (ret)
8469 		return ret;
8470 
8471 	if (val != 0 && val != 1)
8472 		return -EINVAL;
8473 
8474 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8475 		mutex_lock(&trace_types_lock);
8476 		ret = __set_tracer_option(topt->tr, topt->flags,
8477 					  topt->opt, !val);
8478 		mutex_unlock(&trace_types_lock);
8479 		if (ret)
8480 			return ret;
8481 	}
8482 
8483 	*ppos += cnt;
8484 
8485 	return cnt;
8486 }
8487 
8488 
8489 static const struct file_operations trace_options_fops = {
8490 	.open = tracing_open_generic,
8491 	.read = trace_options_read,
8492 	.write = trace_options_write,
8493 	.llseek	= generic_file_llseek,
8494 };
8495 
8496 /*
8497  * In order to pass in both the trace_array descriptor as well as the index
8498  * to the flag that the trace option file represents, the trace_array
8499  * has a character array of trace_flags_index[], which holds the index
8500  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8501  * The address of this character array is passed to the flag option file
8502  * read/write callbacks.
8503  *
8504  * In order to extract both the index and the trace_array descriptor,
8505  * get_tr_index() uses the following algorithm.
8506  *
8507  *   idx = *ptr;
8508  *
8509  * As the pointer itself contains the address of the index (remember
8510  * index[1] == 1).
8511  *
8512  * Then to get the trace_array descriptor, by subtracting that index
8513  * from the ptr, we get to the start of the index itself.
8514  *
8515  *   ptr - idx == &index[0]
8516  *
8517  * Then a simple container_of() from that pointer gets us to the
8518  * trace_array descriptor.
8519  */
8520 static void get_tr_index(void *data, struct trace_array **ptr,
8521 			 unsigned int *pindex)
8522 {
8523 	*pindex = *(unsigned char *)data;
8524 
8525 	*ptr = container_of(data - *pindex, struct trace_array,
8526 			    trace_flags_index);
8527 }
8528 
8529 static ssize_t
8530 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8531 			loff_t *ppos)
8532 {
8533 	void *tr_index = filp->private_data;
8534 	struct trace_array *tr;
8535 	unsigned int index;
8536 	char *buf;
8537 
8538 	get_tr_index(tr_index, &tr, &index);
8539 
8540 	if (tr->trace_flags & (1 << index))
8541 		buf = "1\n";
8542 	else
8543 		buf = "0\n";
8544 
8545 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8546 }
8547 
8548 static ssize_t
8549 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8550 			 loff_t *ppos)
8551 {
8552 	void *tr_index = filp->private_data;
8553 	struct trace_array *tr;
8554 	unsigned int index;
8555 	unsigned long val;
8556 	int ret;
8557 
8558 	get_tr_index(tr_index, &tr, &index);
8559 
8560 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8561 	if (ret)
8562 		return ret;
8563 
8564 	if (val != 0 && val != 1)
8565 		return -EINVAL;
8566 
8567 	mutex_lock(&event_mutex);
8568 	mutex_lock(&trace_types_lock);
8569 	ret = set_tracer_flag(tr, 1 << index, val);
8570 	mutex_unlock(&trace_types_lock);
8571 	mutex_unlock(&event_mutex);
8572 
8573 	if (ret < 0)
8574 		return ret;
8575 
8576 	*ppos += cnt;
8577 
8578 	return cnt;
8579 }
8580 
8581 static const struct file_operations trace_options_core_fops = {
8582 	.open = tracing_open_generic,
8583 	.read = trace_options_core_read,
8584 	.write = trace_options_core_write,
8585 	.llseek = generic_file_llseek,
8586 };
8587 
8588 struct dentry *trace_create_file(const char *name,
8589 				 umode_t mode,
8590 				 struct dentry *parent,
8591 				 void *data,
8592 				 const struct file_operations *fops)
8593 {
8594 	struct dentry *ret;
8595 
8596 	ret = tracefs_create_file(name, mode, parent, data, fops);
8597 	if (!ret)
8598 		pr_warn("Could not create tracefs '%s' entry\n", name);
8599 
8600 	return ret;
8601 }
8602 
8603 
8604 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8605 {
8606 	struct dentry *d_tracer;
8607 
8608 	if (tr->options)
8609 		return tr->options;
8610 
8611 	d_tracer = tracing_get_dentry(tr);
8612 	if (IS_ERR(d_tracer))
8613 		return NULL;
8614 
8615 	tr->options = tracefs_create_dir("options", d_tracer);
8616 	if (!tr->options) {
8617 		pr_warn("Could not create tracefs directory 'options'\n");
8618 		return NULL;
8619 	}
8620 
8621 	return tr->options;
8622 }
8623 
8624 static void
8625 create_trace_option_file(struct trace_array *tr,
8626 			 struct trace_option_dentry *topt,
8627 			 struct tracer_flags *flags,
8628 			 struct tracer_opt *opt)
8629 {
8630 	struct dentry *t_options;
8631 
8632 	t_options = trace_options_init_dentry(tr);
8633 	if (!t_options)
8634 		return;
8635 
8636 	topt->flags = flags;
8637 	topt->opt = opt;
8638 	topt->tr = tr;
8639 
8640 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8641 				    &trace_options_fops);
8642 
8643 }
8644 
8645 static void
8646 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8647 {
8648 	struct trace_option_dentry *topts;
8649 	struct trace_options *tr_topts;
8650 	struct tracer_flags *flags;
8651 	struct tracer_opt *opts;
8652 	int cnt;
8653 	int i;
8654 
8655 	if (!tracer)
8656 		return;
8657 
8658 	flags = tracer->flags;
8659 
8660 	if (!flags || !flags->opts)
8661 		return;
8662 
8663 	/*
8664 	 * If this is an instance, only create flags for tracers
8665 	 * the instance may have.
8666 	 */
8667 	if (!trace_ok_for_array(tracer, tr))
8668 		return;
8669 
8670 	for (i = 0; i < tr->nr_topts; i++) {
8671 		/* Make sure there's no duplicate flags. */
8672 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8673 			return;
8674 	}
8675 
8676 	opts = flags->opts;
8677 
8678 	for (cnt = 0; opts[cnt].name; cnt++)
8679 		;
8680 
8681 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8682 	if (!topts)
8683 		return;
8684 
8685 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8686 			    GFP_KERNEL);
8687 	if (!tr_topts) {
8688 		kfree(topts);
8689 		return;
8690 	}
8691 
8692 	tr->topts = tr_topts;
8693 	tr->topts[tr->nr_topts].tracer = tracer;
8694 	tr->topts[tr->nr_topts].topts = topts;
8695 	tr->nr_topts++;
8696 
8697 	for (cnt = 0; opts[cnt].name; cnt++) {
8698 		create_trace_option_file(tr, &topts[cnt], flags,
8699 					 &opts[cnt]);
8700 		MEM_FAIL(topts[cnt].entry == NULL,
8701 			  "Failed to create trace option: %s",
8702 			  opts[cnt].name);
8703 	}
8704 }
8705 
8706 static struct dentry *
8707 create_trace_option_core_file(struct trace_array *tr,
8708 			      const char *option, long index)
8709 {
8710 	struct dentry *t_options;
8711 
8712 	t_options = trace_options_init_dentry(tr);
8713 	if (!t_options)
8714 		return NULL;
8715 
8716 	return trace_create_file(option, 0644, t_options,
8717 				 (void *)&tr->trace_flags_index[index],
8718 				 &trace_options_core_fops);
8719 }
8720 
8721 static void create_trace_options_dir(struct trace_array *tr)
8722 {
8723 	struct dentry *t_options;
8724 	bool top_level = tr == &global_trace;
8725 	int i;
8726 
8727 	t_options = trace_options_init_dentry(tr);
8728 	if (!t_options)
8729 		return;
8730 
8731 	for (i = 0; trace_options[i]; i++) {
8732 		if (top_level ||
8733 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8734 			create_trace_option_core_file(tr, trace_options[i], i);
8735 	}
8736 }
8737 
8738 static ssize_t
8739 rb_simple_read(struct file *filp, char __user *ubuf,
8740 	       size_t cnt, loff_t *ppos)
8741 {
8742 	struct trace_array *tr = filp->private_data;
8743 	char buf[64];
8744 	int r;
8745 
8746 	r = tracer_tracing_is_on(tr);
8747 	r = sprintf(buf, "%d\n", r);
8748 
8749 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8750 }
8751 
8752 static ssize_t
8753 rb_simple_write(struct file *filp, const char __user *ubuf,
8754 		size_t cnt, loff_t *ppos)
8755 {
8756 	struct trace_array *tr = filp->private_data;
8757 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8758 	unsigned long val;
8759 	int ret;
8760 
8761 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8762 	if (ret)
8763 		return ret;
8764 
8765 	if (buffer) {
8766 		mutex_lock(&trace_types_lock);
8767 		if (!!val == tracer_tracing_is_on(tr)) {
8768 			val = 0; /* do nothing */
8769 		} else if (val) {
8770 			tracer_tracing_on(tr);
8771 			if (tr->current_trace->start)
8772 				tr->current_trace->start(tr);
8773 		} else {
8774 			tracer_tracing_off(tr);
8775 			if (tr->current_trace->stop)
8776 				tr->current_trace->stop(tr);
8777 		}
8778 		mutex_unlock(&trace_types_lock);
8779 	}
8780 
8781 	(*ppos)++;
8782 
8783 	return cnt;
8784 }
8785 
8786 static const struct file_operations rb_simple_fops = {
8787 	.open		= tracing_open_generic_tr,
8788 	.read		= rb_simple_read,
8789 	.write		= rb_simple_write,
8790 	.release	= tracing_release_generic_tr,
8791 	.llseek		= default_llseek,
8792 };
8793 
8794 static ssize_t
8795 buffer_percent_read(struct file *filp, char __user *ubuf,
8796 		    size_t cnt, loff_t *ppos)
8797 {
8798 	struct trace_array *tr = filp->private_data;
8799 	char buf[64];
8800 	int r;
8801 
8802 	r = tr->buffer_percent;
8803 	r = sprintf(buf, "%d\n", r);
8804 
8805 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8806 }
8807 
8808 static ssize_t
8809 buffer_percent_write(struct file *filp, const char __user *ubuf,
8810 		     size_t cnt, loff_t *ppos)
8811 {
8812 	struct trace_array *tr = filp->private_data;
8813 	unsigned long val;
8814 	int ret;
8815 
8816 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8817 	if (ret)
8818 		return ret;
8819 
8820 	if (val > 100)
8821 		return -EINVAL;
8822 
8823 	if (!val)
8824 		val = 1;
8825 
8826 	tr->buffer_percent = val;
8827 
8828 	(*ppos)++;
8829 
8830 	return cnt;
8831 }
8832 
8833 static const struct file_operations buffer_percent_fops = {
8834 	.open		= tracing_open_generic_tr,
8835 	.read		= buffer_percent_read,
8836 	.write		= buffer_percent_write,
8837 	.release	= tracing_release_generic_tr,
8838 	.llseek		= default_llseek,
8839 };
8840 
8841 static struct dentry *trace_instance_dir;
8842 
8843 static void
8844 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8845 
8846 static int
8847 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8848 {
8849 	enum ring_buffer_flags rb_flags;
8850 
8851 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8852 
8853 	buf->tr = tr;
8854 
8855 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8856 	if (!buf->buffer)
8857 		return -ENOMEM;
8858 
8859 	buf->data = alloc_percpu(struct trace_array_cpu);
8860 	if (!buf->data) {
8861 		ring_buffer_free(buf->buffer);
8862 		buf->buffer = NULL;
8863 		return -ENOMEM;
8864 	}
8865 
8866 	/* Allocate the first page for all buffers */
8867 	set_buffer_entries(&tr->array_buffer,
8868 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8869 
8870 	return 0;
8871 }
8872 
8873 static int allocate_trace_buffers(struct trace_array *tr, int size)
8874 {
8875 	int ret;
8876 
8877 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8878 	if (ret)
8879 		return ret;
8880 
8881 #ifdef CONFIG_TRACER_MAX_TRACE
8882 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8883 				    allocate_snapshot ? size : 1);
8884 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8885 		ring_buffer_free(tr->array_buffer.buffer);
8886 		tr->array_buffer.buffer = NULL;
8887 		free_percpu(tr->array_buffer.data);
8888 		tr->array_buffer.data = NULL;
8889 		return -ENOMEM;
8890 	}
8891 	tr->allocated_snapshot = allocate_snapshot;
8892 
8893 	/*
8894 	 * Only the top level trace array gets its snapshot allocated
8895 	 * from the kernel command line.
8896 	 */
8897 	allocate_snapshot = false;
8898 #endif
8899 
8900 	return 0;
8901 }
8902 
8903 static void free_trace_buffer(struct array_buffer *buf)
8904 {
8905 	if (buf->buffer) {
8906 		ring_buffer_free(buf->buffer);
8907 		buf->buffer = NULL;
8908 		free_percpu(buf->data);
8909 		buf->data = NULL;
8910 	}
8911 }
8912 
8913 static void free_trace_buffers(struct trace_array *tr)
8914 {
8915 	if (!tr)
8916 		return;
8917 
8918 	free_trace_buffer(&tr->array_buffer);
8919 
8920 #ifdef CONFIG_TRACER_MAX_TRACE
8921 	free_trace_buffer(&tr->max_buffer);
8922 #endif
8923 }
8924 
8925 static void init_trace_flags_index(struct trace_array *tr)
8926 {
8927 	int i;
8928 
8929 	/* Used by the trace options files */
8930 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8931 		tr->trace_flags_index[i] = i;
8932 }
8933 
8934 static void __update_tracer_options(struct trace_array *tr)
8935 {
8936 	struct tracer *t;
8937 
8938 	for (t = trace_types; t; t = t->next)
8939 		add_tracer_options(tr, t);
8940 }
8941 
8942 static void update_tracer_options(struct trace_array *tr)
8943 {
8944 	mutex_lock(&trace_types_lock);
8945 	__update_tracer_options(tr);
8946 	mutex_unlock(&trace_types_lock);
8947 }
8948 
8949 /* Must have trace_types_lock held */
8950 struct trace_array *trace_array_find(const char *instance)
8951 {
8952 	struct trace_array *tr, *found = NULL;
8953 
8954 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8955 		if (tr->name && strcmp(tr->name, instance) == 0) {
8956 			found = tr;
8957 			break;
8958 		}
8959 	}
8960 
8961 	return found;
8962 }
8963 
8964 struct trace_array *trace_array_find_get(const char *instance)
8965 {
8966 	struct trace_array *tr;
8967 
8968 	mutex_lock(&trace_types_lock);
8969 	tr = trace_array_find(instance);
8970 	if (tr)
8971 		tr->ref++;
8972 	mutex_unlock(&trace_types_lock);
8973 
8974 	return tr;
8975 }
8976 
8977 static int trace_array_create_dir(struct trace_array *tr)
8978 {
8979 	int ret;
8980 
8981 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8982 	if (!tr->dir)
8983 		return -EINVAL;
8984 
8985 	ret = event_trace_add_tracer(tr->dir, tr);
8986 	if (ret)
8987 		tracefs_remove(tr->dir);
8988 
8989 	init_tracer_tracefs(tr, tr->dir);
8990 	__update_tracer_options(tr);
8991 
8992 	return ret;
8993 }
8994 
8995 static struct trace_array *trace_array_create(const char *name)
8996 {
8997 	struct trace_array *tr;
8998 	int ret;
8999 
9000 	ret = -ENOMEM;
9001 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9002 	if (!tr)
9003 		return ERR_PTR(ret);
9004 
9005 	tr->name = kstrdup(name, GFP_KERNEL);
9006 	if (!tr->name)
9007 		goto out_free_tr;
9008 
9009 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9010 		goto out_free_tr;
9011 
9012 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9013 
9014 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9015 
9016 	raw_spin_lock_init(&tr->start_lock);
9017 
9018 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9019 
9020 	tr->current_trace = &nop_trace;
9021 
9022 	INIT_LIST_HEAD(&tr->systems);
9023 	INIT_LIST_HEAD(&tr->events);
9024 	INIT_LIST_HEAD(&tr->hist_vars);
9025 	INIT_LIST_HEAD(&tr->err_log);
9026 
9027 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9028 		goto out_free_tr;
9029 
9030 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9031 		goto out_free_tr;
9032 
9033 	ftrace_init_trace_array(tr);
9034 
9035 	init_trace_flags_index(tr);
9036 
9037 	if (trace_instance_dir) {
9038 		ret = trace_array_create_dir(tr);
9039 		if (ret)
9040 			goto out_free_tr;
9041 	} else
9042 		__trace_early_add_events(tr);
9043 
9044 	list_add(&tr->list, &ftrace_trace_arrays);
9045 
9046 	tr->ref++;
9047 
9048 	return tr;
9049 
9050  out_free_tr:
9051 	ftrace_free_ftrace_ops(tr);
9052 	free_trace_buffers(tr);
9053 	free_cpumask_var(tr->tracing_cpumask);
9054 	kfree(tr->name);
9055 	kfree(tr);
9056 
9057 	return ERR_PTR(ret);
9058 }
9059 
9060 static int instance_mkdir(const char *name)
9061 {
9062 	struct trace_array *tr;
9063 	int ret;
9064 
9065 	mutex_lock(&event_mutex);
9066 	mutex_lock(&trace_types_lock);
9067 
9068 	ret = -EEXIST;
9069 	if (trace_array_find(name))
9070 		goto out_unlock;
9071 
9072 	tr = trace_array_create(name);
9073 
9074 	ret = PTR_ERR_OR_ZERO(tr);
9075 
9076 out_unlock:
9077 	mutex_unlock(&trace_types_lock);
9078 	mutex_unlock(&event_mutex);
9079 	return ret;
9080 }
9081 
9082 /**
9083  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9084  * @name: The name of the trace array to be looked up/created.
9085  *
9086  * Returns pointer to trace array with given name.
9087  * NULL, if it cannot be created.
9088  *
9089  * NOTE: This function increments the reference counter associated with the
9090  * trace array returned. This makes sure it cannot be freed while in use.
9091  * Use trace_array_put() once the trace array is no longer needed.
9092  * If the trace_array is to be freed, trace_array_destroy() needs to
9093  * be called after the trace_array_put(), or simply let user space delete
9094  * it from the tracefs instances directory. But until the
9095  * trace_array_put() is called, user space can not delete it.
9096  *
9097  */
9098 struct trace_array *trace_array_get_by_name(const char *name)
9099 {
9100 	struct trace_array *tr;
9101 
9102 	mutex_lock(&event_mutex);
9103 	mutex_lock(&trace_types_lock);
9104 
9105 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9106 		if (tr->name && strcmp(tr->name, name) == 0)
9107 			goto out_unlock;
9108 	}
9109 
9110 	tr = trace_array_create(name);
9111 
9112 	if (IS_ERR(tr))
9113 		tr = NULL;
9114 out_unlock:
9115 	if (tr)
9116 		tr->ref++;
9117 
9118 	mutex_unlock(&trace_types_lock);
9119 	mutex_unlock(&event_mutex);
9120 	return tr;
9121 }
9122 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9123 
9124 static int __remove_instance(struct trace_array *tr)
9125 {
9126 	int i;
9127 
9128 	/* Reference counter for a newly created trace array = 1. */
9129 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9130 		return -EBUSY;
9131 
9132 	list_del(&tr->list);
9133 
9134 	/* Disable all the flags that were enabled coming in */
9135 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9136 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9137 			set_tracer_flag(tr, 1 << i, 0);
9138 	}
9139 
9140 	tracing_set_nop(tr);
9141 	clear_ftrace_function_probes(tr);
9142 	event_trace_del_tracer(tr);
9143 	ftrace_clear_pids(tr);
9144 	ftrace_destroy_function_files(tr);
9145 	tracefs_remove(tr->dir);
9146 	free_percpu(tr->last_func_repeats);
9147 	free_trace_buffers(tr);
9148 
9149 	for (i = 0; i < tr->nr_topts; i++) {
9150 		kfree(tr->topts[i].topts);
9151 	}
9152 	kfree(tr->topts);
9153 
9154 	free_cpumask_var(tr->tracing_cpumask);
9155 	kfree(tr->name);
9156 	kfree(tr);
9157 
9158 	return 0;
9159 }
9160 
9161 int trace_array_destroy(struct trace_array *this_tr)
9162 {
9163 	struct trace_array *tr;
9164 	int ret;
9165 
9166 	if (!this_tr)
9167 		return -EINVAL;
9168 
9169 	mutex_lock(&event_mutex);
9170 	mutex_lock(&trace_types_lock);
9171 
9172 	ret = -ENODEV;
9173 
9174 	/* Making sure trace array exists before destroying it. */
9175 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9176 		if (tr == this_tr) {
9177 			ret = __remove_instance(tr);
9178 			break;
9179 		}
9180 	}
9181 
9182 	mutex_unlock(&trace_types_lock);
9183 	mutex_unlock(&event_mutex);
9184 
9185 	return ret;
9186 }
9187 EXPORT_SYMBOL_GPL(trace_array_destroy);
9188 
9189 static int instance_rmdir(const char *name)
9190 {
9191 	struct trace_array *tr;
9192 	int ret;
9193 
9194 	mutex_lock(&event_mutex);
9195 	mutex_lock(&trace_types_lock);
9196 
9197 	ret = -ENODEV;
9198 	tr = trace_array_find(name);
9199 	if (tr)
9200 		ret = __remove_instance(tr);
9201 
9202 	mutex_unlock(&trace_types_lock);
9203 	mutex_unlock(&event_mutex);
9204 
9205 	return ret;
9206 }
9207 
9208 static __init void create_trace_instances(struct dentry *d_tracer)
9209 {
9210 	struct trace_array *tr;
9211 
9212 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9213 							 instance_mkdir,
9214 							 instance_rmdir);
9215 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9216 		return;
9217 
9218 	mutex_lock(&event_mutex);
9219 	mutex_lock(&trace_types_lock);
9220 
9221 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9222 		if (!tr->name)
9223 			continue;
9224 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9225 			     "Failed to create instance directory\n"))
9226 			break;
9227 	}
9228 
9229 	mutex_unlock(&trace_types_lock);
9230 	mutex_unlock(&event_mutex);
9231 }
9232 
9233 static void
9234 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9235 {
9236 	struct trace_event_file *file;
9237 	int cpu;
9238 
9239 	trace_create_file("available_tracers", 0444, d_tracer,
9240 			tr, &show_traces_fops);
9241 
9242 	trace_create_file("current_tracer", 0644, d_tracer,
9243 			tr, &set_tracer_fops);
9244 
9245 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9246 			  tr, &tracing_cpumask_fops);
9247 
9248 	trace_create_file("trace_options", 0644, d_tracer,
9249 			  tr, &tracing_iter_fops);
9250 
9251 	trace_create_file("trace", 0644, d_tracer,
9252 			  tr, &tracing_fops);
9253 
9254 	trace_create_file("trace_pipe", 0444, d_tracer,
9255 			  tr, &tracing_pipe_fops);
9256 
9257 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9258 			  tr, &tracing_entries_fops);
9259 
9260 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9261 			  tr, &tracing_total_entries_fops);
9262 
9263 	trace_create_file("free_buffer", 0200, d_tracer,
9264 			  tr, &tracing_free_buffer_fops);
9265 
9266 	trace_create_file("trace_marker", 0220, d_tracer,
9267 			  tr, &tracing_mark_fops);
9268 
9269 	file = __find_event_file(tr, "ftrace", "print");
9270 	if (file && file->dir)
9271 		trace_create_file("trigger", 0644, file->dir, file,
9272 				  &event_trigger_fops);
9273 	tr->trace_marker_file = file;
9274 
9275 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9276 			  tr, &tracing_mark_raw_fops);
9277 
9278 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9279 			  &trace_clock_fops);
9280 
9281 	trace_create_file("tracing_on", 0644, d_tracer,
9282 			  tr, &rb_simple_fops);
9283 
9284 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9285 			  &trace_time_stamp_mode_fops);
9286 
9287 	tr->buffer_percent = 50;
9288 
9289 	trace_create_file("buffer_percent", 0444, d_tracer,
9290 			tr, &buffer_percent_fops);
9291 
9292 	create_trace_options_dir(tr);
9293 
9294 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9295 	trace_create_maxlat_file(tr, d_tracer);
9296 #endif
9297 
9298 	if (ftrace_create_function_files(tr, d_tracer))
9299 		MEM_FAIL(1, "Could not allocate function filter files");
9300 
9301 #ifdef CONFIG_TRACER_SNAPSHOT
9302 	trace_create_file("snapshot", 0644, d_tracer,
9303 			  tr, &snapshot_fops);
9304 #endif
9305 
9306 	trace_create_file("error_log", 0644, d_tracer,
9307 			  tr, &tracing_err_log_fops);
9308 
9309 	for_each_tracing_cpu(cpu)
9310 		tracing_init_tracefs_percpu(tr, cpu);
9311 
9312 	ftrace_init_tracefs(tr, d_tracer);
9313 }
9314 
9315 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9316 {
9317 	struct vfsmount *mnt;
9318 	struct file_system_type *type;
9319 
9320 	/*
9321 	 * To maintain backward compatibility for tools that mount
9322 	 * debugfs to get to the tracing facility, tracefs is automatically
9323 	 * mounted to the debugfs/tracing directory.
9324 	 */
9325 	type = get_fs_type("tracefs");
9326 	if (!type)
9327 		return NULL;
9328 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9329 	put_filesystem(type);
9330 	if (IS_ERR(mnt))
9331 		return NULL;
9332 	mntget(mnt);
9333 
9334 	return mnt;
9335 }
9336 
9337 /**
9338  * tracing_init_dentry - initialize top level trace array
9339  *
9340  * This is called when creating files or directories in the tracing
9341  * directory. It is called via fs_initcall() by any of the boot up code
9342  * and expects to return the dentry of the top level tracing directory.
9343  */
9344 int tracing_init_dentry(void)
9345 {
9346 	struct trace_array *tr = &global_trace;
9347 
9348 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9349 		pr_warn("Tracing disabled due to lockdown\n");
9350 		return -EPERM;
9351 	}
9352 
9353 	/* The top level trace array uses  NULL as parent */
9354 	if (tr->dir)
9355 		return 0;
9356 
9357 	if (WARN_ON(!tracefs_initialized()))
9358 		return -ENODEV;
9359 
9360 	/*
9361 	 * As there may still be users that expect the tracing
9362 	 * files to exist in debugfs/tracing, we must automount
9363 	 * the tracefs file system there, so older tools still
9364 	 * work with the newer kernel.
9365 	 */
9366 	tr->dir = debugfs_create_automount("tracing", NULL,
9367 					   trace_automount, NULL);
9368 
9369 	return 0;
9370 }
9371 
9372 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9373 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9374 
9375 static struct workqueue_struct *eval_map_wq __initdata;
9376 static struct work_struct eval_map_work __initdata;
9377 
9378 static void __init eval_map_work_func(struct work_struct *work)
9379 {
9380 	int len;
9381 
9382 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9383 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9384 }
9385 
9386 static int __init trace_eval_init(void)
9387 {
9388 	INIT_WORK(&eval_map_work, eval_map_work_func);
9389 
9390 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9391 	if (!eval_map_wq) {
9392 		pr_err("Unable to allocate eval_map_wq\n");
9393 		/* Do work here */
9394 		eval_map_work_func(&eval_map_work);
9395 		return -ENOMEM;
9396 	}
9397 
9398 	queue_work(eval_map_wq, &eval_map_work);
9399 	return 0;
9400 }
9401 
9402 static int __init trace_eval_sync(void)
9403 {
9404 	/* Make sure the eval map updates are finished */
9405 	if (eval_map_wq)
9406 		destroy_workqueue(eval_map_wq);
9407 	return 0;
9408 }
9409 
9410 late_initcall_sync(trace_eval_sync);
9411 
9412 
9413 #ifdef CONFIG_MODULES
9414 static void trace_module_add_evals(struct module *mod)
9415 {
9416 	if (!mod->num_trace_evals)
9417 		return;
9418 
9419 	/*
9420 	 * Modules with bad taint do not have events created, do
9421 	 * not bother with enums either.
9422 	 */
9423 	if (trace_module_has_bad_taint(mod))
9424 		return;
9425 
9426 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9427 }
9428 
9429 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9430 static void trace_module_remove_evals(struct module *mod)
9431 {
9432 	union trace_eval_map_item *map;
9433 	union trace_eval_map_item **last = &trace_eval_maps;
9434 
9435 	if (!mod->num_trace_evals)
9436 		return;
9437 
9438 	mutex_lock(&trace_eval_mutex);
9439 
9440 	map = trace_eval_maps;
9441 
9442 	while (map) {
9443 		if (map->head.mod == mod)
9444 			break;
9445 		map = trace_eval_jmp_to_tail(map);
9446 		last = &map->tail.next;
9447 		map = map->tail.next;
9448 	}
9449 	if (!map)
9450 		goto out;
9451 
9452 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9453 	kfree(map);
9454  out:
9455 	mutex_unlock(&trace_eval_mutex);
9456 }
9457 #else
9458 static inline void trace_module_remove_evals(struct module *mod) { }
9459 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9460 
9461 static int trace_module_notify(struct notifier_block *self,
9462 			       unsigned long val, void *data)
9463 {
9464 	struct module *mod = data;
9465 
9466 	switch (val) {
9467 	case MODULE_STATE_COMING:
9468 		trace_module_add_evals(mod);
9469 		break;
9470 	case MODULE_STATE_GOING:
9471 		trace_module_remove_evals(mod);
9472 		break;
9473 	}
9474 
9475 	return NOTIFY_OK;
9476 }
9477 
9478 static struct notifier_block trace_module_nb = {
9479 	.notifier_call = trace_module_notify,
9480 	.priority = 0,
9481 };
9482 #endif /* CONFIG_MODULES */
9483 
9484 static __init int tracer_init_tracefs(void)
9485 {
9486 	int ret;
9487 
9488 	trace_access_lock_init();
9489 
9490 	ret = tracing_init_dentry();
9491 	if (ret)
9492 		return 0;
9493 
9494 	event_trace_init();
9495 
9496 	init_tracer_tracefs(&global_trace, NULL);
9497 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9498 
9499 	trace_create_file("tracing_thresh", 0644, NULL,
9500 			&global_trace, &tracing_thresh_fops);
9501 
9502 	trace_create_file("README", 0444, NULL,
9503 			NULL, &tracing_readme_fops);
9504 
9505 	trace_create_file("saved_cmdlines", 0444, NULL,
9506 			NULL, &tracing_saved_cmdlines_fops);
9507 
9508 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9509 			  NULL, &tracing_saved_cmdlines_size_fops);
9510 
9511 	trace_create_file("saved_tgids", 0444, NULL,
9512 			NULL, &tracing_saved_tgids_fops);
9513 
9514 	trace_eval_init();
9515 
9516 	trace_create_eval_file(NULL);
9517 
9518 #ifdef CONFIG_MODULES
9519 	register_module_notifier(&trace_module_nb);
9520 #endif
9521 
9522 #ifdef CONFIG_DYNAMIC_FTRACE
9523 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9524 			NULL, &tracing_dyn_info_fops);
9525 #endif
9526 
9527 	create_trace_instances(NULL);
9528 
9529 	update_tracer_options(&global_trace);
9530 
9531 	return 0;
9532 }
9533 
9534 static int trace_panic_handler(struct notifier_block *this,
9535 			       unsigned long event, void *unused)
9536 {
9537 	if (ftrace_dump_on_oops)
9538 		ftrace_dump(ftrace_dump_on_oops);
9539 	return NOTIFY_OK;
9540 }
9541 
9542 static struct notifier_block trace_panic_notifier = {
9543 	.notifier_call  = trace_panic_handler,
9544 	.next           = NULL,
9545 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9546 };
9547 
9548 static int trace_die_handler(struct notifier_block *self,
9549 			     unsigned long val,
9550 			     void *data)
9551 {
9552 	switch (val) {
9553 	case DIE_OOPS:
9554 		if (ftrace_dump_on_oops)
9555 			ftrace_dump(ftrace_dump_on_oops);
9556 		break;
9557 	default:
9558 		break;
9559 	}
9560 	return NOTIFY_OK;
9561 }
9562 
9563 static struct notifier_block trace_die_notifier = {
9564 	.notifier_call = trace_die_handler,
9565 	.priority = 200
9566 };
9567 
9568 /*
9569  * printk is set to max of 1024, we really don't need it that big.
9570  * Nothing should be printing 1000 characters anyway.
9571  */
9572 #define TRACE_MAX_PRINT		1000
9573 
9574 /*
9575  * Define here KERN_TRACE so that we have one place to modify
9576  * it if we decide to change what log level the ftrace dump
9577  * should be at.
9578  */
9579 #define KERN_TRACE		KERN_EMERG
9580 
9581 void
9582 trace_printk_seq(struct trace_seq *s)
9583 {
9584 	/* Probably should print a warning here. */
9585 	if (s->seq.len >= TRACE_MAX_PRINT)
9586 		s->seq.len = TRACE_MAX_PRINT;
9587 
9588 	/*
9589 	 * More paranoid code. Although the buffer size is set to
9590 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9591 	 * an extra layer of protection.
9592 	 */
9593 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9594 		s->seq.len = s->seq.size - 1;
9595 
9596 	/* should be zero ended, but we are paranoid. */
9597 	s->buffer[s->seq.len] = 0;
9598 
9599 	printk(KERN_TRACE "%s", s->buffer);
9600 
9601 	trace_seq_init(s);
9602 }
9603 
9604 void trace_init_global_iter(struct trace_iterator *iter)
9605 {
9606 	iter->tr = &global_trace;
9607 	iter->trace = iter->tr->current_trace;
9608 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9609 	iter->array_buffer = &global_trace.array_buffer;
9610 
9611 	if (iter->trace && iter->trace->open)
9612 		iter->trace->open(iter);
9613 
9614 	/* Annotate start of buffers if we had overruns */
9615 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9616 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9617 
9618 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9619 	if (trace_clocks[iter->tr->clock_id].in_ns)
9620 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9621 }
9622 
9623 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9624 {
9625 	/* use static because iter can be a bit big for the stack */
9626 	static struct trace_iterator iter;
9627 	static atomic_t dump_running;
9628 	struct trace_array *tr = &global_trace;
9629 	unsigned int old_userobj;
9630 	unsigned long flags;
9631 	int cnt = 0, cpu;
9632 
9633 	/* Only allow one dump user at a time. */
9634 	if (atomic_inc_return(&dump_running) != 1) {
9635 		atomic_dec(&dump_running);
9636 		return;
9637 	}
9638 
9639 	/*
9640 	 * Always turn off tracing when we dump.
9641 	 * We don't need to show trace output of what happens
9642 	 * between multiple crashes.
9643 	 *
9644 	 * If the user does a sysrq-z, then they can re-enable
9645 	 * tracing with echo 1 > tracing_on.
9646 	 */
9647 	tracing_off();
9648 
9649 	local_irq_save(flags);
9650 	printk_nmi_direct_enter();
9651 
9652 	/* Simulate the iterator */
9653 	trace_init_global_iter(&iter);
9654 	/* Can not use kmalloc for iter.temp and iter.fmt */
9655 	iter.temp = static_temp_buf;
9656 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9657 	iter.fmt = static_fmt_buf;
9658 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9659 
9660 	for_each_tracing_cpu(cpu) {
9661 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9662 	}
9663 
9664 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9665 
9666 	/* don't look at user memory in panic mode */
9667 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9668 
9669 	switch (oops_dump_mode) {
9670 	case DUMP_ALL:
9671 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9672 		break;
9673 	case DUMP_ORIG:
9674 		iter.cpu_file = raw_smp_processor_id();
9675 		break;
9676 	case DUMP_NONE:
9677 		goto out_enable;
9678 	default:
9679 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9680 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9681 	}
9682 
9683 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9684 
9685 	/* Did function tracer already get disabled? */
9686 	if (ftrace_is_dead()) {
9687 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9688 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9689 	}
9690 
9691 	/*
9692 	 * We need to stop all tracing on all CPUS to read
9693 	 * the next buffer. This is a bit expensive, but is
9694 	 * not done often. We fill all what we can read,
9695 	 * and then release the locks again.
9696 	 */
9697 
9698 	while (!trace_empty(&iter)) {
9699 
9700 		if (!cnt)
9701 			printk(KERN_TRACE "---------------------------------\n");
9702 
9703 		cnt++;
9704 
9705 		trace_iterator_reset(&iter);
9706 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9707 
9708 		if (trace_find_next_entry_inc(&iter) != NULL) {
9709 			int ret;
9710 
9711 			ret = print_trace_line(&iter);
9712 			if (ret != TRACE_TYPE_NO_CONSUME)
9713 				trace_consume(&iter);
9714 		}
9715 		touch_nmi_watchdog();
9716 
9717 		trace_printk_seq(&iter.seq);
9718 	}
9719 
9720 	if (!cnt)
9721 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9722 	else
9723 		printk(KERN_TRACE "---------------------------------\n");
9724 
9725  out_enable:
9726 	tr->trace_flags |= old_userobj;
9727 
9728 	for_each_tracing_cpu(cpu) {
9729 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9730 	}
9731 	atomic_dec(&dump_running);
9732 	printk_nmi_direct_exit();
9733 	local_irq_restore(flags);
9734 }
9735 EXPORT_SYMBOL_GPL(ftrace_dump);
9736 
9737 #define WRITE_BUFSIZE  4096
9738 
9739 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9740 				size_t count, loff_t *ppos,
9741 				int (*createfn)(const char *))
9742 {
9743 	char *kbuf, *buf, *tmp;
9744 	int ret = 0;
9745 	size_t done = 0;
9746 	size_t size;
9747 
9748 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9749 	if (!kbuf)
9750 		return -ENOMEM;
9751 
9752 	while (done < count) {
9753 		size = count - done;
9754 
9755 		if (size >= WRITE_BUFSIZE)
9756 			size = WRITE_BUFSIZE - 1;
9757 
9758 		if (copy_from_user(kbuf, buffer + done, size)) {
9759 			ret = -EFAULT;
9760 			goto out;
9761 		}
9762 		kbuf[size] = '\0';
9763 		buf = kbuf;
9764 		do {
9765 			tmp = strchr(buf, '\n');
9766 			if (tmp) {
9767 				*tmp = '\0';
9768 				size = tmp - buf + 1;
9769 			} else {
9770 				size = strlen(buf);
9771 				if (done + size < count) {
9772 					if (buf != kbuf)
9773 						break;
9774 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9775 					pr_warn("Line length is too long: Should be less than %d\n",
9776 						WRITE_BUFSIZE - 2);
9777 					ret = -EINVAL;
9778 					goto out;
9779 				}
9780 			}
9781 			done += size;
9782 
9783 			/* Remove comments */
9784 			tmp = strchr(buf, '#');
9785 
9786 			if (tmp)
9787 				*tmp = '\0';
9788 
9789 			ret = createfn(buf);
9790 			if (ret)
9791 				goto out;
9792 			buf += size;
9793 
9794 		} while (done < count);
9795 	}
9796 	ret = done;
9797 
9798 out:
9799 	kfree(kbuf);
9800 
9801 	return ret;
9802 }
9803 
9804 __init static int tracer_alloc_buffers(void)
9805 {
9806 	int ring_buf_size;
9807 	int ret = -ENOMEM;
9808 
9809 
9810 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9811 		pr_warn("Tracing disabled due to lockdown\n");
9812 		return -EPERM;
9813 	}
9814 
9815 	/*
9816 	 * Make sure we don't accidentally add more trace options
9817 	 * than we have bits for.
9818 	 */
9819 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9820 
9821 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9822 		goto out;
9823 
9824 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9825 		goto out_free_buffer_mask;
9826 
9827 	/* Only allocate trace_printk buffers if a trace_printk exists */
9828 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9829 		/* Must be called before global_trace.buffer is allocated */
9830 		trace_printk_init_buffers();
9831 
9832 	/* To save memory, keep the ring buffer size to its minimum */
9833 	if (ring_buffer_expanded)
9834 		ring_buf_size = trace_buf_size;
9835 	else
9836 		ring_buf_size = 1;
9837 
9838 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9839 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9840 
9841 	raw_spin_lock_init(&global_trace.start_lock);
9842 
9843 	/*
9844 	 * The prepare callbacks allocates some memory for the ring buffer. We
9845 	 * don't free the buffer if the CPU goes down. If we were to free
9846 	 * the buffer, then the user would lose any trace that was in the
9847 	 * buffer. The memory will be removed once the "instance" is removed.
9848 	 */
9849 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9850 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9851 				      NULL);
9852 	if (ret < 0)
9853 		goto out_free_cpumask;
9854 	/* Used for event triggers */
9855 	ret = -ENOMEM;
9856 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9857 	if (!temp_buffer)
9858 		goto out_rm_hp_state;
9859 
9860 	if (trace_create_savedcmd() < 0)
9861 		goto out_free_temp_buffer;
9862 
9863 	/* TODO: make the number of buffers hot pluggable with CPUS */
9864 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9865 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9866 		goto out_free_savedcmd;
9867 	}
9868 
9869 	if (global_trace.buffer_disabled)
9870 		tracing_off();
9871 
9872 	if (trace_boot_clock) {
9873 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9874 		if (ret < 0)
9875 			pr_warn("Trace clock %s not defined, going back to default\n",
9876 				trace_boot_clock);
9877 	}
9878 
9879 	/*
9880 	 * register_tracer() might reference current_trace, so it
9881 	 * needs to be set before we register anything. This is
9882 	 * just a bootstrap of current_trace anyway.
9883 	 */
9884 	global_trace.current_trace = &nop_trace;
9885 
9886 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9887 
9888 	ftrace_init_global_array_ops(&global_trace);
9889 
9890 	init_trace_flags_index(&global_trace);
9891 
9892 	register_tracer(&nop_trace);
9893 
9894 	/* Function tracing may start here (via kernel command line) */
9895 	init_function_trace();
9896 
9897 	/* All seems OK, enable tracing */
9898 	tracing_disabled = 0;
9899 
9900 	atomic_notifier_chain_register(&panic_notifier_list,
9901 				       &trace_panic_notifier);
9902 
9903 	register_die_notifier(&trace_die_notifier);
9904 
9905 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9906 
9907 	INIT_LIST_HEAD(&global_trace.systems);
9908 	INIT_LIST_HEAD(&global_trace.events);
9909 	INIT_LIST_HEAD(&global_trace.hist_vars);
9910 	INIT_LIST_HEAD(&global_trace.err_log);
9911 	list_add(&global_trace.list, &ftrace_trace_arrays);
9912 
9913 	apply_trace_boot_options();
9914 
9915 	register_snapshot_cmd();
9916 
9917 	test_can_verify();
9918 
9919 	return 0;
9920 
9921 out_free_savedcmd:
9922 	free_saved_cmdlines_buffer(savedcmd);
9923 out_free_temp_buffer:
9924 	ring_buffer_free(temp_buffer);
9925 out_rm_hp_state:
9926 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9927 out_free_cpumask:
9928 	free_cpumask_var(global_trace.tracing_cpumask);
9929 out_free_buffer_mask:
9930 	free_cpumask_var(tracing_buffer_mask);
9931 out:
9932 	return ret;
9933 }
9934 
9935 void __init early_trace_init(void)
9936 {
9937 	if (tracepoint_printk) {
9938 		tracepoint_print_iter =
9939 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9940 		if (MEM_FAIL(!tracepoint_print_iter,
9941 			     "Failed to allocate trace iterator\n"))
9942 			tracepoint_printk = 0;
9943 		else
9944 			static_key_enable(&tracepoint_printk_key.key);
9945 	}
9946 	tracer_alloc_buffers();
9947 }
9948 
9949 void __init trace_init(void)
9950 {
9951 	trace_event_init();
9952 }
9953 
9954 __init static int clear_boot_tracer(void)
9955 {
9956 	/*
9957 	 * The default tracer at boot buffer is an init section.
9958 	 * This function is called in lateinit. If we did not
9959 	 * find the boot tracer, then clear it out, to prevent
9960 	 * later registration from accessing the buffer that is
9961 	 * about to be freed.
9962 	 */
9963 	if (!default_bootup_tracer)
9964 		return 0;
9965 
9966 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9967 	       default_bootup_tracer);
9968 	default_bootup_tracer = NULL;
9969 
9970 	return 0;
9971 }
9972 
9973 fs_initcall(tracer_init_tracefs);
9974 late_initcall_sync(clear_boot_tracer);
9975 
9976 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9977 __init static int tracing_set_default_clock(void)
9978 {
9979 	/* sched_clock_stable() is determined in late_initcall */
9980 	if (!trace_boot_clock && !sched_clock_stable()) {
9981 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9982 			pr_warn("Can not set tracing clock due to lockdown\n");
9983 			return -EPERM;
9984 		}
9985 
9986 		printk(KERN_WARNING
9987 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9988 		       "If you want to keep using the local clock, then add:\n"
9989 		       "  \"trace_clock=local\"\n"
9990 		       "on the kernel command line\n");
9991 		tracing_set_clock(&global_trace, "global");
9992 	}
9993 
9994 	return 0;
9995 }
9996 late_initcall_sync(tracing_set_default_clock);
9997 #endif
9998