xref: /openbmc/linux/kernel/trace/trace.c (revision b5f184fbdb03b4fcc1141de34dd5ec964ca5d99e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
251 static int __init set_tracepoint_printk(char *str)
252 {
253 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 		tracepoint_printk = 1;
255 	return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258 
259 unsigned long long ns2usecs(u64 nsec)
260 {
261 	nsec += 500;
262 	do_div(nsec, 1000);
263 	return nsec;
264 }
265 
266 static void
267 trace_process_export(struct trace_export *export,
268 	       struct ring_buffer_event *event, int flag)
269 {
270 	struct trace_entry *entry;
271 	unsigned int size = 0;
272 
273 	if (export->flags & flag) {
274 		entry = ring_buffer_event_data(event);
275 		size = ring_buffer_event_length(event);
276 		export->write(export, entry, size);
277 	}
278 }
279 
280 static DEFINE_MUTEX(ftrace_export_lock);
281 
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283 
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287 
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290 	if (export->flags & TRACE_EXPORT_FUNCTION)
291 		static_branch_inc(&trace_function_exports_enabled);
292 
293 	if (export->flags & TRACE_EXPORT_EVENT)
294 		static_branch_inc(&trace_event_exports_enabled);
295 
296 	if (export->flags & TRACE_EXPORT_MARKER)
297 		static_branch_inc(&trace_marker_exports_enabled);
298 }
299 
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302 	if (export->flags & TRACE_EXPORT_FUNCTION)
303 		static_branch_dec(&trace_function_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_EVENT)
306 		static_branch_dec(&trace_event_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_MARKER)
309 		static_branch_dec(&trace_marker_exports_enabled);
310 }
311 
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314 	struct trace_export *export;
315 
316 	preempt_disable_notrace();
317 
318 	export = rcu_dereference_raw_check(ftrace_exports_list);
319 	while (export) {
320 		trace_process_export(export, event, flag);
321 		export = rcu_dereference_raw_check(export->next);
322 	}
323 
324 	preempt_enable_notrace();
325 }
326 
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330 	rcu_assign_pointer(export->next, *list);
331 	/*
332 	 * We are entering export into the list but another
333 	 * CPU might be walking that list. We need to make sure
334 	 * the export->next pointer is valid before another CPU sees
335 	 * the export pointer included into the list.
336 	 */
337 	rcu_assign_pointer(*list, export);
338 }
339 
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	struct trace_export **p;
344 
345 	for (p = list; *p != NULL; p = &(*p)->next)
346 		if (*p == export)
347 			break;
348 
349 	if (*p != export)
350 		return -1;
351 
352 	rcu_assign_pointer(*p, (*p)->next);
353 
354 	return 0;
355 }
356 
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360 	ftrace_exports_enable(export);
361 
362 	add_trace_export(list, export);
363 }
364 
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	int ret;
369 
370 	ret = rm_trace_export(list, export);
371 	ftrace_exports_disable(export);
372 
373 	return ret;
374 }
375 
376 int register_ftrace_export(struct trace_export *export)
377 {
378 	if (WARN_ON_ONCE(!export->write))
379 		return -1;
380 
381 	mutex_lock(&ftrace_export_lock);
382 
383 	add_ftrace_export(&ftrace_exports_list, export);
384 
385 	mutex_unlock(&ftrace_export_lock);
386 
387 	return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390 
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393 	int ret;
394 
395 	mutex_lock(&ftrace_export_lock);
396 
397 	ret = rm_ftrace_export(&ftrace_exports_list, export);
398 
399 	mutex_unlock(&ftrace_export_lock);
400 
401 	return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404 
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS						\
407 	(FUNCTION_DEFAULT_FLAGS |					\
408 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
409 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
410 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
411 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
412 	 TRACE_ITER_HASH_PTR)
413 
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
416 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417 
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421 
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427 	.trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429 
430 LIST_HEAD(ftrace_trace_arrays);
431 
432 int trace_array_get(struct trace_array *this_tr)
433 {
434 	struct trace_array *tr;
435 	int ret = -ENODEV;
436 
437 	mutex_lock(&trace_types_lock);
438 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439 		if (tr == this_tr) {
440 			tr->ref++;
441 			ret = 0;
442 			break;
443 		}
444 	}
445 	mutex_unlock(&trace_types_lock);
446 
447 	return ret;
448 }
449 
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452 	WARN_ON(!this_tr->ref);
453 	this_tr->ref--;
454 }
455 
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467 	if (!this_tr)
468 		return;
469 
470 	mutex_lock(&trace_types_lock);
471 	__trace_array_put(this_tr);
472 	mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475 
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478 	int ret;
479 
480 	ret = security_locked_down(LOCKDOWN_TRACEFS);
481 	if (ret)
482 		return ret;
483 
484 	if (tracing_disabled)
485 		return -ENODEV;
486 
487 	if (tr && trace_array_get(tr) < 0)
488 		return -ENODEV;
489 
490 	return 0;
491 }
492 
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 			      struct trace_buffer *buffer,
495 			      struct ring_buffer_event *event)
496 {
497 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 	    !filter_match_preds(call->filter, rec)) {
499 		__trace_event_discard_commit(buffer, event);
500 		return 1;
501 	}
502 
503 	return 0;
504 }
505 
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508 	vfree(pid_list->pids);
509 	kfree(pid_list);
510 }
511 
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522 	/*
523 	 * If pid_max changed after filtered_pids was created, we
524 	 * by default ignore all pids greater than the previous pid_max.
525 	 */
526 	if (search_pid >= filtered_pids->pid_max)
527 		return false;
528 
529 	return test_bit(search_pid, filtered_pids->pids);
530 }
531 
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 		       struct trace_pid_list *filtered_no_pids,
545 		       struct task_struct *task)
546 {
547 	/*
548 	 * If filterd_no_pids is not empty, and the task's pid is listed
549 	 * in filtered_no_pids, then return true.
550 	 * Otherwise, if filtered_pids is empty, that means we can
551 	 * trace all tasks. If it has content, then only trace pids
552 	 * within filtered_pids.
553 	 */
554 
555 	return (filtered_pids &&
556 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 		(filtered_no_pids &&
558 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560 
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 				  struct task_struct *self,
575 				  struct task_struct *task)
576 {
577 	if (!pid_list)
578 		return;
579 
580 	/* For forks, we only add if the forking task is listed */
581 	if (self) {
582 		if (!trace_find_filtered_pid(pid_list, self->pid))
583 			return;
584 	}
585 
586 	/* Sorry, but we don't support pid_max changing after setting */
587 	if (task->pid >= pid_list->pid_max)
588 		return;
589 
590 	/* "self" is set for forks, and NULL for exits */
591 	if (self)
592 		set_bit(task->pid, pid_list->pids);
593 	else
594 		clear_bit(task->pid, pid_list->pids);
595 }
596 
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611 	unsigned long pid = (unsigned long)v;
612 
613 	(*pos)++;
614 
615 	/* pid already is +1 of the actual prevous bit */
616 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617 
618 	/* Return pid + 1 to allow zero to be represented */
619 	if (pid < pid_list->pid_max)
620 		return (void *)(pid + 1);
621 
622 	return NULL;
623 }
624 
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638 	unsigned long pid;
639 	loff_t l = 0;
640 
641 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 	if (pid >= pid_list->pid_max)
643 		return NULL;
644 
645 	/* Return pid + 1 so that zero can be the exit value */
646 	for (pid++; pid && l < *pos;
647 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648 		;
649 	return (void *)pid;
650 }
651 
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662 	unsigned long pid = (unsigned long)v - 1;
663 
664 	seq_printf(m, "%lu\n", pid);
665 	return 0;
666 }
667 
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE		127
670 
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 		    struct trace_pid_list **new_pid_list,
673 		    const char __user *ubuf, size_t cnt)
674 {
675 	struct trace_pid_list *pid_list;
676 	struct trace_parser parser;
677 	unsigned long val;
678 	int nr_pids = 0;
679 	ssize_t read = 0;
680 	ssize_t ret = 0;
681 	loff_t pos;
682 	pid_t pid;
683 
684 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685 		return -ENOMEM;
686 
687 	/*
688 	 * Always recreate a new array. The write is an all or nothing
689 	 * operation. Always create a new array when adding new pids by
690 	 * the user. If the operation fails, then the current list is
691 	 * not modified.
692 	 */
693 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694 	if (!pid_list) {
695 		trace_parser_put(&parser);
696 		return -ENOMEM;
697 	}
698 
699 	pid_list->pid_max = READ_ONCE(pid_max);
700 
701 	/* Only truncating will shrink pid_max */
702 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 		pid_list->pid_max = filtered_pids->pid_max;
704 
705 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 	if (!pid_list->pids) {
707 		trace_parser_put(&parser);
708 		kfree(pid_list);
709 		return -ENOMEM;
710 	}
711 
712 	if (filtered_pids) {
713 		/* copy the current bits to the new max */
714 		for_each_set_bit(pid, filtered_pids->pids,
715 				 filtered_pids->pid_max) {
716 			set_bit(pid, pid_list->pids);
717 			nr_pids++;
718 		}
719 	}
720 
721 	while (cnt > 0) {
722 
723 		pos = 0;
724 
725 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 		if (ret < 0 || !trace_parser_loaded(&parser))
727 			break;
728 
729 		read += ret;
730 		ubuf += ret;
731 		cnt -= ret;
732 
733 		ret = -EINVAL;
734 		if (kstrtoul(parser.buffer, 0, &val))
735 			break;
736 		if (val >= pid_list->pid_max)
737 			break;
738 
739 		pid = (pid_t)val;
740 
741 		set_bit(pid, pid_list->pids);
742 		nr_pids++;
743 
744 		trace_parser_clear(&parser);
745 		ret = 0;
746 	}
747 	trace_parser_put(&parser);
748 
749 	if (ret < 0) {
750 		trace_free_pid_list(pid_list);
751 		return ret;
752 	}
753 
754 	if (!nr_pids) {
755 		/* Cleared the list of pids */
756 		trace_free_pid_list(pid_list);
757 		read = ret;
758 		pid_list = NULL;
759 	}
760 
761 	*new_pid_list = pid_list;
762 
763 	return read;
764 }
765 
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768 	u64 ts;
769 
770 	/* Early boot up does not have a buffer yet */
771 	if (!buf->buffer)
772 		return trace_clock_local();
773 
774 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
775 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776 
777 	return ts;
778 }
779 
780 u64 ftrace_now(int cpu)
781 {
782 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784 
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796 	/*
797 	 * For quick access (irqsoff uses this in fast path), just
798 	 * return the mirror variable of the state of the ring buffer.
799 	 * It's a little racy, but we don't really care.
800 	 */
801 	smp_rmb();
802 	return !global_trace.buffer_disabled;
803 }
804 
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
816 
817 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818 
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer		*trace_types __read_mostly;
821 
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826 
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewrited
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848 
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852 
853 static inline void trace_access_lock(int cpu)
854 {
855 	if (cpu == RING_BUFFER_ALL_CPUS) {
856 		/* gain it for accessing the whole ring buffer. */
857 		down_write(&all_cpu_access_lock);
858 	} else {
859 		/* gain it for accessing a cpu ring buffer. */
860 
861 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 		down_read(&all_cpu_access_lock);
863 
864 		/* Secondly block other access to this @cpu ring buffer. */
865 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
866 	}
867 }
868 
869 static inline void trace_access_unlock(int cpu)
870 {
871 	if (cpu == RING_BUFFER_ALL_CPUS) {
872 		up_write(&all_cpu_access_lock);
873 	} else {
874 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 		up_read(&all_cpu_access_lock);
876 	}
877 }
878 
879 static inline void trace_access_lock_init(void)
880 {
881 	int cpu;
882 
883 	for_each_possible_cpu(cpu)
884 		mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886 
887 #else
888 
889 static DEFINE_MUTEX(access_lock);
890 
891 static inline void trace_access_lock(int cpu)
892 {
893 	(void)cpu;
894 	mutex_lock(&access_lock);
895 }
896 
897 static inline void trace_access_unlock(int cpu)
898 {
899 	(void)cpu;
900 	mutex_unlock(&access_lock);
901 }
902 
903 static inline void trace_access_lock_init(void)
904 {
905 }
906 
907 #endif
908 
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 				 unsigned int trace_ctx,
912 				 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 				      struct trace_buffer *buffer,
915 				      unsigned int trace_ctx,
916 				      int skip, struct pt_regs *regs);
917 
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 					unsigned int trace_ctx,
921 					int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 				      struct trace_buffer *buffer,
926 				      unsigned long trace_ctx,
927 				      int skip, struct pt_regs *regs)
928 {
929 }
930 
931 #endif
932 
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 		  int type, unsigned int trace_ctx)
936 {
937 	struct trace_entry *ent = ring_buffer_event_data(event);
938 
939 	tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941 
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944 			  int type,
945 			  unsigned long len,
946 			  unsigned int trace_ctx)
947 {
948 	struct ring_buffer_event *event;
949 
950 	event = ring_buffer_lock_reserve(buffer, len);
951 	if (event != NULL)
952 		trace_event_setup(event, type, trace_ctx);
953 
954 	return event;
955 }
956 
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959 	if (tr->array_buffer.buffer)
960 		ring_buffer_record_on(tr->array_buffer.buffer);
961 	/*
962 	 * This flag is looked at when buffers haven't been allocated
963 	 * yet, or by some tracers (like irqsoff), that just want to
964 	 * know if the ring buffer has been disabled, but it can handle
965 	 * races of where it gets disabled but we still do a record.
966 	 * As the check is in the fast path of the tracers, it is more
967 	 * important to be fast than accurate.
968 	 */
969 	tr->buffer_disabled = 0;
970 	/* Make the flag seen by readers */
971 	smp_wmb();
972 }
973 
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982 	tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985 
986 
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990 	__this_cpu_write(trace_taskinfo_save, true);
991 
992 	/* If this is the temp buffer, we need to commit fully */
993 	if (this_cpu_read(trace_buffered_event) == event) {
994 		/* Length is in event->array[0] */
995 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 		/* Release the temp buffer */
997 		this_cpu_dec(trace_buffered_event_cnt);
998 	} else
999 		ring_buffer_unlock_commit(buffer, event);
1000 }
1001 
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:	   The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010 	struct ring_buffer_event *event;
1011 	struct trace_buffer *buffer;
1012 	struct print_entry *entry;
1013 	unsigned int trace_ctx;
1014 	int alloc;
1015 
1016 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017 		return 0;
1018 
1019 	if (unlikely(tracing_selftest_running || tracing_disabled))
1020 		return 0;
1021 
1022 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 
1024 	trace_ctx = tracing_gen_ctx();
1025 	buffer = global_trace.array_buffer.buffer;
1026 	ring_buffer_nest_start(buffer);
1027 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028 					    trace_ctx);
1029 	if (!event) {
1030 		size = 0;
1031 		goto out;
1032 	}
1033 
1034 	entry = ring_buffer_event_data(event);
1035 	entry->ip = ip;
1036 
1037 	memcpy(&entry->buf, str, size);
1038 
1039 	/* Add a newline if necessary */
1040 	if (entry->buf[size - 1] != '\n') {
1041 		entry->buf[size] = '\n';
1042 		entry->buf[size + 1] = '\0';
1043 	} else
1044 		entry->buf[size] = '\0';
1045 
1046 	__buffer_unlock_commit(buffer, event);
1047 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049 	ring_buffer_nest_end(buffer);
1050 	return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		internal_trace_puts("*** stopping trace here!   ***\n");
1113 		tracing_off();
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	arch_spin_lock(&tr->max_lock);
1194 
1195 	if (tr->cond_snapshot)
1196 		cond_data = tr->cond_snapshot->cond_data;
1197 
1198 	arch_spin_unlock(&tr->max_lock);
1199 
1200 	return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 					struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207 
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210 	int ret;
1211 
1212 	if (!tr->allocated_snapshot) {
1213 
1214 		/* allocate spare buffer */
1215 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217 		if (ret < 0)
1218 			return ret;
1219 
1220 		tr->allocated_snapshot = true;
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228 	/*
1229 	 * We don't free the ring buffer. instead, resize it because
1230 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 	 * we want preserve it.
1232 	 */
1233 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 	set_buffer_entries(&tr->max_buffer, 1);
1235 	tracing_reset_online_cpus(&tr->max_buffer);
1236 	tr->allocated_snapshot = false;
1237 }
1238 
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251 	struct trace_array *tr = &global_trace;
1252 	int ret;
1253 
1254 	ret = tracing_alloc_snapshot_instance(tr);
1255 	WARN_ON(ret < 0);
1256 
1257 	return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260 
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274 	int ret;
1275 
1276 	ret = tracing_alloc_snapshot();
1277 	if (ret < 0)
1278 		return;
1279 
1280 	tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283 
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:		The tracing instance
1287  * @cond_data:	User data to associate with the snapshot
1288  * @update:	Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 				 cond_update_fn_t update)
1299 {
1300 	struct cond_snapshot *cond_snapshot;
1301 	int ret = 0;
1302 
1303 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304 	if (!cond_snapshot)
1305 		return -ENOMEM;
1306 
1307 	cond_snapshot->cond_data = cond_data;
1308 	cond_snapshot->update = update;
1309 
1310 	mutex_lock(&trace_types_lock);
1311 
1312 	ret = tracing_alloc_snapshot_instance(tr);
1313 	if (ret)
1314 		goto fail_unlock;
1315 
1316 	if (tr->current_trace->use_max_tr) {
1317 		ret = -EBUSY;
1318 		goto fail_unlock;
1319 	}
1320 
1321 	/*
1322 	 * The cond_snapshot can only change to NULL without the
1323 	 * trace_types_lock. We don't care if we race with it going
1324 	 * to NULL, but we want to make sure that it's not set to
1325 	 * something other than NULL when we get here, which we can
1326 	 * do safely with only holding the trace_types_lock and not
1327 	 * having to take the max_lock.
1328 	 */
1329 	if (tr->cond_snapshot) {
1330 		ret = -EBUSY;
1331 		goto fail_unlock;
1332 	}
1333 
1334 	arch_spin_lock(&tr->max_lock);
1335 	tr->cond_snapshot = cond_snapshot;
1336 	arch_spin_unlock(&tr->max_lock);
1337 
1338 	mutex_unlock(&trace_types_lock);
1339 
1340 	return ret;
1341 
1342  fail_unlock:
1343 	mutex_unlock(&trace_types_lock);
1344 	kfree(cond_snapshot);
1345 	return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348 
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:		The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361 	int ret = 0;
1362 
1363 	arch_spin_lock(&tr->max_lock);
1364 
1365 	if (!tr->cond_snapshot)
1366 		ret = -EINVAL;
1367 	else {
1368 		kfree(tr->cond_snapshot);
1369 		tr->cond_snapshot = NULL;
1370 	}
1371 
1372 	arch_spin_unlock(&tr->max_lock);
1373 
1374 	return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391 	return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396 	/* Give warning */
1397 	tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402 	return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407 	return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412 	return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416 
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419 	if (tr->array_buffer.buffer)
1420 		ring_buffer_record_off(tr->array_buffer.buffer);
1421 	/*
1422 	 * This flag is looked at when buffers haven't been allocated
1423 	 * yet, or by some tracers (like irqsoff), that just want to
1424 	 * know if the ring buffer has been disabled, but it can handle
1425 	 * races of where it gets disabled but we still do a record.
1426 	 * As the check is in the fast path of the tracers, it is more
1427 	 * important to be fast than accurate.
1428 	 */
1429 	tr->buffer_disabled = 1;
1430 	/* Make the flag seen by readers */
1431 	smp_wmb();
1432 }
1433 
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444 	tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447 
1448 void disable_trace_on_warning(void)
1449 {
1450 	if (__disable_trace_on_warning) {
1451 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 			"Disabling tracing due to warning\n");
1453 		tracing_off();
1454 	}
1455 }
1456 
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465 	if (tr->array_buffer.buffer)
1466 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 	return !tr->buffer_disabled;
1468 }
1469 
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475 	return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478 
1479 static int __init set_buf_size(char *str)
1480 {
1481 	unsigned long buf_size;
1482 
1483 	if (!str)
1484 		return 0;
1485 	buf_size = memparse(str, &str);
1486 	/* nr_entries can not be zero */
1487 	if (buf_size == 0)
1488 		return 0;
1489 	trace_buf_size = buf_size;
1490 	return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493 
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496 	unsigned long threshold;
1497 	int ret;
1498 
1499 	if (!str)
1500 		return 0;
1501 	ret = kstrtoul(str, 0, &threshold);
1502 	if (ret < 0)
1503 		return 0;
1504 	tracing_thresh = threshold * 1000;
1505 	return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508 
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511 	return nsecs / 1000;
1512 }
1513 
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522 
1523 /* These must match the bit postions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525 	TRACE_FLAGS
1526 	NULL
1527 };
1528 
1529 static struct {
1530 	u64 (*func)(void);
1531 	const char *name;
1532 	int in_ns;		/* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 	{ trace_clock_local,		"local",	1 },
1535 	{ trace_clock_global,		"global",	1 },
1536 	{ trace_clock_counter,		"counter",	0 },
1537 	{ trace_clock_jiffies,		"uptime",	0 },
1538 	{ trace_clock,			"perf",		1 },
1539 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1540 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1541 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1542 	ARCH_TRACE_CLOCKS
1543 };
1544 
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547 	if (trace_clocks[tr->clock_id].in_ns)
1548 		return true;
1549 
1550 	return false;
1551 }
1552 
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558 	memset(parser, 0, sizeof(*parser));
1559 
1560 	parser->buffer = kmalloc(size, GFP_KERNEL);
1561 	if (!parser->buffer)
1562 		return 1;
1563 
1564 	parser->size = size;
1565 	return 0;
1566 }
1567 
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573 	kfree(parser->buffer);
1574 	parser->buffer = NULL;
1575 }
1576 
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 	size_t cnt, loff_t *ppos)
1590 {
1591 	char ch;
1592 	size_t read = 0;
1593 	ssize_t ret;
1594 
1595 	if (!*ppos)
1596 		trace_parser_clear(parser);
1597 
1598 	ret = get_user(ch, ubuf++);
1599 	if (ret)
1600 		goto out;
1601 
1602 	read++;
1603 	cnt--;
1604 
1605 	/*
1606 	 * The parser is not finished with the last write,
1607 	 * continue reading the user input without skipping spaces.
1608 	 */
1609 	if (!parser->cont) {
1610 		/* skip white space */
1611 		while (cnt && isspace(ch)) {
1612 			ret = get_user(ch, ubuf++);
1613 			if (ret)
1614 				goto out;
1615 			read++;
1616 			cnt--;
1617 		}
1618 
1619 		parser->idx = 0;
1620 
1621 		/* only spaces were written */
1622 		if (isspace(ch) || !ch) {
1623 			*ppos += read;
1624 			ret = read;
1625 			goto out;
1626 		}
1627 	}
1628 
1629 	/* read the non-space input */
1630 	while (cnt && !isspace(ch) && ch) {
1631 		if (parser->idx < parser->size - 1)
1632 			parser->buffer[parser->idx++] = ch;
1633 		else {
1634 			ret = -EINVAL;
1635 			goto out;
1636 		}
1637 		ret = get_user(ch, ubuf++);
1638 		if (ret)
1639 			goto out;
1640 		read++;
1641 		cnt--;
1642 	}
1643 
1644 	/* We either got finished input or we have to wait for another call. */
1645 	if (isspace(ch) || !ch) {
1646 		parser->buffer[parser->idx] = 0;
1647 		parser->cont = false;
1648 	} else if (parser->idx < parser->size - 1) {
1649 		parser->cont = true;
1650 		parser->buffer[parser->idx++] = ch;
1651 		/* Make sure the parsed string always terminates with '\0'. */
1652 		parser->buffer[parser->idx] = 0;
1653 	} else {
1654 		ret = -EINVAL;
1655 		goto out;
1656 	}
1657 
1658 	*ppos += read;
1659 	ret = read;
1660 
1661 out:
1662 	return ret;
1663 }
1664 
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668 	int len;
1669 
1670 	if (trace_seq_used(s) <= s->seq.readpos)
1671 		return -EBUSY;
1672 
1673 	len = trace_seq_used(s) - s->seq.readpos;
1674 	if (cnt > len)
1675 		cnt = len;
1676 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677 
1678 	s->seq.readpos += cnt;
1679 	return cnt;
1680 }
1681 
1682 unsigned long __read_mostly	tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684 
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 	defined(CONFIG_FSNOTIFY)
1687 
1688 static struct workqueue_struct *fsnotify_wq;
1689 
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692 	struct trace_array *tr = container_of(work, struct trace_array,
1693 					      fsnotify_work);
1694 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696 
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699 	struct trace_array *tr = container_of(iwork, struct trace_array,
1700 					      fsnotify_irqwork);
1701 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703 
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 				     struct dentry *d_tracer)
1706 {
1707 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 					      d_tracer, &tr->max_latency,
1711 					      &tracing_max_lat_fops);
1712 }
1713 
1714 __init static int latency_fsnotify_init(void)
1715 {
1716 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1718 	if (!fsnotify_wq) {
1719 		pr_err("Unable to allocate tr_max_lat_wq\n");
1720 		return -ENOMEM;
1721 	}
1722 	return 0;
1723 }
1724 
1725 late_initcall_sync(latency_fsnotify_init);
1726 
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729 	if (!fsnotify_wq)
1730 		return;
1731 	/*
1732 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 	 * possible that we are called from __schedule() or do_idle(), which
1734 	 * could cause a deadlock.
1735 	 */
1736 	irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738 
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744 
1745 #define trace_create_maxlat_file(tr, d_tracer)				\
1746 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1747 			  &tr->max_latency, &tracing_max_lat_fops)
1748 
1749 #endif
1750 
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760 	struct array_buffer *trace_buf = &tr->array_buffer;
1761 	struct array_buffer *max_buf = &tr->max_buffer;
1762 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764 
1765 	max_buf->cpu = cpu;
1766 	max_buf->time_start = data->preempt_timestamp;
1767 
1768 	max_data->saved_latency = tr->max_latency;
1769 	max_data->critical_start = data->critical_start;
1770 	max_data->critical_end = data->critical_end;
1771 
1772 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 	max_data->pid = tsk->pid;
1774 	/*
1775 	 * If tsk == current, then use current_uid(), as that does not use
1776 	 * RCU. The irq tracer can be called out of RCU scope.
1777 	 */
1778 	if (tsk == current)
1779 		max_data->uid = current_uid();
1780 	else
1781 		max_data->uid = task_uid(tsk);
1782 
1783 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 	max_data->policy = tsk->policy;
1785 	max_data->rt_priority = tsk->rt_priority;
1786 
1787 	/* record this tasks comm */
1788 	tracing_record_cmdline(tsk);
1789 	latency_fsnotify(tr);
1790 }
1791 
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804 	      void *cond_data)
1805 {
1806 	if (tr->stop_count)
1807 		return;
1808 
1809 	WARN_ON_ONCE(!irqs_disabled());
1810 
1811 	if (!tr->allocated_snapshot) {
1812 		/* Only the nop tracer should hit this when disabling */
1813 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814 		return;
1815 	}
1816 
1817 	arch_spin_lock(&tr->max_lock);
1818 
1819 	/* Inherit the recordable setting from array_buffer */
1820 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 		ring_buffer_record_on(tr->max_buffer.buffer);
1822 	else
1823 		ring_buffer_record_off(tr->max_buffer.buffer);
1824 
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827 		goto out_unlock;
1828 #endif
1829 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830 
1831 	__update_max_tr(tr, tsk, cpu);
1832 
1833  out_unlock:
1834 	arch_spin_unlock(&tr->max_lock);
1835 }
1836 
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848 	int ret;
1849 
1850 	if (tr->stop_count)
1851 		return;
1852 
1853 	WARN_ON_ONCE(!irqs_disabled());
1854 	if (!tr->allocated_snapshot) {
1855 		/* Only the nop tracer should hit this when disabling */
1856 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857 		return;
1858 	}
1859 
1860 	arch_spin_lock(&tr->max_lock);
1861 
1862 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863 
1864 	if (ret == -EBUSY) {
1865 		/*
1866 		 * We failed to swap the buffer due to a commit taking
1867 		 * place on this CPU. We fail to record, but we reset
1868 		 * the max trace buffer (no one writes directly to it)
1869 		 * and flag that it failed.
1870 		 */
1871 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 			"Failed to swap buffers due to commit in progress\n");
1873 	}
1874 
1875 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876 
1877 	__update_max_tr(tr, tsk, cpu);
1878 	arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881 
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884 	/* Iterators are static, they should be filled or empty */
1885 	if (trace_buffer_iter(iter, iter->cpu_file))
1886 		return 0;
1887 
1888 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889 				full);
1890 }
1891 
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894 
1895 struct trace_selftests {
1896 	struct list_head		list;
1897 	struct tracer			*type;
1898 };
1899 
1900 static LIST_HEAD(postponed_selftests);
1901 
1902 static int save_selftest(struct tracer *type)
1903 {
1904 	struct trace_selftests *selftest;
1905 
1906 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907 	if (!selftest)
1908 		return -ENOMEM;
1909 
1910 	selftest->type = type;
1911 	list_add(&selftest->list, &postponed_selftests);
1912 	return 0;
1913 }
1914 
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917 	struct trace_array *tr = &global_trace;
1918 	struct tracer *saved_tracer = tr->current_trace;
1919 	int ret;
1920 
1921 	if (!type->selftest || tracing_selftest_disabled)
1922 		return 0;
1923 
1924 	/*
1925 	 * If a tracer registers early in boot up (before scheduling is
1926 	 * initialized and such), then do not run its selftests yet.
1927 	 * Instead, run it a little later in the boot process.
1928 	 */
1929 	if (!selftests_can_run)
1930 		return save_selftest(type);
1931 
1932 	/*
1933 	 * Run a selftest on this tracer.
1934 	 * Here we reset the trace buffer, and set the current
1935 	 * tracer to be this tracer. The tracer can then run some
1936 	 * internal tracing to verify that everything is in order.
1937 	 * If we fail, we do not register this tracer.
1938 	 */
1939 	tracing_reset_online_cpus(&tr->array_buffer);
1940 
1941 	tr->current_trace = type;
1942 
1943 #ifdef CONFIG_TRACER_MAX_TRACE
1944 	if (type->use_max_tr) {
1945 		/* If we expanded the buffers, make sure the max is expanded too */
1946 		if (ring_buffer_expanded)
1947 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1948 					   RING_BUFFER_ALL_CPUS);
1949 		tr->allocated_snapshot = true;
1950 	}
1951 #endif
1952 
1953 	/* the test is responsible for initializing and enabling */
1954 	pr_info("Testing tracer %s: ", type->name);
1955 	ret = type->selftest(type, tr);
1956 	/* the test is responsible for resetting too */
1957 	tr->current_trace = saved_tracer;
1958 	if (ret) {
1959 		printk(KERN_CONT "FAILED!\n");
1960 		/* Add the warning after printing 'FAILED' */
1961 		WARN_ON(1);
1962 		return -1;
1963 	}
1964 	/* Only reset on passing, to avoid touching corrupted buffers */
1965 	tracing_reset_online_cpus(&tr->array_buffer);
1966 
1967 #ifdef CONFIG_TRACER_MAX_TRACE
1968 	if (type->use_max_tr) {
1969 		tr->allocated_snapshot = false;
1970 
1971 		/* Shrink the max buffer again */
1972 		if (ring_buffer_expanded)
1973 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1974 					   RING_BUFFER_ALL_CPUS);
1975 	}
1976 #endif
1977 
1978 	printk(KERN_CONT "PASSED\n");
1979 	return 0;
1980 }
1981 
1982 static __init int init_trace_selftests(void)
1983 {
1984 	struct trace_selftests *p, *n;
1985 	struct tracer *t, **last;
1986 	int ret;
1987 
1988 	selftests_can_run = true;
1989 
1990 	mutex_lock(&trace_types_lock);
1991 
1992 	if (list_empty(&postponed_selftests))
1993 		goto out;
1994 
1995 	pr_info("Running postponed tracer tests:\n");
1996 
1997 	tracing_selftest_running = true;
1998 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1999 		/* This loop can take minutes when sanitizers are enabled, so
2000 		 * lets make sure we allow RCU processing.
2001 		 */
2002 		cond_resched();
2003 		ret = run_tracer_selftest(p->type);
2004 		/* If the test fails, then warn and remove from available_tracers */
2005 		if (ret < 0) {
2006 			WARN(1, "tracer: %s failed selftest, disabling\n",
2007 			     p->type->name);
2008 			last = &trace_types;
2009 			for (t = trace_types; t; t = t->next) {
2010 				if (t == p->type) {
2011 					*last = t->next;
2012 					break;
2013 				}
2014 				last = &t->next;
2015 			}
2016 		}
2017 		list_del(&p->list);
2018 		kfree(p);
2019 	}
2020 	tracing_selftest_running = false;
2021 
2022  out:
2023 	mutex_unlock(&trace_types_lock);
2024 
2025 	return 0;
2026 }
2027 core_initcall(init_trace_selftests);
2028 #else
2029 static inline int run_tracer_selftest(struct tracer *type)
2030 {
2031 	return 0;
2032 }
2033 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2034 
2035 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2036 
2037 static void __init apply_trace_boot_options(void);
2038 
2039 /**
2040  * register_tracer - register a tracer with the ftrace system.
2041  * @type: the plugin for the tracer
2042  *
2043  * Register a new plugin tracer.
2044  */
2045 int __init register_tracer(struct tracer *type)
2046 {
2047 	struct tracer *t;
2048 	int ret = 0;
2049 
2050 	if (!type->name) {
2051 		pr_info("Tracer must have a name\n");
2052 		return -1;
2053 	}
2054 
2055 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2056 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2057 		return -1;
2058 	}
2059 
2060 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2061 		pr_warn("Can not register tracer %s due to lockdown\n",
2062 			   type->name);
2063 		return -EPERM;
2064 	}
2065 
2066 	mutex_lock(&trace_types_lock);
2067 
2068 	tracing_selftest_running = true;
2069 
2070 	for (t = trace_types; t; t = t->next) {
2071 		if (strcmp(type->name, t->name) == 0) {
2072 			/* already found */
2073 			pr_info("Tracer %s already registered\n",
2074 				type->name);
2075 			ret = -1;
2076 			goto out;
2077 		}
2078 	}
2079 
2080 	if (!type->set_flag)
2081 		type->set_flag = &dummy_set_flag;
2082 	if (!type->flags) {
2083 		/*allocate a dummy tracer_flags*/
2084 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2085 		if (!type->flags) {
2086 			ret = -ENOMEM;
2087 			goto out;
2088 		}
2089 		type->flags->val = 0;
2090 		type->flags->opts = dummy_tracer_opt;
2091 	} else
2092 		if (!type->flags->opts)
2093 			type->flags->opts = dummy_tracer_opt;
2094 
2095 	/* store the tracer for __set_tracer_option */
2096 	type->flags->trace = type;
2097 
2098 	ret = run_tracer_selftest(type);
2099 	if (ret < 0)
2100 		goto out;
2101 
2102 	type->next = trace_types;
2103 	trace_types = type;
2104 	add_tracer_options(&global_trace, type);
2105 
2106  out:
2107 	tracing_selftest_running = false;
2108 	mutex_unlock(&trace_types_lock);
2109 
2110 	if (ret || !default_bootup_tracer)
2111 		goto out_unlock;
2112 
2113 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2114 		goto out_unlock;
2115 
2116 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2117 	/* Do we want this tracer to start on bootup? */
2118 	tracing_set_tracer(&global_trace, type->name);
2119 	default_bootup_tracer = NULL;
2120 
2121 	apply_trace_boot_options();
2122 
2123 	/* disable other selftests, since this will break it. */
2124 	disable_tracing_selftest("running a tracer");
2125 
2126  out_unlock:
2127 	return ret;
2128 }
2129 
2130 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2131 {
2132 	struct trace_buffer *buffer = buf->buffer;
2133 
2134 	if (!buffer)
2135 		return;
2136 
2137 	ring_buffer_record_disable(buffer);
2138 
2139 	/* Make sure all commits have finished */
2140 	synchronize_rcu();
2141 	ring_buffer_reset_cpu(buffer, cpu);
2142 
2143 	ring_buffer_record_enable(buffer);
2144 }
2145 
2146 void tracing_reset_online_cpus(struct array_buffer *buf)
2147 {
2148 	struct trace_buffer *buffer = buf->buffer;
2149 
2150 	if (!buffer)
2151 		return;
2152 
2153 	ring_buffer_record_disable(buffer);
2154 
2155 	/* Make sure all commits have finished */
2156 	synchronize_rcu();
2157 
2158 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2159 
2160 	ring_buffer_reset_online_cpus(buffer);
2161 
2162 	ring_buffer_record_enable(buffer);
2163 }
2164 
2165 /* Must have trace_types_lock held */
2166 void tracing_reset_all_online_cpus(void)
2167 {
2168 	struct trace_array *tr;
2169 
2170 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2171 		if (!tr->clear_trace)
2172 			continue;
2173 		tr->clear_trace = false;
2174 		tracing_reset_online_cpus(&tr->array_buffer);
2175 #ifdef CONFIG_TRACER_MAX_TRACE
2176 		tracing_reset_online_cpus(&tr->max_buffer);
2177 #endif
2178 	}
2179 }
2180 
2181 static int *tgid_map;
2182 
2183 #define SAVED_CMDLINES_DEFAULT 128
2184 #define NO_CMDLINE_MAP UINT_MAX
2185 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2186 struct saved_cmdlines_buffer {
2187 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2188 	unsigned *map_cmdline_to_pid;
2189 	unsigned cmdline_num;
2190 	int cmdline_idx;
2191 	char *saved_cmdlines;
2192 };
2193 static struct saved_cmdlines_buffer *savedcmd;
2194 
2195 /* temporary disable recording */
2196 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2197 
2198 static inline char *get_saved_cmdlines(int idx)
2199 {
2200 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2201 }
2202 
2203 static inline void set_cmdline(int idx, const char *cmdline)
2204 {
2205 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2206 }
2207 
2208 static int allocate_cmdlines_buffer(unsigned int val,
2209 				    struct saved_cmdlines_buffer *s)
2210 {
2211 	s->map_cmdline_to_pid = kmalloc_array(val,
2212 					      sizeof(*s->map_cmdline_to_pid),
2213 					      GFP_KERNEL);
2214 	if (!s->map_cmdline_to_pid)
2215 		return -ENOMEM;
2216 
2217 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2218 	if (!s->saved_cmdlines) {
2219 		kfree(s->map_cmdline_to_pid);
2220 		return -ENOMEM;
2221 	}
2222 
2223 	s->cmdline_idx = 0;
2224 	s->cmdline_num = val;
2225 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2226 	       sizeof(s->map_pid_to_cmdline));
2227 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2228 	       val * sizeof(*s->map_cmdline_to_pid));
2229 
2230 	return 0;
2231 }
2232 
2233 static int trace_create_savedcmd(void)
2234 {
2235 	int ret;
2236 
2237 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2238 	if (!savedcmd)
2239 		return -ENOMEM;
2240 
2241 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2242 	if (ret < 0) {
2243 		kfree(savedcmd);
2244 		savedcmd = NULL;
2245 		return -ENOMEM;
2246 	}
2247 
2248 	return 0;
2249 }
2250 
2251 int is_tracing_stopped(void)
2252 {
2253 	return global_trace.stop_count;
2254 }
2255 
2256 /**
2257  * tracing_start - quick start of the tracer
2258  *
2259  * If tracing is enabled but was stopped by tracing_stop,
2260  * this will start the tracer back up.
2261  */
2262 void tracing_start(void)
2263 {
2264 	struct trace_buffer *buffer;
2265 	unsigned long flags;
2266 
2267 	if (tracing_disabled)
2268 		return;
2269 
2270 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2271 	if (--global_trace.stop_count) {
2272 		if (global_trace.stop_count < 0) {
2273 			/* Someone screwed up their debugging */
2274 			WARN_ON_ONCE(1);
2275 			global_trace.stop_count = 0;
2276 		}
2277 		goto out;
2278 	}
2279 
2280 	/* Prevent the buffers from switching */
2281 	arch_spin_lock(&global_trace.max_lock);
2282 
2283 	buffer = global_trace.array_buffer.buffer;
2284 	if (buffer)
2285 		ring_buffer_record_enable(buffer);
2286 
2287 #ifdef CONFIG_TRACER_MAX_TRACE
2288 	buffer = global_trace.max_buffer.buffer;
2289 	if (buffer)
2290 		ring_buffer_record_enable(buffer);
2291 #endif
2292 
2293 	arch_spin_unlock(&global_trace.max_lock);
2294 
2295  out:
2296 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2297 }
2298 
2299 static void tracing_start_tr(struct trace_array *tr)
2300 {
2301 	struct trace_buffer *buffer;
2302 	unsigned long flags;
2303 
2304 	if (tracing_disabled)
2305 		return;
2306 
2307 	/* If global, we need to also start the max tracer */
2308 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2309 		return tracing_start();
2310 
2311 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2312 
2313 	if (--tr->stop_count) {
2314 		if (tr->stop_count < 0) {
2315 			/* Someone screwed up their debugging */
2316 			WARN_ON_ONCE(1);
2317 			tr->stop_count = 0;
2318 		}
2319 		goto out;
2320 	}
2321 
2322 	buffer = tr->array_buffer.buffer;
2323 	if (buffer)
2324 		ring_buffer_record_enable(buffer);
2325 
2326  out:
2327 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2328 }
2329 
2330 /**
2331  * tracing_stop - quick stop of the tracer
2332  *
2333  * Light weight way to stop tracing. Use in conjunction with
2334  * tracing_start.
2335  */
2336 void tracing_stop(void)
2337 {
2338 	struct trace_buffer *buffer;
2339 	unsigned long flags;
2340 
2341 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2342 	if (global_trace.stop_count++)
2343 		goto out;
2344 
2345 	/* Prevent the buffers from switching */
2346 	arch_spin_lock(&global_trace.max_lock);
2347 
2348 	buffer = global_trace.array_buffer.buffer;
2349 	if (buffer)
2350 		ring_buffer_record_disable(buffer);
2351 
2352 #ifdef CONFIG_TRACER_MAX_TRACE
2353 	buffer = global_trace.max_buffer.buffer;
2354 	if (buffer)
2355 		ring_buffer_record_disable(buffer);
2356 #endif
2357 
2358 	arch_spin_unlock(&global_trace.max_lock);
2359 
2360  out:
2361 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2362 }
2363 
2364 static void tracing_stop_tr(struct trace_array *tr)
2365 {
2366 	struct trace_buffer *buffer;
2367 	unsigned long flags;
2368 
2369 	/* If global, we need to also stop the max tracer */
2370 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2371 		return tracing_stop();
2372 
2373 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2374 	if (tr->stop_count++)
2375 		goto out;
2376 
2377 	buffer = tr->array_buffer.buffer;
2378 	if (buffer)
2379 		ring_buffer_record_disable(buffer);
2380 
2381  out:
2382 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2383 }
2384 
2385 static int trace_save_cmdline(struct task_struct *tsk)
2386 {
2387 	unsigned pid, idx;
2388 
2389 	/* treat recording of idle task as a success */
2390 	if (!tsk->pid)
2391 		return 1;
2392 
2393 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2394 		return 0;
2395 
2396 	/*
2397 	 * It's not the end of the world if we don't get
2398 	 * the lock, but we also don't want to spin
2399 	 * nor do we want to disable interrupts,
2400 	 * so if we miss here, then better luck next time.
2401 	 */
2402 	if (!arch_spin_trylock(&trace_cmdline_lock))
2403 		return 0;
2404 
2405 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2406 	if (idx == NO_CMDLINE_MAP) {
2407 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2408 
2409 		/*
2410 		 * Check whether the cmdline buffer at idx has a pid
2411 		 * mapped. We are going to overwrite that entry so we
2412 		 * need to clear the map_pid_to_cmdline. Otherwise we
2413 		 * would read the new comm for the old pid.
2414 		 */
2415 		pid = savedcmd->map_cmdline_to_pid[idx];
2416 		if (pid != NO_CMDLINE_MAP)
2417 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2418 
2419 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2420 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2421 
2422 		savedcmd->cmdline_idx = idx;
2423 	}
2424 
2425 	set_cmdline(idx, tsk->comm);
2426 
2427 	arch_spin_unlock(&trace_cmdline_lock);
2428 
2429 	return 1;
2430 }
2431 
2432 static void __trace_find_cmdline(int pid, char comm[])
2433 {
2434 	unsigned map;
2435 
2436 	if (!pid) {
2437 		strcpy(comm, "<idle>");
2438 		return;
2439 	}
2440 
2441 	if (WARN_ON_ONCE(pid < 0)) {
2442 		strcpy(comm, "<XXX>");
2443 		return;
2444 	}
2445 
2446 	if (pid > PID_MAX_DEFAULT) {
2447 		strcpy(comm, "<...>");
2448 		return;
2449 	}
2450 
2451 	map = savedcmd->map_pid_to_cmdline[pid];
2452 	if (map != NO_CMDLINE_MAP)
2453 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2454 	else
2455 		strcpy(comm, "<...>");
2456 }
2457 
2458 void trace_find_cmdline(int pid, char comm[])
2459 {
2460 	preempt_disable();
2461 	arch_spin_lock(&trace_cmdline_lock);
2462 
2463 	__trace_find_cmdline(pid, comm);
2464 
2465 	arch_spin_unlock(&trace_cmdline_lock);
2466 	preempt_enable();
2467 }
2468 
2469 int trace_find_tgid(int pid)
2470 {
2471 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2472 		return 0;
2473 
2474 	return tgid_map[pid];
2475 }
2476 
2477 static int trace_save_tgid(struct task_struct *tsk)
2478 {
2479 	/* treat recording of idle task as a success */
2480 	if (!tsk->pid)
2481 		return 1;
2482 
2483 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2484 		return 0;
2485 
2486 	tgid_map[tsk->pid] = tsk->tgid;
2487 	return 1;
2488 }
2489 
2490 static bool tracing_record_taskinfo_skip(int flags)
2491 {
2492 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493 		return true;
2494 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2495 		return true;
2496 	if (!__this_cpu_read(trace_taskinfo_save))
2497 		return true;
2498 	return false;
2499 }
2500 
2501 /**
2502  * tracing_record_taskinfo - record the task info of a task
2503  *
2504  * @task:  task to record
2505  * @flags: TRACE_RECORD_CMDLINE for recording comm
2506  *         TRACE_RECORD_TGID for recording tgid
2507  */
2508 void tracing_record_taskinfo(struct task_struct *task, int flags)
2509 {
2510 	bool done;
2511 
2512 	if (tracing_record_taskinfo_skip(flags))
2513 		return;
2514 
2515 	/*
2516 	 * Record as much task information as possible. If some fail, continue
2517 	 * to try to record the others.
2518 	 */
2519 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2520 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2521 
2522 	/* If recording any information failed, retry again soon. */
2523 	if (!done)
2524 		return;
2525 
2526 	__this_cpu_write(trace_taskinfo_save, false);
2527 }
2528 
2529 /**
2530  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2531  *
2532  * @prev: previous task during sched_switch
2533  * @next: next task during sched_switch
2534  * @flags: TRACE_RECORD_CMDLINE for recording comm
2535  *         TRACE_RECORD_TGID for recording tgid
2536  */
2537 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2538 					  struct task_struct *next, int flags)
2539 {
2540 	bool done;
2541 
2542 	if (tracing_record_taskinfo_skip(flags))
2543 		return;
2544 
2545 	/*
2546 	 * Record as much task information as possible. If some fail, continue
2547 	 * to try to record the others.
2548 	 */
2549 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2550 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2551 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2552 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2553 
2554 	/* If recording any information failed, retry again soon. */
2555 	if (!done)
2556 		return;
2557 
2558 	__this_cpu_write(trace_taskinfo_save, false);
2559 }
2560 
2561 /* Helpers to record a specific task information */
2562 void tracing_record_cmdline(struct task_struct *task)
2563 {
2564 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2565 }
2566 
2567 void tracing_record_tgid(struct task_struct *task)
2568 {
2569 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2570 }
2571 
2572 /*
2573  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2574  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2575  * simplifies those functions and keeps them in sync.
2576  */
2577 enum print_line_t trace_handle_return(struct trace_seq *s)
2578 {
2579 	return trace_seq_has_overflowed(s) ?
2580 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2581 }
2582 EXPORT_SYMBOL_GPL(trace_handle_return);
2583 
2584 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2585 {
2586 	unsigned int trace_flags = irqs_status;
2587 	unsigned int pc;
2588 
2589 	pc = preempt_count();
2590 
2591 	if (pc & NMI_MASK)
2592 		trace_flags |= TRACE_FLAG_NMI;
2593 	if (pc & HARDIRQ_MASK)
2594 		trace_flags |= TRACE_FLAG_HARDIRQ;
2595 	if (in_serving_softirq())
2596 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2597 
2598 	if (tif_need_resched())
2599 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2600 	if (test_preempt_need_resched())
2601 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2602 	return (trace_flags << 16) | (pc & 0xff);
2603 }
2604 
2605 struct ring_buffer_event *
2606 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2607 			  int type,
2608 			  unsigned long len,
2609 			  unsigned int trace_ctx)
2610 {
2611 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2612 }
2613 
2614 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2615 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2616 static int trace_buffered_event_ref;
2617 
2618 /**
2619  * trace_buffered_event_enable - enable buffering events
2620  *
2621  * When events are being filtered, it is quicker to use a temporary
2622  * buffer to write the event data into if there's a likely chance
2623  * that it will not be committed. The discard of the ring buffer
2624  * is not as fast as committing, and is much slower than copying
2625  * a commit.
2626  *
2627  * When an event is to be filtered, allocate per cpu buffers to
2628  * write the event data into, and if the event is filtered and discarded
2629  * it is simply dropped, otherwise, the entire data is to be committed
2630  * in one shot.
2631  */
2632 void trace_buffered_event_enable(void)
2633 {
2634 	struct ring_buffer_event *event;
2635 	struct page *page;
2636 	int cpu;
2637 
2638 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2639 
2640 	if (trace_buffered_event_ref++)
2641 		return;
2642 
2643 	for_each_tracing_cpu(cpu) {
2644 		page = alloc_pages_node(cpu_to_node(cpu),
2645 					GFP_KERNEL | __GFP_NORETRY, 0);
2646 		if (!page)
2647 			goto failed;
2648 
2649 		event = page_address(page);
2650 		memset(event, 0, sizeof(*event));
2651 
2652 		per_cpu(trace_buffered_event, cpu) = event;
2653 
2654 		preempt_disable();
2655 		if (cpu == smp_processor_id() &&
2656 		    __this_cpu_read(trace_buffered_event) !=
2657 		    per_cpu(trace_buffered_event, cpu))
2658 			WARN_ON_ONCE(1);
2659 		preempt_enable();
2660 	}
2661 
2662 	return;
2663  failed:
2664 	trace_buffered_event_disable();
2665 }
2666 
2667 static void enable_trace_buffered_event(void *data)
2668 {
2669 	/* Probably not needed, but do it anyway */
2670 	smp_rmb();
2671 	this_cpu_dec(trace_buffered_event_cnt);
2672 }
2673 
2674 static void disable_trace_buffered_event(void *data)
2675 {
2676 	this_cpu_inc(trace_buffered_event_cnt);
2677 }
2678 
2679 /**
2680  * trace_buffered_event_disable - disable buffering events
2681  *
2682  * When a filter is removed, it is faster to not use the buffered
2683  * events, and to commit directly into the ring buffer. Free up
2684  * the temp buffers when there are no more users. This requires
2685  * special synchronization with current events.
2686  */
2687 void trace_buffered_event_disable(void)
2688 {
2689 	int cpu;
2690 
2691 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2692 
2693 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2694 		return;
2695 
2696 	if (--trace_buffered_event_ref)
2697 		return;
2698 
2699 	preempt_disable();
2700 	/* For each CPU, set the buffer as used. */
2701 	smp_call_function_many(tracing_buffer_mask,
2702 			       disable_trace_buffered_event, NULL, 1);
2703 	preempt_enable();
2704 
2705 	/* Wait for all current users to finish */
2706 	synchronize_rcu();
2707 
2708 	for_each_tracing_cpu(cpu) {
2709 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2710 		per_cpu(trace_buffered_event, cpu) = NULL;
2711 	}
2712 	/*
2713 	 * Make sure trace_buffered_event is NULL before clearing
2714 	 * trace_buffered_event_cnt.
2715 	 */
2716 	smp_wmb();
2717 
2718 	preempt_disable();
2719 	/* Do the work on each cpu */
2720 	smp_call_function_many(tracing_buffer_mask,
2721 			       enable_trace_buffered_event, NULL, 1);
2722 	preempt_enable();
2723 }
2724 
2725 static struct trace_buffer *temp_buffer;
2726 
2727 struct ring_buffer_event *
2728 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2729 			  struct trace_event_file *trace_file,
2730 			  int type, unsigned long len,
2731 			  unsigned int trace_ctx)
2732 {
2733 	struct ring_buffer_event *entry;
2734 	int val;
2735 
2736 	*current_rb = trace_file->tr->array_buffer.buffer;
2737 
2738 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2739 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2740 	    (entry = this_cpu_read(trace_buffered_event))) {
2741 		/* Try to use the per cpu buffer first */
2742 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2743 		if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2744 			trace_event_setup(entry, type, trace_ctx);
2745 			entry->array[0] = len;
2746 			return entry;
2747 		}
2748 		this_cpu_dec(trace_buffered_event_cnt);
2749 	}
2750 
2751 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2752 					    trace_ctx);
2753 	/*
2754 	 * If tracing is off, but we have triggers enabled
2755 	 * we still need to look at the event data. Use the temp_buffer
2756 	 * to store the trace event for the trigger to use. It's recursive
2757 	 * safe and will not be recorded anywhere.
2758 	 */
2759 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2760 		*current_rb = temp_buffer;
2761 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2762 						    trace_ctx);
2763 	}
2764 	return entry;
2765 }
2766 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2767 
2768 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2769 static DEFINE_MUTEX(tracepoint_printk_mutex);
2770 
2771 static void output_printk(struct trace_event_buffer *fbuffer)
2772 {
2773 	struct trace_event_call *event_call;
2774 	struct trace_event_file *file;
2775 	struct trace_event *event;
2776 	unsigned long flags;
2777 	struct trace_iterator *iter = tracepoint_print_iter;
2778 
2779 	/* We should never get here if iter is NULL */
2780 	if (WARN_ON_ONCE(!iter))
2781 		return;
2782 
2783 	event_call = fbuffer->trace_file->event_call;
2784 	if (!event_call || !event_call->event.funcs ||
2785 	    !event_call->event.funcs->trace)
2786 		return;
2787 
2788 	file = fbuffer->trace_file;
2789 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2790 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2791 	     !filter_match_preds(file->filter, fbuffer->entry)))
2792 		return;
2793 
2794 	event = &fbuffer->trace_file->event_call->event;
2795 
2796 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2797 	trace_seq_init(&iter->seq);
2798 	iter->ent = fbuffer->entry;
2799 	event_call->event.funcs->trace(iter, 0, event);
2800 	trace_seq_putc(&iter->seq, 0);
2801 	printk("%s", iter->seq.buffer);
2802 
2803 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2804 }
2805 
2806 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2807 			     void *buffer, size_t *lenp,
2808 			     loff_t *ppos)
2809 {
2810 	int save_tracepoint_printk;
2811 	int ret;
2812 
2813 	mutex_lock(&tracepoint_printk_mutex);
2814 	save_tracepoint_printk = tracepoint_printk;
2815 
2816 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2817 
2818 	/*
2819 	 * This will force exiting early, as tracepoint_printk
2820 	 * is always zero when tracepoint_printk_iter is not allocated
2821 	 */
2822 	if (!tracepoint_print_iter)
2823 		tracepoint_printk = 0;
2824 
2825 	if (save_tracepoint_printk == tracepoint_printk)
2826 		goto out;
2827 
2828 	if (tracepoint_printk)
2829 		static_key_enable(&tracepoint_printk_key.key);
2830 	else
2831 		static_key_disable(&tracepoint_printk_key.key);
2832 
2833  out:
2834 	mutex_unlock(&tracepoint_printk_mutex);
2835 
2836 	return ret;
2837 }
2838 
2839 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2840 {
2841 	if (static_key_false(&tracepoint_printk_key.key))
2842 		output_printk(fbuffer);
2843 
2844 	if (static_branch_unlikely(&trace_event_exports_enabled))
2845 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2846 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2847 				    fbuffer->event, fbuffer->entry,
2848 				    fbuffer->trace_ctx, fbuffer->regs);
2849 }
2850 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2851 
2852 /*
2853  * Skip 3:
2854  *
2855  *   trace_buffer_unlock_commit_regs()
2856  *   trace_event_buffer_commit()
2857  *   trace_event_raw_event_xxx()
2858  */
2859 # define STACK_SKIP 3
2860 
2861 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2862 				     struct trace_buffer *buffer,
2863 				     struct ring_buffer_event *event,
2864 				     unsigned int trace_ctx,
2865 				     struct pt_regs *regs)
2866 {
2867 	__buffer_unlock_commit(buffer, event);
2868 
2869 	/*
2870 	 * If regs is not set, then skip the necessary functions.
2871 	 * Note, we can still get here via blktrace, wakeup tracer
2872 	 * and mmiotrace, but that's ok if they lose a function or
2873 	 * two. They are not that meaningful.
2874 	 */
2875 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2876 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2877 }
2878 
2879 /*
2880  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2881  */
2882 void
2883 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2884 				   struct ring_buffer_event *event)
2885 {
2886 	__buffer_unlock_commit(buffer, event);
2887 }
2888 
2889 void
2890 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2891 	       parent_ip, unsigned int trace_ctx)
2892 {
2893 	struct trace_event_call *call = &event_function;
2894 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2895 	struct ring_buffer_event *event;
2896 	struct ftrace_entry *entry;
2897 
2898 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2899 					    trace_ctx);
2900 	if (!event)
2901 		return;
2902 	entry	= ring_buffer_event_data(event);
2903 	entry->ip			= ip;
2904 	entry->parent_ip		= parent_ip;
2905 
2906 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2907 		if (static_branch_unlikely(&trace_function_exports_enabled))
2908 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2909 		__buffer_unlock_commit(buffer, event);
2910 	}
2911 }
2912 
2913 #ifdef CONFIG_STACKTRACE
2914 
2915 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2916 #define FTRACE_KSTACK_NESTING	4
2917 
2918 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2919 
2920 struct ftrace_stack {
2921 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2922 };
2923 
2924 
2925 struct ftrace_stacks {
2926 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2927 };
2928 
2929 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2930 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2931 
2932 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2933 				 unsigned int trace_ctx,
2934 				 int skip, struct pt_regs *regs)
2935 {
2936 	struct trace_event_call *call = &event_kernel_stack;
2937 	struct ring_buffer_event *event;
2938 	unsigned int size, nr_entries;
2939 	struct ftrace_stack *fstack;
2940 	struct stack_entry *entry;
2941 	int stackidx;
2942 
2943 	/*
2944 	 * Add one, for this function and the call to save_stack_trace()
2945 	 * If regs is set, then these functions will not be in the way.
2946 	 */
2947 #ifndef CONFIG_UNWINDER_ORC
2948 	if (!regs)
2949 		skip++;
2950 #endif
2951 
2952 	preempt_disable_notrace();
2953 
2954 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2955 
2956 	/* This should never happen. If it does, yell once and skip */
2957 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2958 		goto out;
2959 
2960 	/*
2961 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2962 	 * interrupt will either see the value pre increment or post
2963 	 * increment. If the interrupt happens pre increment it will have
2964 	 * restored the counter when it returns.  We just need a barrier to
2965 	 * keep gcc from moving things around.
2966 	 */
2967 	barrier();
2968 
2969 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2970 	size = ARRAY_SIZE(fstack->calls);
2971 
2972 	if (regs) {
2973 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2974 						   size, skip);
2975 	} else {
2976 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2977 	}
2978 
2979 	size = nr_entries * sizeof(unsigned long);
2980 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2981 					    sizeof(*entry) + size, trace_ctx);
2982 	if (!event)
2983 		goto out;
2984 	entry = ring_buffer_event_data(event);
2985 
2986 	memcpy(&entry->caller, fstack->calls, size);
2987 	entry->size = nr_entries;
2988 
2989 	if (!call_filter_check_discard(call, entry, buffer, event))
2990 		__buffer_unlock_commit(buffer, event);
2991 
2992  out:
2993 	/* Again, don't let gcc optimize things here */
2994 	barrier();
2995 	__this_cpu_dec(ftrace_stack_reserve);
2996 	preempt_enable_notrace();
2997 
2998 }
2999 
3000 static inline void ftrace_trace_stack(struct trace_array *tr,
3001 				      struct trace_buffer *buffer,
3002 				      unsigned int trace_ctx,
3003 				      int skip, struct pt_regs *regs)
3004 {
3005 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3006 		return;
3007 
3008 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3009 }
3010 
3011 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3012 		   int skip)
3013 {
3014 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3015 
3016 	if (rcu_is_watching()) {
3017 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3018 		return;
3019 	}
3020 
3021 	/*
3022 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3023 	 * but if the above rcu_is_watching() failed, then the NMI
3024 	 * triggered someplace critical, and rcu_irq_enter() should
3025 	 * not be called from NMI.
3026 	 */
3027 	if (unlikely(in_nmi()))
3028 		return;
3029 
3030 	rcu_irq_enter_irqson();
3031 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3032 	rcu_irq_exit_irqson();
3033 }
3034 
3035 /**
3036  * trace_dump_stack - record a stack back trace in the trace buffer
3037  * @skip: Number of functions to skip (helper handlers)
3038  */
3039 void trace_dump_stack(int skip)
3040 {
3041 	if (tracing_disabled || tracing_selftest_running)
3042 		return;
3043 
3044 #ifndef CONFIG_UNWINDER_ORC
3045 	/* Skip 1 to skip this function. */
3046 	skip++;
3047 #endif
3048 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3049 			     tracing_gen_ctx(), skip, NULL);
3050 }
3051 EXPORT_SYMBOL_GPL(trace_dump_stack);
3052 
3053 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3054 static DEFINE_PER_CPU(int, user_stack_count);
3055 
3056 static void
3057 ftrace_trace_userstack(struct trace_array *tr,
3058 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3059 {
3060 	struct trace_event_call *call = &event_user_stack;
3061 	struct ring_buffer_event *event;
3062 	struct userstack_entry *entry;
3063 
3064 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3065 		return;
3066 
3067 	/*
3068 	 * NMIs can not handle page faults, even with fix ups.
3069 	 * The save user stack can (and often does) fault.
3070 	 */
3071 	if (unlikely(in_nmi()))
3072 		return;
3073 
3074 	/*
3075 	 * prevent recursion, since the user stack tracing may
3076 	 * trigger other kernel events.
3077 	 */
3078 	preempt_disable();
3079 	if (__this_cpu_read(user_stack_count))
3080 		goto out;
3081 
3082 	__this_cpu_inc(user_stack_count);
3083 
3084 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3085 					    sizeof(*entry), trace_ctx);
3086 	if (!event)
3087 		goto out_drop_count;
3088 	entry	= ring_buffer_event_data(event);
3089 
3090 	entry->tgid		= current->tgid;
3091 	memset(&entry->caller, 0, sizeof(entry->caller));
3092 
3093 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3094 	if (!call_filter_check_discard(call, entry, buffer, event))
3095 		__buffer_unlock_commit(buffer, event);
3096 
3097  out_drop_count:
3098 	__this_cpu_dec(user_stack_count);
3099  out:
3100 	preempt_enable();
3101 }
3102 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3103 static void ftrace_trace_userstack(struct trace_array *tr,
3104 				   struct trace_buffer *buffer,
3105 				   unsigned int trace_ctx)
3106 {
3107 }
3108 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3109 
3110 #endif /* CONFIG_STACKTRACE */
3111 
3112 /* created for use with alloc_percpu */
3113 struct trace_buffer_struct {
3114 	int nesting;
3115 	char buffer[4][TRACE_BUF_SIZE];
3116 };
3117 
3118 static struct trace_buffer_struct *trace_percpu_buffer;
3119 
3120 /*
3121  * This allows for lockless recording.  If we're nested too deeply, then
3122  * this returns NULL.
3123  */
3124 static char *get_trace_buf(void)
3125 {
3126 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3127 
3128 	if (!buffer || buffer->nesting >= 4)
3129 		return NULL;
3130 
3131 	buffer->nesting++;
3132 
3133 	/* Interrupts must see nesting incremented before we use the buffer */
3134 	barrier();
3135 	return &buffer->buffer[buffer->nesting - 1][0];
3136 }
3137 
3138 static void put_trace_buf(void)
3139 {
3140 	/* Don't let the decrement of nesting leak before this */
3141 	barrier();
3142 	this_cpu_dec(trace_percpu_buffer->nesting);
3143 }
3144 
3145 static int alloc_percpu_trace_buffer(void)
3146 {
3147 	struct trace_buffer_struct *buffers;
3148 
3149 	if (trace_percpu_buffer)
3150 		return 0;
3151 
3152 	buffers = alloc_percpu(struct trace_buffer_struct);
3153 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3154 		return -ENOMEM;
3155 
3156 	trace_percpu_buffer = buffers;
3157 	return 0;
3158 }
3159 
3160 static int buffers_allocated;
3161 
3162 void trace_printk_init_buffers(void)
3163 {
3164 	if (buffers_allocated)
3165 		return;
3166 
3167 	if (alloc_percpu_trace_buffer())
3168 		return;
3169 
3170 	/* trace_printk() is for debug use only. Don't use it in production. */
3171 
3172 	pr_warn("\n");
3173 	pr_warn("**********************************************************\n");
3174 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3175 	pr_warn("**                                                      **\n");
3176 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3177 	pr_warn("**                                                      **\n");
3178 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3179 	pr_warn("** unsafe for production use.                           **\n");
3180 	pr_warn("**                                                      **\n");
3181 	pr_warn("** If you see this message and you are not debugging    **\n");
3182 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3183 	pr_warn("**                                                      **\n");
3184 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185 	pr_warn("**********************************************************\n");
3186 
3187 	/* Expand the buffers to set size */
3188 	tracing_update_buffers();
3189 
3190 	buffers_allocated = 1;
3191 
3192 	/*
3193 	 * trace_printk_init_buffers() can be called by modules.
3194 	 * If that happens, then we need to start cmdline recording
3195 	 * directly here. If the global_trace.buffer is already
3196 	 * allocated here, then this was called by module code.
3197 	 */
3198 	if (global_trace.array_buffer.buffer)
3199 		tracing_start_cmdline_record();
3200 }
3201 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3202 
3203 void trace_printk_start_comm(void)
3204 {
3205 	/* Start tracing comms if trace printk is set */
3206 	if (!buffers_allocated)
3207 		return;
3208 	tracing_start_cmdline_record();
3209 }
3210 
3211 static void trace_printk_start_stop_comm(int enabled)
3212 {
3213 	if (!buffers_allocated)
3214 		return;
3215 
3216 	if (enabled)
3217 		tracing_start_cmdline_record();
3218 	else
3219 		tracing_stop_cmdline_record();
3220 }
3221 
3222 /**
3223  * trace_vbprintk - write binary msg to tracing buffer
3224  * @ip:    The address of the caller
3225  * @fmt:   The string format to write to the buffer
3226  * @args:  Arguments for @fmt
3227  */
3228 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3229 {
3230 	struct trace_event_call *call = &event_bprint;
3231 	struct ring_buffer_event *event;
3232 	struct trace_buffer *buffer;
3233 	struct trace_array *tr = &global_trace;
3234 	struct bprint_entry *entry;
3235 	unsigned int trace_ctx;
3236 	char *tbuffer;
3237 	int len = 0, size;
3238 
3239 	if (unlikely(tracing_selftest_running || tracing_disabled))
3240 		return 0;
3241 
3242 	/* Don't pollute graph traces with trace_vprintk internals */
3243 	pause_graph_tracing();
3244 
3245 	trace_ctx = tracing_gen_ctx();
3246 	preempt_disable_notrace();
3247 
3248 	tbuffer = get_trace_buf();
3249 	if (!tbuffer) {
3250 		len = 0;
3251 		goto out_nobuffer;
3252 	}
3253 
3254 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3255 
3256 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3257 		goto out_put;
3258 
3259 	size = sizeof(*entry) + sizeof(u32) * len;
3260 	buffer = tr->array_buffer.buffer;
3261 	ring_buffer_nest_start(buffer);
3262 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3263 					    trace_ctx);
3264 	if (!event)
3265 		goto out;
3266 	entry = ring_buffer_event_data(event);
3267 	entry->ip			= ip;
3268 	entry->fmt			= fmt;
3269 
3270 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3271 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3272 		__buffer_unlock_commit(buffer, event);
3273 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3274 	}
3275 
3276 out:
3277 	ring_buffer_nest_end(buffer);
3278 out_put:
3279 	put_trace_buf();
3280 
3281 out_nobuffer:
3282 	preempt_enable_notrace();
3283 	unpause_graph_tracing();
3284 
3285 	return len;
3286 }
3287 EXPORT_SYMBOL_GPL(trace_vbprintk);
3288 
3289 __printf(3, 0)
3290 static int
3291 __trace_array_vprintk(struct trace_buffer *buffer,
3292 		      unsigned long ip, const char *fmt, va_list args)
3293 {
3294 	struct trace_event_call *call = &event_print;
3295 	struct ring_buffer_event *event;
3296 	int len = 0, size;
3297 	struct print_entry *entry;
3298 	unsigned int trace_ctx;
3299 	char *tbuffer;
3300 
3301 	if (tracing_disabled || tracing_selftest_running)
3302 		return 0;
3303 
3304 	/* Don't pollute graph traces with trace_vprintk internals */
3305 	pause_graph_tracing();
3306 
3307 	trace_ctx = tracing_gen_ctx();
3308 	preempt_disable_notrace();
3309 
3310 
3311 	tbuffer = get_trace_buf();
3312 	if (!tbuffer) {
3313 		len = 0;
3314 		goto out_nobuffer;
3315 	}
3316 
3317 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3318 
3319 	size = sizeof(*entry) + len + 1;
3320 	ring_buffer_nest_start(buffer);
3321 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3322 					    trace_ctx);
3323 	if (!event)
3324 		goto out;
3325 	entry = ring_buffer_event_data(event);
3326 	entry->ip = ip;
3327 
3328 	memcpy(&entry->buf, tbuffer, len + 1);
3329 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3330 		__buffer_unlock_commit(buffer, event);
3331 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3332 	}
3333 
3334 out:
3335 	ring_buffer_nest_end(buffer);
3336 	put_trace_buf();
3337 
3338 out_nobuffer:
3339 	preempt_enable_notrace();
3340 	unpause_graph_tracing();
3341 
3342 	return len;
3343 }
3344 
3345 __printf(3, 0)
3346 int trace_array_vprintk(struct trace_array *tr,
3347 			unsigned long ip, const char *fmt, va_list args)
3348 {
3349 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3350 }
3351 
3352 /**
3353  * trace_array_printk - Print a message to a specific instance
3354  * @tr: The instance trace_array descriptor
3355  * @ip: The instruction pointer that this is called from.
3356  * @fmt: The format to print (printf format)
3357  *
3358  * If a subsystem sets up its own instance, they have the right to
3359  * printk strings into their tracing instance buffer using this
3360  * function. Note, this function will not write into the top level
3361  * buffer (use trace_printk() for that), as writing into the top level
3362  * buffer should only have events that can be individually disabled.
3363  * trace_printk() is only used for debugging a kernel, and should not
3364  * be ever encorporated in normal use.
3365  *
3366  * trace_array_printk() can be used, as it will not add noise to the
3367  * top level tracing buffer.
3368  *
3369  * Note, trace_array_init_printk() must be called on @tr before this
3370  * can be used.
3371  */
3372 __printf(3, 0)
3373 int trace_array_printk(struct trace_array *tr,
3374 		       unsigned long ip, const char *fmt, ...)
3375 {
3376 	int ret;
3377 	va_list ap;
3378 
3379 	if (!tr)
3380 		return -ENOENT;
3381 
3382 	/* This is only allowed for created instances */
3383 	if (tr == &global_trace)
3384 		return 0;
3385 
3386 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3387 		return 0;
3388 
3389 	va_start(ap, fmt);
3390 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3391 	va_end(ap);
3392 	return ret;
3393 }
3394 EXPORT_SYMBOL_GPL(trace_array_printk);
3395 
3396 /**
3397  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3398  * @tr: The trace array to initialize the buffers for
3399  *
3400  * As trace_array_printk() only writes into instances, they are OK to
3401  * have in the kernel (unlike trace_printk()). This needs to be called
3402  * before trace_array_printk() can be used on a trace_array.
3403  */
3404 int trace_array_init_printk(struct trace_array *tr)
3405 {
3406 	if (!tr)
3407 		return -ENOENT;
3408 
3409 	/* This is only allowed for created instances */
3410 	if (tr == &global_trace)
3411 		return -EINVAL;
3412 
3413 	return alloc_percpu_trace_buffer();
3414 }
3415 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3416 
3417 __printf(3, 4)
3418 int trace_array_printk_buf(struct trace_buffer *buffer,
3419 			   unsigned long ip, const char *fmt, ...)
3420 {
3421 	int ret;
3422 	va_list ap;
3423 
3424 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3425 		return 0;
3426 
3427 	va_start(ap, fmt);
3428 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3429 	va_end(ap);
3430 	return ret;
3431 }
3432 
3433 __printf(2, 0)
3434 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3435 {
3436 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3437 }
3438 EXPORT_SYMBOL_GPL(trace_vprintk);
3439 
3440 static void trace_iterator_increment(struct trace_iterator *iter)
3441 {
3442 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3443 
3444 	iter->idx++;
3445 	if (buf_iter)
3446 		ring_buffer_iter_advance(buf_iter);
3447 }
3448 
3449 static struct trace_entry *
3450 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3451 		unsigned long *lost_events)
3452 {
3453 	struct ring_buffer_event *event;
3454 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3455 
3456 	if (buf_iter) {
3457 		event = ring_buffer_iter_peek(buf_iter, ts);
3458 		if (lost_events)
3459 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3460 				(unsigned long)-1 : 0;
3461 	} else {
3462 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3463 					 lost_events);
3464 	}
3465 
3466 	if (event) {
3467 		iter->ent_size = ring_buffer_event_length(event);
3468 		return ring_buffer_event_data(event);
3469 	}
3470 	iter->ent_size = 0;
3471 	return NULL;
3472 }
3473 
3474 static struct trace_entry *
3475 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3476 		  unsigned long *missing_events, u64 *ent_ts)
3477 {
3478 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3479 	struct trace_entry *ent, *next = NULL;
3480 	unsigned long lost_events = 0, next_lost = 0;
3481 	int cpu_file = iter->cpu_file;
3482 	u64 next_ts = 0, ts;
3483 	int next_cpu = -1;
3484 	int next_size = 0;
3485 	int cpu;
3486 
3487 	/*
3488 	 * If we are in a per_cpu trace file, don't bother by iterating over
3489 	 * all cpu and peek directly.
3490 	 */
3491 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3492 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3493 			return NULL;
3494 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3495 		if (ent_cpu)
3496 			*ent_cpu = cpu_file;
3497 
3498 		return ent;
3499 	}
3500 
3501 	for_each_tracing_cpu(cpu) {
3502 
3503 		if (ring_buffer_empty_cpu(buffer, cpu))
3504 			continue;
3505 
3506 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3507 
3508 		/*
3509 		 * Pick the entry with the smallest timestamp:
3510 		 */
3511 		if (ent && (!next || ts < next_ts)) {
3512 			next = ent;
3513 			next_cpu = cpu;
3514 			next_ts = ts;
3515 			next_lost = lost_events;
3516 			next_size = iter->ent_size;
3517 		}
3518 	}
3519 
3520 	iter->ent_size = next_size;
3521 
3522 	if (ent_cpu)
3523 		*ent_cpu = next_cpu;
3524 
3525 	if (ent_ts)
3526 		*ent_ts = next_ts;
3527 
3528 	if (missing_events)
3529 		*missing_events = next_lost;
3530 
3531 	return next;
3532 }
3533 
3534 #define STATIC_FMT_BUF_SIZE	128
3535 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3536 
3537 static char *trace_iter_expand_format(struct trace_iterator *iter)
3538 {
3539 	char *tmp;
3540 
3541 	if (iter->fmt == static_fmt_buf)
3542 		return NULL;
3543 
3544 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3545 		       GFP_KERNEL);
3546 	if (tmp) {
3547 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3548 		iter->fmt = tmp;
3549 	}
3550 
3551 	return tmp;
3552 }
3553 
3554 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3555 {
3556 	const char *p, *new_fmt;
3557 	char *q;
3558 
3559 	if (WARN_ON_ONCE(!fmt))
3560 		return fmt;
3561 
3562 	if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3563 		return fmt;
3564 
3565 	p = fmt;
3566 	new_fmt = q = iter->fmt;
3567 	while (*p) {
3568 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3569 			if (!trace_iter_expand_format(iter))
3570 				return fmt;
3571 
3572 			q += iter->fmt - new_fmt;
3573 			new_fmt = iter->fmt;
3574 		}
3575 
3576 		*q++ = *p++;
3577 
3578 		/* Replace %p with %px */
3579 		if (p[-1] == '%') {
3580 			if (p[0] == '%') {
3581 				*q++ = *p++;
3582 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3583 				*q++ = *p++;
3584 				*q++ = 'x';
3585 			}
3586 		}
3587 	}
3588 	*q = '\0';
3589 
3590 	return new_fmt;
3591 }
3592 
3593 #define STATIC_TEMP_BUF_SIZE	128
3594 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3595 
3596 /* Find the next real entry, without updating the iterator itself */
3597 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3598 					  int *ent_cpu, u64 *ent_ts)
3599 {
3600 	/* __find_next_entry will reset ent_size */
3601 	int ent_size = iter->ent_size;
3602 	struct trace_entry *entry;
3603 
3604 	/*
3605 	 * If called from ftrace_dump(), then the iter->temp buffer
3606 	 * will be the static_temp_buf and not created from kmalloc.
3607 	 * If the entry size is greater than the buffer, we can
3608 	 * not save it. Just return NULL in that case. This is only
3609 	 * used to add markers when two consecutive events' time
3610 	 * stamps have a large delta. See trace_print_lat_context()
3611 	 */
3612 	if (iter->temp == static_temp_buf &&
3613 	    STATIC_TEMP_BUF_SIZE < ent_size)
3614 		return NULL;
3615 
3616 	/*
3617 	 * The __find_next_entry() may call peek_next_entry(), which may
3618 	 * call ring_buffer_peek() that may make the contents of iter->ent
3619 	 * undefined. Need to copy iter->ent now.
3620 	 */
3621 	if (iter->ent && iter->ent != iter->temp) {
3622 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3623 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3624 			void *temp;
3625 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3626 			if (!temp)
3627 				return NULL;
3628 			kfree(iter->temp);
3629 			iter->temp = temp;
3630 			iter->temp_size = iter->ent_size;
3631 		}
3632 		memcpy(iter->temp, iter->ent, iter->ent_size);
3633 		iter->ent = iter->temp;
3634 	}
3635 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3636 	/* Put back the original ent_size */
3637 	iter->ent_size = ent_size;
3638 
3639 	return entry;
3640 }
3641 
3642 /* Find the next real entry, and increment the iterator to the next entry */
3643 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3644 {
3645 	iter->ent = __find_next_entry(iter, &iter->cpu,
3646 				      &iter->lost_events, &iter->ts);
3647 
3648 	if (iter->ent)
3649 		trace_iterator_increment(iter);
3650 
3651 	return iter->ent ? iter : NULL;
3652 }
3653 
3654 static void trace_consume(struct trace_iterator *iter)
3655 {
3656 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3657 			    &iter->lost_events);
3658 }
3659 
3660 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3661 {
3662 	struct trace_iterator *iter = m->private;
3663 	int i = (int)*pos;
3664 	void *ent;
3665 
3666 	WARN_ON_ONCE(iter->leftover);
3667 
3668 	(*pos)++;
3669 
3670 	/* can't go backwards */
3671 	if (iter->idx > i)
3672 		return NULL;
3673 
3674 	if (iter->idx < 0)
3675 		ent = trace_find_next_entry_inc(iter);
3676 	else
3677 		ent = iter;
3678 
3679 	while (ent && iter->idx < i)
3680 		ent = trace_find_next_entry_inc(iter);
3681 
3682 	iter->pos = *pos;
3683 
3684 	return ent;
3685 }
3686 
3687 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3688 {
3689 	struct ring_buffer_iter *buf_iter;
3690 	unsigned long entries = 0;
3691 	u64 ts;
3692 
3693 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3694 
3695 	buf_iter = trace_buffer_iter(iter, cpu);
3696 	if (!buf_iter)
3697 		return;
3698 
3699 	ring_buffer_iter_reset(buf_iter);
3700 
3701 	/*
3702 	 * We could have the case with the max latency tracers
3703 	 * that a reset never took place on a cpu. This is evident
3704 	 * by the timestamp being before the start of the buffer.
3705 	 */
3706 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3707 		if (ts >= iter->array_buffer->time_start)
3708 			break;
3709 		entries++;
3710 		ring_buffer_iter_advance(buf_iter);
3711 	}
3712 
3713 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3714 }
3715 
3716 /*
3717  * The current tracer is copied to avoid a global locking
3718  * all around.
3719  */
3720 static void *s_start(struct seq_file *m, loff_t *pos)
3721 {
3722 	struct trace_iterator *iter = m->private;
3723 	struct trace_array *tr = iter->tr;
3724 	int cpu_file = iter->cpu_file;
3725 	void *p = NULL;
3726 	loff_t l = 0;
3727 	int cpu;
3728 
3729 	/*
3730 	 * copy the tracer to avoid using a global lock all around.
3731 	 * iter->trace is a copy of current_trace, the pointer to the
3732 	 * name may be used instead of a strcmp(), as iter->trace->name
3733 	 * will point to the same string as current_trace->name.
3734 	 */
3735 	mutex_lock(&trace_types_lock);
3736 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3737 		*iter->trace = *tr->current_trace;
3738 	mutex_unlock(&trace_types_lock);
3739 
3740 #ifdef CONFIG_TRACER_MAX_TRACE
3741 	if (iter->snapshot && iter->trace->use_max_tr)
3742 		return ERR_PTR(-EBUSY);
3743 #endif
3744 
3745 	if (!iter->snapshot)
3746 		atomic_inc(&trace_record_taskinfo_disabled);
3747 
3748 	if (*pos != iter->pos) {
3749 		iter->ent = NULL;
3750 		iter->cpu = 0;
3751 		iter->idx = -1;
3752 
3753 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3754 			for_each_tracing_cpu(cpu)
3755 				tracing_iter_reset(iter, cpu);
3756 		} else
3757 			tracing_iter_reset(iter, cpu_file);
3758 
3759 		iter->leftover = 0;
3760 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3761 			;
3762 
3763 	} else {
3764 		/*
3765 		 * If we overflowed the seq_file before, then we want
3766 		 * to just reuse the trace_seq buffer again.
3767 		 */
3768 		if (iter->leftover)
3769 			p = iter;
3770 		else {
3771 			l = *pos - 1;
3772 			p = s_next(m, p, &l);
3773 		}
3774 	}
3775 
3776 	trace_event_read_lock();
3777 	trace_access_lock(cpu_file);
3778 	return p;
3779 }
3780 
3781 static void s_stop(struct seq_file *m, void *p)
3782 {
3783 	struct trace_iterator *iter = m->private;
3784 
3785 #ifdef CONFIG_TRACER_MAX_TRACE
3786 	if (iter->snapshot && iter->trace->use_max_tr)
3787 		return;
3788 #endif
3789 
3790 	if (!iter->snapshot)
3791 		atomic_dec(&trace_record_taskinfo_disabled);
3792 
3793 	trace_access_unlock(iter->cpu_file);
3794 	trace_event_read_unlock();
3795 }
3796 
3797 static void
3798 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3799 		      unsigned long *entries, int cpu)
3800 {
3801 	unsigned long count;
3802 
3803 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3804 	/*
3805 	 * If this buffer has skipped entries, then we hold all
3806 	 * entries for the trace and we need to ignore the
3807 	 * ones before the time stamp.
3808 	 */
3809 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3810 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3811 		/* total is the same as the entries */
3812 		*total = count;
3813 	} else
3814 		*total = count +
3815 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3816 	*entries = count;
3817 }
3818 
3819 static void
3820 get_total_entries(struct array_buffer *buf,
3821 		  unsigned long *total, unsigned long *entries)
3822 {
3823 	unsigned long t, e;
3824 	int cpu;
3825 
3826 	*total = 0;
3827 	*entries = 0;
3828 
3829 	for_each_tracing_cpu(cpu) {
3830 		get_total_entries_cpu(buf, &t, &e, cpu);
3831 		*total += t;
3832 		*entries += e;
3833 	}
3834 }
3835 
3836 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3837 {
3838 	unsigned long total, entries;
3839 
3840 	if (!tr)
3841 		tr = &global_trace;
3842 
3843 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3844 
3845 	return entries;
3846 }
3847 
3848 unsigned long trace_total_entries(struct trace_array *tr)
3849 {
3850 	unsigned long total, entries;
3851 
3852 	if (!tr)
3853 		tr = &global_trace;
3854 
3855 	get_total_entries(&tr->array_buffer, &total, &entries);
3856 
3857 	return entries;
3858 }
3859 
3860 static void print_lat_help_header(struct seq_file *m)
3861 {
3862 	seq_puts(m, "#                    _------=> CPU#            \n"
3863 		    "#                   / _-----=> irqs-off        \n"
3864 		    "#                  | / _----=> need-resched    \n"
3865 		    "#                  || / _---=> hardirq/softirq \n"
3866 		    "#                  ||| / _--=> preempt-depth   \n"
3867 		    "#                  |||| /     delay            \n"
3868 		    "#  cmd     pid     ||||| time  |   caller      \n"
3869 		    "#     \\   /        |||||  \\    |   /         \n");
3870 }
3871 
3872 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3873 {
3874 	unsigned long total;
3875 	unsigned long entries;
3876 
3877 	get_total_entries(buf, &total, &entries);
3878 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3879 		   entries, total, num_online_cpus());
3880 	seq_puts(m, "#\n");
3881 }
3882 
3883 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3884 				   unsigned int flags)
3885 {
3886 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3887 
3888 	print_event_info(buf, m);
3889 
3890 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3891 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3892 }
3893 
3894 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3895 				       unsigned int flags)
3896 {
3897 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3898 	const char *space = "            ";
3899 	int prec = tgid ? 12 : 2;
3900 
3901 	print_event_info(buf, m);
3902 
3903 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3904 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3905 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3906 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3907 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3908 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3909 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3910 }
3911 
3912 void
3913 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3914 {
3915 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3916 	struct array_buffer *buf = iter->array_buffer;
3917 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3918 	struct tracer *type = iter->trace;
3919 	unsigned long entries;
3920 	unsigned long total;
3921 	const char *name = "preemption";
3922 
3923 	name = type->name;
3924 
3925 	get_total_entries(buf, &total, &entries);
3926 
3927 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3928 		   name, UTS_RELEASE);
3929 	seq_puts(m, "# -----------------------------------"
3930 		 "---------------------------------\n");
3931 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3932 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3933 		   nsecs_to_usecs(data->saved_latency),
3934 		   entries,
3935 		   total,
3936 		   buf->cpu,
3937 #if defined(CONFIG_PREEMPT_NONE)
3938 		   "server",
3939 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3940 		   "desktop",
3941 #elif defined(CONFIG_PREEMPT)
3942 		   "preempt",
3943 #elif defined(CONFIG_PREEMPT_RT)
3944 		   "preempt_rt",
3945 #else
3946 		   "unknown",
3947 #endif
3948 		   /* These are reserved for later use */
3949 		   0, 0, 0, 0);
3950 #ifdef CONFIG_SMP
3951 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3952 #else
3953 	seq_puts(m, ")\n");
3954 #endif
3955 	seq_puts(m, "#    -----------------\n");
3956 	seq_printf(m, "#    | task: %.16s-%d "
3957 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3958 		   data->comm, data->pid,
3959 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3960 		   data->policy, data->rt_priority);
3961 	seq_puts(m, "#    -----------------\n");
3962 
3963 	if (data->critical_start) {
3964 		seq_puts(m, "#  => started at: ");
3965 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3966 		trace_print_seq(m, &iter->seq);
3967 		seq_puts(m, "\n#  => ended at:   ");
3968 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3969 		trace_print_seq(m, &iter->seq);
3970 		seq_puts(m, "\n#\n");
3971 	}
3972 
3973 	seq_puts(m, "#\n");
3974 }
3975 
3976 static void test_cpu_buff_start(struct trace_iterator *iter)
3977 {
3978 	struct trace_seq *s = &iter->seq;
3979 	struct trace_array *tr = iter->tr;
3980 
3981 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3982 		return;
3983 
3984 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3985 		return;
3986 
3987 	if (cpumask_available(iter->started) &&
3988 	    cpumask_test_cpu(iter->cpu, iter->started))
3989 		return;
3990 
3991 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3992 		return;
3993 
3994 	if (cpumask_available(iter->started))
3995 		cpumask_set_cpu(iter->cpu, iter->started);
3996 
3997 	/* Don't print started cpu buffer for the first entry of the trace */
3998 	if (iter->idx > 1)
3999 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4000 				iter->cpu);
4001 }
4002 
4003 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4004 {
4005 	struct trace_array *tr = iter->tr;
4006 	struct trace_seq *s = &iter->seq;
4007 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4008 	struct trace_entry *entry;
4009 	struct trace_event *event;
4010 
4011 	entry = iter->ent;
4012 
4013 	test_cpu_buff_start(iter);
4014 
4015 	event = ftrace_find_event(entry->type);
4016 
4017 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4018 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4019 			trace_print_lat_context(iter);
4020 		else
4021 			trace_print_context(iter);
4022 	}
4023 
4024 	if (trace_seq_has_overflowed(s))
4025 		return TRACE_TYPE_PARTIAL_LINE;
4026 
4027 	if (event)
4028 		return event->funcs->trace(iter, sym_flags, event);
4029 
4030 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4031 
4032 	return trace_handle_return(s);
4033 }
4034 
4035 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4036 {
4037 	struct trace_array *tr = iter->tr;
4038 	struct trace_seq *s = &iter->seq;
4039 	struct trace_entry *entry;
4040 	struct trace_event *event;
4041 
4042 	entry = iter->ent;
4043 
4044 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4045 		trace_seq_printf(s, "%d %d %llu ",
4046 				 entry->pid, iter->cpu, iter->ts);
4047 
4048 	if (trace_seq_has_overflowed(s))
4049 		return TRACE_TYPE_PARTIAL_LINE;
4050 
4051 	event = ftrace_find_event(entry->type);
4052 	if (event)
4053 		return event->funcs->raw(iter, 0, event);
4054 
4055 	trace_seq_printf(s, "%d ?\n", entry->type);
4056 
4057 	return trace_handle_return(s);
4058 }
4059 
4060 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4061 {
4062 	struct trace_array *tr = iter->tr;
4063 	struct trace_seq *s = &iter->seq;
4064 	unsigned char newline = '\n';
4065 	struct trace_entry *entry;
4066 	struct trace_event *event;
4067 
4068 	entry = iter->ent;
4069 
4070 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4071 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4072 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4073 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4074 		if (trace_seq_has_overflowed(s))
4075 			return TRACE_TYPE_PARTIAL_LINE;
4076 	}
4077 
4078 	event = ftrace_find_event(entry->type);
4079 	if (event) {
4080 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4081 		if (ret != TRACE_TYPE_HANDLED)
4082 			return ret;
4083 	}
4084 
4085 	SEQ_PUT_FIELD(s, newline);
4086 
4087 	return trace_handle_return(s);
4088 }
4089 
4090 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4091 {
4092 	struct trace_array *tr = iter->tr;
4093 	struct trace_seq *s = &iter->seq;
4094 	struct trace_entry *entry;
4095 	struct trace_event *event;
4096 
4097 	entry = iter->ent;
4098 
4099 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4100 		SEQ_PUT_FIELD(s, entry->pid);
4101 		SEQ_PUT_FIELD(s, iter->cpu);
4102 		SEQ_PUT_FIELD(s, iter->ts);
4103 		if (trace_seq_has_overflowed(s))
4104 			return TRACE_TYPE_PARTIAL_LINE;
4105 	}
4106 
4107 	event = ftrace_find_event(entry->type);
4108 	return event ? event->funcs->binary(iter, 0, event) :
4109 		TRACE_TYPE_HANDLED;
4110 }
4111 
4112 int trace_empty(struct trace_iterator *iter)
4113 {
4114 	struct ring_buffer_iter *buf_iter;
4115 	int cpu;
4116 
4117 	/* If we are looking at one CPU buffer, only check that one */
4118 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4119 		cpu = iter->cpu_file;
4120 		buf_iter = trace_buffer_iter(iter, cpu);
4121 		if (buf_iter) {
4122 			if (!ring_buffer_iter_empty(buf_iter))
4123 				return 0;
4124 		} else {
4125 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4126 				return 0;
4127 		}
4128 		return 1;
4129 	}
4130 
4131 	for_each_tracing_cpu(cpu) {
4132 		buf_iter = trace_buffer_iter(iter, cpu);
4133 		if (buf_iter) {
4134 			if (!ring_buffer_iter_empty(buf_iter))
4135 				return 0;
4136 		} else {
4137 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4138 				return 0;
4139 		}
4140 	}
4141 
4142 	return 1;
4143 }
4144 
4145 /*  Called with trace_event_read_lock() held. */
4146 enum print_line_t print_trace_line(struct trace_iterator *iter)
4147 {
4148 	struct trace_array *tr = iter->tr;
4149 	unsigned long trace_flags = tr->trace_flags;
4150 	enum print_line_t ret;
4151 
4152 	if (iter->lost_events) {
4153 		if (iter->lost_events == (unsigned long)-1)
4154 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4155 					 iter->cpu);
4156 		else
4157 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4158 					 iter->cpu, iter->lost_events);
4159 		if (trace_seq_has_overflowed(&iter->seq))
4160 			return TRACE_TYPE_PARTIAL_LINE;
4161 	}
4162 
4163 	if (iter->trace && iter->trace->print_line) {
4164 		ret = iter->trace->print_line(iter);
4165 		if (ret != TRACE_TYPE_UNHANDLED)
4166 			return ret;
4167 	}
4168 
4169 	if (iter->ent->type == TRACE_BPUTS &&
4170 			trace_flags & TRACE_ITER_PRINTK &&
4171 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4172 		return trace_print_bputs_msg_only(iter);
4173 
4174 	if (iter->ent->type == TRACE_BPRINT &&
4175 			trace_flags & TRACE_ITER_PRINTK &&
4176 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4177 		return trace_print_bprintk_msg_only(iter);
4178 
4179 	if (iter->ent->type == TRACE_PRINT &&
4180 			trace_flags & TRACE_ITER_PRINTK &&
4181 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4182 		return trace_print_printk_msg_only(iter);
4183 
4184 	if (trace_flags & TRACE_ITER_BIN)
4185 		return print_bin_fmt(iter);
4186 
4187 	if (trace_flags & TRACE_ITER_HEX)
4188 		return print_hex_fmt(iter);
4189 
4190 	if (trace_flags & TRACE_ITER_RAW)
4191 		return print_raw_fmt(iter);
4192 
4193 	return print_trace_fmt(iter);
4194 }
4195 
4196 void trace_latency_header(struct seq_file *m)
4197 {
4198 	struct trace_iterator *iter = m->private;
4199 	struct trace_array *tr = iter->tr;
4200 
4201 	/* print nothing if the buffers are empty */
4202 	if (trace_empty(iter))
4203 		return;
4204 
4205 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4206 		print_trace_header(m, iter);
4207 
4208 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4209 		print_lat_help_header(m);
4210 }
4211 
4212 void trace_default_header(struct seq_file *m)
4213 {
4214 	struct trace_iterator *iter = m->private;
4215 	struct trace_array *tr = iter->tr;
4216 	unsigned long trace_flags = tr->trace_flags;
4217 
4218 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4219 		return;
4220 
4221 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4222 		/* print nothing if the buffers are empty */
4223 		if (trace_empty(iter))
4224 			return;
4225 		print_trace_header(m, iter);
4226 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4227 			print_lat_help_header(m);
4228 	} else {
4229 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4230 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4231 				print_func_help_header_irq(iter->array_buffer,
4232 							   m, trace_flags);
4233 			else
4234 				print_func_help_header(iter->array_buffer, m,
4235 						       trace_flags);
4236 		}
4237 	}
4238 }
4239 
4240 static void test_ftrace_alive(struct seq_file *m)
4241 {
4242 	if (!ftrace_is_dead())
4243 		return;
4244 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4245 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4246 }
4247 
4248 #ifdef CONFIG_TRACER_MAX_TRACE
4249 static void show_snapshot_main_help(struct seq_file *m)
4250 {
4251 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4252 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4253 		    "#                      Takes a snapshot of the main buffer.\n"
4254 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4255 		    "#                      (Doesn't have to be '2' works with any number that\n"
4256 		    "#                       is not a '0' or '1')\n");
4257 }
4258 
4259 static void show_snapshot_percpu_help(struct seq_file *m)
4260 {
4261 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4262 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4263 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4264 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4265 #else
4266 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4267 		    "#                     Must use main snapshot file to allocate.\n");
4268 #endif
4269 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4270 		    "#                      (Doesn't have to be '2' works with any number that\n"
4271 		    "#                       is not a '0' or '1')\n");
4272 }
4273 
4274 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4275 {
4276 	if (iter->tr->allocated_snapshot)
4277 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4278 	else
4279 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4280 
4281 	seq_puts(m, "# Snapshot commands:\n");
4282 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4283 		show_snapshot_main_help(m);
4284 	else
4285 		show_snapshot_percpu_help(m);
4286 }
4287 #else
4288 /* Should never be called */
4289 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4290 #endif
4291 
4292 static int s_show(struct seq_file *m, void *v)
4293 {
4294 	struct trace_iterator *iter = v;
4295 	int ret;
4296 
4297 	if (iter->ent == NULL) {
4298 		if (iter->tr) {
4299 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4300 			seq_puts(m, "#\n");
4301 			test_ftrace_alive(m);
4302 		}
4303 		if (iter->snapshot && trace_empty(iter))
4304 			print_snapshot_help(m, iter);
4305 		else if (iter->trace && iter->trace->print_header)
4306 			iter->trace->print_header(m);
4307 		else
4308 			trace_default_header(m);
4309 
4310 	} else if (iter->leftover) {
4311 		/*
4312 		 * If we filled the seq_file buffer earlier, we
4313 		 * want to just show it now.
4314 		 */
4315 		ret = trace_print_seq(m, &iter->seq);
4316 
4317 		/* ret should this time be zero, but you never know */
4318 		iter->leftover = ret;
4319 
4320 	} else {
4321 		print_trace_line(iter);
4322 		ret = trace_print_seq(m, &iter->seq);
4323 		/*
4324 		 * If we overflow the seq_file buffer, then it will
4325 		 * ask us for this data again at start up.
4326 		 * Use that instead.
4327 		 *  ret is 0 if seq_file write succeeded.
4328 		 *        -1 otherwise.
4329 		 */
4330 		iter->leftover = ret;
4331 	}
4332 
4333 	return 0;
4334 }
4335 
4336 /*
4337  * Should be used after trace_array_get(), trace_types_lock
4338  * ensures that i_cdev was already initialized.
4339  */
4340 static inline int tracing_get_cpu(struct inode *inode)
4341 {
4342 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4343 		return (long)inode->i_cdev - 1;
4344 	return RING_BUFFER_ALL_CPUS;
4345 }
4346 
4347 static const struct seq_operations tracer_seq_ops = {
4348 	.start		= s_start,
4349 	.next		= s_next,
4350 	.stop		= s_stop,
4351 	.show		= s_show,
4352 };
4353 
4354 static struct trace_iterator *
4355 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4356 {
4357 	struct trace_array *tr = inode->i_private;
4358 	struct trace_iterator *iter;
4359 	int cpu;
4360 
4361 	if (tracing_disabled)
4362 		return ERR_PTR(-ENODEV);
4363 
4364 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4365 	if (!iter)
4366 		return ERR_PTR(-ENOMEM);
4367 
4368 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4369 				    GFP_KERNEL);
4370 	if (!iter->buffer_iter)
4371 		goto release;
4372 
4373 	/*
4374 	 * trace_find_next_entry() may need to save off iter->ent.
4375 	 * It will place it into the iter->temp buffer. As most
4376 	 * events are less than 128, allocate a buffer of that size.
4377 	 * If one is greater, then trace_find_next_entry() will
4378 	 * allocate a new buffer to adjust for the bigger iter->ent.
4379 	 * It's not critical if it fails to get allocated here.
4380 	 */
4381 	iter->temp = kmalloc(128, GFP_KERNEL);
4382 	if (iter->temp)
4383 		iter->temp_size = 128;
4384 
4385 	/*
4386 	 * trace_event_printf() may need to modify given format
4387 	 * string to replace %p with %px so that it shows real address
4388 	 * instead of hash value. However, that is only for the event
4389 	 * tracing, other tracer may not need. Defer the allocation
4390 	 * until it is needed.
4391 	 */
4392 	iter->fmt = NULL;
4393 	iter->fmt_size = 0;
4394 
4395 	/*
4396 	 * We make a copy of the current tracer to avoid concurrent
4397 	 * changes on it while we are reading.
4398 	 */
4399 	mutex_lock(&trace_types_lock);
4400 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4401 	if (!iter->trace)
4402 		goto fail;
4403 
4404 	*iter->trace = *tr->current_trace;
4405 
4406 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4407 		goto fail;
4408 
4409 	iter->tr = tr;
4410 
4411 #ifdef CONFIG_TRACER_MAX_TRACE
4412 	/* Currently only the top directory has a snapshot */
4413 	if (tr->current_trace->print_max || snapshot)
4414 		iter->array_buffer = &tr->max_buffer;
4415 	else
4416 #endif
4417 		iter->array_buffer = &tr->array_buffer;
4418 	iter->snapshot = snapshot;
4419 	iter->pos = -1;
4420 	iter->cpu_file = tracing_get_cpu(inode);
4421 	mutex_init(&iter->mutex);
4422 
4423 	/* Notify the tracer early; before we stop tracing. */
4424 	if (iter->trace->open)
4425 		iter->trace->open(iter);
4426 
4427 	/* Annotate start of buffers if we had overruns */
4428 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4429 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4430 
4431 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4432 	if (trace_clocks[tr->clock_id].in_ns)
4433 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4434 
4435 	/*
4436 	 * If pause-on-trace is enabled, then stop the trace while
4437 	 * dumping, unless this is the "snapshot" file
4438 	 */
4439 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4440 		tracing_stop_tr(tr);
4441 
4442 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4443 		for_each_tracing_cpu(cpu) {
4444 			iter->buffer_iter[cpu] =
4445 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4446 							 cpu, GFP_KERNEL);
4447 		}
4448 		ring_buffer_read_prepare_sync();
4449 		for_each_tracing_cpu(cpu) {
4450 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4451 			tracing_iter_reset(iter, cpu);
4452 		}
4453 	} else {
4454 		cpu = iter->cpu_file;
4455 		iter->buffer_iter[cpu] =
4456 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4457 						 cpu, GFP_KERNEL);
4458 		ring_buffer_read_prepare_sync();
4459 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4460 		tracing_iter_reset(iter, cpu);
4461 	}
4462 
4463 	mutex_unlock(&trace_types_lock);
4464 
4465 	return iter;
4466 
4467  fail:
4468 	mutex_unlock(&trace_types_lock);
4469 	kfree(iter->trace);
4470 	kfree(iter->temp);
4471 	kfree(iter->buffer_iter);
4472 release:
4473 	seq_release_private(inode, file);
4474 	return ERR_PTR(-ENOMEM);
4475 }
4476 
4477 int tracing_open_generic(struct inode *inode, struct file *filp)
4478 {
4479 	int ret;
4480 
4481 	ret = tracing_check_open_get_tr(NULL);
4482 	if (ret)
4483 		return ret;
4484 
4485 	filp->private_data = inode->i_private;
4486 	return 0;
4487 }
4488 
4489 bool tracing_is_disabled(void)
4490 {
4491 	return (tracing_disabled) ? true: false;
4492 }
4493 
4494 /*
4495  * Open and update trace_array ref count.
4496  * Must have the current trace_array passed to it.
4497  */
4498 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4499 {
4500 	struct trace_array *tr = inode->i_private;
4501 	int ret;
4502 
4503 	ret = tracing_check_open_get_tr(tr);
4504 	if (ret)
4505 		return ret;
4506 
4507 	filp->private_data = inode->i_private;
4508 
4509 	return 0;
4510 }
4511 
4512 static int tracing_release(struct inode *inode, struct file *file)
4513 {
4514 	struct trace_array *tr = inode->i_private;
4515 	struct seq_file *m = file->private_data;
4516 	struct trace_iterator *iter;
4517 	int cpu;
4518 
4519 	if (!(file->f_mode & FMODE_READ)) {
4520 		trace_array_put(tr);
4521 		return 0;
4522 	}
4523 
4524 	/* Writes do not use seq_file */
4525 	iter = m->private;
4526 	mutex_lock(&trace_types_lock);
4527 
4528 	for_each_tracing_cpu(cpu) {
4529 		if (iter->buffer_iter[cpu])
4530 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4531 	}
4532 
4533 	if (iter->trace && iter->trace->close)
4534 		iter->trace->close(iter);
4535 
4536 	if (!iter->snapshot && tr->stop_count)
4537 		/* reenable tracing if it was previously enabled */
4538 		tracing_start_tr(tr);
4539 
4540 	__trace_array_put(tr);
4541 
4542 	mutex_unlock(&trace_types_lock);
4543 
4544 	mutex_destroy(&iter->mutex);
4545 	free_cpumask_var(iter->started);
4546 	kfree(iter->fmt);
4547 	kfree(iter->temp);
4548 	kfree(iter->trace);
4549 	kfree(iter->buffer_iter);
4550 	seq_release_private(inode, file);
4551 
4552 	return 0;
4553 }
4554 
4555 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4556 {
4557 	struct trace_array *tr = inode->i_private;
4558 
4559 	trace_array_put(tr);
4560 	return 0;
4561 }
4562 
4563 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4564 {
4565 	struct trace_array *tr = inode->i_private;
4566 
4567 	trace_array_put(tr);
4568 
4569 	return single_release(inode, file);
4570 }
4571 
4572 static int tracing_open(struct inode *inode, struct file *file)
4573 {
4574 	struct trace_array *tr = inode->i_private;
4575 	struct trace_iterator *iter;
4576 	int ret;
4577 
4578 	ret = tracing_check_open_get_tr(tr);
4579 	if (ret)
4580 		return ret;
4581 
4582 	/* If this file was open for write, then erase contents */
4583 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4584 		int cpu = tracing_get_cpu(inode);
4585 		struct array_buffer *trace_buf = &tr->array_buffer;
4586 
4587 #ifdef CONFIG_TRACER_MAX_TRACE
4588 		if (tr->current_trace->print_max)
4589 			trace_buf = &tr->max_buffer;
4590 #endif
4591 
4592 		if (cpu == RING_BUFFER_ALL_CPUS)
4593 			tracing_reset_online_cpus(trace_buf);
4594 		else
4595 			tracing_reset_cpu(trace_buf, cpu);
4596 	}
4597 
4598 	if (file->f_mode & FMODE_READ) {
4599 		iter = __tracing_open(inode, file, false);
4600 		if (IS_ERR(iter))
4601 			ret = PTR_ERR(iter);
4602 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4603 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4604 	}
4605 
4606 	if (ret < 0)
4607 		trace_array_put(tr);
4608 
4609 	return ret;
4610 }
4611 
4612 /*
4613  * Some tracers are not suitable for instance buffers.
4614  * A tracer is always available for the global array (toplevel)
4615  * or if it explicitly states that it is.
4616  */
4617 static bool
4618 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4619 {
4620 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4621 }
4622 
4623 /* Find the next tracer that this trace array may use */
4624 static struct tracer *
4625 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4626 {
4627 	while (t && !trace_ok_for_array(t, tr))
4628 		t = t->next;
4629 
4630 	return t;
4631 }
4632 
4633 static void *
4634 t_next(struct seq_file *m, void *v, loff_t *pos)
4635 {
4636 	struct trace_array *tr = m->private;
4637 	struct tracer *t = v;
4638 
4639 	(*pos)++;
4640 
4641 	if (t)
4642 		t = get_tracer_for_array(tr, t->next);
4643 
4644 	return t;
4645 }
4646 
4647 static void *t_start(struct seq_file *m, loff_t *pos)
4648 {
4649 	struct trace_array *tr = m->private;
4650 	struct tracer *t;
4651 	loff_t l = 0;
4652 
4653 	mutex_lock(&trace_types_lock);
4654 
4655 	t = get_tracer_for_array(tr, trace_types);
4656 	for (; t && l < *pos; t = t_next(m, t, &l))
4657 			;
4658 
4659 	return t;
4660 }
4661 
4662 static void t_stop(struct seq_file *m, void *p)
4663 {
4664 	mutex_unlock(&trace_types_lock);
4665 }
4666 
4667 static int t_show(struct seq_file *m, void *v)
4668 {
4669 	struct tracer *t = v;
4670 
4671 	if (!t)
4672 		return 0;
4673 
4674 	seq_puts(m, t->name);
4675 	if (t->next)
4676 		seq_putc(m, ' ');
4677 	else
4678 		seq_putc(m, '\n');
4679 
4680 	return 0;
4681 }
4682 
4683 static const struct seq_operations show_traces_seq_ops = {
4684 	.start		= t_start,
4685 	.next		= t_next,
4686 	.stop		= t_stop,
4687 	.show		= t_show,
4688 };
4689 
4690 static int show_traces_open(struct inode *inode, struct file *file)
4691 {
4692 	struct trace_array *tr = inode->i_private;
4693 	struct seq_file *m;
4694 	int ret;
4695 
4696 	ret = tracing_check_open_get_tr(tr);
4697 	if (ret)
4698 		return ret;
4699 
4700 	ret = seq_open(file, &show_traces_seq_ops);
4701 	if (ret) {
4702 		trace_array_put(tr);
4703 		return ret;
4704 	}
4705 
4706 	m = file->private_data;
4707 	m->private = tr;
4708 
4709 	return 0;
4710 }
4711 
4712 static int show_traces_release(struct inode *inode, struct file *file)
4713 {
4714 	struct trace_array *tr = inode->i_private;
4715 
4716 	trace_array_put(tr);
4717 	return seq_release(inode, file);
4718 }
4719 
4720 static ssize_t
4721 tracing_write_stub(struct file *filp, const char __user *ubuf,
4722 		   size_t count, loff_t *ppos)
4723 {
4724 	return count;
4725 }
4726 
4727 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4728 {
4729 	int ret;
4730 
4731 	if (file->f_mode & FMODE_READ)
4732 		ret = seq_lseek(file, offset, whence);
4733 	else
4734 		file->f_pos = ret = 0;
4735 
4736 	return ret;
4737 }
4738 
4739 static const struct file_operations tracing_fops = {
4740 	.open		= tracing_open,
4741 	.read		= seq_read,
4742 	.write		= tracing_write_stub,
4743 	.llseek		= tracing_lseek,
4744 	.release	= tracing_release,
4745 };
4746 
4747 static const struct file_operations show_traces_fops = {
4748 	.open		= show_traces_open,
4749 	.read		= seq_read,
4750 	.llseek		= seq_lseek,
4751 	.release	= show_traces_release,
4752 };
4753 
4754 static ssize_t
4755 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4756 		     size_t count, loff_t *ppos)
4757 {
4758 	struct trace_array *tr = file_inode(filp)->i_private;
4759 	char *mask_str;
4760 	int len;
4761 
4762 	len = snprintf(NULL, 0, "%*pb\n",
4763 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4764 	mask_str = kmalloc(len, GFP_KERNEL);
4765 	if (!mask_str)
4766 		return -ENOMEM;
4767 
4768 	len = snprintf(mask_str, len, "%*pb\n",
4769 		       cpumask_pr_args(tr->tracing_cpumask));
4770 	if (len >= count) {
4771 		count = -EINVAL;
4772 		goto out_err;
4773 	}
4774 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4775 
4776 out_err:
4777 	kfree(mask_str);
4778 
4779 	return count;
4780 }
4781 
4782 int tracing_set_cpumask(struct trace_array *tr,
4783 			cpumask_var_t tracing_cpumask_new)
4784 {
4785 	int cpu;
4786 
4787 	if (!tr)
4788 		return -EINVAL;
4789 
4790 	local_irq_disable();
4791 	arch_spin_lock(&tr->max_lock);
4792 	for_each_tracing_cpu(cpu) {
4793 		/*
4794 		 * Increase/decrease the disabled counter if we are
4795 		 * about to flip a bit in the cpumask:
4796 		 */
4797 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4798 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4799 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4800 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4801 		}
4802 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4803 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4804 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4805 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4806 		}
4807 	}
4808 	arch_spin_unlock(&tr->max_lock);
4809 	local_irq_enable();
4810 
4811 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4812 
4813 	return 0;
4814 }
4815 
4816 static ssize_t
4817 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4818 		      size_t count, loff_t *ppos)
4819 {
4820 	struct trace_array *tr = file_inode(filp)->i_private;
4821 	cpumask_var_t tracing_cpumask_new;
4822 	int err;
4823 
4824 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4825 		return -ENOMEM;
4826 
4827 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4828 	if (err)
4829 		goto err_free;
4830 
4831 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4832 	if (err)
4833 		goto err_free;
4834 
4835 	free_cpumask_var(tracing_cpumask_new);
4836 
4837 	return count;
4838 
4839 err_free:
4840 	free_cpumask_var(tracing_cpumask_new);
4841 
4842 	return err;
4843 }
4844 
4845 static const struct file_operations tracing_cpumask_fops = {
4846 	.open		= tracing_open_generic_tr,
4847 	.read		= tracing_cpumask_read,
4848 	.write		= tracing_cpumask_write,
4849 	.release	= tracing_release_generic_tr,
4850 	.llseek		= generic_file_llseek,
4851 };
4852 
4853 static int tracing_trace_options_show(struct seq_file *m, void *v)
4854 {
4855 	struct tracer_opt *trace_opts;
4856 	struct trace_array *tr = m->private;
4857 	u32 tracer_flags;
4858 	int i;
4859 
4860 	mutex_lock(&trace_types_lock);
4861 	tracer_flags = tr->current_trace->flags->val;
4862 	trace_opts = tr->current_trace->flags->opts;
4863 
4864 	for (i = 0; trace_options[i]; i++) {
4865 		if (tr->trace_flags & (1 << i))
4866 			seq_printf(m, "%s\n", trace_options[i]);
4867 		else
4868 			seq_printf(m, "no%s\n", trace_options[i]);
4869 	}
4870 
4871 	for (i = 0; trace_opts[i].name; i++) {
4872 		if (tracer_flags & trace_opts[i].bit)
4873 			seq_printf(m, "%s\n", trace_opts[i].name);
4874 		else
4875 			seq_printf(m, "no%s\n", trace_opts[i].name);
4876 	}
4877 	mutex_unlock(&trace_types_lock);
4878 
4879 	return 0;
4880 }
4881 
4882 static int __set_tracer_option(struct trace_array *tr,
4883 			       struct tracer_flags *tracer_flags,
4884 			       struct tracer_opt *opts, int neg)
4885 {
4886 	struct tracer *trace = tracer_flags->trace;
4887 	int ret;
4888 
4889 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4890 	if (ret)
4891 		return ret;
4892 
4893 	if (neg)
4894 		tracer_flags->val &= ~opts->bit;
4895 	else
4896 		tracer_flags->val |= opts->bit;
4897 	return 0;
4898 }
4899 
4900 /* Try to assign a tracer specific option */
4901 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4902 {
4903 	struct tracer *trace = tr->current_trace;
4904 	struct tracer_flags *tracer_flags = trace->flags;
4905 	struct tracer_opt *opts = NULL;
4906 	int i;
4907 
4908 	for (i = 0; tracer_flags->opts[i].name; i++) {
4909 		opts = &tracer_flags->opts[i];
4910 
4911 		if (strcmp(cmp, opts->name) == 0)
4912 			return __set_tracer_option(tr, trace->flags, opts, neg);
4913 	}
4914 
4915 	return -EINVAL;
4916 }
4917 
4918 /* Some tracers require overwrite to stay enabled */
4919 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4920 {
4921 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4922 		return -1;
4923 
4924 	return 0;
4925 }
4926 
4927 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4928 {
4929 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4930 	    (mask == TRACE_ITER_RECORD_CMD))
4931 		lockdep_assert_held(&event_mutex);
4932 
4933 	/* do nothing if flag is already set */
4934 	if (!!(tr->trace_flags & mask) == !!enabled)
4935 		return 0;
4936 
4937 	/* Give the tracer a chance to approve the change */
4938 	if (tr->current_trace->flag_changed)
4939 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4940 			return -EINVAL;
4941 
4942 	if (enabled)
4943 		tr->trace_flags |= mask;
4944 	else
4945 		tr->trace_flags &= ~mask;
4946 
4947 	if (mask == TRACE_ITER_RECORD_CMD)
4948 		trace_event_enable_cmd_record(enabled);
4949 
4950 	if (mask == TRACE_ITER_RECORD_TGID) {
4951 		if (!tgid_map)
4952 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4953 					   sizeof(*tgid_map),
4954 					   GFP_KERNEL);
4955 		if (!tgid_map) {
4956 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4957 			return -ENOMEM;
4958 		}
4959 
4960 		trace_event_enable_tgid_record(enabled);
4961 	}
4962 
4963 	if (mask == TRACE_ITER_EVENT_FORK)
4964 		trace_event_follow_fork(tr, enabled);
4965 
4966 	if (mask == TRACE_ITER_FUNC_FORK)
4967 		ftrace_pid_follow_fork(tr, enabled);
4968 
4969 	if (mask == TRACE_ITER_OVERWRITE) {
4970 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4971 #ifdef CONFIG_TRACER_MAX_TRACE
4972 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4973 #endif
4974 	}
4975 
4976 	if (mask == TRACE_ITER_PRINTK) {
4977 		trace_printk_start_stop_comm(enabled);
4978 		trace_printk_control(enabled);
4979 	}
4980 
4981 	return 0;
4982 }
4983 
4984 int trace_set_options(struct trace_array *tr, char *option)
4985 {
4986 	char *cmp;
4987 	int neg = 0;
4988 	int ret;
4989 	size_t orig_len = strlen(option);
4990 	int len;
4991 
4992 	cmp = strstrip(option);
4993 
4994 	len = str_has_prefix(cmp, "no");
4995 	if (len)
4996 		neg = 1;
4997 
4998 	cmp += len;
4999 
5000 	mutex_lock(&event_mutex);
5001 	mutex_lock(&trace_types_lock);
5002 
5003 	ret = match_string(trace_options, -1, cmp);
5004 	/* If no option could be set, test the specific tracer options */
5005 	if (ret < 0)
5006 		ret = set_tracer_option(tr, cmp, neg);
5007 	else
5008 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5009 
5010 	mutex_unlock(&trace_types_lock);
5011 	mutex_unlock(&event_mutex);
5012 
5013 	/*
5014 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5015 	 * turn it back into a space.
5016 	 */
5017 	if (orig_len > strlen(option))
5018 		option[strlen(option)] = ' ';
5019 
5020 	return ret;
5021 }
5022 
5023 static void __init apply_trace_boot_options(void)
5024 {
5025 	char *buf = trace_boot_options_buf;
5026 	char *option;
5027 
5028 	while (true) {
5029 		option = strsep(&buf, ",");
5030 
5031 		if (!option)
5032 			break;
5033 
5034 		if (*option)
5035 			trace_set_options(&global_trace, option);
5036 
5037 		/* Put back the comma to allow this to be called again */
5038 		if (buf)
5039 			*(buf - 1) = ',';
5040 	}
5041 }
5042 
5043 static ssize_t
5044 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5045 			size_t cnt, loff_t *ppos)
5046 {
5047 	struct seq_file *m = filp->private_data;
5048 	struct trace_array *tr = m->private;
5049 	char buf[64];
5050 	int ret;
5051 
5052 	if (cnt >= sizeof(buf))
5053 		return -EINVAL;
5054 
5055 	if (copy_from_user(buf, ubuf, cnt))
5056 		return -EFAULT;
5057 
5058 	buf[cnt] = 0;
5059 
5060 	ret = trace_set_options(tr, buf);
5061 	if (ret < 0)
5062 		return ret;
5063 
5064 	*ppos += cnt;
5065 
5066 	return cnt;
5067 }
5068 
5069 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5070 {
5071 	struct trace_array *tr = inode->i_private;
5072 	int ret;
5073 
5074 	ret = tracing_check_open_get_tr(tr);
5075 	if (ret)
5076 		return ret;
5077 
5078 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5079 	if (ret < 0)
5080 		trace_array_put(tr);
5081 
5082 	return ret;
5083 }
5084 
5085 static const struct file_operations tracing_iter_fops = {
5086 	.open		= tracing_trace_options_open,
5087 	.read		= seq_read,
5088 	.llseek		= seq_lseek,
5089 	.release	= tracing_single_release_tr,
5090 	.write		= tracing_trace_options_write,
5091 };
5092 
5093 static const char readme_msg[] =
5094 	"tracing mini-HOWTO:\n\n"
5095 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5096 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5097 	" Important files:\n"
5098 	"  trace\t\t\t- The static contents of the buffer\n"
5099 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5100 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5101 	"  current_tracer\t- function and latency tracers\n"
5102 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5103 	"  error_log\t- error log for failed commands (that support it)\n"
5104 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5105 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5106 	"  trace_clock\t\t-change the clock used to order events\n"
5107 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5108 	"      global:   Synced across CPUs but slows tracing down.\n"
5109 	"     counter:   Not a clock, but just an increment\n"
5110 	"      uptime:   Jiffy counter from time of boot\n"
5111 	"        perf:   Same clock that perf events use\n"
5112 #ifdef CONFIG_X86_64
5113 	"     x86-tsc:   TSC cycle counter\n"
5114 #endif
5115 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5116 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5117 	"    absolute:   Absolute (standalone) timestamp\n"
5118 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5119 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5120 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5121 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5122 	"\t\t\t  Remove sub-buffer with rmdir\n"
5123 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5124 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5125 	"\t\t\t  option name\n"
5126 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5127 #ifdef CONFIG_DYNAMIC_FTRACE
5128 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5129 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5130 	"\t\t\t  functions\n"
5131 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5132 	"\t     modules: Can select a group via module\n"
5133 	"\t      Format: :mod:<module-name>\n"
5134 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5135 	"\t    triggers: a command to perform when function is hit\n"
5136 	"\t      Format: <function>:<trigger>[:count]\n"
5137 	"\t     trigger: traceon, traceoff\n"
5138 	"\t\t      enable_event:<system>:<event>\n"
5139 	"\t\t      disable_event:<system>:<event>\n"
5140 #ifdef CONFIG_STACKTRACE
5141 	"\t\t      stacktrace\n"
5142 #endif
5143 #ifdef CONFIG_TRACER_SNAPSHOT
5144 	"\t\t      snapshot\n"
5145 #endif
5146 	"\t\t      dump\n"
5147 	"\t\t      cpudump\n"
5148 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5149 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5150 	"\t     The first one will disable tracing every time do_fault is hit\n"
5151 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5152 	"\t       The first time do trap is hit and it disables tracing, the\n"
5153 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5154 	"\t       the counter will not decrement. It only decrements when the\n"
5155 	"\t       trigger did work\n"
5156 	"\t     To remove trigger without count:\n"
5157 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5158 	"\t     To remove trigger with a count:\n"
5159 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5160 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5161 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5162 	"\t    modules: Can select a group via module command :mod:\n"
5163 	"\t    Does not accept triggers\n"
5164 #endif /* CONFIG_DYNAMIC_FTRACE */
5165 #ifdef CONFIG_FUNCTION_TRACER
5166 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5167 	"\t\t    (function)\n"
5168 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5169 	"\t\t    (function)\n"
5170 #endif
5171 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5172 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5173 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5174 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5175 #endif
5176 #ifdef CONFIG_TRACER_SNAPSHOT
5177 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5178 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5179 	"\t\t\t  information\n"
5180 #endif
5181 #ifdef CONFIG_STACK_TRACER
5182 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5183 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5184 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5185 	"\t\t\t  new trace)\n"
5186 #ifdef CONFIG_DYNAMIC_FTRACE
5187 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5188 	"\t\t\t  traces\n"
5189 #endif
5190 #endif /* CONFIG_STACK_TRACER */
5191 #ifdef CONFIG_DYNAMIC_EVENTS
5192 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5193 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5194 #endif
5195 #ifdef CONFIG_KPROBE_EVENTS
5196 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5197 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5198 #endif
5199 #ifdef CONFIG_UPROBE_EVENTS
5200 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5201 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5202 #endif
5203 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5204 	"\t  accepts: event-definitions (one definition per line)\n"
5205 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5206 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5207 #ifdef CONFIG_HIST_TRIGGERS
5208 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5209 #endif
5210 	"\t           -:[<group>/]<event>\n"
5211 #ifdef CONFIG_KPROBE_EVENTS
5212 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5213   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5214 #endif
5215 #ifdef CONFIG_UPROBE_EVENTS
5216   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5217 #endif
5218 	"\t     args: <name>=fetcharg[:type]\n"
5219 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5220 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5221 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5222 #else
5223 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5224 #endif
5225 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5226 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5227 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5228 	"\t           <type>\\[<array-size>\\]\n"
5229 #ifdef CONFIG_HIST_TRIGGERS
5230 	"\t    field: <stype> <name>;\n"
5231 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5232 	"\t           [unsigned] char/int/long\n"
5233 #endif
5234 #endif
5235 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5236 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5237 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5238 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5239 	"\t\t\t  events\n"
5240 	"      filter\t\t- If set, only events passing filter are traced\n"
5241 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5242 	"\t\t\t  <event>:\n"
5243 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5244 	"      filter\t\t- If set, only events passing filter are traced\n"
5245 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5246 	"\t    Format: <trigger>[:count][if <filter>]\n"
5247 	"\t   trigger: traceon, traceoff\n"
5248 	"\t            enable_event:<system>:<event>\n"
5249 	"\t            disable_event:<system>:<event>\n"
5250 #ifdef CONFIG_HIST_TRIGGERS
5251 	"\t            enable_hist:<system>:<event>\n"
5252 	"\t            disable_hist:<system>:<event>\n"
5253 #endif
5254 #ifdef CONFIG_STACKTRACE
5255 	"\t\t    stacktrace\n"
5256 #endif
5257 #ifdef CONFIG_TRACER_SNAPSHOT
5258 	"\t\t    snapshot\n"
5259 #endif
5260 #ifdef CONFIG_HIST_TRIGGERS
5261 	"\t\t    hist (see below)\n"
5262 #endif
5263 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5264 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5265 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5266 	"\t                  events/block/block_unplug/trigger\n"
5267 	"\t   The first disables tracing every time block_unplug is hit.\n"
5268 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5269 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5270 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5271 	"\t   Like function triggers, the counter is only decremented if it\n"
5272 	"\t    enabled or disabled tracing.\n"
5273 	"\t   To remove a trigger without a count:\n"
5274 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5275 	"\t   To remove a trigger with a count:\n"
5276 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5277 	"\t   Filters can be ignored when removing a trigger.\n"
5278 #ifdef CONFIG_HIST_TRIGGERS
5279 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5280 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5281 	"\t            [:values=<field1[,field2,...]>]\n"
5282 	"\t            [:sort=<field1[,field2,...]>]\n"
5283 	"\t            [:size=#entries]\n"
5284 	"\t            [:pause][:continue][:clear]\n"
5285 	"\t            [:name=histname1]\n"
5286 	"\t            [:<handler>.<action>]\n"
5287 	"\t            [if <filter>]\n\n"
5288 	"\t    When a matching event is hit, an entry is added to a hash\n"
5289 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5290 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5291 	"\t    correspond to fields in the event's format description.  Keys\n"
5292 	"\t    can be any field, or the special string 'stacktrace'.\n"
5293 	"\t    Compound keys consisting of up to two fields can be specified\n"
5294 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5295 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5296 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5297 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5298 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5299 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5300 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5301 	"\t    its histogram data will be shared with other triggers of the\n"
5302 	"\t    same name, and trigger hits will update this common data.\n\n"
5303 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5304 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5305 	"\t    triggers attached to an event, there will be a table for each\n"
5306 	"\t    trigger in the output.  The table displayed for a named\n"
5307 	"\t    trigger will be the same as any other instance having the\n"
5308 	"\t    same name.  The default format used to display a given field\n"
5309 	"\t    can be modified by appending any of the following modifiers\n"
5310 	"\t    to the field name, as applicable:\n\n"
5311 	"\t            .hex        display a number as a hex value\n"
5312 	"\t            .sym        display an address as a symbol\n"
5313 	"\t            .sym-offset display an address as a symbol and offset\n"
5314 	"\t            .execname   display a common_pid as a program name\n"
5315 	"\t            .syscall    display a syscall id as a syscall name\n"
5316 	"\t            .log2       display log2 value rather than raw number\n"
5317 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5318 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5319 	"\t    trigger or to start a hist trigger but not log any events\n"
5320 	"\t    until told to do so.  'continue' can be used to start or\n"
5321 	"\t    restart a paused hist trigger.\n\n"
5322 	"\t    The 'clear' parameter will clear the contents of a running\n"
5323 	"\t    hist trigger and leave its current paused/active state\n"
5324 	"\t    unchanged.\n\n"
5325 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5326 	"\t    have one event conditionally start and stop another event's\n"
5327 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5328 	"\t    the enable_event and disable_event triggers.\n\n"
5329 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5330 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5331 	"\t        <handler>.<action>\n\n"
5332 	"\t    The available handlers are:\n\n"
5333 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5334 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5335 	"\t        onchange(var)            - invoke action if var changes\n\n"
5336 	"\t    The available actions are:\n\n"
5337 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5338 	"\t        save(field,...)                      - save current event fields\n"
5339 #ifdef CONFIG_TRACER_SNAPSHOT
5340 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5341 #endif
5342 #ifdef CONFIG_SYNTH_EVENTS
5343 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5344 	"\t  Write into this file to define/undefine new synthetic events.\n"
5345 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5346 #endif
5347 #endif
5348 ;
5349 
5350 static ssize_t
5351 tracing_readme_read(struct file *filp, char __user *ubuf,
5352 		       size_t cnt, loff_t *ppos)
5353 {
5354 	return simple_read_from_buffer(ubuf, cnt, ppos,
5355 					readme_msg, strlen(readme_msg));
5356 }
5357 
5358 static const struct file_operations tracing_readme_fops = {
5359 	.open		= tracing_open_generic,
5360 	.read		= tracing_readme_read,
5361 	.llseek		= generic_file_llseek,
5362 };
5363 
5364 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5365 {
5366 	int *ptr = v;
5367 
5368 	if (*pos || m->count)
5369 		ptr++;
5370 
5371 	(*pos)++;
5372 
5373 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5374 		if (trace_find_tgid(*ptr))
5375 			return ptr;
5376 	}
5377 
5378 	return NULL;
5379 }
5380 
5381 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5382 {
5383 	void *v;
5384 	loff_t l = 0;
5385 
5386 	if (!tgid_map)
5387 		return NULL;
5388 
5389 	v = &tgid_map[0];
5390 	while (l <= *pos) {
5391 		v = saved_tgids_next(m, v, &l);
5392 		if (!v)
5393 			return NULL;
5394 	}
5395 
5396 	return v;
5397 }
5398 
5399 static void saved_tgids_stop(struct seq_file *m, void *v)
5400 {
5401 }
5402 
5403 static int saved_tgids_show(struct seq_file *m, void *v)
5404 {
5405 	int pid = (int *)v - tgid_map;
5406 
5407 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5408 	return 0;
5409 }
5410 
5411 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5412 	.start		= saved_tgids_start,
5413 	.stop		= saved_tgids_stop,
5414 	.next		= saved_tgids_next,
5415 	.show		= saved_tgids_show,
5416 };
5417 
5418 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5419 {
5420 	int ret;
5421 
5422 	ret = tracing_check_open_get_tr(NULL);
5423 	if (ret)
5424 		return ret;
5425 
5426 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5427 }
5428 
5429 
5430 static const struct file_operations tracing_saved_tgids_fops = {
5431 	.open		= tracing_saved_tgids_open,
5432 	.read		= seq_read,
5433 	.llseek		= seq_lseek,
5434 	.release	= seq_release,
5435 };
5436 
5437 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5438 {
5439 	unsigned int *ptr = v;
5440 
5441 	if (*pos || m->count)
5442 		ptr++;
5443 
5444 	(*pos)++;
5445 
5446 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5447 	     ptr++) {
5448 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5449 			continue;
5450 
5451 		return ptr;
5452 	}
5453 
5454 	return NULL;
5455 }
5456 
5457 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5458 {
5459 	void *v;
5460 	loff_t l = 0;
5461 
5462 	preempt_disable();
5463 	arch_spin_lock(&trace_cmdline_lock);
5464 
5465 	v = &savedcmd->map_cmdline_to_pid[0];
5466 	while (l <= *pos) {
5467 		v = saved_cmdlines_next(m, v, &l);
5468 		if (!v)
5469 			return NULL;
5470 	}
5471 
5472 	return v;
5473 }
5474 
5475 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5476 {
5477 	arch_spin_unlock(&trace_cmdline_lock);
5478 	preempt_enable();
5479 }
5480 
5481 static int saved_cmdlines_show(struct seq_file *m, void *v)
5482 {
5483 	char buf[TASK_COMM_LEN];
5484 	unsigned int *pid = v;
5485 
5486 	__trace_find_cmdline(*pid, buf);
5487 	seq_printf(m, "%d %s\n", *pid, buf);
5488 	return 0;
5489 }
5490 
5491 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5492 	.start		= saved_cmdlines_start,
5493 	.next		= saved_cmdlines_next,
5494 	.stop		= saved_cmdlines_stop,
5495 	.show		= saved_cmdlines_show,
5496 };
5497 
5498 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5499 {
5500 	int ret;
5501 
5502 	ret = tracing_check_open_get_tr(NULL);
5503 	if (ret)
5504 		return ret;
5505 
5506 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5507 }
5508 
5509 static const struct file_operations tracing_saved_cmdlines_fops = {
5510 	.open		= tracing_saved_cmdlines_open,
5511 	.read		= seq_read,
5512 	.llseek		= seq_lseek,
5513 	.release	= seq_release,
5514 };
5515 
5516 static ssize_t
5517 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5518 				 size_t cnt, loff_t *ppos)
5519 {
5520 	char buf[64];
5521 	int r;
5522 
5523 	arch_spin_lock(&trace_cmdline_lock);
5524 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5525 	arch_spin_unlock(&trace_cmdline_lock);
5526 
5527 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5528 }
5529 
5530 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5531 {
5532 	kfree(s->saved_cmdlines);
5533 	kfree(s->map_cmdline_to_pid);
5534 	kfree(s);
5535 }
5536 
5537 static int tracing_resize_saved_cmdlines(unsigned int val)
5538 {
5539 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5540 
5541 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5542 	if (!s)
5543 		return -ENOMEM;
5544 
5545 	if (allocate_cmdlines_buffer(val, s) < 0) {
5546 		kfree(s);
5547 		return -ENOMEM;
5548 	}
5549 
5550 	arch_spin_lock(&trace_cmdline_lock);
5551 	savedcmd_temp = savedcmd;
5552 	savedcmd = s;
5553 	arch_spin_unlock(&trace_cmdline_lock);
5554 	free_saved_cmdlines_buffer(savedcmd_temp);
5555 
5556 	return 0;
5557 }
5558 
5559 static ssize_t
5560 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5561 				  size_t cnt, loff_t *ppos)
5562 {
5563 	unsigned long val;
5564 	int ret;
5565 
5566 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5567 	if (ret)
5568 		return ret;
5569 
5570 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5571 	if (!val || val > PID_MAX_DEFAULT)
5572 		return -EINVAL;
5573 
5574 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5575 	if (ret < 0)
5576 		return ret;
5577 
5578 	*ppos += cnt;
5579 
5580 	return cnt;
5581 }
5582 
5583 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5584 	.open		= tracing_open_generic,
5585 	.read		= tracing_saved_cmdlines_size_read,
5586 	.write		= tracing_saved_cmdlines_size_write,
5587 };
5588 
5589 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5590 static union trace_eval_map_item *
5591 update_eval_map(union trace_eval_map_item *ptr)
5592 {
5593 	if (!ptr->map.eval_string) {
5594 		if (ptr->tail.next) {
5595 			ptr = ptr->tail.next;
5596 			/* Set ptr to the next real item (skip head) */
5597 			ptr++;
5598 		} else
5599 			return NULL;
5600 	}
5601 	return ptr;
5602 }
5603 
5604 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5605 {
5606 	union trace_eval_map_item *ptr = v;
5607 
5608 	/*
5609 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5610 	 * This really should never happen.
5611 	 */
5612 	(*pos)++;
5613 	ptr = update_eval_map(ptr);
5614 	if (WARN_ON_ONCE(!ptr))
5615 		return NULL;
5616 
5617 	ptr++;
5618 	ptr = update_eval_map(ptr);
5619 
5620 	return ptr;
5621 }
5622 
5623 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5624 {
5625 	union trace_eval_map_item *v;
5626 	loff_t l = 0;
5627 
5628 	mutex_lock(&trace_eval_mutex);
5629 
5630 	v = trace_eval_maps;
5631 	if (v)
5632 		v++;
5633 
5634 	while (v && l < *pos) {
5635 		v = eval_map_next(m, v, &l);
5636 	}
5637 
5638 	return v;
5639 }
5640 
5641 static void eval_map_stop(struct seq_file *m, void *v)
5642 {
5643 	mutex_unlock(&trace_eval_mutex);
5644 }
5645 
5646 static int eval_map_show(struct seq_file *m, void *v)
5647 {
5648 	union trace_eval_map_item *ptr = v;
5649 
5650 	seq_printf(m, "%s %ld (%s)\n",
5651 		   ptr->map.eval_string, ptr->map.eval_value,
5652 		   ptr->map.system);
5653 
5654 	return 0;
5655 }
5656 
5657 static const struct seq_operations tracing_eval_map_seq_ops = {
5658 	.start		= eval_map_start,
5659 	.next		= eval_map_next,
5660 	.stop		= eval_map_stop,
5661 	.show		= eval_map_show,
5662 };
5663 
5664 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5665 {
5666 	int ret;
5667 
5668 	ret = tracing_check_open_get_tr(NULL);
5669 	if (ret)
5670 		return ret;
5671 
5672 	return seq_open(filp, &tracing_eval_map_seq_ops);
5673 }
5674 
5675 static const struct file_operations tracing_eval_map_fops = {
5676 	.open		= tracing_eval_map_open,
5677 	.read		= seq_read,
5678 	.llseek		= seq_lseek,
5679 	.release	= seq_release,
5680 };
5681 
5682 static inline union trace_eval_map_item *
5683 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5684 {
5685 	/* Return tail of array given the head */
5686 	return ptr + ptr->head.length + 1;
5687 }
5688 
5689 static void
5690 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5691 			   int len)
5692 {
5693 	struct trace_eval_map **stop;
5694 	struct trace_eval_map **map;
5695 	union trace_eval_map_item *map_array;
5696 	union trace_eval_map_item *ptr;
5697 
5698 	stop = start + len;
5699 
5700 	/*
5701 	 * The trace_eval_maps contains the map plus a head and tail item,
5702 	 * where the head holds the module and length of array, and the
5703 	 * tail holds a pointer to the next list.
5704 	 */
5705 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5706 	if (!map_array) {
5707 		pr_warn("Unable to allocate trace eval mapping\n");
5708 		return;
5709 	}
5710 
5711 	mutex_lock(&trace_eval_mutex);
5712 
5713 	if (!trace_eval_maps)
5714 		trace_eval_maps = map_array;
5715 	else {
5716 		ptr = trace_eval_maps;
5717 		for (;;) {
5718 			ptr = trace_eval_jmp_to_tail(ptr);
5719 			if (!ptr->tail.next)
5720 				break;
5721 			ptr = ptr->tail.next;
5722 
5723 		}
5724 		ptr->tail.next = map_array;
5725 	}
5726 	map_array->head.mod = mod;
5727 	map_array->head.length = len;
5728 	map_array++;
5729 
5730 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5731 		map_array->map = **map;
5732 		map_array++;
5733 	}
5734 	memset(map_array, 0, sizeof(*map_array));
5735 
5736 	mutex_unlock(&trace_eval_mutex);
5737 }
5738 
5739 static void trace_create_eval_file(struct dentry *d_tracer)
5740 {
5741 	trace_create_file("eval_map", 0444, d_tracer,
5742 			  NULL, &tracing_eval_map_fops);
5743 }
5744 
5745 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5746 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5747 static inline void trace_insert_eval_map_file(struct module *mod,
5748 			      struct trace_eval_map **start, int len) { }
5749 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5750 
5751 static void trace_insert_eval_map(struct module *mod,
5752 				  struct trace_eval_map **start, int len)
5753 {
5754 	struct trace_eval_map **map;
5755 
5756 	if (len <= 0)
5757 		return;
5758 
5759 	map = start;
5760 
5761 	trace_event_eval_update(map, len);
5762 
5763 	trace_insert_eval_map_file(mod, start, len);
5764 }
5765 
5766 static ssize_t
5767 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5768 		       size_t cnt, loff_t *ppos)
5769 {
5770 	struct trace_array *tr = filp->private_data;
5771 	char buf[MAX_TRACER_SIZE+2];
5772 	int r;
5773 
5774 	mutex_lock(&trace_types_lock);
5775 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5776 	mutex_unlock(&trace_types_lock);
5777 
5778 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5779 }
5780 
5781 int tracer_init(struct tracer *t, struct trace_array *tr)
5782 {
5783 	tracing_reset_online_cpus(&tr->array_buffer);
5784 	return t->init(tr);
5785 }
5786 
5787 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5788 {
5789 	int cpu;
5790 
5791 	for_each_tracing_cpu(cpu)
5792 		per_cpu_ptr(buf->data, cpu)->entries = val;
5793 }
5794 
5795 #ifdef CONFIG_TRACER_MAX_TRACE
5796 /* resize @tr's buffer to the size of @size_tr's entries */
5797 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5798 					struct array_buffer *size_buf, int cpu_id)
5799 {
5800 	int cpu, ret = 0;
5801 
5802 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5803 		for_each_tracing_cpu(cpu) {
5804 			ret = ring_buffer_resize(trace_buf->buffer,
5805 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5806 			if (ret < 0)
5807 				break;
5808 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5809 				per_cpu_ptr(size_buf->data, cpu)->entries;
5810 		}
5811 	} else {
5812 		ret = ring_buffer_resize(trace_buf->buffer,
5813 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5814 		if (ret == 0)
5815 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5816 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5817 	}
5818 
5819 	return ret;
5820 }
5821 #endif /* CONFIG_TRACER_MAX_TRACE */
5822 
5823 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5824 					unsigned long size, int cpu)
5825 {
5826 	int ret;
5827 
5828 	/*
5829 	 * If kernel or user changes the size of the ring buffer
5830 	 * we use the size that was given, and we can forget about
5831 	 * expanding it later.
5832 	 */
5833 	ring_buffer_expanded = true;
5834 
5835 	/* May be called before buffers are initialized */
5836 	if (!tr->array_buffer.buffer)
5837 		return 0;
5838 
5839 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5840 	if (ret < 0)
5841 		return ret;
5842 
5843 #ifdef CONFIG_TRACER_MAX_TRACE
5844 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5845 	    !tr->current_trace->use_max_tr)
5846 		goto out;
5847 
5848 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5849 	if (ret < 0) {
5850 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5851 						     &tr->array_buffer, cpu);
5852 		if (r < 0) {
5853 			/*
5854 			 * AARGH! We are left with different
5855 			 * size max buffer!!!!
5856 			 * The max buffer is our "snapshot" buffer.
5857 			 * When a tracer needs a snapshot (one of the
5858 			 * latency tracers), it swaps the max buffer
5859 			 * with the saved snap shot. We succeeded to
5860 			 * update the size of the main buffer, but failed to
5861 			 * update the size of the max buffer. But when we tried
5862 			 * to reset the main buffer to the original size, we
5863 			 * failed there too. This is very unlikely to
5864 			 * happen, but if it does, warn and kill all
5865 			 * tracing.
5866 			 */
5867 			WARN_ON(1);
5868 			tracing_disabled = 1;
5869 		}
5870 		return ret;
5871 	}
5872 
5873 	if (cpu == RING_BUFFER_ALL_CPUS)
5874 		set_buffer_entries(&tr->max_buffer, size);
5875 	else
5876 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5877 
5878  out:
5879 #endif /* CONFIG_TRACER_MAX_TRACE */
5880 
5881 	if (cpu == RING_BUFFER_ALL_CPUS)
5882 		set_buffer_entries(&tr->array_buffer, size);
5883 	else
5884 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5885 
5886 	return ret;
5887 }
5888 
5889 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5890 				  unsigned long size, int cpu_id)
5891 {
5892 	int ret = size;
5893 
5894 	mutex_lock(&trace_types_lock);
5895 
5896 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5897 		/* make sure, this cpu is enabled in the mask */
5898 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5899 			ret = -EINVAL;
5900 			goto out;
5901 		}
5902 	}
5903 
5904 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5905 	if (ret < 0)
5906 		ret = -ENOMEM;
5907 
5908 out:
5909 	mutex_unlock(&trace_types_lock);
5910 
5911 	return ret;
5912 }
5913 
5914 
5915 /**
5916  * tracing_update_buffers - used by tracing facility to expand ring buffers
5917  *
5918  * To save on memory when the tracing is never used on a system with it
5919  * configured in. The ring buffers are set to a minimum size. But once
5920  * a user starts to use the tracing facility, then they need to grow
5921  * to their default size.
5922  *
5923  * This function is to be called when a tracer is about to be used.
5924  */
5925 int tracing_update_buffers(void)
5926 {
5927 	int ret = 0;
5928 
5929 	mutex_lock(&trace_types_lock);
5930 	if (!ring_buffer_expanded)
5931 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5932 						RING_BUFFER_ALL_CPUS);
5933 	mutex_unlock(&trace_types_lock);
5934 
5935 	return ret;
5936 }
5937 
5938 struct trace_option_dentry;
5939 
5940 static void
5941 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5942 
5943 /*
5944  * Used to clear out the tracer before deletion of an instance.
5945  * Must have trace_types_lock held.
5946  */
5947 static void tracing_set_nop(struct trace_array *tr)
5948 {
5949 	if (tr->current_trace == &nop_trace)
5950 		return;
5951 
5952 	tr->current_trace->enabled--;
5953 
5954 	if (tr->current_trace->reset)
5955 		tr->current_trace->reset(tr);
5956 
5957 	tr->current_trace = &nop_trace;
5958 }
5959 
5960 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5961 {
5962 	/* Only enable if the directory has been created already. */
5963 	if (!tr->dir)
5964 		return;
5965 
5966 	create_trace_option_files(tr, t);
5967 }
5968 
5969 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5970 {
5971 	struct tracer *t;
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5973 	bool had_max_tr;
5974 #endif
5975 	int ret = 0;
5976 
5977 	mutex_lock(&trace_types_lock);
5978 
5979 	if (!ring_buffer_expanded) {
5980 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5981 						RING_BUFFER_ALL_CPUS);
5982 		if (ret < 0)
5983 			goto out;
5984 		ret = 0;
5985 	}
5986 
5987 	for (t = trace_types; t; t = t->next) {
5988 		if (strcmp(t->name, buf) == 0)
5989 			break;
5990 	}
5991 	if (!t) {
5992 		ret = -EINVAL;
5993 		goto out;
5994 	}
5995 	if (t == tr->current_trace)
5996 		goto out;
5997 
5998 #ifdef CONFIG_TRACER_SNAPSHOT
5999 	if (t->use_max_tr) {
6000 		arch_spin_lock(&tr->max_lock);
6001 		if (tr->cond_snapshot)
6002 			ret = -EBUSY;
6003 		arch_spin_unlock(&tr->max_lock);
6004 		if (ret)
6005 			goto out;
6006 	}
6007 #endif
6008 	/* Some tracers won't work on kernel command line */
6009 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6010 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6011 			t->name);
6012 		goto out;
6013 	}
6014 
6015 	/* Some tracers are only allowed for the top level buffer */
6016 	if (!trace_ok_for_array(t, tr)) {
6017 		ret = -EINVAL;
6018 		goto out;
6019 	}
6020 
6021 	/* If trace pipe files are being read, we can't change the tracer */
6022 	if (tr->trace_ref) {
6023 		ret = -EBUSY;
6024 		goto out;
6025 	}
6026 
6027 	trace_branch_disable();
6028 
6029 	tr->current_trace->enabled--;
6030 
6031 	if (tr->current_trace->reset)
6032 		tr->current_trace->reset(tr);
6033 
6034 	/* Current trace needs to be nop_trace before synchronize_rcu */
6035 	tr->current_trace = &nop_trace;
6036 
6037 #ifdef CONFIG_TRACER_MAX_TRACE
6038 	had_max_tr = tr->allocated_snapshot;
6039 
6040 	if (had_max_tr && !t->use_max_tr) {
6041 		/*
6042 		 * We need to make sure that the update_max_tr sees that
6043 		 * current_trace changed to nop_trace to keep it from
6044 		 * swapping the buffers after we resize it.
6045 		 * The update_max_tr is called from interrupts disabled
6046 		 * so a synchronized_sched() is sufficient.
6047 		 */
6048 		synchronize_rcu();
6049 		free_snapshot(tr);
6050 	}
6051 #endif
6052 
6053 #ifdef CONFIG_TRACER_MAX_TRACE
6054 	if (t->use_max_tr && !had_max_tr) {
6055 		ret = tracing_alloc_snapshot_instance(tr);
6056 		if (ret < 0)
6057 			goto out;
6058 	}
6059 #endif
6060 
6061 	if (t->init) {
6062 		ret = tracer_init(t, tr);
6063 		if (ret)
6064 			goto out;
6065 	}
6066 
6067 	tr->current_trace = t;
6068 	tr->current_trace->enabled++;
6069 	trace_branch_enable(tr);
6070  out:
6071 	mutex_unlock(&trace_types_lock);
6072 
6073 	return ret;
6074 }
6075 
6076 static ssize_t
6077 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6078 			size_t cnt, loff_t *ppos)
6079 {
6080 	struct trace_array *tr = filp->private_data;
6081 	char buf[MAX_TRACER_SIZE+1];
6082 	int i;
6083 	size_t ret;
6084 	int err;
6085 
6086 	ret = cnt;
6087 
6088 	if (cnt > MAX_TRACER_SIZE)
6089 		cnt = MAX_TRACER_SIZE;
6090 
6091 	if (copy_from_user(buf, ubuf, cnt))
6092 		return -EFAULT;
6093 
6094 	buf[cnt] = 0;
6095 
6096 	/* strip ending whitespace. */
6097 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6098 		buf[i] = 0;
6099 
6100 	err = tracing_set_tracer(tr, buf);
6101 	if (err)
6102 		return err;
6103 
6104 	*ppos += ret;
6105 
6106 	return ret;
6107 }
6108 
6109 static ssize_t
6110 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6111 		   size_t cnt, loff_t *ppos)
6112 {
6113 	char buf[64];
6114 	int r;
6115 
6116 	r = snprintf(buf, sizeof(buf), "%ld\n",
6117 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6118 	if (r > sizeof(buf))
6119 		r = sizeof(buf);
6120 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6121 }
6122 
6123 static ssize_t
6124 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6125 		    size_t cnt, loff_t *ppos)
6126 {
6127 	unsigned long val;
6128 	int ret;
6129 
6130 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6131 	if (ret)
6132 		return ret;
6133 
6134 	*ptr = val * 1000;
6135 
6136 	return cnt;
6137 }
6138 
6139 static ssize_t
6140 tracing_thresh_read(struct file *filp, char __user *ubuf,
6141 		    size_t cnt, loff_t *ppos)
6142 {
6143 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6144 }
6145 
6146 static ssize_t
6147 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6148 		     size_t cnt, loff_t *ppos)
6149 {
6150 	struct trace_array *tr = filp->private_data;
6151 	int ret;
6152 
6153 	mutex_lock(&trace_types_lock);
6154 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6155 	if (ret < 0)
6156 		goto out;
6157 
6158 	if (tr->current_trace->update_thresh) {
6159 		ret = tr->current_trace->update_thresh(tr);
6160 		if (ret < 0)
6161 			goto out;
6162 	}
6163 
6164 	ret = cnt;
6165 out:
6166 	mutex_unlock(&trace_types_lock);
6167 
6168 	return ret;
6169 }
6170 
6171 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6172 
6173 static ssize_t
6174 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6175 		     size_t cnt, loff_t *ppos)
6176 {
6177 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6178 }
6179 
6180 static ssize_t
6181 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6182 		      size_t cnt, loff_t *ppos)
6183 {
6184 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6185 }
6186 
6187 #endif
6188 
6189 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6190 {
6191 	struct trace_array *tr = inode->i_private;
6192 	struct trace_iterator *iter;
6193 	int ret;
6194 
6195 	ret = tracing_check_open_get_tr(tr);
6196 	if (ret)
6197 		return ret;
6198 
6199 	mutex_lock(&trace_types_lock);
6200 
6201 	/* create a buffer to store the information to pass to userspace */
6202 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6203 	if (!iter) {
6204 		ret = -ENOMEM;
6205 		__trace_array_put(tr);
6206 		goto out;
6207 	}
6208 
6209 	trace_seq_init(&iter->seq);
6210 	iter->trace = tr->current_trace;
6211 
6212 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6213 		ret = -ENOMEM;
6214 		goto fail;
6215 	}
6216 
6217 	/* trace pipe does not show start of buffer */
6218 	cpumask_setall(iter->started);
6219 
6220 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6221 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6222 
6223 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6224 	if (trace_clocks[tr->clock_id].in_ns)
6225 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6226 
6227 	iter->tr = tr;
6228 	iter->array_buffer = &tr->array_buffer;
6229 	iter->cpu_file = tracing_get_cpu(inode);
6230 	mutex_init(&iter->mutex);
6231 	filp->private_data = iter;
6232 
6233 	if (iter->trace->pipe_open)
6234 		iter->trace->pipe_open(iter);
6235 
6236 	nonseekable_open(inode, filp);
6237 
6238 	tr->trace_ref++;
6239 out:
6240 	mutex_unlock(&trace_types_lock);
6241 	return ret;
6242 
6243 fail:
6244 	kfree(iter);
6245 	__trace_array_put(tr);
6246 	mutex_unlock(&trace_types_lock);
6247 	return ret;
6248 }
6249 
6250 static int tracing_release_pipe(struct inode *inode, struct file *file)
6251 {
6252 	struct trace_iterator *iter = file->private_data;
6253 	struct trace_array *tr = inode->i_private;
6254 
6255 	mutex_lock(&trace_types_lock);
6256 
6257 	tr->trace_ref--;
6258 
6259 	if (iter->trace->pipe_close)
6260 		iter->trace->pipe_close(iter);
6261 
6262 	mutex_unlock(&trace_types_lock);
6263 
6264 	free_cpumask_var(iter->started);
6265 	mutex_destroy(&iter->mutex);
6266 	kfree(iter);
6267 
6268 	trace_array_put(tr);
6269 
6270 	return 0;
6271 }
6272 
6273 static __poll_t
6274 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6275 {
6276 	struct trace_array *tr = iter->tr;
6277 
6278 	/* Iterators are static, they should be filled or empty */
6279 	if (trace_buffer_iter(iter, iter->cpu_file))
6280 		return EPOLLIN | EPOLLRDNORM;
6281 
6282 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6283 		/*
6284 		 * Always select as readable when in blocking mode
6285 		 */
6286 		return EPOLLIN | EPOLLRDNORM;
6287 	else
6288 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6289 					     filp, poll_table);
6290 }
6291 
6292 static __poll_t
6293 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6294 {
6295 	struct trace_iterator *iter = filp->private_data;
6296 
6297 	return trace_poll(iter, filp, poll_table);
6298 }
6299 
6300 /* Must be called with iter->mutex held. */
6301 static int tracing_wait_pipe(struct file *filp)
6302 {
6303 	struct trace_iterator *iter = filp->private_data;
6304 	int ret;
6305 
6306 	while (trace_empty(iter)) {
6307 
6308 		if ((filp->f_flags & O_NONBLOCK)) {
6309 			return -EAGAIN;
6310 		}
6311 
6312 		/*
6313 		 * We block until we read something and tracing is disabled.
6314 		 * We still block if tracing is disabled, but we have never
6315 		 * read anything. This allows a user to cat this file, and
6316 		 * then enable tracing. But after we have read something,
6317 		 * we give an EOF when tracing is again disabled.
6318 		 *
6319 		 * iter->pos will be 0 if we haven't read anything.
6320 		 */
6321 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6322 			break;
6323 
6324 		mutex_unlock(&iter->mutex);
6325 
6326 		ret = wait_on_pipe(iter, 0);
6327 
6328 		mutex_lock(&iter->mutex);
6329 
6330 		if (ret)
6331 			return ret;
6332 	}
6333 
6334 	return 1;
6335 }
6336 
6337 /*
6338  * Consumer reader.
6339  */
6340 static ssize_t
6341 tracing_read_pipe(struct file *filp, char __user *ubuf,
6342 		  size_t cnt, loff_t *ppos)
6343 {
6344 	struct trace_iterator *iter = filp->private_data;
6345 	ssize_t sret;
6346 
6347 	/*
6348 	 * Avoid more than one consumer on a single file descriptor
6349 	 * This is just a matter of traces coherency, the ring buffer itself
6350 	 * is protected.
6351 	 */
6352 	mutex_lock(&iter->mutex);
6353 
6354 	/* return any leftover data */
6355 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6356 	if (sret != -EBUSY)
6357 		goto out;
6358 
6359 	trace_seq_init(&iter->seq);
6360 
6361 	if (iter->trace->read) {
6362 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6363 		if (sret)
6364 			goto out;
6365 	}
6366 
6367 waitagain:
6368 	sret = tracing_wait_pipe(filp);
6369 	if (sret <= 0)
6370 		goto out;
6371 
6372 	/* stop when tracing is finished */
6373 	if (trace_empty(iter)) {
6374 		sret = 0;
6375 		goto out;
6376 	}
6377 
6378 	if (cnt >= PAGE_SIZE)
6379 		cnt = PAGE_SIZE - 1;
6380 
6381 	/* reset all but tr, trace, and overruns */
6382 	memset(&iter->seq, 0,
6383 	       sizeof(struct trace_iterator) -
6384 	       offsetof(struct trace_iterator, seq));
6385 	cpumask_clear(iter->started);
6386 	trace_seq_init(&iter->seq);
6387 	iter->pos = -1;
6388 
6389 	trace_event_read_lock();
6390 	trace_access_lock(iter->cpu_file);
6391 	while (trace_find_next_entry_inc(iter) != NULL) {
6392 		enum print_line_t ret;
6393 		int save_len = iter->seq.seq.len;
6394 
6395 		ret = print_trace_line(iter);
6396 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6397 			/* don't print partial lines */
6398 			iter->seq.seq.len = save_len;
6399 			break;
6400 		}
6401 		if (ret != TRACE_TYPE_NO_CONSUME)
6402 			trace_consume(iter);
6403 
6404 		if (trace_seq_used(&iter->seq) >= cnt)
6405 			break;
6406 
6407 		/*
6408 		 * Setting the full flag means we reached the trace_seq buffer
6409 		 * size and we should leave by partial output condition above.
6410 		 * One of the trace_seq_* functions is not used properly.
6411 		 */
6412 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6413 			  iter->ent->type);
6414 	}
6415 	trace_access_unlock(iter->cpu_file);
6416 	trace_event_read_unlock();
6417 
6418 	/* Now copy what we have to the user */
6419 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6420 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6421 		trace_seq_init(&iter->seq);
6422 
6423 	/*
6424 	 * If there was nothing to send to user, in spite of consuming trace
6425 	 * entries, go back to wait for more entries.
6426 	 */
6427 	if (sret == -EBUSY)
6428 		goto waitagain;
6429 
6430 out:
6431 	mutex_unlock(&iter->mutex);
6432 
6433 	return sret;
6434 }
6435 
6436 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6437 				     unsigned int idx)
6438 {
6439 	__free_page(spd->pages[idx]);
6440 }
6441 
6442 static size_t
6443 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6444 {
6445 	size_t count;
6446 	int save_len;
6447 	int ret;
6448 
6449 	/* Seq buffer is page-sized, exactly what we need. */
6450 	for (;;) {
6451 		save_len = iter->seq.seq.len;
6452 		ret = print_trace_line(iter);
6453 
6454 		if (trace_seq_has_overflowed(&iter->seq)) {
6455 			iter->seq.seq.len = save_len;
6456 			break;
6457 		}
6458 
6459 		/*
6460 		 * This should not be hit, because it should only
6461 		 * be set if the iter->seq overflowed. But check it
6462 		 * anyway to be safe.
6463 		 */
6464 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6465 			iter->seq.seq.len = save_len;
6466 			break;
6467 		}
6468 
6469 		count = trace_seq_used(&iter->seq) - save_len;
6470 		if (rem < count) {
6471 			rem = 0;
6472 			iter->seq.seq.len = save_len;
6473 			break;
6474 		}
6475 
6476 		if (ret != TRACE_TYPE_NO_CONSUME)
6477 			trace_consume(iter);
6478 		rem -= count;
6479 		if (!trace_find_next_entry_inc(iter))	{
6480 			rem = 0;
6481 			iter->ent = NULL;
6482 			break;
6483 		}
6484 	}
6485 
6486 	return rem;
6487 }
6488 
6489 static ssize_t tracing_splice_read_pipe(struct file *filp,
6490 					loff_t *ppos,
6491 					struct pipe_inode_info *pipe,
6492 					size_t len,
6493 					unsigned int flags)
6494 {
6495 	struct page *pages_def[PIPE_DEF_BUFFERS];
6496 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6497 	struct trace_iterator *iter = filp->private_data;
6498 	struct splice_pipe_desc spd = {
6499 		.pages		= pages_def,
6500 		.partial	= partial_def,
6501 		.nr_pages	= 0, /* This gets updated below. */
6502 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6503 		.ops		= &default_pipe_buf_ops,
6504 		.spd_release	= tracing_spd_release_pipe,
6505 	};
6506 	ssize_t ret;
6507 	size_t rem;
6508 	unsigned int i;
6509 
6510 	if (splice_grow_spd(pipe, &spd))
6511 		return -ENOMEM;
6512 
6513 	mutex_lock(&iter->mutex);
6514 
6515 	if (iter->trace->splice_read) {
6516 		ret = iter->trace->splice_read(iter, filp,
6517 					       ppos, pipe, len, flags);
6518 		if (ret)
6519 			goto out_err;
6520 	}
6521 
6522 	ret = tracing_wait_pipe(filp);
6523 	if (ret <= 0)
6524 		goto out_err;
6525 
6526 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6527 		ret = -EFAULT;
6528 		goto out_err;
6529 	}
6530 
6531 	trace_event_read_lock();
6532 	trace_access_lock(iter->cpu_file);
6533 
6534 	/* Fill as many pages as possible. */
6535 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6536 		spd.pages[i] = alloc_page(GFP_KERNEL);
6537 		if (!spd.pages[i])
6538 			break;
6539 
6540 		rem = tracing_fill_pipe_page(rem, iter);
6541 
6542 		/* Copy the data into the page, so we can start over. */
6543 		ret = trace_seq_to_buffer(&iter->seq,
6544 					  page_address(spd.pages[i]),
6545 					  trace_seq_used(&iter->seq));
6546 		if (ret < 0) {
6547 			__free_page(spd.pages[i]);
6548 			break;
6549 		}
6550 		spd.partial[i].offset = 0;
6551 		spd.partial[i].len = trace_seq_used(&iter->seq);
6552 
6553 		trace_seq_init(&iter->seq);
6554 	}
6555 
6556 	trace_access_unlock(iter->cpu_file);
6557 	trace_event_read_unlock();
6558 	mutex_unlock(&iter->mutex);
6559 
6560 	spd.nr_pages = i;
6561 
6562 	if (i)
6563 		ret = splice_to_pipe(pipe, &spd);
6564 	else
6565 		ret = 0;
6566 out:
6567 	splice_shrink_spd(&spd);
6568 	return ret;
6569 
6570 out_err:
6571 	mutex_unlock(&iter->mutex);
6572 	goto out;
6573 }
6574 
6575 static ssize_t
6576 tracing_entries_read(struct file *filp, char __user *ubuf,
6577 		     size_t cnt, loff_t *ppos)
6578 {
6579 	struct inode *inode = file_inode(filp);
6580 	struct trace_array *tr = inode->i_private;
6581 	int cpu = tracing_get_cpu(inode);
6582 	char buf[64];
6583 	int r = 0;
6584 	ssize_t ret;
6585 
6586 	mutex_lock(&trace_types_lock);
6587 
6588 	if (cpu == RING_BUFFER_ALL_CPUS) {
6589 		int cpu, buf_size_same;
6590 		unsigned long size;
6591 
6592 		size = 0;
6593 		buf_size_same = 1;
6594 		/* check if all cpu sizes are same */
6595 		for_each_tracing_cpu(cpu) {
6596 			/* fill in the size from first enabled cpu */
6597 			if (size == 0)
6598 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6599 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6600 				buf_size_same = 0;
6601 				break;
6602 			}
6603 		}
6604 
6605 		if (buf_size_same) {
6606 			if (!ring_buffer_expanded)
6607 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6608 					    size >> 10,
6609 					    trace_buf_size >> 10);
6610 			else
6611 				r = sprintf(buf, "%lu\n", size >> 10);
6612 		} else
6613 			r = sprintf(buf, "X\n");
6614 	} else
6615 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6616 
6617 	mutex_unlock(&trace_types_lock);
6618 
6619 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6620 	return ret;
6621 }
6622 
6623 static ssize_t
6624 tracing_entries_write(struct file *filp, const char __user *ubuf,
6625 		      size_t cnt, loff_t *ppos)
6626 {
6627 	struct inode *inode = file_inode(filp);
6628 	struct trace_array *tr = inode->i_private;
6629 	unsigned long val;
6630 	int ret;
6631 
6632 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6633 	if (ret)
6634 		return ret;
6635 
6636 	/* must have at least 1 entry */
6637 	if (!val)
6638 		return -EINVAL;
6639 
6640 	/* value is in KB */
6641 	val <<= 10;
6642 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6643 	if (ret < 0)
6644 		return ret;
6645 
6646 	*ppos += cnt;
6647 
6648 	return cnt;
6649 }
6650 
6651 static ssize_t
6652 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6653 				size_t cnt, loff_t *ppos)
6654 {
6655 	struct trace_array *tr = filp->private_data;
6656 	char buf[64];
6657 	int r, cpu;
6658 	unsigned long size = 0, expanded_size = 0;
6659 
6660 	mutex_lock(&trace_types_lock);
6661 	for_each_tracing_cpu(cpu) {
6662 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6663 		if (!ring_buffer_expanded)
6664 			expanded_size += trace_buf_size >> 10;
6665 	}
6666 	if (ring_buffer_expanded)
6667 		r = sprintf(buf, "%lu\n", size);
6668 	else
6669 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6670 	mutex_unlock(&trace_types_lock);
6671 
6672 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6673 }
6674 
6675 static ssize_t
6676 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6677 			  size_t cnt, loff_t *ppos)
6678 {
6679 	/*
6680 	 * There is no need to read what the user has written, this function
6681 	 * is just to make sure that there is no error when "echo" is used
6682 	 */
6683 
6684 	*ppos += cnt;
6685 
6686 	return cnt;
6687 }
6688 
6689 static int
6690 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6691 {
6692 	struct trace_array *tr = inode->i_private;
6693 
6694 	/* disable tracing ? */
6695 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6696 		tracer_tracing_off(tr);
6697 	/* resize the ring buffer to 0 */
6698 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6699 
6700 	trace_array_put(tr);
6701 
6702 	return 0;
6703 }
6704 
6705 static ssize_t
6706 tracing_mark_write(struct file *filp, const char __user *ubuf,
6707 					size_t cnt, loff_t *fpos)
6708 {
6709 	struct trace_array *tr = filp->private_data;
6710 	struct ring_buffer_event *event;
6711 	enum event_trigger_type tt = ETT_NONE;
6712 	struct trace_buffer *buffer;
6713 	struct print_entry *entry;
6714 	ssize_t written;
6715 	int size;
6716 	int len;
6717 
6718 /* Used in tracing_mark_raw_write() as well */
6719 #define FAULTED_STR "<faulted>"
6720 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6721 
6722 	if (tracing_disabled)
6723 		return -EINVAL;
6724 
6725 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6726 		return -EINVAL;
6727 
6728 	if (cnt > TRACE_BUF_SIZE)
6729 		cnt = TRACE_BUF_SIZE;
6730 
6731 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6732 
6733 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6734 
6735 	/* If less than "<faulted>", then make sure we can still add that */
6736 	if (cnt < FAULTED_SIZE)
6737 		size += FAULTED_SIZE - cnt;
6738 
6739 	buffer = tr->array_buffer.buffer;
6740 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6741 					    tracing_gen_ctx());
6742 	if (unlikely(!event))
6743 		/* Ring buffer disabled, return as if not open for write */
6744 		return -EBADF;
6745 
6746 	entry = ring_buffer_event_data(event);
6747 	entry->ip = _THIS_IP_;
6748 
6749 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6750 	if (len) {
6751 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6752 		cnt = FAULTED_SIZE;
6753 		written = -EFAULT;
6754 	} else
6755 		written = cnt;
6756 
6757 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6758 		/* do not add \n before testing triggers, but add \0 */
6759 		entry->buf[cnt] = '\0';
6760 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6761 	}
6762 
6763 	if (entry->buf[cnt - 1] != '\n') {
6764 		entry->buf[cnt] = '\n';
6765 		entry->buf[cnt + 1] = '\0';
6766 	} else
6767 		entry->buf[cnt] = '\0';
6768 
6769 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6770 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6771 	__buffer_unlock_commit(buffer, event);
6772 
6773 	if (tt)
6774 		event_triggers_post_call(tr->trace_marker_file, tt);
6775 
6776 	if (written > 0)
6777 		*fpos += written;
6778 
6779 	return written;
6780 }
6781 
6782 /* Limit it for now to 3K (including tag) */
6783 #define RAW_DATA_MAX_SIZE (1024*3)
6784 
6785 static ssize_t
6786 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6787 					size_t cnt, loff_t *fpos)
6788 {
6789 	struct trace_array *tr = filp->private_data;
6790 	struct ring_buffer_event *event;
6791 	struct trace_buffer *buffer;
6792 	struct raw_data_entry *entry;
6793 	ssize_t written;
6794 	int size;
6795 	int len;
6796 
6797 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6798 
6799 	if (tracing_disabled)
6800 		return -EINVAL;
6801 
6802 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6803 		return -EINVAL;
6804 
6805 	/* The marker must at least have a tag id */
6806 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6807 		return -EINVAL;
6808 
6809 	if (cnt > TRACE_BUF_SIZE)
6810 		cnt = TRACE_BUF_SIZE;
6811 
6812 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6813 
6814 	size = sizeof(*entry) + cnt;
6815 	if (cnt < FAULT_SIZE_ID)
6816 		size += FAULT_SIZE_ID - cnt;
6817 
6818 	buffer = tr->array_buffer.buffer;
6819 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6820 					    tracing_gen_ctx());
6821 	if (!event)
6822 		/* Ring buffer disabled, return as if not open for write */
6823 		return -EBADF;
6824 
6825 	entry = ring_buffer_event_data(event);
6826 
6827 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6828 	if (len) {
6829 		entry->id = -1;
6830 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6831 		written = -EFAULT;
6832 	} else
6833 		written = cnt;
6834 
6835 	__buffer_unlock_commit(buffer, event);
6836 
6837 	if (written > 0)
6838 		*fpos += written;
6839 
6840 	return written;
6841 }
6842 
6843 static int tracing_clock_show(struct seq_file *m, void *v)
6844 {
6845 	struct trace_array *tr = m->private;
6846 	int i;
6847 
6848 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6849 		seq_printf(m,
6850 			"%s%s%s%s", i ? " " : "",
6851 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6852 			i == tr->clock_id ? "]" : "");
6853 	seq_putc(m, '\n');
6854 
6855 	return 0;
6856 }
6857 
6858 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6859 {
6860 	int i;
6861 
6862 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6863 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6864 			break;
6865 	}
6866 	if (i == ARRAY_SIZE(trace_clocks))
6867 		return -EINVAL;
6868 
6869 	mutex_lock(&trace_types_lock);
6870 
6871 	tr->clock_id = i;
6872 
6873 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6874 
6875 	/*
6876 	 * New clock may not be consistent with the previous clock.
6877 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6878 	 */
6879 	tracing_reset_online_cpus(&tr->array_buffer);
6880 
6881 #ifdef CONFIG_TRACER_MAX_TRACE
6882 	if (tr->max_buffer.buffer)
6883 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6884 	tracing_reset_online_cpus(&tr->max_buffer);
6885 #endif
6886 
6887 	mutex_unlock(&trace_types_lock);
6888 
6889 	return 0;
6890 }
6891 
6892 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6893 				   size_t cnt, loff_t *fpos)
6894 {
6895 	struct seq_file *m = filp->private_data;
6896 	struct trace_array *tr = m->private;
6897 	char buf[64];
6898 	const char *clockstr;
6899 	int ret;
6900 
6901 	if (cnt >= sizeof(buf))
6902 		return -EINVAL;
6903 
6904 	if (copy_from_user(buf, ubuf, cnt))
6905 		return -EFAULT;
6906 
6907 	buf[cnt] = 0;
6908 
6909 	clockstr = strstrip(buf);
6910 
6911 	ret = tracing_set_clock(tr, clockstr);
6912 	if (ret)
6913 		return ret;
6914 
6915 	*fpos += cnt;
6916 
6917 	return cnt;
6918 }
6919 
6920 static int tracing_clock_open(struct inode *inode, struct file *file)
6921 {
6922 	struct trace_array *tr = inode->i_private;
6923 	int ret;
6924 
6925 	ret = tracing_check_open_get_tr(tr);
6926 	if (ret)
6927 		return ret;
6928 
6929 	ret = single_open(file, tracing_clock_show, inode->i_private);
6930 	if (ret < 0)
6931 		trace_array_put(tr);
6932 
6933 	return ret;
6934 }
6935 
6936 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6937 {
6938 	struct trace_array *tr = m->private;
6939 
6940 	mutex_lock(&trace_types_lock);
6941 
6942 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6943 		seq_puts(m, "delta [absolute]\n");
6944 	else
6945 		seq_puts(m, "[delta] absolute\n");
6946 
6947 	mutex_unlock(&trace_types_lock);
6948 
6949 	return 0;
6950 }
6951 
6952 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6953 {
6954 	struct trace_array *tr = inode->i_private;
6955 	int ret;
6956 
6957 	ret = tracing_check_open_get_tr(tr);
6958 	if (ret)
6959 		return ret;
6960 
6961 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6962 	if (ret < 0)
6963 		trace_array_put(tr);
6964 
6965 	return ret;
6966 }
6967 
6968 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6969 {
6970 	int ret = 0;
6971 
6972 	mutex_lock(&trace_types_lock);
6973 
6974 	if (abs && tr->time_stamp_abs_ref++)
6975 		goto out;
6976 
6977 	if (!abs) {
6978 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6979 			ret = -EINVAL;
6980 			goto out;
6981 		}
6982 
6983 		if (--tr->time_stamp_abs_ref)
6984 			goto out;
6985 	}
6986 
6987 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6988 
6989 #ifdef CONFIG_TRACER_MAX_TRACE
6990 	if (tr->max_buffer.buffer)
6991 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6992 #endif
6993  out:
6994 	mutex_unlock(&trace_types_lock);
6995 
6996 	return ret;
6997 }
6998 
6999 struct ftrace_buffer_info {
7000 	struct trace_iterator	iter;
7001 	void			*spare;
7002 	unsigned int		spare_cpu;
7003 	unsigned int		read;
7004 };
7005 
7006 #ifdef CONFIG_TRACER_SNAPSHOT
7007 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7008 {
7009 	struct trace_array *tr = inode->i_private;
7010 	struct trace_iterator *iter;
7011 	struct seq_file *m;
7012 	int ret;
7013 
7014 	ret = tracing_check_open_get_tr(tr);
7015 	if (ret)
7016 		return ret;
7017 
7018 	if (file->f_mode & FMODE_READ) {
7019 		iter = __tracing_open(inode, file, true);
7020 		if (IS_ERR(iter))
7021 			ret = PTR_ERR(iter);
7022 	} else {
7023 		/* Writes still need the seq_file to hold the private data */
7024 		ret = -ENOMEM;
7025 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7026 		if (!m)
7027 			goto out;
7028 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7029 		if (!iter) {
7030 			kfree(m);
7031 			goto out;
7032 		}
7033 		ret = 0;
7034 
7035 		iter->tr = tr;
7036 		iter->array_buffer = &tr->max_buffer;
7037 		iter->cpu_file = tracing_get_cpu(inode);
7038 		m->private = iter;
7039 		file->private_data = m;
7040 	}
7041 out:
7042 	if (ret < 0)
7043 		trace_array_put(tr);
7044 
7045 	return ret;
7046 }
7047 
7048 static ssize_t
7049 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7050 		       loff_t *ppos)
7051 {
7052 	struct seq_file *m = filp->private_data;
7053 	struct trace_iterator *iter = m->private;
7054 	struct trace_array *tr = iter->tr;
7055 	unsigned long val;
7056 	int ret;
7057 
7058 	ret = tracing_update_buffers();
7059 	if (ret < 0)
7060 		return ret;
7061 
7062 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7063 	if (ret)
7064 		return ret;
7065 
7066 	mutex_lock(&trace_types_lock);
7067 
7068 	if (tr->current_trace->use_max_tr) {
7069 		ret = -EBUSY;
7070 		goto out;
7071 	}
7072 
7073 	arch_spin_lock(&tr->max_lock);
7074 	if (tr->cond_snapshot)
7075 		ret = -EBUSY;
7076 	arch_spin_unlock(&tr->max_lock);
7077 	if (ret)
7078 		goto out;
7079 
7080 	switch (val) {
7081 	case 0:
7082 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7083 			ret = -EINVAL;
7084 			break;
7085 		}
7086 		if (tr->allocated_snapshot)
7087 			free_snapshot(tr);
7088 		break;
7089 	case 1:
7090 /* Only allow per-cpu swap if the ring buffer supports it */
7091 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7092 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7093 			ret = -EINVAL;
7094 			break;
7095 		}
7096 #endif
7097 		if (tr->allocated_snapshot)
7098 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7099 					&tr->array_buffer, iter->cpu_file);
7100 		else
7101 			ret = tracing_alloc_snapshot_instance(tr);
7102 		if (ret < 0)
7103 			break;
7104 		local_irq_disable();
7105 		/* Now, we're going to swap */
7106 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7107 			update_max_tr(tr, current, smp_processor_id(), NULL);
7108 		else
7109 			update_max_tr_single(tr, current, iter->cpu_file);
7110 		local_irq_enable();
7111 		break;
7112 	default:
7113 		if (tr->allocated_snapshot) {
7114 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7115 				tracing_reset_online_cpus(&tr->max_buffer);
7116 			else
7117 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7118 		}
7119 		break;
7120 	}
7121 
7122 	if (ret >= 0) {
7123 		*ppos += cnt;
7124 		ret = cnt;
7125 	}
7126 out:
7127 	mutex_unlock(&trace_types_lock);
7128 	return ret;
7129 }
7130 
7131 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7132 {
7133 	struct seq_file *m = file->private_data;
7134 	int ret;
7135 
7136 	ret = tracing_release(inode, file);
7137 
7138 	if (file->f_mode & FMODE_READ)
7139 		return ret;
7140 
7141 	/* If write only, the seq_file is just a stub */
7142 	if (m)
7143 		kfree(m->private);
7144 	kfree(m);
7145 
7146 	return 0;
7147 }
7148 
7149 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7150 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7151 				    size_t count, loff_t *ppos);
7152 static int tracing_buffers_release(struct inode *inode, struct file *file);
7153 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7154 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7155 
7156 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7157 {
7158 	struct ftrace_buffer_info *info;
7159 	int ret;
7160 
7161 	/* The following checks for tracefs lockdown */
7162 	ret = tracing_buffers_open(inode, filp);
7163 	if (ret < 0)
7164 		return ret;
7165 
7166 	info = filp->private_data;
7167 
7168 	if (info->iter.trace->use_max_tr) {
7169 		tracing_buffers_release(inode, filp);
7170 		return -EBUSY;
7171 	}
7172 
7173 	info->iter.snapshot = true;
7174 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7175 
7176 	return ret;
7177 }
7178 
7179 #endif /* CONFIG_TRACER_SNAPSHOT */
7180 
7181 
7182 static const struct file_operations tracing_thresh_fops = {
7183 	.open		= tracing_open_generic,
7184 	.read		= tracing_thresh_read,
7185 	.write		= tracing_thresh_write,
7186 	.llseek		= generic_file_llseek,
7187 };
7188 
7189 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7190 static const struct file_operations tracing_max_lat_fops = {
7191 	.open		= tracing_open_generic,
7192 	.read		= tracing_max_lat_read,
7193 	.write		= tracing_max_lat_write,
7194 	.llseek		= generic_file_llseek,
7195 };
7196 #endif
7197 
7198 static const struct file_operations set_tracer_fops = {
7199 	.open		= tracing_open_generic,
7200 	.read		= tracing_set_trace_read,
7201 	.write		= tracing_set_trace_write,
7202 	.llseek		= generic_file_llseek,
7203 };
7204 
7205 static const struct file_operations tracing_pipe_fops = {
7206 	.open		= tracing_open_pipe,
7207 	.poll		= tracing_poll_pipe,
7208 	.read		= tracing_read_pipe,
7209 	.splice_read	= tracing_splice_read_pipe,
7210 	.release	= tracing_release_pipe,
7211 	.llseek		= no_llseek,
7212 };
7213 
7214 static const struct file_operations tracing_entries_fops = {
7215 	.open		= tracing_open_generic_tr,
7216 	.read		= tracing_entries_read,
7217 	.write		= tracing_entries_write,
7218 	.llseek		= generic_file_llseek,
7219 	.release	= tracing_release_generic_tr,
7220 };
7221 
7222 static const struct file_operations tracing_total_entries_fops = {
7223 	.open		= tracing_open_generic_tr,
7224 	.read		= tracing_total_entries_read,
7225 	.llseek		= generic_file_llseek,
7226 	.release	= tracing_release_generic_tr,
7227 };
7228 
7229 static const struct file_operations tracing_free_buffer_fops = {
7230 	.open		= tracing_open_generic_tr,
7231 	.write		= tracing_free_buffer_write,
7232 	.release	= tracing_free_buffer_release,
7233 };
7234 
7235 static const struct file_operations tracing_mark_fops = {
7236 	.open		= tracing_open_generic_tr,
7237 	.write		= tracing_mark_write,
7238 	.llseek		= generic_file_llseek,
7239 	.release	= tracing_release_generic_tr,
7240 };
7241 
7242 static const struct file_operations tracing_mark_raw_fops = {
7243 	.open		= tracing_open_generic_tr,
7244 	.write		= tracing_mark_raw_write,
7245 	.llseek		= generic_file_llseek,
7246 	.release	= tracing_release_generic_tr,
7247 };
7248 
7249 static const struct file_operations trace_clock_fops = {
7250 	.open		= tracing_clock_open,
7251 	.read		= seq_read,
7252 	.llseek		= seq_lseek,
7253 	.release	= tracing_single_release_tr,
7254 	.write		= tracing_clock_write,
7255 };
7256 
7257 static const struct file_operations trace_time_stamp_mode_fops = {
7258 	.open		= tracing_time_stamp_mode_open,
7259 	.read		= seq_read,
7260 	.llseek		= seq_lseek,
7261 	.release	= tracing_single_release_tr,
7262 };
7263 
7264 #ifdef CONFIG_TRACER_SNAPSHOT
7265 static const struct file_operations snapshot_fops = {
7266 	.open		= tracing_snapshot_open,
7267 	.read		= seq_read,
7268 	.write		= tracing_snapshot_write,
7269 	.llseek		= tracing_lseek,
7270 	.release	= tracing_snapshot_release,
7271 };
7272 
7273 static const struct file_operations snapshot_raw_fops = {
7274 	.open		= snapshot_raw_open,
7275 	.read		= tracing_buffers_read,
7276 	.release	= tracing_buffers_release,
7277 	.splice_read	= tracing_buffers_splice_read,
7278 	.llseek		= no_llseek,
7279 };
7280 
7281 #endif /* CONFIG_TRACER_SNAPSHOT */
7282 
7283 #define TRACING_LOG_ERRS_MAX	8
7284 #define TRACING_LOG_LOC_MAX	128
7285 
7286 #define CMD_PREFIX "  Command: "
7287 
7288 struct err_info {
7289 	const char	**errs;	/* ptr to loc-specific array of err strings */
7290 	u8		type;	/* index into errs -> specific err string */
7291 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7292 	u64		ts;
7293 };
7294 
7295 struct tracing_log_err {
7296 	struct list_head	list;
7297 	struct err_info		info;
7298 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7299 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7300 };
7301 
7302 static DEFINE_MUTEX(tracing_err_log_lock);
7303 
7304 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7305 {
7306 	struct tracing_log_err *err;
7307 
7308 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7309 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7310 		if (!err)
7311 			err = ERR_PTR(-ENOMEM);
7312 		tr->n_err_log_entries++;
7313 
7314 		return err;
7315 	}
7316 
7317 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7318 	list_del(&err->list);
7319 
7320 	return err;
7321 }
7322 
7323 /**
7324  * err_pos - find the position of a string within a command for error careting
7325  * @cmd: The tracing command that caused the error
7326  * @str: The string to position the caret at within @cmd
7327  *
7328  * Finds the position of the first occurence of @str within @cmd.  The
7329  * return value can be passed to tracing_log_err() for caret placement
7330  * within @cmd.
7331  *
7332  * Returns the index within @cmd of the first occurence of @str or 0
7333  * if @str was not found.
7334  */
7335 unsigned int err_pos(char *cmd, const char *str)
7336 {
7337 	char *found;
7338 
7339 	if (WARN_ON(!strlen(cmd)))
7340 		return 0;
7341 
7342 	found = strstr(cmd, str);
7343 	if (found)
7344 		return found - cmd;
7345 
7346 	return 0;
7347 }
7348 
7349 /**
7350  * tracing_log_err - write an error to the tracing error log
7351  * @tr: The associated trace array for the error (NULL for top level array)
7352  * @loc: A string describing where the error occurred
7353  * @cmd: The tracing command that caused the error
7354  * @errs: The array of loc-specific static error strings
7355  * @type: The index into errs[], which produces the specific static err string
7356  * @pos: The position the caret should be placed in the cmd
7357  *
7358  * Writes an error into tracing/error_log of the form:
7359  *
7360  * <loc>: error: <text>
7361  *   Command: <cmd>
7362  *              ^
7363  *
7364  * tracing/error_log is a small log file containing the last
7365  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7366  * unless there has been a tracing error, and the error log can be
7367  * cleared and have its memory freed by writing the empty string in
7368  * truncation mode to it i.e. echo > tracing/error_log.
7369  *
7370  * NOTE: the @errs array along with the @type param are used to
7371  * produce a static error string - this string is not copied and saved
7372  * when the error is logged - only a pointer to it is saved.  See
7373  * existing callers for examples of how static strings are typically
7374  * defined for use with tracing_log_err().
7375  */
7376 void tracing_log_err(struct trace_array *tr,
7377 		     const char *loc, const char *cmd,
7378 		     const char **errs, u8 type, u8 pos)
7379 {
7380 	struct tracing_log_err *err;
7381 
7382 	if (!tr)
7383 		tr = &global_trace;
7384 
7385 	mutex_lock(&tracing_err_log_lock);
7386 	err = get_tracing_log_err(tr);
7387 	if (PTR_ERR(err) == -ENOMEM) {
7388 		mutex_unlock(&tracing_err_log_lock);
7389 		return;
7390 	}
7391 
7392 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7393 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7394 
7395 	err->info.errs = errs;
7396 	err->info.type = type;
7397 	err->info.pos = pos;
7398 	err->info.ts = local_clock();
7399 
7400 	list_add_tail(&err->list, &tr->err_log);
7401 	mutex_unlock(&tracing_err_log_lock);
7402 }
7403 
7404 static void clear_tracing_err_log(struct trace_array *tr)
7405 {
7406 	struct tracing_log_err *err, *next;
7407 
7408 	mutex_lock(&tracing_err_log_lock);
7409 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7410 		list_del(&err->list);
7411 		kfree(err);
7412 	}
7413 
7414 	tr->n_err_log_entries = 0;
7415 	mutex_unlock(&tracing_err_log_lock);
7416 }
7417 
7418 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7419 {
7420 	struct trace_array *tr = m->private;
7421 
7422 	mutex_lock(&tracing_err_log_lock);
7423 
7424 	return seq_list_start(&tr->err_log, *pos);
7425 }
7426 
7427 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7428 {
7429 	struct trace_array *tr = m->private;
7430 
7431 	return seq_list_next(v, &tr->err_log, pos);
7432 }
7433 
7434 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7435 {
7436 	mutex_unlock(&tracing_err_log_lock);
7437 }
7438 
7439 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7440 {
7441 	u8 i;
7442 
7443 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7444 		seq_putc(m, ' ');
7445 	for (i = 0; i < pos; i++)
7446 		seq_putc(m, ' ');
7447 	seq_puts(m, "^\n");
7448 }
7449 
7450 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7451 {
7452 	struct tracing_log_err *err = v;
7453 
7454 	if (err) {
7455 		const char *err_text = err->info.errs[err->info.type];
7456 		u64 sec = err->info.ts;
7457 		u32 nsec;
7458 
7459 		nsec = do_div(sec, NSEC_PER_SEC);
7460 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7461 			   err->loc, err_text);
7462 		seq_printf(m, "%s", err->cmd);
7463 		tracing_err_log_show_pos(m, err->info.pos);
7464 	}
7465 
7466 	return 0;
7467 }
7468 
7469 static const struct seq_operations tracing_err_log_seq_ops = {
7470 	.start  = tracing_err_log_seq_start,
7471 	.next   = tracing_err_log_seq_next,
7472 	.stop   = tracing_err_log_seq_stop,
7473 	.show   = tracing_err_log_seq_show
7474 };
7475 
7476 static int tracing_err_log_open(struct inode *inode, struct file *file)
7477 {
7478 	struct trace_array *tr = inode->i_private;
7479 	int ret = 0;
7480 
7481 	ret = tracing_check_open_get_tr(tr);
7482 	if (ret)
7483 		return ret;
7484 
7485 	/* If this file was opened for write, then erase contents */
7486 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7487 		clear_tracing_err_log(tr);
7488 
7489 	if (file->f_mode & FMODE_READ) {
7490 		ret = seq_open(file, &tracing_err_log_seq_ops);
7491 		if (!ret) {
7492 			struct seq_file *m = file->private_data;
7493 			m->private = tr;
7494 		} else {
7495 			trace_array_put(tr);
7496 		}
7497 	}
7498 	return ret;
7499 }
7500 
7501 static ssize_t tracing_err_log_write(struct file *file,
7502 				     const char __user *buffer,
7503 				     size_t count, loff_t *ppos)
7504 {
7505 	return count;
7506 }
7507 
7508 static int tracing_err_log_release(struct inode *inode, struct file *file)
7509 {
7510 	struct trace_array *tr = inode->i_private;
7511 
7512 	trace_array_put(tr);
7513 
7514 	if (file->f_mode & FMODE_READ)
7515 		seq_release(inode, file);
7516 
7517 	return 0;
7518 }
7519 
7520 static const struct file_operations tracing_err_log_fops = {
7521 	.open           = tracing_err_log_open,
7522 	.write		= tracing_err_log_write,
7523 	.read           = seq_read,
7524 	.llseek         = seq_lseek,
7525 	.release        = tracing_err_log_release,
7526 };
7527 
7528 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7529 {
7530 	struct trace_array *tr = inode->i_private;
7531 	struct ftrace_buffer_info *info;
7532 	int ret;
7533 
7534 	ret = tracing_check_open_get_tr(tr);
7535 	if (ret)
7536 		return ret;
7537 
7538 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7539 	if (!info) {
7540 		trace_array_put(tr);
7541 		return -ENOMEM;
7542 	}
7543 
7544 	mutex_lock(&trace_types_lock);
7545 
7546 	info->iter.tr		= tr;
7547 	info->iter.cpu_file	= tracing_get_cpu(inode);
7548 	info->iter.trace	= tr->current_trace;
7549 	info->iter.array_buffer = &tr->array_buffer;
7550 	info->spare		= NULL;
7551 	/* Force reading ring buffer for first read */
7552 	info->read		= (unsigned int)-1;
7553 
7554 	filp->private_data = info;
7555 
7556 	tr->trace_ref++;
7557 
7558 	mutex_unlock(&trace_types_lock);
7559 
7560 	ret = nonseekable_open(inode, filp);
7561 	if (ret < 0)
7562 		trace_array_put(tr);
7563 
7564 	return ret;
7565 }
7566 
7567 static __poll_t
7568 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7569 {
7570 	struct ftrace_buffer_info *info = filp->private_data;
7571 	struct trace_iterator *iter = &info->iter;
7572 
7573 	return trace_poll(iter, filp, poll_table);
7574 }
7575 
7576 static ssize_t
7577 tracing_buffers_read(struct file *filp, char __user *ubuf,
7578 		     size_t count, loff_t *ppos)
7579 {
7580 	struct ftrace_buffer_info *info = filp->private_data;
7581 	struct trace_iterator *iter = &info->iter;
7582 	ssize_t ret = 0;
7583 	ssize_t size;
7584 
7585 	if (!count)
7586 		return 0;
7587 
7588 #ifdef CONFIG_TRACER_MAX_TRACE
7589 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7590 		return -EBUSY;
7591 #endif
7592 
7593 	if (!info->spare) {
7594 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7595 							  iter->cpu_file);
7596 		if (IS_ERR(info->spare)) {
7597 			ret = PTR_ERR(info->spare);
7598 			info->spare = NULL;
7599 		} else {
7600 			info->spare_cpu = iter->cpu_file;
7601 		}
7602 	}
7603 	if (!info->spare)
7604 		return ret;
7605 
7606 	/* Do we have previous read data to read? */
7607 	if (info->read < PAGE_SIZE)
7608 		goto read;
7609 
7610  again:
7611 	trace_access_lock(iter->cpu_file);
7612 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7613 				    &info->spare,
7614 				    count,
7615 				    iter->cpu_file, 0);
7616 	trace_access_unlock(iter->cpu_file);
7617 
7618 	if (ret < 0) {
7619 		if (trace_empty(iter)) {
7620 			if ((filp->f_flags & O_NONBLOCK))
7621 				return -EAGAIN;
7622 
7623 			ret = wait_on_pipe(iter, 0);
7624 			if (ret)
7625 				return ret;
7626 
7627 			goto again;
7628 		}
7629 		return 0;
7630 	}
7631 
7632 	info->read = 0;
7633  read:
7634 	size = PAGE_SIZE - info->read;
7635 	if (size > count)
7636 		size = count;
7637 
7638 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7639 	if (ret == size)
7640 		return -EFAULT;
7641 
7642 	size -= ret;
7643 
7644 	*ppos += size;
7645 	info->read += size;
7646 
7647 	return size;
7648 }
7649 
7650 static int tracing_buffers_release(struct inode *inode, struct file *file)
7651 {
7652 	struct ftrace_buffer_info *info = file->private_data;
7653 	struct trace_iterator *iter = &info->iter;
7654 
7655 	mutex_lock(&trace_types_lock);
7656 
7657 	iter->tr->trace_ref--;
7658 
7659 	__trace_array_put(iter->tr);
7660 
7661 	if (info->spare)
7662 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7663 					   info->spare_cpu, info->spare);
7664 	kvfree(info);
7665 
7666 	mutex_unlock(&trace_types_lock);
7667 
7668 	return 0;
7669 }
7670 
7671 struct buffer_ref {
7672 	struct trace_buffer	*buffer;
7673 	void			*page;
7674 	int			cpu;
7675 	refcount_t		refcount;
7676 };
7677 
7678 static void buffer_ref_release(struct buffer_ref *ref)
7679 {
7680 	if (!refcount_dec_and_test(&ref->refcount))
7681 		return;
7682 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7683 	kfree(ref);
7684 }
7685 
7686 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7687 				    struct pipe_buffer *buf)
7688 {
7689 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7690 
7691 	buffer_ref_release(ref);
7692 	buf->private = 0;
7693 }
7694 
7695 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7696 				struct pipe_buffer *buf)
7697 {
7698 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7699 
7700 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7701 		return false;
7702 
7703 	refcount_inc(&ref->refcount);
7704 	return true;
7705 }
7706 
7707 /* Pipe buffer operations for a buffer. */
7708 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7709 	.release		= buffer_pipe_buf_release,
7710 	.get			= buffer_pipe_buf_get,
7711 };
7712 
7713 /*
7714  * Callback from splice_to_pipe(), if we need to release some pages
7715  * at the end of the spd in case we error'ed out in filling the pipe.
7716  */
7717 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7718 {
7719 	struct buffer_ref *ref =
7720 		(struct buffer_ref *)spd->partial[i].private;
7721 
7722 	buffer_ref_release(ref);
7723 	spd->partial[i].private = 0;
7724 }
7725 
7726 static ssize_t
7727 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7728 			    struct pipe_inode_info *pipe, size_t len,
7729 			    unsigned int flags)
7730 {
7731 	struct ftrace_buffer_info *info = file->private_data;
7732 	struct trace_iterator *iter = &info->iter;
7733 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7734 	struct page *pages_def[PIPE_DEF_BUFFERS];
7735 	struct splice_pipe_desc spd = {
7736 		.pages		= pages_def,
7737 		.partial	= partial_def,
7738 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7739 		.ops		= &buffer_pipe_buf_ops,
7740 		.spd_release	= buffer_spd_release,
7741 	};
7742 	struct buffer_ref *ref;
7743 	int entries, i;
7744 	ssize_t ret = 0;
7745 
7746 #ifdef CONFIG_TRACER_MAX_TRACE
7747 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7748 		return -EBUSY;
7749 #endif
7750 
7751 	if (*ppos & (PAGE_SIZE - 1))
7752 		return -EINVAL;
7753 
7754 	if (len & (PAGE_SIZE - 1)) {
7755 		if (len < PAGE_SIZE)
7756 			return -EINVAL;
7757 		len &= PAGE_MASK;
7758 	}
7759 
7760 	if (splice_grow_spd(pipe, &spd))
7761 		return -ENOMEM;
7762 
7763  again:
7764 	trace_access_lock(iter->cpu_file);
7765 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7766 
7767 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7768 		struct page *page;
7769 		int r;
7770 
7771 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7772 		if (!ref) {
7773 			ret = -ENOMEM;
7774 			break;
7775 		}
7776 
7777 		refcount_set(&ref->refcount, 1);
7778 		ref->buffer = iter->array_buffer->buffer;
7779 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7780 		if (IS_ERR(ref->page)) {
7781 			ret = PTR_ERR(ref->page);
7782 			ref->page = NULL;
7783 			kfree(ref);
7784 			break;
7785 		}
7786 		ref->cpu = iter->cpu_file;
7787 
7788 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7789 					  len, iter->cpu_file, 1);
7790 		if (r < 0) {
7791 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7792 						   ref->page);
7793 			kfree(ref);
7794 			break;
7795 		}
7796 
7797 		page = virt_to_page(ref->page);
7798 
7799 		spd.pages[i] = page;
7800 		spd.partial[i].len = PAGE_SIZE;
7801 		spd.partial[i].offset = 0;
7802 		spd.partial[i].private = (unsigned long)ref;
7803 		spd.nr_pages++;
7804 		*ppos += PAGE_SIZE;
7805 
7806 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7807 	}
7808 
7809 	trace_access_unlock(iter->cpu_file);
7810 	spd.nr_pages = i;
7811 
7812 	/* did we read anything? */
7813 	if (!spd.nr_pages) {
7814 		if (ret)
7815 			goto out;
7816 
7817 		ret = -EAGAIN;
7818 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7819 			goto out;
7820 
7821 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7822 		if (ret)
7823 			goto out;
7824 
7825 		goto again;
7826 	}
7827 
7828 	ret = splice_to_pipe(pipe, &spd);
7829 out:
7830 	splice_shrink_spd(&spd);
7831 
7832 	return ret;
7833 }
7834 
7835 static const struct file_operations tracing_buffers_fops = {
7836 	.open		= tracing_buffers_open,
7837 	.read		= tracing_buffers_read,
7838 	.poll		= tracing_buffers_poll,
7839 	.release	= tracing_buffers_release,
7840 	.splice_read	= tracing_buffers_splice_read,
7841 	.llseek		= no_llseek,
7842 };
7843 
7844 static ssize_t
7845 tracing_stats_read(struct file *filp, char __user *ubuf,
7846 		   size_t count, loff_t *ppos)
7847 {
7848 	struct inode *inode = file_inode(filp);
7849 	struct trace_array *tr = inode->i_private;
7850 	struct array_buffer *trace_buf = &tr->array_buffer;
7851 	int cpu = tracing_get_cpu(inode);
7852 	struct trace_seq *s;
7853 	unsigned long cnt;
7854 	unsigned long long t;
7855 	unsigned long usec_rem;
7856 
7857 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7858 	if (!s)
7859 		return -ENOMEM;
7860 
7861 	trace_seq_init(s);
7862 
7863 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7864 	trace_seq_printf(s, "entries: %ld\n", cnt);
7865 
7866 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7867 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7868 
7869 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7870 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7871 
7872 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7873 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7874 
7875 	if (trace_clocks[tr->clock_id].in_ns) {
7876 		/* local or global for trace_clock */
7877 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7878 		usec_rem = do_div(t, USEC_PER_SEC);
7879 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7880 								t, usec_rem);
7881 
7882 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7883 		usec_rem = do_div(t, USEC_PER_SEC);
7884 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7885 	} else {
7886 		/* counter or tsc mode for trace_clock */
7887 		trace_seq_printf(s, "oldest event ts: %llu\n",
7888 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7889 
7890 		trace_seq_printf(s, "now ts: %llu\n",
7891 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7892 	}
7893 
7894 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7895 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7896 
7897 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7898 	trace_seq_printf(s, "read events: %ld\n", cnt);
7899 
7900 	count = simple_read_from_buffer(ubuf, count, ppos,
7901 					s->buffer, trace_seq_used(s));
7902 
7903 	kfree(s);
7904 
7905 	return count;
7906 }
7907 
7908 static const struct file_operations tracing_stats_fops = {
7909 	.open		= tracing_open_generic_tr,
7910 	.read		= tracing_stats_read,
7911 	.llseek		= generic_file_llseek,
7912 	.release	= tracing_release_generic_tr,
7913 };
7914 
7915 #ifdef CONFIG_DYNAMIC_FTRACE
7916 
7917 static ssize_t
7918 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7919 		  size_t cnt, loff_t *ppos)
7920 {
7921 	ssize_t ret;
7922 	char *buf;
7923 	int r;
7924 
7925 	/* 256 should be plenty to hold the amount needed */
7926 	buf = kmalloc(256, GFP_KERNEL);
7927 	if (!buf)
7928 		return -ENOMEM;
7929 
7930 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7931 		      ftrace_update_tot_cnt,
7932 		      ftrace_number_of_pages,
7933 		      ftrace_number_of_groups);
7934 
7935 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7936 	kfree(buf);
7937 	return ret;
7938 }
7939 
7940 static const struct file_operations tracing_dyn_info_fops = {
7941 	.open		= tracing_open_generic,
7942 	.read		= tracing_read_dyn_info,
7943 	.llseek		= generic_file_llseek,
7944 };
7945 #endif /* CONFIG_DYNAMIC_FTRACE */
7946 
7947 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7948 static void
7949 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7950 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7951 		void *data)
7952 {
7953 	tracing_snapshot_instance(tr);
7954 }
7955 
7956 static void
7957 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7958 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7959 		      void *data)
7960 {
7961 	struct ftrace_func_mapper *mapper = data;
7962 	long *count = NULL;
7963 
7964 	if (mapper)
7965 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7966 
7967 	if (count) {
7968 
7969 		if (*count <= 0)
7970 			return;
7971 
7972 		(*count)--;
7973 	}
7974 
7975 	tracing_snapshot_instance(tr);
7976 }
7977 
7978 static int
7979 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7980 		      struct ftrace_probe_ops *ops, void *data)
7981 {
7982 	struct ftrace_func_mapper *mapper = data;
7983 	long *count = NULL;
7984 
7985 	seq_printf(m, "%ps:", (void *)ip);
7986 
7987 	seq_puts(m, "snapshot");
7988 
7989 	if (mapper)
7990 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7991 
7992 	if (count)
7993 		seq_printf(m, ":count=%ld\n", *count);
7994 	else
7995 		seq_puts(m, ":unlimited\n");
7996 
7997 	return 0;
7998 }
7999 
8000 static int
8001 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8002 		     unsigned long ip, void *init_data, void **data)
8003 {
8004 	struct ftrace_func_mapper *mapper = *data;
8005 
8006 	if (!mapper) {
8007 		mapper = allocate_ftrace_func_mapper();
8008 		if (!mapper)
8009 			return -ENOMEM;
8010 		*data = mapper;
8011 	}
8012 
8013 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8014 }
8015 
8016 static void
8017 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8018 		     unsigned long ip, void *data)
8019 {
8020 	struct ftrace_func_mapper *mapper = data;
8021 
8022 	if (!ip) {
8023 		if (!mapper)
8024 			return;
8025 		free_ftrace_func_mapper(mapper, NULL);
8026 		return;
8027 	}
8028 
8029 	ftrace_func_mapper_remove_ip(mapper, ip);
8030 }
8031 
8032 static struct ftrace_probe_ops snapshot_probe_ops = {
8033 	.func			= ftrace_snapshot,
8034 	.print			= ftrace_snapshot_print,
8035 };
8036 
8037 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8038 	.func			= ftrace_count_snapshot,
8039 	.print			= ftrace_snapshot_print,
8040 	.init			= ftrace_snapshot_init,
8041 	.free			= ftrace_snapshot_free,
8042 };
8043 
8044 static int
8045 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8046 			       char *glob, char *cmd, char *param, int enable)
8047 {
8048 	struct ftrace_probe_ops *ops;
8049 	void *count = (void *)-1;
8050 	char *number;
8051 	int ret;
8052 
8053 	if (!tr)
8054 		return -ENODEV;
8055 
8056 	/* hash funcs only work with set_ftrace_filter */
8057 	if (!enable)
8058 		return -EINVAL;
8059 
8060 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8061 
8062 	if (glob[0] == '!')
8063 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8064 
8065 	if (!param)
8066 		goto out_reg;
8067 
8068 	number = strsep(&param, ":");
8069 
8070 	if (!strlen(number))
8071 		goto out_reg;
8072 
8073 	/*
8074 	 * We use the callback data field (which is a pointer)
8075 	 * as our counter.
8076 	 */
8077 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8078 	if (ret)
8079 		return ret;
8080 
8081  out_reg:
8082 	ret = tracing_alloc_snapshot_instance(tr);
8083 	if (ret < 0)
8084 		goto out;
8085 
8086 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8087 
8088  out:
8089 	return ret < 0 ? ret : 0;
8090 }
8091 
8092 static struct ftrace_func_command ftrace_snapshot_cmd = {
8093 	.name			= "snapshot",
8094 	.func			= ftrace_trace_snapshot_callback,
8095 };
8096 
8097 static __init int register_snapshot_cmd(void)
8098 {
8099 	return register_ftrace_command(&ftrace_snapshot_cmd);
8100 }
8101 #else
8102 static inline __init int register_snapshot_cmd(void) { return 0; }
8103 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8104 
8105 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8106 {
8107 	if (WARN_ON(!tr->dir))
8108 		return ERR_PTR(-ENODEV);
8109 
8110 	/* Top directory uses NULL as the parent */
8111 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8112 		return NULL;
8113 
8114 	/* All sub buffers have a descriptor */
8115 	return tr->dir;
8116 }
8117 
8118 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8119 {
8120 	struct dentry *d_tracer;
8121 
8122 	if (tr->percpu_dir)
8123 		return tr->percpu_dir;
8124 
8125 	d_tracer = tracing_get_dentry(tr);
8126 	if (IS_ERR(d_tracer))
8127 		return NULL;
8128 
8129 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8130 
8131 	MEM_FAIL(!tr->percpu_dir,
8132 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8133 
8134 	return tr->percpu_dir;
8135 }
8136 
8137 static struct dentry *
8138 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8139 		      void *data, long cpu, const struct file_operations *fops)
8140 {
8141 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8142 
8143 	if (ret) /* See tracing_get_cpu() */
8144 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8145 	return ret;
8146 }
8147 
8148 static void
8149 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8150 {
8151 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8152 	struct dentry *d_cpu;
8153 	char cpu_dir[30]; /* 30 characters should be more than enough */
8154 
8155 	if (!d_percpu)
8156 		return;
8157 
8158 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8159 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8160 	if (!d_cpu) {
8161 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8162 		return;
8163 	}
8164 
8165 	/* per cpu trace_pipe */
8166 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8167 				tr, cpu, &tracing_pipe_fops);
8168 
8169 	/* per cpu trace */
8170 	trace_create_cpu_file("trace", 0644, d_cpu,
8171 				tr, cpu, &tracing_fops);
8172 
8173 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8174 				tr, cpu, &tracing_buffers_fops);
8175 
8176 	trace_create_cpu_file("stats", 0444, d_cpu,
8177 				tr, cpu, &tracing_stats_fops);
8178 
8179 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8180 				tr, cpu, &tracing_entries_fops);
8181 
8182 #ifdef CONFIG_TRACER_SNAPSHOT
8183 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8184 				tr, cpu, &snapshot_fops);
8185 
8186 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8187 				tr, cpu, &snapshot_raw_fops);
8188 #endif
8189 }
8190 
8191 #ifdef CONFIG_FTRACE_SELFTEST
8192 /* Let selftest have access to static functions in this file */
8193 #include "trace_selftest.c"
8194 #endif
8195 
8196 static ssize_t
8197 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8198 			loff_t *ppos)
8199 {
8200 	struct trace_option_dentry *topt = filp->private_data;
8201 	char *buf;
8202 
8203 	if (topt->flags->val & topt->opt->bit)
8204 		buf = "1\n";
8205 	else
8206 		buf = "0\n";
8207 
8208 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8209 }
8210 
8211 static ssize_t
8212 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8213 			 loff_t *ppos)
8214 {
8215 	struct trace_option_dentry *topt = filp->private_data;
8216 	unsigned long val;
8217 	int ret;
8218 
8219 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8220 	if (ret)
8221 		return ret;
8222 
8223 	if (val != 0 && val != 1)
8224 		return -EINVAL;
8225 
8226 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8227 		mutex_lock(&trace_types_lock);
8228 		ret = __set_tracer_option(topt->tr, topt->flags,
8229 					  topt->opt, !val);
8230 		mutex_unlock(&trace_types_lock);
8231 		if (ret)
8232 			return ret;
8233 	}
8234 
8235 	*ppos += cnt;
8236 
8237 	return cnt;
8238 }
8239 
8240 
8241 static const struct file_operations trace_options_fops = {
8242 	.open = tracing_open_generic,
8243 	.read = trace_options_read,
8244 	.write = trace_options_write,
8245 	.llseek	= generic_file_llseek,
8246 };
8247 
8248 /*
8249  * In order to pass in both the trace_array descriptor as well as the index
8250  * to the flag that the trace option file represents, the trace_array
8251  * has a character array of trace_flags_index[], which holds the index
8252  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8253  * The address of this character array is passed to the flag option file
8254  * read/write callbacks.
8255  *
8256  * In order to extract both the index and the trace_array descriptor,
8257  * get_tr_index() uses the following algorithm.
8258  *
8259  *   idx = *ptr;
8260  *
8261  * As the pointer itself contains the address of the index (remember
8262  * index[1] == 1).
8263  *
8264  * Then to get the trace_array descriptor, by subtracting that index
8265  * from the ptr, we get to the start of the index itself.
8266  *
8267  *   ptr - idx == &index[0]
8268  *
8269  * Then a simple container_of() from that pointer gets us to the
8270  * trace_array descriptor.
8271  */
8272 static void get_tr_index(void *data, struct trace_array **ptr,
8273 			 unsigned int *pindex)
8274 {
8275 	*pindex = *(unsigned char *)data;
8276 
8277 	*ptr = container_of(data - *pindex, struct trace_array,
8278 			    trace_flags_index);
8279 }
8280 
8281 static ssize_t
8282 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8283 			loff_t *ppos)
8284 {
8285 	void *tr_index = filp->private_data;
8286 	struct trace_array *tr;
8287 	unsigned int index;
8288 	char *buf;
8289 
8290 	get_tr_index(tr_index, &tr, &index);
8291 
8292 	if (tr->trace_flags & (1 << index))
8293 		buf = "1\n";
8294 	else
8295 		buf = "0\n";
8296 
8297 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8298 }
8299 
8300 static ssize_t
8301 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8302 			 loff_t *ppos)
8303 {
8304 	void *tr_index = filp->private_data;
8305 	struct trace_array *tr;
8306 	unsigned int index;
8307 	unsigned long val;
8308 	int ret;
8309 
8310 	get_tr_index(tr_index, &tr, &index);
8311 
8312 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8313 	if (ret)
8314 		return ret;
8315 
8316 	if (val != 0 && val != 1)
8317 		return -EINVAL;
8318 
8319 	mutex_lock(&event_mutex);
8320 	mutex_lock(&trace_types_lock);
8321 	ret = set_tracer_flag(tr, 1 << index, val);
8322 	mutex_unlock(&trace_types_lock);
8323 	mutex_unlock(&event_mutex);
8324 
8325 	if (ret < 0)
8326 		return ret;
8327 
8328 	*ppos += cnt;
8329 
8330 	return cnt;
8331 }
8332 
8333 static const struct file_operations trace_options_core_fops = {
8334 	.open = tracing_open_generic,
8335 	.read = trace_options_core_read,
8336 	.write = trace_options_core_write,
8337 	.llseek = generic_file_llseek,
8338 };
8339 
8340 struct dentry *trace_create_file(const char *name,
8341 				 umode_t mode,
8342 				 struct dentry *parent,
8343 				 void *data,
8344 				 const struct file_operations *fops)
8345 {
8346 	struct dentry *ret;
8347 
8348 	ret = tracefs_create_file(name, mode, parent, data, fops);
8349 	if (!ret)
8350 		pr_warn("Could not create tracefs '%s' entry\n", name);
8351 
8352 	return ret;
8353 }
8354 
8355 
8356 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8357 {
8358 	struct dentry *d_tracer;
8359 
8360 	if (tr->options)
8361 		return tr->options;
8362 
8363 	d_tracer = tracing_get_dentry(tr);
8364 	if (IS_ERR(d_tracer))
8365 		return NULL;
8366 
8367 	tr->options = tracefs_create_dir("options", d_tracer);
8368 	if (!tr->options) {
8369 		pr_warn("Could not create tracefs directory 'options'\n");
8370 		return NULL;
8371 	}
8372 
8373 	return tr->options;
8374 }
8375 
8376 static void
8377 create_trace_option_file(struct trace_array *tr,
8378 			 struct trace_option_dentry *topt,
8379 			 struct tracer_flags *flags,
8380 			 struct tracer_opt *opt)
8381 {
8382 	struct dentry *t_options;
8383 
8384 	t_options = trace_options_init_dentry(tr);
8385 	if (!t_options)
8386 		return;
8387 
8388 	topt->flags = flags;
8389 	topt->opt = opt;
8390 	topt->tr = tr;
8391 
8392 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8393 				    &trace_options_fops);
8394 
8395 }
8396 
8397 static void
8398 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8399 {
8400 	struct trace_option_dentry *topts;
8401 	struct trace_options *tr_topts;
8402 	struct tracer_flags *flags;
8403 	struct tracer_opt *opts;
8404 	int cnt;
8405 	int i;
8406 
8407 	if (!tracer)
8408 		return;
8409 
8410 	flags = tracer->flags;
8411 
8412 	if (!flags || !flags->opts)
8413 		return;
8414 
8415 	/*
8416 	 * If this is an instance, only create flags for tracers
8417 	 * the instance may have.
8418 	 */
8419 	if (!trace_ok_for_array(tracer, tr))
8420 		return;
8421 
8422 	for (i = 0; i < tr->nr_topts; i++) {
8423 		/* Make sure there's no duplicate flags. */
8424 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8425 			return;
8426 	}
8427 
8428 	opts = flags->opts;
8429 
8430 	for (cnt = 0; opts[cnt].name; cnt++)
8431 		;
8432 
8433 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8434 	if (!topts)
8435 		return;
8436 
8437 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8438 			    GFP_KERNEL);
8439 	if (!tr_topts) {
8440 		kfree(topts);
8441 		return;
8442 	}
8443 
8444 	tr->topts = tr_topts;
8445 	tr->topts[tr->nr_topts].tracer = tracer;
8446 	tr->topts[tr->nr_topts].topts = topts;
8447 	tr->nr_topts++;
8448 
8449 	for (cnt = 0; opts[cnt].name; cnt++) {
8450 		create_trace_option_file(tr, &topts[cnt], flags,
8451 					 &opts[cnt]);
8452 		MEM_FAIL(topts[cnt].entry == NULL,
8453 			  "Failed to create trace option: %s",
8454 			  opts[cnt].name);
8455 	}
8456 }
8457 
8458 static struct dentry *
8459 create_trace_option_core_file(struct trace_array *tr,
8460 			      const char *option, long index)
8461 {
8462 	struct dentry *t_options;
8463 
8464 	t_options = trace_options_init_dentry(tr);
8465 	if (!t_options)
8466 		return NULL;
8467 
8468 	return trace_create_file(option, 0644, t_options,
8469 				 (void *)&tr->trace_flags_index[index],
8470 				 &trace_options_core_fops);
8471 }
8472 
8473 static void create_trace_options_dir(struct trace_array *tr)
8474 {
8475 	struct dentry *t_options;
8476 	bool top_level = tr == &global_trace;
8477 	int i;
8478 
8479 	t_options = trace_options_init_dentry(tr);
8480 	if (!t_options)
8481 		return;
8482 
8483 	for (i = 0; trace_options[i]; i++) {
8484 		if (top_level ||
8485 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8486 			create_trace_option_core_file(tr, trace_options[i], i);
8487 	}
8488 }
8489 
8490 static ssize_t
8491 rb_simple_read(struct file *filp, char __user *ubuf,
8492 	       size_t cnt, loff_t *ppos)
8493 {
8494 	struct trace_array *tr = filp->private_data;
8495 	char buf[64];
8496 	int r;
8497 
8498 	r = tracer_tracing_is_on(tr);
8499 	r = sprintf(buf, "%d\n", r);
8500 
8501 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8502 }
8503 
8504 static ssize_t
8505 rb_simple_write(struct file *filp, const char __user *ubuf,
8506 		size_t cnt, loff_t *ppos)
8507 {
8508 	struct trace_array *tr = filp->private_data;
8509 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8510 	unsigned long val;
8511 	int ret;
8512 
8513 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8514 	if (ret)
8515 		return ret;
8516 
8517 	if (buffer) {
8518 		mutex_lock(&trace_types_lock);
8519 		if (!!val == tracer_tracing_is_on(tr)) {
8520 			val = 0; /* do nothing */
8521 		} else if (val) {
8522 			tracer_tracing_on(tr);
8523 			if (tr->current_trace->start)
8524 				tr->current_trace->start(tr);
8525 		} else {
8526 			tracer_tracing_off(tr);
8527 			if (tr->current_trace->stop)
8528 				tr->current_trace->stop(tr);
8529 		}
8530 		mutex_unlock(&trace_types_lock);
8531 	}
8532 
8533 	(*ppos)++;
8534 
8535 	return cnt;
8536 }
8537 
8538 static const struct file_operations rb_simple_fops = {
8539 	.open		= tracing_open_generic_tr,
8540 	.read		= rb_simple_read,
8541 	.write		= rb_simple_write,
8542 	.release	= tracing_release_generic_tr,
8543 	.llseek		= default_llseek,
8544 };
8545 
8546 static ssize_t
8547 buffer_percent_read(struct file *filp, char __user *ubuf,
8548 		    size_t cnt, loff_t *ppos)
8549 {
8550 	struct trace_array *tr = filp->private_data;
8551 	char buf[64];
8552 	int r;
8553 
8554 	r = tr->buffer_percent;
8555 	r = sprintf(buf, "%d\n", r);
8556 
8557 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8558 }
8559 
8560 static ssize_t
8561 buffer_percent_write(struct file *filp, const char __user *ubuf,
8562 		     size_t cnt, loff_t *ppos)
8563 {
8564 	struct trace_array *tr = filp->private_data;
8565 	unsigned long val;
8566 	int ret;
8567 
8568 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8569 	if (ret)
8570 		return ret;
8571 
8572 	if (val > 100)
8573 		return -EINVAL;
8574 
8575 	if (!val)
8576 		val = 1;
8577 
8578 	tr->buffer_percent = val;
8579 
8580 	(*ppos)++;
8581 
8582 	return cnt;
8583 }
8584 
8585 static const struct file_operations buffer_percent_fops = {
8586 	.open		= tracing_open_generic_tr,
8587 	.read		= buffer_percent_read,
8588 	.write		= buffer_percent_write,
8589 	.release	= tracing_release_generic_tr,
8590 	.llseek		= default_llseek,
8591 };
8592 
8593 static struct dentry *trace_instance_dir;
8594 
8595 static void
8596 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8597 
8598 static int
8599 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8600 {
8601 	enum ring_buffer_flags rb_flags;
8602 
8603 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8604 
8605 	buf->tr = tr;
8606 
8607 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8608 	if (!buf->buffer)
8609 		return -ENOMEM;
8610 
8611 	buf->data = alloc_percpu(struct trace_array_cpu);
8612 	if (!buf->data) {
8613 		ring_buffer_free(buf->buffer);
8614 		buf->buffer = NULL;
8615 		return -ENOMEM;
8616 	}
8617 
8618 	/* Allocate the first page for all buffers */
8619 	set_buffer_entries(&tr->array_buffer,
8620 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8621 
8622 	return 0;
8623 }
8624 
8625 static int allocate_trace_buffers(struct trace_array *tr, int size)
8626 {
8627 	int ret;
8628 
8629 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8630 	if (ret)
8631 		return ret;
8632 
8633 #ifdef CONFIG_TRACER_MAX_TRACE
8634 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8635 				    allocate_snapshot ? size : 1);
8636 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8637 		ring_buffer_free(tr->array_buffer.buffer);
8638 		tr->array_buffer.buffer = NULL;
8639 		free_percpu(tr->array_buffer.data);
8640 		tr->array_buffer.data = NULL;
8641 		return -ENOMEM;
8642 	}
8643 	tr->allocated_snapshot = allocate_snapshot;
8644 
8645 	/*
8646 	 * Only the top level trace array gets its snapshot allocated
8647 	 * from the kernel command line.
8648 	 */
8649 	allocate_snapshot = false;
8650 #endif
8651 
8652 	return 0;
8653 }
8654 
8655 static void free_trace_buffer(struct array_buffer *buf)
8656 {
8657 	if (buf->buffer) {
8658 		ring_buffer_free(buf->buffer);
8659 		buf->buffer = NULL;
8660 		free_percpu(buf->data);
8661 		buf->data = NULL;
8662 	}
8663 }
8664 
8665 static void free_trace_buffers(struct trace_array *tr)
8666 {
8667 	if (!tr)
8668 		return;
8669 
8670 	free_trace_buffer(&tr->array_buffer);
8671 
8672 #ifdef CONFIG_TRACER_MAX_TRACE
8673 	free_trace_buffer(&tr->max_buffer);
8674 #endif
8675 }
8676 
8677 static void init_trace_flags_index(struct trace_array *tr)
8678 {
8679 	int i;
8680 
8681 	/* Used by the trace options files */
8682 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8683 		tr->trace_flags_index[i] = i;
8684 }
8685 
8686 static void __update_tracer_options(struct trace_array *tr)
8687 {
8688 	struct tracer *t;
8689 
8690 	for (t = trace_types; t; t = t->next)
8691 		add_tracer_options(tr, t);
8692 }
8693 
8694 static void update_tracer_options(struct trace_array *tr)
8695 {
8696 	mutex_lock(&trace_types_lock);
8697 	__update_tracer_options(tr);
8698 	mutex_unlock(&trace_types_lock);
8699 }
8700 
8701 /* Must have trace_types_lock held */
8702 struct trace_array *trace_array_find(const char *instance)
8703 {
8704 	struct trace_array *tr, *found = NULL;
8705 
8706 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8707 		if (tr->name && strcmp(tr->name, instance) == 0) {
8708 			found = tr;
8709 			break;
8710 		}
8711 	}
8712 
8713 	return found;
8714 }
8715 
8716 struct trace_array *trace_array_find_get(const char *instance)
8717 {
8718 	struct trace_array *tr;
8719 
8720 	mutex_lock(&trace_types_lock);
8721 	tr = trace_array_find(instance);
8722 	if (tr)
8723 		tr->ref++;
8724 	mutex_unlock(&trace_types_lock);
8725 
8726 	return tr;
8727 }
8728 
8729 static int trace_array_create_dir(struct trace_array *tr)
8730 {
8731 	int ret;
8732 
8733 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8734 	if (!tr->dir)
8735 		return -EINVAL;
8736 
8737 	ret = event_trace_add_tracer(tr->dir, tr);
8738 	if (ret)
8739 		tracefs_remove(tr->dir);
8740 
8741 	init_tracer_tracefs(tr, tr->dir);
8742 	__update_tracer_options(tr);
8743 
8744 	return ret;
8745 }
8746 
8747 static struct trace_array *trace_array_create(const char *name)
8748 {
8749 	struct trace_array *tr;
8750 	int ret;
8751 
8752 	ret = -ENOMEM;
8753 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8754 	if (!tr)
8755 		return ERR_PTR(ret);
8756 
8757 	tr->name = kstrdup(name, GFP_KERNEL);
8758 	if (!tr->name)
8759 		goto out_free_tr;
8760 
8761 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8762 		goto out_free_tr;
8763 
8764 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8765 
8766 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8767 
8768 	raw_spin_lock_init(&tr->start_lock);
8769 
8770 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8771 
8772 	tr->current_trace = &nop_trace;
8773 
8774 	INIT_LIST_HEAD(&tr->systems);
8775 	INIT_LIST_HEAD(&tr->events);
8776 	INIT_LIST_HEAD(&tr->hist_vars);
8777 	INIT_LIST_HEAD(&tr->err_log);
8778 
8779 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8780 		goto out_free_tr;
8781 
8782 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8783 		goto out_free_tr;
8784 
8785 	ftrace_init_trace_array(tr);
8786 
8787 	init_trace_flags_index(tr);
8788 
8789 	if (trace_instance_dir) {
8790 		ret = trace_array_create_dir(tr);
8791 		if (ret)
8792 			goto out_free_tr;
8793 	} else
8794 		__trace_early_add_events(tr);
8795 
8796 	list_add(&tr->list, &ftrace_trace_arrays);
8797 
8798 	tr->ref++;
8799 
8800 	return tr;
8801 
8802  out_free_tr:
8803 	ftrace_free_ftrace_ops(tr);
8804 	free_trace_buffers(tr);
8805 	free_cpumask_var(tr->tracing_cpumask);
8806 	kfree(tr->name);
8807 	kfree(tr);
8808 
8809 	return ERR_PTR(ret);
8810 }
8811 
8812 static int instance_mkdir(const char *name)
8813 {
8814 	struct trace_array *tr;
8815 	int ret;
8816 
8817 	mutex_lock(&event_mutex);
8818 	mutex_lock(&trace_types_lock);
8819 
8820 	ret = -EEXIST;
8821 	if (trace_array_find(name))
8822 		goto out_unlock;
8823 
8824 	tr = trace_array_create(name);
8825 
8826 	ret = PTR_ERR_OR_ZERO(tr);
8827 
8828 out_unlock:
8829 	mutex_unlock(&trace_types_lock);
8830 	mutex_unlock(&event_mutex);
8831 	return ret;
8832 }
8833 
8834 /**
8835  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8836  * @name: The name of the trace array to be looked up/created.
8837  *
8838  * Returns pointer to trace array with given name.
8839  * NULL, if it cannot be created.
8840  *
8841  * NOTE: This function increments the reference counter associated with the
8842  * trace array returned. This makes sure it cannot be freed while in use.
8843  * Use trace_array_put() once the trace array is no longer needed.
8844  * If the trace_array is to be freed, trace_array_destroy() needs to
8845  * be called after the trace_array_put(), or simply let user space delete
8846  * it from the tracefs instances directory. But until the
8847  * trace_array_put() is called, user space can not delete it.
8848  *
8849  */
8850 struct trace_array *trace_array_get_by_name(const char *name)
8851 {
8852 	struct trace_array *tr;
8853 
8854 	mutex_lock(&event_mutex);
8855 	mutex_lock(&trace_types_lock);
8856 
8857 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8858 		if (tr->name && strcmp(tr->name, name) == 0)
8859 			goto out_unlock;
8860 	}
8861 
8862 	tr = trace_array_create(name);
8863 
8864 	if (IS_ERR(tr))
8865 		tr = NULL;
8866 out_unlock:
8867 	if (tr)
8868 		tr->ref++;
8869 
8870 	mutex_unlock(&trace_types_lock);
8871 	mutex_unlock(&event_mutex);
8872 	return tr;
8873 }
8874 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8875 
8876 static int __remove_instance(struct trace_array *tr)
8877 {
8878 	int i;
8879 
8880 	/* Reference counter for a newly created trace array = 1. */
8881 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8882 		return -EBUSY;
8883 
8884 	list_del(&tr->list);
8885 
8886 	/* Disable all the flags that were enabled coming in */
8887 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8888 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8889 			set_tracer_flag(tr, 1 << i, 0);
8890 	}
8891 
8892 	tracing_set_nop(tr);
8893 	clear_ftrace_function_probes(tr);
8894 	event_trace_del_tracer(tr);
8895 	ftrace_clear_pids(tr);
8896 	ftrace_destroy_function_files(tr);
8897 	tracefs_remove(tr->dir);
8898 	free_trace_buffers(tr);
8899 
8900 	for (i = 0; i < tr->nr_topts; i++) {
8901 		kfree(tr->topts[i].topts);
8902 	}
8903 	kfree(tr->topts);
8904 
8905 	free_cpumask_var(tr->tracing_cpumask);
8906 	kfree(tr->name);
8907 	kfree(tr);
8908 
8909 	return 0;
8910 }
8911 
8912 int trace_array_destroy(struct trace_array *this_tr)
8913 {
8914 	struct trace_array *tr;
8915 	int ret;
8916 
8917 	if (!this_tr)
8918 		return -EINVAL;
8919 
8920 	mutex_lock(&event_mutex);
8921 	mutex_lock(&trace_types_lock);
8922 
8923 	ret = -ENODEV;
8924 
8925 	/* Making sure trace array exists before destroying it. */
8926 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8927 		if (tr == this_tr) {
8928 			ret = __remove_instance(tr);
8929 			break;
8930 		}
8931 	}
8932 
8933 	mutex_unlock(&trace_types_lock);
8934 	mutex_unlock(&event_mutex);
8935 
8936 	return ret;
8937 }
8938 EXPORT_SYMBOL_GPL(trace_array_destroy);
8939 
8940 static int instance_rmdir(const char *name)
8941 {
8942 	struct trace_array *tr;
8943 	int ret;
8944 
8945 	mutex_lock(&event_mutex);
8946 	mutex_lock(&trace_types_lock);
8947 
8948 	ret = -ENODEV;
8949 	tr = trace_array_find(name);
8950 	if (tr)
8951 		ret = __remove_instance(tr);
8952 
8953 	mutex_unlock(&trace_types_lock);
8954 	mutex_unlock(&event_mutex);
8955 
8956 	return ret;
8957 }
8958 
8959 static __init void create_trace_instances(struct dentry *d_tracer)
8960 {
8961 	struct trace_array *tr;
8962 
8963 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8964 							 instance_mkdir,
8965 							 instance_rmdir);
8966 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8967 		return;
8968 
8969 	mutex_lock(&event_mutex);
8970 	mutex_lock(&trace_types_lock);
8971 
8972 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8973 		if (!tr->name)
8974 			continue;
8975 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8976 			     "Failed to create instance directory\n"))
8977 			break;
8978 	}
8979 
8980 	mutex_unlock(&trace_types_lock);
8981 	mutex_unlock(&event_mutex);
8982 }
8983 
8984 static void
8985 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8986 {
8987 	struct trace_event_file *file;
8988 	int cpu;
8989 
8990 	trace_create_file("available_tracers", 0444, d_tracer,
8991 			tr, &show_traces_fops);
8992 
8993 	trace_create_file("current_tracer", 0644, d_tracer,
8994 			tr, &set_tracer_fops);
8995 
8996 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8997 			  tr, &tracing_cpumask_fops);
8998 
8999 	trace_create_file("trace_options", 0644, d_tracer,
9000 			  tr, &tracing_iter_fops);
9001 
9002 	trace_create_file("trace", 0644, d_tracer,
9003 			  tr, &tracing_fops);
9004 
9005 	trace_create_file("trace_pipe", 0444, d_tracer,
9006 			  tr, &tracing_pipe_fops);
9007 
9008 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9009 			  tr, &tracing_entries_fops);
9010 
9011 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9012 			  tr, &tracing_total_entries_fops);
9013 
9014 	trace_create_file("free_buffer", 0200, d_tracer,
9015 			  tr, &tracing_free_buffer_fops);
9016 
9017 	trace_create_file("trace_marker", 0220, d_tracer,
9018 			  tr, &tracing_mark_fops);
9019 
9020 	file = __find_event_file(tr, "ftrace", "print");
9021 	if (file && file->dir)
9022 		trace_create_file("trigger", 0644, file->dir, file,
9023 				  &event_trigger_fops);
9024 	tr->trace_marker_file = file;
9025 
9026 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9027 			  tr, &tracing_mark_raw_fops);
9028 
9029 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9030 			  &trace_clock_fops);
9031 
9032 	trace_create_file("tracing_on", 0644, d_tracer,
9033 			  tr, &rb_simple_fops);
9034 
9035 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9036 			  &trace_time_stamp_mode_fops);
9037 
9038 	tr->buffer_percent = 50;
9039 
9040 	trace_create_file("buffer_percent", 0444, d_tracer,
9041 			tr, &buffer_percent_fops);
9042 
9043 	create_trace_options_dir(tr);
9044 
9045 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9046 	trace_create_maxlat_file(tr, d_tracer);
9047 #endif
9048 
9049 	if (ftrace_create_function_files(tr, d_tracer))
9050 		MEM_FAIL(1, "Could not allocate function filter files");
9051 
9052 #ifdef CONFIG_TRACER_SNAPSHOT
9053 	trace_create_file("snapshot", 0644, d_tracer,
9054 			  tr, &snapshot_fops);
9055 #endif
9056 
9057 	trace_create_file("error_log", 0644, d_tracer,
9058 			  tr, &tracing_err_log_fops);
9059 
9060 	for_each_tracing_cpu(cpu)
9061 		tracing_init_tracefs_percpu(tr, cpu);
9062 
9063 	ftrace_init_tracefs(tr, d_tracer);
9064 }
9065 
9066 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9067 {
9068 	struct vfsmount *mnt;
9069 	struct file_system_type *type;
9070 
9071 	/*
9072 	 * To maintain backward compatibility for tools that mount
9073 	 * debugfs to get to the tracing facility, tracefs is automatically
9074 	 * mounted to the debugfs/tracing directory.
9075 	 */
9076 	type = get_fs_type("tracefs");
9077 	if (!type)
9078 		return NULL;
9079 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9080 	put_filesystem(type);
9081 	if (IS_ERR(mnt))
9082 		return NULL;
9083 	mntget(mnt);
9084 
9085 	return mnt;
9086 }
9087 
9088 /**
9089  * tracing_init_dentry - initialize top level trace array
9090  *
9091  * This is called when creating files or directories in the tracing
9092  * directory. It is called via fs_initcall() by any of the boot up code
9093  * and expects to return the dentry of the top level tracing directory.
9094  */
9095 int tracing_init_dentry(void)
9096 {
9097 	struct trace_array *tr = &global_trace;
9098 
9099 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9100 		pr_warn("Tracing disabled due to lockdown\n");
9101 		return -EPERM;
9102 	}
9103 
9104 	/* The top level trace array uses  NULL as parent */
9105 	if (tr->dir)
9106 		return 0;
9107 
9108 	if (WARN_ON(!tracefs_initialized()))
9109 		return -ENODEV;
9110 
9111 	/*
9112 	 * As there may still be users that expect the tracing
9113 	 * files to exist in debugfs/tracing, we must automount
9114 	 * the tracefs file system there, so older tools still
9115 	 * work with the newer kerenl.
9116 	 */
9117 	tr->dir = debugfs_create_automount("tracing", NULL,
9118 					   trace_automount, NULL);
9119 
9120 	return 0;
9121 }
9122 
9123 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9124 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9125 
9126 static struct workqueue_struct *eval_map_wq __initdata;
9127 static struct work_struct eval_map_work __initdata;
9128 
9129 static void __init eval_map_work_func(struct work_struct *work)
9130 {
9131 	int len;
9132 
9133 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9134 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9135 }
9136 
9137 static int __init trace_eval_init(void)
9138 {
9139 	INIT_WORK(&eval_map_work, eval_map_work_func);
9140 
9141 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9142 	if (!eval_map_wq) {
9143 		pr_err("Unable to allocate eval_map_wq\n");
9144 		/* Do work here */
9145 		eval_map_work_func(&eval_map_work);
9146 		return -ENOMEM;
9147 	}
9148 
9149 	queue_work(eval_map_wq, &eval_map_work);
9150 	return 0;
9151 }
9152 
9153 static int __init trace_eval_sync(void)
9154 {
9155 	/* Make sure the eval map updates are finished */
9156 	if (eval_map_wq)
9157 		destroy_workqueue(eval_map_wq);
9158 	return 0;
9159 }
9160 
9161 late_initcall_sync(trace_eval_sync);
9162 
9163 
9164 #ifdef CONFIG_MODULES
9165 static void trace_module_add_evals(struct module *mod)
9166 {
9167 	if (!mod->num_trace_evals)
9168 		return;
9169 
9170 	/*
9171 	 * Modules with bad taint do not have events created, do
9172 	 * not bother with enums either.
9173 	 */
9174 	if (trace_module_has_bad_taint(mod))
9175 		return;
9176 
9177 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9178 }
9179 
9180 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9181 static void trace_module_remove_evals(struct module *mod)
9182 {
9183 	union trace_eval_map_item *map;
9184 	union trace_eval_map_item **last = &trace_eval_maps;
9185 
9186 	if (!mod->num_trace_evals)
9187 		return;
9188 
9189 	mutex_lock(&trace_eval_mutex);
9190 
9191 	map = trace_eval_maps;
9192 
9193 	while (map) {
9194 		if (map->head.mod == mod)
9195 			break;
9196 		map = trace_eval_jmp_to_tail(map);
9197 		last = &map->tail.next;
9198 		map = map->tail.next;
9199 	}
9200 	if (!map)
9201 		goto out;
9202 
9203 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9204 	kfree(map);
9205  out:
9206 	mutex_unlock(&trace_eval_mutex);
9207 }
9208 #else
9209 static inline void trace_module_remove_evals(struct module *mod) { }
9210 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9211 
9212 static int trace_module_notify(struct notifier_block *self,
9213 			       unsigned long val, void *data)
9214 {
9215 	struct module *mod = data;
9216 
9217 	switch (val) {
9218 	case MODULE_STATE_COMING:
9219 		trace_module_add_evals(mod);
9220 		break;
9221 	case MODULE_STATE_GOING:
9222 		trace_module_remove_evals(mod);
9223 		break;
9224 	}
9225 
9226 	return NOTIFY_OK;
9227 }
9228 
9229 static struct notifier_block trace_module_nb = {
9230 	.notifier_call = trace_module_notify,
9231 	.priority = 0,
9232 };
9233 #endif /* CONFIG_MODULES */
9234 
9235 static __init int tracer_init_tracefs(void)
9236 {
9237 	int ret;
9238 
9239 	trace_access_lock_init();
9240 
9241 	ret = tracing_init_dentry();
9242 	if (ret)
9243 		return 0;
9244 
9245 	event_trace_init();
9246 
9247 	init_tracer_tracefs(&global_trace, NULL);
9248 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9249 
9250 	trace_create_file("tracing_thresh", 0644, NULL,
9251 			&global_trace, &tracing_thresh_fops);
9252 
9253 	trace_create_file("README", 0444, NULL,
9254 			NULL, &tracing_readme_fops);
9255 
9256 	trace_create_file("saved_cmdlines", 0444, NULL,
9257 			NULL, &tracing_saved_cmdlines_fops);
9258 
9259 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9260 			  NULL, &tracing_saved_cmdlines_size_fops);
9261 
9262 	trace_create_file("saved_tgids", 0444, NULL,
9263 			NULL, &tracing_saved_tgids_fops);
9264 
9265 	trace_eval_init();
9266 
9267 	trace_create_eval_file(NULL);
9268 
9269 #ifdef CONFIG_MODULES
9270 	register_module_notifier(&trace_module_nb);
9271 #endif
9272 
9273 #ifdef CONFIG_DYNAMIC_FTRACE
9274 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9275 			NULL, &tracing_dyn_info_fops);
9276 #endif
9277 
9278 	create_trace_instances(NULL);
9279 
9280 	update_tracer_options(&global_trace);
9281 
9282 	return 0;
9283 }
9284 
9285 static int trace_panic_handler(struct notifier_block *this,
9286 			       unsigned long event, void *unused)
9287 {
9288 	if (ftrace_dump_on_oops)
9289 		ftrace_dump(ftrace_dump_on_oops);
9290 	return NOTIFY_OK;
9291 }
9292 
9293 static struct notifier_block trace_panic_notifier = {
9294 	.notifier_call  = trace_panic_handler,
9295 	.next           = NULL,
9296 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9297 };
9298 
9299 static int trace_die_handler(struct notifier_block *self,
9300 			     unsigned long val,
9301 			     void *data)
9302 {
9303 	switch (val) {
9304 	case DIE_OOPS:
9305 		if (ftrace_dump_on_oops)
9306 			ftrace_dump(ftrace_dump_on_oops);
9307 		break;
9308 	default:
9309 		break;
9310 	}
9311 	return NOTIFY_OK;
9312 }
9313 
9314 static struct notifier_block trace_die_notifier = {
9315 	.notifier_call = trace_die_handler,
9316 	.priority = 200
9317 };
9318 
9319 /*
9320  * printk is set to max of 1024, we really don't need it that big.
9321  * Nothing should be printing 1000 characters anyway.
9322  */
9323 #define TRACE_MAX_PRINT		1000
9324 
9325 /*
9326  * Define here KERN_TRACE so that we have one place to modify
9327  * it if we decide to change what log level the ftrace dump
9328  * should be at.
9329  */
9330 #define KERN_TRACE		KERN_EMERG
9331 
9332 void
9333 trace_printk_seq(struct trace_seq *s)
9334 {
9335 	/* Probably should print a warning here. */
9336 	if (s->seq.len >= TRACE_MAX_PRINT)
9337 		s->seq.len = TRACE_MAX_PRINT;
9338 
9339 	/*
9340 	 * More paranoid code. Although the buffer size is set to
9341 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9342 	 * an extra layer of protection.
9343 	 */
9344 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9345 		s->seq.len = s->seq.size - 1;
9346 
9347 	/* should be zero ended, but we are paranoid. */
9348 	s->buffer[s->seq.len] = 0;
9349 
9350 	printk(KERN_TRACE "%s", s->buffer);
9351 
9352 	trace_seq_init(s);
9353 }
9354 
9355 void trace_init_global_iter(struct trace_iterator *iter)
9356 {
9357 	iter->tr = &global_trace;
9358 	iter->trace = iter->tr->current_trace;
9359 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9360 	iter->array_buffer = &global_trace.array_buffer;
9361 
9362 	if (iter->trace && iter->trace->open)
9363 		iter->trace->open(iter);
9364 
9365 	/* Annotate start of buffers if we had overruns */
9366 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9367 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9368 
9369 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9370 	if (trace_clocks[iter->tr->clock_id].in_ns)
9371 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9372 }
9373 
9374 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9375 {
9376 	/* use static because iter can be a bit big for the stack */
9377 	static struct trace_iterator iter;
9378 	static atomic_t dump_running;
9379 	struct trace_array *tr = &global_trace;
9380 	unsigned int old_userobj;
9381 	unsigned long flags;
9382 	int cnt = 0, cpu;
9383 
9384 	/* Only allow one dump user at a time. */
9385 	if (atomic_inc_return(&dump_running) != 1) {
9386 		atomic_dec(&dump_running);
9387 		return;
9388 	}
9389 
9390 	/*
9391 	 * Always turn off tracing when we dump.
9392 	 * We don't need to show trace output of what happens
9393 	 * between multiple crashes.
9394 	 *
9395 	 * If the user does a sysrq-z, then they can re-enable
9396 	 * tracing with echo 1 > tracing_on.
9397 	 */
9398 	tracing_off();
9399 
9400 	local_irq_save(flags);
9401 	printk_nmi_direct_enter();
9402 
9403 	/* Simulate the iterator */
9404 	trace_init_global_iter(&iter);
9405 	/* Can not use kmalloc for iter.temp and iter.fmt */
9406 	iter.temp = static_temp_buf;
9407 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9408 	iter.fmt = static_fmt_buf;
9409 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9410 
9411 	for_each_tracing_cpu(cpu) {
9412 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9413 	}
9414 
9415 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9416 
9417 	/* don't look at user memory in panic mode */
9418 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9419 
9420 	switch (oops_dump_mode) {
9421 	case DUMP_ALL:
9422 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9423 		break;
9424 	case DUMP_ORIG:
9425 		iter.cpu_file = raw_smp_processor_id();
9426 		break;
9427 	case DUMP_NONE:
9428 		goto out_enable;
9429 	default:
9430 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9431 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9432 	}
9433 
9434 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9435 
9436 	/* Did function tracer already get disabled? */
9437 	if (ftrace_is_dead()) {
9438 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9439 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9440 	}
9441 
9442 	/*
9443 	 * We need to stop all tracing on all CPUS to read
9444 	 * the next buffer. This is a bit expensive, but is
9445 	 * not done often. We fill all what we can read,
9446 	 * and then release the locks again.
9447 	 */
9448 
9449 	while (!trace_empty(&iter)) {
9450 
9451 		if (!cnt)
9452 			printk(KERN_TRACE "---------------------------------\n");
9453 
9454 		cnt++;
9455 
9456 		trace_iterator_reset(&iter);
9457 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9458 
9459 		if (trace_find_next_entry_inc(&iter) != NULL) {
9460 			int ret;
9461 
9462 			ret = print_trace_line(&iter);
9463 			if (ret != TRACE_TYPE_NO_CONSUME)
9464 				trace_consume(&iter);
9465 		}
9466 		touch_nmi_watchdog();
9467 
9468 		trace_printk_seq(&iter.seq);
9469 	}
9470 
9471 	if (!cnt)
9472 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9473 	else
9474 		printk(KERN_TRACE "---------------------------------\n");
9475 
9476  out_enable:
9477 	tr->trace_flags |= old_userobj;
9478 
9479 	for_each_tracing_cpu(cpu) {
9480 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9481 	}
9482 	atomic_dec(&dump_running);
9483 	printk_nmi_direct_exit();
9484 	local_irq_restore(flags);
9485 }
9486 EXPORT_SYMBOL_GPL(ftrace_dump);
9487 
9488 #define WRITE_BUFSIZE  4096
9489 
9490 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9491 				size_t count, loff_t *ppos,
9492 				int (*createfn)(const char *))
9493 {
9494 	char *kbuf, *buf, *tmp;
9495 	int ret = 0;
9496 	size_t done = 0;
9497 	size_t size;
9498 
9499 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9500 	if (!kbuf)
9501 		return -ENOMEM;
9502 
9503 	while (done < count) {
9504 		size = count - done;
9505 
9506 		if (size >= WRITE_BUFSIZE)
9507 			size = WRITE_BUFSIZE - 1;
9508 
9509 		if (copy_from_user(kbuf, buffer + done, size)) {
9510 			ret = -EFAULT;
9511 			goto out;
9512 		}
9513 		kbuf[size] = '\0';
9514 		buf = kbuf;
9515 		do {
9516 			tmp = strchr(buf, '\n');
9517 			if (tmp) {
9518 				*tmp = '\0';
9519 				size = tmp - buf + 1;
9520 			} else {
9521 				size = strlen(buf);
9522 				if (done + size < count) {
9523 					if (buf != kbuf)
9524 						break;
9525 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9526 					pr_warn("Line length is too long: Should be less than %d\n",
9527 						WRITE_BUFSIZE - 2);
9528 					ret = -EINVAL;
9529 					goto out;
9530 				}
9531 			}
9532 			done += size;
9533 
9534 			/* Remove comments */
9535 			tmp = strchr(buf, '#');
9536 
9537 			if (tmp)
9538 				*tmp = '\0';
9539 
9540 			ret = createfn(buf);
9541 			if (ret)
9542 				goto out;
9543 			buf += size;
9544 
9545 		} while (done < count);
9546 	}
9547 	ret = done;
9548 
9549 out:
9550 	kfree(kbuf);
9551 
9552 	return ret;
9553 }
9554 
9555 __init static int tracer_alloc_buffers(void)
9556 {
9557 	int ring_buf_size;
9558 	int ret = -ENOMEM;
9559 
9560 
9561 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9562 		pr_warn("Tracing disabled due to lockdown\n");
9563 		return -EPERM;
9564 	}
9565 
9566 	/*
9567 	 * Make sure we don't accidentally add more trace options
9568 	 * than we have bits for.
9569 	 */
9570 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9571 
9572 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9573 		goto out;
9574 
9575 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9576 		goto out_free_buffer_mask;
9577 
9578 	/* Only allocate trace_printk buffers if a trace_printk exists */
9579 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9580 		/* Must be called before global_trace.buffer is allocated */
9581 		trace_printk_init_buffers();
9582 
9583 	/* To save memory, keep the ring buffer size to its minimum */
9584 	if (ring_buffer_expanded)
9585 		ring_buf_size = trace_buf_size;
9586 	else
9587 		ring_buf_size = 1;
9588 
9589 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9590 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9591 
9592 	raw_spin_lock_init(&global_trace.start_lock);
9593 
9594 	/*
9595 	 * The prepare callbacks allocates some memory for the ring buffer. We
9596 	 * don't free the buffer if the CPU goes down. If we were to free
9597 	 * the buffer, then the user would lose any trace that was in the
9598 	 * buffer. The memory will be removed once the "instance" is removed.
9599 	 */
9600 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9601 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9602 				      NULL);
9603 	if (ret < 0)
9604 		goto out_free_cpumask;
9605 	/* Used for event triggers */
9606 	ret = -ENOMEM;
9607 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9608 	if (!temp_buffer)
9609 		goto out_rm_hp_state;
9610 
9611 	if (trace_create_savedcmd() < 0)
9612 		goto out_free_temp_buffer;
9613 
9614 	/* TODO: make the number of buffers hot pluggable with CPUS */
9615 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9616 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9617 		goto out_free_savedcmd;
9618 	}
9619 
9620 	if (global_trace.buffer_disabled)
9621 		tracing_off();
9622 
9623 	if (trace_boot_clock) {
9624 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9625 		if (ret < 0)
9626 			pr_warn("Trace clock %s not defined, going back to default\n",
9627 				trace_boot_clock);
9628 	}
9629 
9630 	/*
9631 	 * register_tracer() might reference current_trace, so it
9632 	 * needs to be set before we register anything. This is
9633 	 * just a bootstrap of current_trace anyway.
9634 	 */
9635 	global_trace.current_trace = &nop_trace;
9636 
9637 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9638 
9639 	ftrace_init_global_array_ops(&global_trace);
9640 
9641 	init_trace_flags_index(&global_trace);
9642 
9643 	register_tracer(&nop_trace);
9644 
9645 	/* Function tracing may start here (via kernel command line) */
9646 	init_function_trace();
9647 
9648 	/* All seems OK, enable tracing */
9649 	tracing_disabled = 0;
9650 
9651 	atomic_notifier_chain_register(&panic_notifier_list,
9652 				       &trace_panic_notifier);
9653 
9654 	register_die_notifier(&trace_die_notifier);
9655 
9656 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9657 
9658 	INIT_LIST_HEAD(&global_trace.systems);
9659 	INIT_LIST_HEAD(&global_trace.events);
9660 	INIT_LIST_HEAD(&global_trace.hist_vars);
9661 	INIT_LIST_HEAD(&global_trace.err_log);
9662 	list_add(&global_trace.list, &ftrace_trace_arrays);
9663 
9664 	apply_trace_boot_options();
9665 
9666 	register_snapshot_cmd();
9667 
9668 	return 0;
9669 
9670 out_free_savedcmd:
9671 	free_saved_cmdlines_buffer(savedcmd);
9672 out_free_temp_buffer:
9673 	ring_buffer_free(temp_buffer);
9674 out_rm_hp_state:
9675 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9676 out_free_cpumask:
9677 	free_cpumask_var(global_trace.tracing_cpumask);
9678 out_free_buffer_mask:
9679 	free_cpumask_var(tracing_buffer_mask);
9680 out:
9681 	return ret;
9682 }
9683 
9684 void __init early_trace_init(void)
9685 {
9686 	if (tracepoint_printk) {
9687 		tracepoint_print_iter =
9688 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9689 		if (MEM_FAIL(!tracepoint_print_iter,
9690 			     "Failed to allocate trace iterator\n"))
9691 			tracepoint_printk = 0;
9692 		else
9693 			static_key_enable(&tracepoint_printk_key.key);
9694 	}
9695 	tracer_alloc_buffers();
9696 }
9697 
9698 void __init trace_init(void)
9699 {
9700 	trace_event_init();
9701 }
9702 
9703 __init static int clear_boot_tracer(void)
9704 {
9705 	/*
9706 	 * The default tracer at boot buffer is an init section.
9707 	 * This function is called in lateinit. If we did not
9708 	 * find the boot tracer, then clear it out, to prevent
9709 	 * later registration from accessing the buffer that is
9710 	 * about to be freed.
9711 	 */
9712 	if (!default_bootup_tracer)
9713 		return 0;
9714 
9715 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9716 	       default_bootup_tracer);
9717 	default_bootup_tracer = NULL;
9718 
9719 	return 0;
9720 }
9721 
9722 fs_initcall(tracer_init_tracefs);
9723 late_initcall_sync(clear_boot_tracer);
9724 
9725 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9726 __init static int tracing_set_default_clock(void)
9727 {
9728 	/* sched_clock_stable() is determined in late_initcall */
9729 	if (!trace_boot_clock && !sched_clock_stable()) {
9730 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9731 			pr_warn("Can not set tracing clock due to lockdown\n");
9732 			return -EPERM;
9733 		}
9734 
9735 		printk(KERN_WARNING
9736 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9737 		       "If you want to keep using the local clock, then add:\n"
9738 		       "  \"trace_clock=local\"\n"
9739 		       "on the kernel command line\n");
9740 		tracing_set_clock(&global_trace, "global");
9741 	}
9742 
9743 	return 0;
9744 }
9745 late_initcall_sync(tracing_set_default_clock);
9746 #endif
9747