xref: /openbmc/linux/kernel/trace/trace.c (revision ae6f2db4d59e9f8c90cb3c2d2a954832898d0f2b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static int __init set_cmdline_ftrace(char *str)
191 {
192 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193 	default_bootup_tracer = bootup_tracer_buf;
194 	/* We are using ftrace early, expand it */
195 	ring_buffer_expanded = true;
196 	return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199 
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
203 		ftrace_dump_on_oops = DUMP_ALL;
204 		return 1;
205 	}
206 
207 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208 		ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211 
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215 
216 static int __init stop_trace_on_warning(char *str)
217 {
218 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219 		__disable_trace_on_warning = 1;
220 	return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223 
224 static int __init boot_alloc_snapshot(char *str)
225 {
226 	allocate_snapshot = true;
227 	/* We also need the main ring buffer expanded */
228 	ring_buffer_expanded = true;
229 	return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232 
233 
234 static int __init boot_snapshot(char *str)
235 {
236 	snapshot_at_boot = true;
237 	boot_alloc_snapshot(str);
238 	return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241 
242 
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244 
245 static int __init set_trace_boot_options(char *str)
246 {
247 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248 	return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251 
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254 
255 static int __init set_trace_boot_clock(char *str)
256 {
257 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258 	trace_boot_clock = trace_boot_clock_buf;
259 	return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262 
263 static int __init set_tracepoint_printk(char *str)
264 {
265 	/* Ignore the "tp_printk_stop_on_boot" param */
266 	if (*str == '_')
267 		return 0;
268 
269 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270 		tracepoint_printk = 1;
271 	return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274 
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277 	tracepoint_printk_stop_on_boot = true;
278 	return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281 
282 unsigned long long ns2usecs(u64 nsec)
283 {
284 	nsec += 500;
285 	do_div(nsec, 1000);
286 	return nsec;
287 }
288 
289 static void
290 trace_process_export(struct trace_export *export,
291 	       struct ring_buffer_event *event, int flag)
292 {
293 	struct trace_entry *entry;
294 	unsigned int size = 0;
295 
296 	if (export->flags & flag) {
297 		entry = ring_buffer_event_data(event);
298 		size = ring_buffer_event_length(event);
299 		export->write(export, entry, size);
300 	}
301 }
302 
303 static DEFINE_MUTEX(ftrace_export_lock);
304 
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306 
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310 
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313 	if (export->flags & TRACE_EXPORT_FUNCTION)
314 		static_branch_inc(&trace_function_exports_enabled);
315 
316 	if (export->flags & TRACE_EXPORT_EVENT)
317 		static_branch_inc(&trace_event_exports_enabled);
318 
319 	if (export->flags & TRACE_EXPORT_MARKER)
320 		static_branch_inc(&trace_marker_exports_enabled);
321 }
322 
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325 	if (export->flags & TRACE_EXPORT_FUNCTION)
326 		static_branch_dec(&trace_function_exports_enabled);
327 
328 	if (export->flags & TRACE_EXPORT_EVENT)
329 		static_branch_dec(&trace_event_exports_enabled);
330 
331 	if (export->flags & TRACE_EXPORT_MARKER)
332 		static_branch_dec(&trace_marker_exports_enabled);
333 }
334 
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337 	struct trace_export *export;
338 
339 	preempt_disable_notrace();
340 
341 	export = rcu_dereference_raw_check(ftrace_exports_list);
342 	while (export) {
343 		trace_process_export(export, event, flag);
344 		export = rcu_dereference_raw_check(export->next);
345 	}
346 
347 	preempt_enable_notrace();
348 }
349 
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353 	rcu_assign_pointer(export->next, *list);
354 	/*
355 	 * We are entering export into the list but another
356 	 * CPU might be walking that list. We need to make sure
357 	 * the export->next pointer is valid before another CPU sees
358 	 * the export pointer included into the list.
359 	 */
360 	rcu_assign_pointer(*list, export);
361 }
362 
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366 	struct trace_export **p;
367 
368 	for (p = list; *p != NULL; p = &(*p)->next)
369 		if (*p == export)
370 			break;
371 
372 	if (*p != export)
373 		return -1;
374 
375 	rcu_assign_pointer(*p, (*p)->next);
376 
377 	return 0;
378 }
379 
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 	ftrace_exports_enable(export);
384 
385 	add_trace_export(list, export);
386 }
387 
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391 	int ret;
392 
393 	ret = rm_trace_export(list, export);
394 	ftrace_exports_disable(export);
395 
396 	return ret;
397 }
398 
399 int register_ftrace_export(struct trace_export *export)
400 {
401 	if (WARN_ON_ONCE(!export->write))
402 		return -1;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	add_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413 
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416 	int ret;
417 
418 	mutex_lock(&ftrace_export_lock);
419 
420 	ret = rm_ftrace_export(&ftrace_exports_list, export);
421 
422 	mutex_unlock(&ftrace_export_lock);
423 
424 	return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427 
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS						\
430 	(FUNCTION_DEFAULT_FLAGS |					\
431 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
432 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
433 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
434 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
435 	 TRACE_ITER_HASH_PTR)
436 
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
439 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440 
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444 
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450 	.trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452 
453 LIST_HEAD(ftrace_trace_arrays);
454 
455 int trace_array_get(struct trace_array *this_tr)
456 {
457 	struct trace_array *tr;
458 	int ret = -ENODEV;
459 
460 	mutex_lock(&trace_types_lock);
461 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462 		if (tr == this_tr) {
463 			tr->ref++;
464 			ret = 0;
465 			break;
466 		}
467 	}
468 	mutex_unlock(&trace_types_lock);
469 
470 	return ret;
471 }
472 
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475 	WARN_ON(!this_tr->ref);
476 	this_tr->ref--;
477 }
478 
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490 	if (!this_tr)
491 		return;
492 
493 	mutex_lock(&trace_types_lock);
494 	__trace_array_put(this_tr);
495 	mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498 
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501 	int ret;
502 
503 	ret = security_locked_down(LOCKDOWN_TRACEFS);
504 	if (ret)
505 		return ret;
506 
507 	if (tracing_disabled)
508 		return -ENODEV;
509 
510 	if (tr && trace_array_get(tr) < 0)
511 		return -ENODEV;
512 
513 	return 0;
514 }
515 
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517 			      struct trace_buffer *buffer,
518 			      struct ring_buffer_event *event)
519 {
520 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521 	    !filter_match_preds(call->filter, rec)) {
522 		__trace_event_discard_commit(buffer, event);
523 		return 1;
524 	}
525 
526 	return 0;
527 }
528 
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539 	return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541 
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554 		       struct trace_pid_list *filtered_no_pids,
555 		       struct task_struct *task)
556 {
557 	/*
558 	 * If filtered_no_pids is not empty, and the task's pid is listed
559 	 * in filtered_no_pids, then return true.
560 	 * Otherwise, if filtered_pids is empty, that means we can
561 	 * trace all tasks. If it has content, then only trace pids
562 	 * within filtered_pids.
563 	 */
564 
565 	return (filtered_pids &&
566 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
567 		(filtered_no_pids &&
568 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570 
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584 				  struct task_struct *self,
585 				  struct task_struct *task)
586 {
587 	if (!pid_list)
588 		return;
589 
590 	/* For forks, we only add if the forking task is listed */
591 	if (self) {
592 		if (!trace_find_filtered_pid(pid_list, self->pid))
593 			return;
594 	}
595 
596 	/* "self" is set for forks, and NULL for exits */
597 	if (self)
598 		trace_pid_list_set(pid_list, task->pid);
599 	else
600 		trace_pid_list_clear(pid_list, task->pid);
601 }
602 
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617 	long pid = (unsigned long)v;
618 	unsigned int next;
619 
620 	(*pos)++;
621 
622 	/* pid already is +1 of the actual previous bit */
623 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
624 		return NULL;
625 
626 	pid = next;
627 
628 	/* Return pid + 1 to allow zero to be represented */
629 	return (void *)(pid + 1);
630 }
631 
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645 	unsigned long pid;
646 	unsigned int first;
647 	loff_t l = 0;
648 
649 	if (trace_pid_list_first(pid_list, &first) < 0)
650 		return NULL;
651 
652 	pid = first;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = trace_pid_list_alloc();
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	if (filtered_pids) {
709 		/* copy the current bits to the new max */
710 		ret = trace_pid_list_first(filtered_pids, &pid);
711 		while (!ret) {
712 			trace_pid_list_set(pid_list, pid);
713 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714 			nr_pids++;
715 		}
716 	}
717 
718 	ret = 0;
719 	while (cnt > 0) {
720 
721 		pos = 0;
722 
723 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
724 		if (ret < 0)
725 			break;
726 
727 		read += ret;
728 		ubuf += ret;
729 		cnt -= ret;
730 
731 		if (!trace_parser_loaded(&parser))
732 			break;
733 
734 		ret = -EINVAL;
735 		if (kstrtoul(parser.buffer, 0, &val))
736 			break;
737 
738 		pid = (pid_t)val;
739 
740 		if (trace_pid_list_set(pid_list, pid) < 0) {
741 			ret = -1;
742 			break;
743 		}
744 		nr_pids++;
745 
746 		trace_parser_clear(&parser);
747 		ret = 0;
748 	}
749 	trace_parser_put(&parser);
750 
751 	if (ret < 0) {
752 		trace_pid_list_free(pid_list);
753 		return ret;
754 	}
755 
756 	if (!nr_pids) {
757 		/* Cleared the list of pids */
758 		trace_pid_list_free(pid_list);
759 		pid_list = NULL;
760 	}
761 
762 	*new_pid_list = pid_list;
763 
764 	return read;
765 }
766 
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 	u64 ts;
770 
771 	/* Early boot up does not have a buffer yet */
772 	if (!buf->buffer)
773 		return trace_clock_local();
774 
775 	ts = ring_buffer_time_stamp(buf->buffer);
776 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777 
778 	return ts;
779 }
780 
781 u64 ftrace_now(int cpu)
782 {
783 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785 
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797 	/*
798 	 * For quick access (irqsoff uses this in fast path), just
799 	 * return the mirror variable of the state of the ring buffer.
800 	 * It's a little racy, but we don't really care.
801 	 */
802 	smp_rmb();
803 	return !global_trace.buffer_disabled;
804 }
805 
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
817 
818 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer		*trace_types __read_mostly;
822 
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827 
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849 
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 
854 static inline void trace_access_lock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		/* gain it for accessing the whole ring buffer. */
858 		down_write(&all_cpu_access_lock);
859 	} else {
860 		/* gain it for accessing a cpu ring buffer. */
861 
862 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 		down_read(&all_cpu_access_lock);
864 
865 		/* Secondly block other access to this @cpu ring buffer. */
866 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 	}
868 }
869 
870 static inline void trace_access_unlock(int cpu)
871 {
872 	if (cpu == RING_BUFFER_ALL_CPUS) {
873 		up_write(&all_cpu_access_lock);
874 	} else {
875 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 		up_read(&all_cpu_access_lock);
877 	}
878 }
879 
880 static inline void trace_access_lock_init(void)
881 {
882 	int cpu;
883 
884 	for_each_possible_cpu(cpu)
885 		mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887 
888 #else
889 
890 static DEFINE_MUTEX(access_lock);
891 
892 static inline void trace_access_lock(int cpu)
893 {
894 	(void)cpu;
895 	mutex_lock(&access_lock);
896 }
897 
898 static inline void trace_access_unlock(int cpu)
899 {
900 	(void)cpu;
901 	mutex_unlock(&access_lock);
902 }
903 
904 static inline void trace_access_lock_init(void)
905 {
906 }
907 
908 #endif
909 
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 				 unsigned int trace_ctx,
913 				 int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 				      struct trace_buffer *buffer,
916 				      unsigned int trace_ctx,
917 				      int skip, struct pt_regs *regs);
918 
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 					unsigned int trace_ctx,
922 					int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 				      struct trace_buffer *buffer,
927 				      unsigned long trace_ctx,
928 				      int skip, struct pt_regs *regs)
929 {
930 }
931 
932 #endif
933 
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936 		  int type, unsigned int trace_ctx)
937 {
938 	struct trace_entry *ent = ring_buffer_event_data(event);
939 
940 	tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942 
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 			  int type,
946 			  unsigned long len,
947 			  unsigned int trace_ctx)
948 {
949 	struct ring_buffer_event *event;
950 
951 	event = ring_buffer_lock_reserve(buffer, len);
952 	if (event != NULL)
953 		trace_event_setup(event, type, trace_ctx);
954 
955 	return event;
956 }
957 
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 	if (tr->array_buffer.buffer)
961 		ring_buffer_record_on(tr->array_buffer.buffer);
962 	/*
963 	 * This flag is looked at when buffers haven't been allocated
964 	 * yet, or by some tracers (like irqsoff), that just want to
965 	 * know if the ring buffer has been disabled, but it can handle
966 	 * races of where it gets disabled but we still do a record.
967 	 * As the check is in the fast path of the tracers, it is more
968 	 * important to be fast than accurate.
969 	 */
970 	tr->buffer_disabled = 0;
971 	/* Make the flag seen by readers */
972 	smp_wmb();
973 }
974 
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983 	tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986 
987 
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 	__this_cpu_write(trace_taskinfo_save, true);
992 
993 	/* If this is the temp buffer, we need to commit fully */
994 	if (this_cpu_read(trace_buffered_event) == event) {
995 		/* Length is in event->array[0] */
996 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 		/* Release the temp buffer */
998 		this_cpu_dec(trace_buffered_event_cnt);
999 		/* ring_buffer_unlock_commit() enables preemption */
1000 		preempt_enable_notrace();
1001 	} else
1002 		ring_buffer_unlock_commit(buffer, event);
1003 }
1004 
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:	   The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013 	struct ring_buffer_event *event;
1014 	struct trace_buffer *buffer;
1015 	struct print_entry *entry;
1016 	unsigned int trace_ctx;
1017 	int alloc;
1018 
1019 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020 		return 0;
1021 
1022 	if (unlikely(tracing_selftest_running || tracing_disabled))
1023 		return 0;
1024 
1025 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026 
1027 	trace_ctx = tracing_gen_ctx();
1028 	buffer = global_trace.array_buffer.buffer;
1029 	ring_buffer_nest_start(buffer);
1030 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031 					    trace_ctx);
1032 	if (!event) {
1033 		size = 0;
1034 		goto out;
1035 	}
1036 
1037 	entry = ring_buffer_event_data(event);
1038 	entry->ip = ip;
1039 
1040 	memcpy(&entry->buf, str, size);
1041 
1042 	/* Add a newline if necessary */
1043 	if (entry->buf[size - 1] != '\n') {
1044 		entry->buf[size] = '\n';
1045 		entry->buf[size + 1] = '\0';
1046 	} else
1047 		entry->buf[size] = '\0';
1048 
1049 	__buffer_unlock_commit(buffer, event);
1050 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052 	ring_buffer_nest_end(buffer);
1053 	return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056 
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:	   The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064 	struct ring_buffer_event *event;
1065 	struct trace_buffer *buffer;
1066 	struct bputs_entry *entry;
1067 	unsigned int trace_ctx;
1068 	int size = sizeof(struct bputs_entry);
1069 	int ret = 0;
1070 
1071 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072 		return 0;
1073 
1074 	if (unlikely(tracing_selftest_running || tracing_disabled))
1075 		return 0;
1076 
1077 	trace_ctx = tracing_gen_ctx();
1078 	buffer = global_trace.array_buffer.buffer;
1079 
1080 	ring_buffer_nest_start(buffer);
1081 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082 					    trace_ctx);
1083 	if (!event)
1084 		goto out;
1085 
1086 	entry = ring_buffer_event_data(event);
1087 	entry->ip			= ip;
1088 	entry->str			= str;
1089 
1090 	__buffer_unlock_commit(buffer, event);
1091 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 
1093 	ret = 1;
1094  out:
1095 	ring_buffer_nest_end(buffer);
1096 	return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099 
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102 					   void *cond_data)
1103 {
1104 	struct tracer *tracer = tr->current_trace;
1105 	unsigned long flags;
1106 
1107 	if (in_nmi()) {
1108 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1110 		return;
1111 	}
1112 
1113 	if (!tr->allocated_snapshot) {
1114 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115 		internal_trace_puts("*** stopping trace here!   ***\n");
1116 		tracing_off();
1117 		return;
1118 	}
1119 
1120 	/* Note, snapshot can not be used when the tracer uses it */
1121 	if (tracer->use_max_tr) {
1122 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 		return;
1125 	}
1126 
1127 	local_irq_save(flags);
1128 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1129 	local_irq_restore(flags);
1130 }
1131 
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134 	tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136 
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153 	struct trace_array *tr = &global_trace;
1154 
1155 	tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158 
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:		The tracing instance to snapshot
1162  * @cond_data:	The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174 	tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177 
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:		The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194 	void *cond_data = NULL;
1195 
1196 	local_irq_disable();
1197 	arch_spin_lock(&tr->max_lock);
1198 
1199 	if (tr->cond_snapshot)
1200 		cond_data = tr->cond_snapshot->cond_data;
1201 
1202 	arch_spin_unlock(&tr->max_lock);
1203 	local_irq_enable();
1204 
1205 	return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208 
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210 					struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212 
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215 	int ret;
1216 
1217 	if (!tr->allocated_snapshot) {
1218 
1219 		/* allocate spare buffer */
1220 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222 		if (ret < 0)
1223 			return ret;
1224 
1225 		tr->allocated_snapshot = true;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233 	/*
1234 	 * We don't free the ring buffer. instead, resize it because
1235 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1236 	 * we want preserve it.
1237 	 */
1238 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239 	set_buffer_entries(&tr->max_buffer, 1);
1240 	tracing_reset_online_cpus(&tr->max_buffer);
1241 	tr->allocated_snapshot = false;
1242 }
1243 
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256 	struct trace_array *tr = &global_trace;
1257 	int ret;
1258 
1259 	ret = tracing_alloc_snapshot_instance(tr);
1260 	WARN_ON(ret < 0);
1261 
1262 	return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265 
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279 	int ret;
1280 
1281 	ret = tracing_alloc_snapshot();
1282 	if (ret < 0)
1283 		return;
1284 
1285 	tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288 
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:		The tracing instance
1292  * @cond_data:	User data to associate with the snapshot
1293  * @update:	Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303 				 cond_update_fn_t update)
1304 {
1305 	struct cond_snapshot *cond_snapshot;
1306 	int ret = 0;
1307 
1308 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309 	if (!cond_snapshot)
1310 		return -ENOMEM;
1311 
1312 	cond_snapshot->cond_data = cond_data;
1313 	cond_snapshot->update = update;
1314 
1315 	mutex_lock(&trace_types_lock);
1316 
1317 	ret = tracing_alloc_snapshot_instance(tr);
1318 	if (ret)
1319 		goto fail_unlock;
1320 
1321 	if (tr->current_trace->use_max_tr) {
1322 		ret = -EBUSY;
1323 		goto fail_unlock;
1324 	}
1325 
1326 	/*
1327 	 * The cond_snapshot can only change to NULL without the
1328 	 * trace_types_lock. We don't care if we race with it going
1329 	 * to NULL, but we want to make sure that it's not set to
1330 	 * something other than NULL when we get here, which we can
1331 	 * do safely with only holding the trace_types_lock and not
1332 	 * having to take the max_lock.
1333 	 */
1334 	if (tr->cond_snapshot) {
1335 		ret = -EBUSY;
1336 		goto fail_unlock;
1337 	}
1338 
1339 	local_irq_disable();
1340 	arch_spin_lock(&tr->max_lock);
1341 	tr->cond_snapshot = cond_snapshot;
1342 	arch_spin_unlock(&tr->max_lock);
1343 	local_irq_enable();
1344 
1345 	mutex_unlock(&trace_types_lock);
1346 
1347 	return ret;
1348 
1349  fail_unlock:
1350 	mutex_unlock(&trace_types_lock);
1351 	kfree(cond_snapshot);
1352 	return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355 
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:		The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368 	int ret = 0;
1369 
1370 	local_irq_disable();
1371 	arch_spin_lock(&tr->max_lock);
1372 
1373 	if (!tr->cond_snapshot)
1374 		ret = -EINVAL;
1375 	else {
1376 		kfree(tr->cond_snapshot);
1377 		tr->cond_snapshot = NULL;
1378 	}
1379 
1380 	arch_spin_unlock(&tr->max_lock);
1381 	local_irq_enable();
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/*
1496 	 * nr_entries can not be zero and the startup
1497 	 * tests require some buffer space. Therefore
1498 	 * ensure we have at least 4096 bytes of buffer.
1499 	 */
1500 	trace_buf_size = max(4096UL, buf_size);
1501 	return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504 
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507 	unsigned long threshold;
1508 	int ret;
1509 
1510 	if (!str)
1511 		return 0;
1512 	ret = kstrtoul(str, 0, &threshold);
1513 	if (ret < 0)
1514 		return 0;
1515 	tracing_thresh = threshold * 1000;
1516 	return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519 
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522 	return nsecs / 1000;
1523 }
1524 
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533 
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536 	TRACE_FLAGS
1537 	NULL
1538 };
1539 
1540 static struct {
1541 	u64 (*func)(void);
1542 	const char *name;
1543 	int in_ns;		/* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545 	{ trace_clock_local,		"local",	1 },
1546 	{ trace_clock_global,		"global",	1 },
1547 	{ trace_clock_counter,		"counter",	0 },
1548 	{ trace_clock_jiffies,		"uptime",	0 },
1549 	{ trace_clock,			"perf",		1 },
1550 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1551 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1552 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1553 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1554 	ARCH_TRACE_CLOCKS
1555 };
1556 
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559 	if (trace_clocks[tr->clock_id].in_ns)
1560 		return true;
1561 
1562 	return false;
1563 }
1564 
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570 	memset(parser, 0, sizeof(*parser));
1571 
1572 	parser->buffer = kmalloc(size, GFP_KERNEL);
1573 	if (!parser->buffer)
1574 		return 1;
1575 
1576 	parser->size = size;
1577 	return 0;
1578 }
1579 
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585 	kfree(parser->buffer);
1586 	parser->buffer = NULL;
1587 }
1588 
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601 	size_t cnt, loff_t *ppos)
1602 {
1603 	char ch;
1604 	size_t read = 0;
1605 	ssize_t ret;
1606 
1607 	if (!*ppos)
1608 		trace_parser_clear(parser);
1609 
1610 	ret = get_user(ch, ubuf++);
1611 	if (ret)
1612 		goto out;
1613 
1614 	read++;
1615 	cnt--;
1616 
1617 	/*
1618 	 * The parser is not finished with the last write,
1619 	 * continue reading the user input without skipping spaces.
1620 	 */
1621 	if (!parser->cont) {
1622 		/* skip white space */
1623 		while (cnt && isspace(ch)) {
1624 			ret = get_user(ch, ubuf++);
1625 			if (ret)
1626 				goto out;
1627 			read++;
1628 			cnt--;
1629 		}
1630 
1631 		parser->idx = 0;
1632 
1633 		/* only spaces were written */
1634 		if (isspace(ch) || !ch) {
1635 			*ppos += read;
1636 			ret = read;
1637 			goto out;
1638 		}
1639 	}
1640 
1641 	/* read the non-space input */
1642 	while (cnt && !isspace(ch) && ch) {
1643 		if (parser->idx < parser->size - 1)
1644 			parser->buffer[parser->idx++] = ch;
1645 		else {
1646 			ret = -EINVAL;
1647 			goto out;
1648 		}
1649 		ret = get_user(ch, ubuf++);
1650 		if (ret)
1651 			goto out;
1652 		read++;
1653 		cnt--;
1654 	}
1655 
1656 	/* We either got finished input or we have to wait for another call. */
1657 	if (isspace(ch) || !ch) {
1658 		parser->buffer[parser->idx] = 0;
1659 		parser->cont = false;
1660 	} else if (parser->idx < parser->size - 1) {
1661 		parser->cont = true;
1662 		parser->buffer[parser->idx++] = ch;
1663 		/* Make sure the parsed string always terminates with '\0'. */
1664 		parser->buffer[parser->idx] = 0;
1665 	} else {
1666 		ret = -EINVAL;
1667 		goto out;
1668 	}
1669 
1670 	*ppos += read;
1671 	ret = read;
1672 
1673 out:
1674 	return ret;
1675 }
1676 
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680 	int len;
1681 
1682 	if (trace_seq_used(s) <= s->seq.readpos)
1683 		return -EBUSY;
1684 
1685 	len = trace_seq_used(s) - s->seq.readpos;
1686 	if (cnt > len)
1687 		cnt = len;
1688 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689 
1690 	s->seq.readpos += cnt;
1691 	return cnt;
1692 }
1693 
1694 unsigned long __read_mostly	tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696 
1697 #ifdef LATENCY_FS_NOTIFY
1698 
1699 static struct workqueue_struct *fsnotify_wq;
1700 
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703 	struct trace_array *tr = container_of(work, struct trace_array,
1704 					      fsnotify_work);
1705 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707 
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710 	struct trace_array *tr = container_of(iwork, struct trace_array,
1711 					      fsnotify_irqwork);
1712 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714 
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716 				     struct dentry *d_tracer)
1717 {
1718 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1721 					      TRACE_MODE_WRITE,
1722 					      d_tracer, &tr->max_latency,
1723 					      &tracing_max_lat_fops);
1724 }
1725 
1726 __init static int latency_fsnotify_init(void)
1727 {
1728 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1730 	if (!fsnotify_wq) {
1731 		pr_err("Unable to allocate tr_max_lat_wq\n");
1732 		return -ENOMEM;
1733 	}
1734 	return 0;
1735 }
1736 
1737 late_initcall_sync(latency_fsnotify_init);
1738 
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741 	if (!fsnotify_wq)
1742 		return;
1743 	/*
1744 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745 	 * possible that we are called from __schedule() or do_idle(), which
1746 	 * could cause a deadlock.
1747 	 */
1748 	irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750 
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1752 	|| defined(CONFIG_OSNOISE_TRACER)
1753 
1754 #define trace_create_maxlat_file(tr, d_tracer)				\
1755 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1756 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757 
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1760 #endif
1761 
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771 	struct array_buffer *trace_buf = &tr->array_buffer;
1772 	struct array_buffer *max_buf = &tr->max_buffer;
1773 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775 
1776 	max_buf->cpu = cpu;
1777 	max_buf->time_start = data->preempt_timestamp;
1778 
1779 	max_data->saved_latency = tr->max_latency;
1780 	max_data->critical_start = data->critical_start;
1781 	max_data->critical_end = data->critical_end;
1782 
1783 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784 	max_data->pid = tsk->pid;
1785 	/*
1786 	 * If tsk == current, then use current_uid(), as that does not use
1787 	 * RCU. The irq tracer can be called out of RCU scope.
1788 	 */
1789 	if (tsk == current)
1790 		max_data->uid = current_uid();
1791 	else
1792 		max_data->uid = task_uid(tsk);
1793 
1794 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795 	max_data->policy = tsk->policy;
1796 	max_data->rt_priority = tsk->rt_priority;
1797 
1798 	/* record this tasks comm */
1799 	tracing_record_cmdline(tsk);
1800 	latency_fsnotify(tr);
1801 }
1802 
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815 	      void *cond_data)
1816 {
1817 	if (tr->stop_count)
1818 		return;
1819 
1820 	WARN_ON_ONCE(!irqs_disabled());
1821 
1822 	if (!tr->allocated_snapshot) {
1823 		/* Only the nop tracer should hit this when disabling */
1824 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825 		return;
1826 	}
1827 
1828 	arch_spin_lock(&tr->max_lock);
1829 
1830 	/* Inherit the recordable setting from array_buffer */
1831 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832 		ring_buffer_record_on(tr->max_buffer.buffer);
1833 	else
1834 		ring_buffer_record_off(tr->max_buffer.buffer);
1835 
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838 		goto out_unlock;
1839 #endif
1840 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841 
1842 	__update_max_tr(tr, tsk, cpu);
1843 
1844  out_unlock:
1845 	arch_spin_unlock(&tr->max_lock);
1846 }
1847 
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859 	int ret;
1860 
1861 	if (tr->stop_count)
1862 		return;
1863 
1864 	WARN_ON_ONCE(!irqs_disabled());
1865 	if (!tr->allocated_snapshot) {
1866 		/* Only the nop tracer should hit this when disabling */
1867 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868 		return;
1869 	}
1870 
1871 	arch_spin_lock(&tr->max_lock);
1872 
1873 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874 
1875 	if (ret == -EBUSY) {
1876 		/*
1877 		 * We failed to swap the buffer due to a commit taking
1878 		 * place on this CPU. We fail to record, but we reset
1879 		 * the max trace buffer (no one writes directly to it)
1880 		 * and flag that it failed.
1881 		 */
1882 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883 			"Failed to swap buffers due to commit in progress\n");
1884 	}
1885 
1886 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887 
1888 	__update_max_tr(tr, tsk, cpu);
1889 	arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892 
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895 	/* Iterators are static, they should be filled or empty */
1896 	if (trace_buffer_iter(iter, iter->cpu_file))
1897 		return 0;
1898 
1899 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900 				full);
1901 }
1902 
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905 
1906 struct trace_selftests {
1907 	struct list_head		list;
1908 	struct tracer			*type;
1909 };
1910 
1911 static LIST_HEAD(postponed_selftests);
1912 
1913 static int save_selftest(struct tracer *type)
1914 {
1915 	struct trace_selftests *selftest;
1916 
1917 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918 	if (!selftest)
1919 		return -ENOMEM;
1920 
1921 	selftest->type = type;
1922 	list_add(&selftest->list, &postponed_selftests);
1923 	return 0;
1924 }
1925 
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928 	struct trace_array *tr = &global_trace;
1929 	struct tracer *saved_tracer = tr->current_trace;
1930 	int ret;
1931 
1932 	if (!type->selftest || tracing_selftest_disabled)
1933 		return 0;
1934 
1935 	/*
1936 	 * If a tracer registers early in boot up (before scheduling is
1937 	 * initialized and such), then do not run its selftests yet.
1938 	 * Instead, run it a little later in the boot process.
1939 	 */
1940 	if (!selftests_can_run)
1941 		return save_selftest(type);
1942 
1943 	if (!tracing_is_on()) {
1944 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945 			type->name);
1946 		return 0;
1947 	}
1948 
1949 	/*
1950 	 * Run a selftest on this tracer.
1951 	 * Here we reset the trace buffer, and set the current
1952 	 * tracer to be this tracer. The tracer can then run some
1953 	 * internal tracing to verify that everything is in order.
1954 	 * If we fail, we do not register this tracer.
1955 	 */
1956 	tracing_reset_online_cpus(&tr->array_buffer);
1957 
1958 	tr->current_trace = type;
1959 
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961 	if (type->use_max_tr) {
1962 		/* If we expanded the buffers, make sure the max is expanded too */
1963 		if (ring_buffer_expanded)
1964 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965 					   RING_BUFFER_ALL_CPUS);
1966 		tr->allocated_snapshot = true;
1967 	}
1968 #endif
1969 
1970 	/* the test is responsible for initializing and enabling */
1971 	pr_info("Testing tracer %s: ", type->name);
1972 	ret = type->selftest(type, tr);
1973 	/* the test is responsible for resetting too */
1974 	tr->current_trace = saved_tracer;
1975 	if (ret) {
1976 		printk(KERN_CONT "FAILED!\n");
1977 		/* Add the warning after printing 'FAILED' */
1978 		WARN_ON(1);
1979 		return -1;
1980 	}
1981 	/* Only reset on passing, to avoid touching corrupted buffers */
1982 	tracing_reset_online_cpus(&tr->array_buffer);
1983 
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985 	if (type->use_max_tr) {
1986 		tr->allocated_snapshot = false;
1987 
1988 		/* Shrink the max buffer again */
1989 		if (ring_buffer_expanded)
1990 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1991 					   RING_BUFFER_ALL_CPUS);
1992 	}
1993 #endif
1994 
1995 	printk(KERN_CONT "PASSED\n");
1996 	return 0;
1997 }
1998 
1999 static __init int init_trace_selftests(void)
2000 {
2001 	struct trace_selftests *p, *n;
2002 	struct tracer *t, **last;
2003 	int ret;
2004 
2005 	selftests_can_run = true;
2006 
2007 	mutex_lock(&trace_types_lock);
2008 
2009 	if (list_empty(&postponed_selftests))
2010 		goto out;
2011 
2012 	pr_info("Running postponed tracer tests:\n");
2013 
2014 	tracing_selftest_running = true;
2015 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016 		/* This loop can take minutes when sanitizers are enabled, so
2017 		 * lets make sure we allow RCU processing.
2018 		 */
2019 		cond_resched();
2020 		ret = run_tracer_selftest(p->type);
2021 		/* If the test fails, then warn and remove from available_tracers */
2022 		if (ret < 0) {
2023 			WARN(1, "tracer: %s failed selftest, disabling\n",
2024 			     p->type->name);
2025 			last = &trace_types;
2026 			for (t = trace_types; t; t = t->next) {
2027 				if (t == p->type) {
2028 					*last = t->next;
2029 					break;
2030 				}
2031 				last = &t->next;
2032 			}
2033 		}
2034 		list_del(&p->list);
2035 		kfree(p);
2036 	}
2037 	tracing_selftest_running = false;
2038 
2039  out:
2040 	mutex_unlock(&trace_types_lock);
2041 
2042 	return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048 	return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051 
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053 
2054 static void __init apply_trace_boot_options(void);
2055 
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064 	struct tracer *t;
2065 	int ret = 0;
2066 
2067 	if (!type->name) {
2068 		pr_info("Tracer must have a name\n");
2069 		return -1;
2070 	}
2071 
2072 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074 		return -1;
2075 	}
2076 
2077 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078 		pr_warn("Can not register tracer %s due to lockdown\n",
2079 			   type->name);
2080 		return -EPERM;
2081 	}
2082 
2083 	mutex_lock(&trace_types_lock);
2084 
2085 	tracing_selftest_running = true;
2086 
2087 	for (t = trace_types; t; t = t->next) {
2088 		if (strcmp(type->name, t->name) == 0) {
2089 			/* already found */
2090 			pr_info("Tracer %s already registered\n",
2091 				type->name);
2092 			ret = -1;
2093 			goto out;
2094 		}
2095 	}
2096 
2097 	if (!type->set_flag)
2098 		type->set_flag = &dummy_set_flag;
2099 	if (!type->flags) {
2100 		/*allocate a dummy tracer_flags*/
2101 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102 		if (!type->flags) {
2103 			ret = -ENOMEM;
2104 			goto out;
2105 		}
2106 		type->flags->val = 0;
2107 		type->flags->opts = dummy_tracer_opt;
2108 	} else
2109 		if (!type->flags->opts)
2110 			type->flags->opts = dummy_tracer_opt;
2111 
2112 	/* store the tracer for __set_tracer_option */
2113 	type->flags->trace = type;
2114 
2115 	ret = run_tracer_selftest(type);
2116 	if (ret < 0)
2117 		goto out;
2118 
2119 	type->next = trace_types;
2120 	trace_types = type;
2121 	add_tracer_options(&global_trace, type);
2122 
2123  out:
2124 	tracing_selftest_running = false;
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	if (ret || !default_bootup_tracer)
2128 		goto out_unlock;
2129 
2130 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131 		goto out_unlock;
2132 
2133 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134 	/* Do we want this tracer to start on bootup? */
2135 	tracing_set_tracer(&global_trace, type->name);
2136 	default_bootup_tracer = NULL;
2137 
2138 	apply_trace_boot_options();
2139 
2140 	/* disable other selftests, since this will break it. */
2141 	disable_tracing_selftest("running a tracer");
2142 
2143  out_unlock:
2144 	return ret;
2145 }
2146 
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149 	struct trace_buffer *buffer = buf->buffer;
2150 
2151 	if (!buffer)
2152 		return;
2153 
2154 	ring_buffer_record_disable(buffer);
2155 
2156 	/* Make sure all commits have finished */
2157 	synchronize_rcu();
2158 	ring_buffer_reset_cpu(buffer, cpu);
2159 
2160 	ring_buffer_record_enable(buffer);
2161 }
2162 
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165 	struct trace_buffer *buffer = buf->buffer;
2166 
2167 	if (!buffer)
2168 		return;
2169 
2170 	ring_buffer_record_disable(buffer);
2171 
2172 	/* Make sure all commits have finished */
2173 	synchronize_rcu();
2174 
2175 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176 
2177 	ring_buffer_reset_online_cpus(buffer);
2178 
2179 	ring_buffer_record_enable(buffer);
2180 }
2181 
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2184 {
2185 	struct trace_array *tr;
2186 
2187 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188 		if (!tr->clear_trace)
2189 			continue;
2190 		tr->clear_trace = false;
2191 		tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193 		tracing_reset_online_cpus(&tr->max_buffer);
2194 #endif
2195 	}
2196 }
2197 
2198 /*
2199  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200  * is the tgid last observed corresponding to pid=i.
2201  */
2202 static int *tgid_map;
2203 
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2206 
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2209 /*
2210  * Preemption must be disabled before acquiring trace_cmdline_lock.
2211  * The various trace_arrays' max_lock must be acquired in a context
2212  * where interrupt is disabled.
2213  */
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217 	unsigned *map_cmdline_to_pid;
2218 	unsigned cmdline_num;
2219 	int cmdline_idx;
2220 	char *saved_cmdlines;
2221 };
2222 static struct saved_cmdlines_buffer *savedcmd;
2223 
2224 static inline char *get_saved_cmdlines(int idx)
2225 {
2226 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2227 }
2228 
2229 static inline void set_cmdline(int idx, const char *cmdline)
2230 {
2231 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2232 }
2233 
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235 				    struct saved_cmdlines_buffer *s)
2236 {
2237 	s->map_cmdline_to_pid = kmalloc_array(val,
2238 					      sizeof(*s->map_cmdline_to_pid),
2239 					      GFP_KERNEL);
2240 	if (!s->map_cmdline_to_pid)
2241 		return -ENOMEM;
2242 
2243 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244 	if (!s->saved_cmdlines) {
2245 		kfree(s->map_cmdline_to_pid);
2246 		return -ENOMEM;
2247 	}
2248 
2249 	s->cmdline_idx = 0;
2250 	s->cmdline_num = val;
2251 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252 	       sizeof(s->map_pid_to_cmdline));
2253 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254 	       val * sizeof(*s->map_cmdline_to_pid));
2255 
2256 	return 0;
2257 }
2258 
2259 static int trace_create_savedcmd(void)
2260 {
2261 	int ret;
2262 
2263 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2264 	if (!savedcmd)
2265 		return -ENOMEM;
2266 
2267 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2268 	if (ret < 0) {
2269 		kfree(savedcmd);
2270 		savedcmd = NULL;
2271 		return -ENOMEM;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 int is_tracing_stopped(void)
2278 {
2279 	return global_trace.stop_count;
2280 }
2281 
2282 /**
2283  * tracing_start - quick start of the tracer
2284  *
2285  * If tracing is enabled but was stopped by tracing_stop,
2286  * this will start the tracer back up.
2287  */
2288 void tracing_start(void)
2289 {
2290 	struct trace_buffer *buffer;
2291 	unsigned long flags;
2292 
2293 	if (tracing_disabled)
2294 		return;
2295 
2296 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297 	if (--global_trace.stop_count) {
2298 		if (global_trace.stop_count < 0) {
2299 			/* Someone screwed up their debugging */
2300 			WARN_ON_ONCE(1);
2301 			global_trace.stop_count = 0;
2302 		}
2303 		goto out;
2304 	}
2305 
2306 	/* Prevent the buffers from switching */
2307 	arch_spin_lock(&global_trace.max_lock);
2308 
2309 	buffer = global_trace.array_buffer.buffer;
2310 	if (buffer)
2311 		ring_buffer_record_enable(buffer);
2312 
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314 	buffer = global_trace.max_buffer.buffer;
2315 	if (buffer)
2316 		ring_buffer_record_enable(buffer);
2317 #endif
2318 
2319 	arch_spin_unlock(&global_trace.max_lock);
2320 
2321  out:
2322 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2323 }
2324 
2325 static void tracing_start_tr(struct trace_array *tr)
2326 {
2327 	struct trace_buffer *buffer;
2328 	unsigned long flags;
2329 
2330 	if (tracing_disabled)
2331 		return;
2332 
2333 	/* If global, we need to also start the max tracer */
2334 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335 		return tracing_start();
2336 
2337 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2338 
2339 	if (--tr->stop_count) {
2340 		if (tr->stop_count < 0) {
2341 			/* Someone screwed up their debugging */
2342 			WARN_ON_ONCE(1);
2343 			tr->stop_count = 0;
2344 		}
2345 		goto out;
2346 	}
2347 
2348 	buffer = tr->array_buffer.buffer;
2349 	if (buffer)
2350 		ring_buffer_record_enable(buffer);
2351 
2352  out:
2353 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2354 }
2355 
2356 /**
2357  * tracing_stop - quick stop of the tracer
2358  *
2359  * Light weight way to stop tracing. Use in conjunction with
2360  * tracing_start.
2361  */
2362 void tracing_stop(void)
2363 {
2364 	struct trace_buffer *buffer;
2365 	unsigned long flags;
2366 
2367 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368 	if (global_trace.stop_count++)
2369 		goto out;
2370 
2371 	/* Prevent the buffers from switching */
2372 	arch_spin_lock(&global_trace.max_lock);
2373 
2374 	buffer = global_trace.array_buffer.buffer;
2375 	if (buffer)
2376 		ring_buffer_record_disable(buffer);
2377 
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379 	buffer = global_trace.max_buffer.buffer;
2380 	if (buffer)
2381 		ring_buffer_record_disable(buffer);
2382 #endif
2383 
2384 	arch_spin_unlock(&global_trace.max_lock);
2385 
2386  out:
2387 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2388 }
2389 
2390 static void tracing_stop_tr(struct trace_array *tr)
2391 {
2392 	struct trace_buffer *buffer;
2393 	unsigned long flags;
2394 
2395 	/* If global, we need to also stop the max tracer */
2396 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397 		return tracing_stop();
2398 
2399 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2400 	if (tr->stop_count++)
2401 		goto out;
2402 
2403 	buffer = tr->array_buffer.buffer;
2404 	if (buffer)
2405 		ring_buffer_record_disable(buffer);
2406 
2407  out:
2408 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2409 }
2410 
2411 static int trace_save_cmdline(struct task_struct *tsk)
2412 {
2413 	unsigned tpid, idx;
2414 
2415 	/* treat recording of idle task as a success */
2416 	if (!tsk->pid)
2417 		return 1;
2418 
2419 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2420 
2421 	/*
2422 	 * It's not the end of the world if we don't get
2423 	 * the lock, but we also don't want to spin
2424 	 * nor do we want to disable interrupts,
2425 	 * so if we miss here, then better luck next time.
2426 	 *
2427 	 * This is called within the scheduler and wake up, so interrupts
2428 	 * had better been disabled and run queue lock been held.
2429 	 */
2430 	lockdep_assert_preemption_disabled();
2431 	if (!arch_spin_trylock(&trace_cmdline_lock))
2432 		return 0;
2433 
2434 	idx = savedcmd->map_pid_to_cmdline[tpid];
2435 	if (idx == NO_CMDLINE_MAP) {
2436 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2437 
2438 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2439 		savedcmd->cmdline_idx = idx;
2440 	}
2441 
2442 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443 	set_cmdline(idx, tsk->comm);
2444 
2445 	arch_spin_unlock(&trace_cmdline_lock);
2446 
2447 	return 1;
2448 }
2449 
2450 static void __trace_find_cmdline(int pid, char comm[])
2451 {
2452 	unsigned map;
2453 	int tpid;
2454 
2455 	if (!pid) {
2456 		strcpy(comm, "<idle>");
2457 		return;
2458 	}
2459 
2460 	if (WARN_ON_ONCE(pid < 0)) {
2461 		strcpy(comm, "<XXX>");
2462 		return;
2463 	}
2464 
2465 	tpid = pid & (PID_MAX_DEFAULT - 1);
2466 	map = savedcmd->map_pid_to_cmdline[tpid];
2467 	if (map != NO_CMDLINE_MAP) {
2468 		tpid = savedcmd->map_cmdline_to_pid[map];
2469 		if (tpid == pid) {
2470 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2471 			return;
2472 		}
2473 	}
2474 	strcpy(comm, "<...>");
2475 }
2476 
2477 void trace_find_cmdline(int pid, char comm[])
2478 {
2479 	preempt_disable();
2480 	arch_spin_lock(&trace_cmdline_lock);
2481 
2482 	__trace_find_cmdline(pid, comm);
2483 
2484 	arch_spin_unlock(&trace_cmdline_lock);
2485 	preempt_enable();
2486 }
2487 
2488 static int *trace_find_tgid_ptr(int pid)
2489 {
2490 	/*
2491 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492 	 * if we observe a non-NULL tgid_map then we also observe the correct
2493 	 * tgid_map_max.
2494 	 */
2495 	int *map = smp_load_acquire(&tgid_map);
2496 
2497 	if (unlikely(!map || pid > tgid_map_max))
2498 		return NULL;
2499 
2500 	return &map[pid];
2501 }
2502 
2503 int trace_find_tgid(int pid)
2504 {
2505 	int *ptr = trace_find_tgid_ptr(pid);
2506 
2507 	return ptr ? *ptr : 0;
2508 }
2509 
2510 static int trace_save_tgid(struct task_struct *tsk)
2511 {
2512 	int *ptr;
2513 
2514 	/* treat recording of idle task as a success */
2515 	if (!tsk->pid)
2516 		return 1;
2517 
2518 	ptr = trace_find_tgid_ptr(tsk->pid);
2519 	if (!ptr)
2520 		return 0;
2521 
2522 	*ptr = tsk->tgid;
2523 	return 1;
2524 }
2525 
2526 static bool tracing_record_taskinfo_skip(int flags)
2527 {
2528 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2529 		return true;
2530 	if (!__this_cpu_read(trace_taskinfo_save))
2531 		return true;
2532 	return false;
2533 }
2534 
2535 /**
2536  * tracing_record_taskinfo - record the task info of a task
2537  *
2538  * @task:  task to record
2539  * @flags: TRACE_RECORD_CMDLINE for recording comm
2540  *         TRACE_RECORD_TGID for recording tgid
2541  */
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2543 {
2544 	bool done;
2545 
2546 	if (tracing_record_taskinfo_skip(flags))
2547 		return;
2548 
2549 	/*
2550 	 * Record as much task information as possible. If some fail, continue
2551 	 * to try to record the others.
2552 	 */
2553 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2555 
2556 	/* If recording any information failed, retry again soon. */
2557 	if (!done)
2558 		return;
2559 
2560 	__this_cpu_write(trace_taskinfo_save, false);
2561 }
2562 
2563 /**
2564  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2565  *
2566  * @prev: previous task during sched_switch
2567  * @next: next task during sched_switch
2568  * @flags: TRACE_RECORD_CMDLINE for recording comm
2569  *         TRACE_RECORD_TGID for recording tgid
2570  */
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572 					  struct task_struct *next, int flags)
2573 {
2574 	bool done;
2575 
2576 	if (tracing_record_taskinfo_skip(flags))
2577 		return;
2578 
2579 	/*
2580 	 * Record as much task information as possible. If some fail, continue
2581 	 * to try to record the others.
2582 	 */
2583 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2587 
2588 	/* If recording any information failed, retry again soon. */
2589 	if (!done)
2590 		return;
2591 
2592 	__this_cpu_write(trace_taskinfo_save, false);
2593 }
2594 
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2597 {
2598 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2599 }
2600 
2601 void tracing_record_tgid(struct task_struct *task)
2602 {
2603 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2604 }
2605 
2606 /*
2607  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609  * simplifies those functions and keeps them in sync.
2610  */
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2612 {
2613 	return trace_seq_has_overflowed(s) ?
2614 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2615 }
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2617 
2618 static unsigned short migration_disable_value(void)
2619 {
2620 #if defined(CONFIG_SMP)
2621 	return current->migration_disabled;
2622 #else
2623 	return 0;
2624 #endif
2625 }
2626 
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2628 {
2629 	unsigned int trace_flags = irqs_status;
2630 	unsigned int pc;
2631 
2632 	pc = preempt_count();
2633 
2634 	if (pc & NMI_MASK)
2635 		trace_flags |= TRACE_FLAG_NMI;
2636 	if (pc & HARDIRQ_MASK)
2637 		trace_flags |= TRACE_FLAG_HARDIRQ;
2638 	if (in_serving_softirq())
2639 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2640 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641 		trace_flags |= TRACE_FLAG_BH_OFF;
2642 
2643 	if (tif_need_resched())
2644 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645 	if (test_preempt_need_resched())
2646 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2649 }
2650 
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2653 			  int type,
2654 			  unsigned long len,
2655 			  unsigned int trace_ctx)
2656 {
2657 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2658 }
2659 
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2663 
2664 /**
2665  * trace_buffered_event_enable - enable buffering events
2666  *
2667  * When events are being filtered, it is quicker to use a temporary
2668  * buffer to write the event data into if there's a likely chance
2669  * that it will not be committed. The discard of the ring buffer
2670  * is not as fast as committing, and is much slower than copying
2671  * a commit.
2672  *
2673  * When an event is to be filtered, allocate per cpu buffers to
2674  * write the event data into, and if the event is filtered and discarded
2675  * it is simply dropped, otherwise, the entire data is to be committed
2676  * in one shot.
2677  */
2678 void trace_buffered_event_enable(void)
2679 {
2680 	struct ring_buffer_event *event;
2681 	struct page *page;
2682 	int cpu;
2683 
2684 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2685 
2686 	if (trace_buffered_event_ref++)
2687 		return;
2688 
2689 	for_each_tracing_cpu(cpu) {
2690 		page = alloc_pages_node(cpu_to_node(cpu),
2691 					GFP_KERNEL | __GFP_NORETRY, 0);
2692 		if (!page)
2693 			goto failed;
2694 
2695 		event = page_address(page);
2696 		memset(event, 0, sizeof(*event));
2697 
2698 		per_cpu(trace_buffered_event, cpu) = event;
2699 
2700 		preempt_disable();
2701 		if (cpu == smp_processor_id() &&
2702 		    __this_cpu_read(trace_buffered_event) !=
2703 		    per_cpu(trace_buffered_event, cpu))
2704 			WARN_ON_ONCE(1);
2705 		preempt_enable();
2706 	}
2707 
2708 	return;
2709  failed:
2710 	trace_buffered_event_disable();
2711 }
2712 
2713 static void enable_trace_buffered_event(void *data)
2714 {
2715 	/* Probably not needed, but do it anyway */
2716 	smp_rmb();
2717 	this_cpu_dec(trace_buffered_event_cnt);
2718 }
2719 
2720 static void disable_trace_buffered_event(void *data)
2721 {
2722 	this_cpu_inc(trace_buffered_event_cnt);
2723 }
2724 
2725 /**
2726  * trace_buffered_event_disable - disable buffering events
2727  *
2728  * When a filter is removed, it is faster to not use the buffered
2729  * events, and to commit directly into the ring buffer. Free up
2730  * the temp buffers when there are no more users. This requires
2731  * special synchronization with current events.
2732  */
2733 void trace_buffered_event_disable(void)
2734 {
2735 	int cpu;
2736 
2737 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738 
2739 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2740 		return;
2741 
2742 	if (--trace_buffered_event_ref)
2743 		return;
2744 
2745 	preempt_disable();
2746 	/* For each CPU, set the buffer as used. */
2747 	smp_call_function_many(tracing_buffer_mask,
2748 			       disable_trace_buffered_event, NULL, 1);
2749 	preempt_enable();
2750 
2751 	/* Wait for all current users to finish */
2752 	synchronize_rcu();
2753 
2754 	for_each_tracing_cpu(cpu) {
2755 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756 		per_cpu(trace_buffered_event, cpu) = NULL;
2757 	}
2758 	/*
2759 	 * Make sure trace_buffered_event is NULL before clearing
2760 	 * trace_buffered_event_cnt.
2761 	 */
2762 	smp_wmb();
2763 
2764 	preempt_disable();
2765 	/* Do the work on each cpu */
2766 	smp_call_function_many(tracing_buffer_mask,
2767 			       enable_trace_buffered_event, NULL, 1);
2768 	preempt_enable();
2769 }
2770 
2771 static struct trace_buffer *temp_buffer;
2772 
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775 			  struct trace_event_file *trace_file,
2776 			  int type, unsigned long len,
2777 			  unsigned int trace_ctx)
2778 {
2779 	struct ring_buffer_event *entry;
2780 	struct trace_array *tr = trace_file->tr;
2781 	int val;
2782 
2783 	*current_rb = tr->array_buffer.buffer;
2784 
2785 	if (!tr->no_filter_buffering_ref &&
2786 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787 		preempt_disable_notrace();
2788 		/*
2789 		 * Filtering is on, so try to use the per cpu buffer first.
2790 		 * This buffer will simulate a ring_buffer_event,
2791 		 * where the type_len is zero and the array[0] will
2792 		 * hold the full length.
2793 		 * (see include/linux/ring-buffer.h for details on
2794 		 *  how the ring_buffer_event is structured).
2795 		 *
2796 		 * Using a temp buffer during filtering and copying it
2797 		 * on a matched filter is quicker than writing directly
2798 		 * into the ring buffer and then discarding it when
2799 		 * it doesn't match. That is because the discard
2800 		 * requires several atomic operations to get right.
2801 		 * Copying on match and doing nothing on a failed match
2802 		 * is still quicker than no copy on match, but having
2803 		 * to discard out of the ring buffer on a failed match.
2804 		 */
2805 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2806 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2807 
2808 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2809 
2810 			/*
2811 			 * Preemption is disabled, but interrupts and NMIs
2812 			 * can still come in now. If that happens after
2813 			 * the above increment, then it will have to go
2814 			 * back to the old method of allocating the event
2815 			 * on the ring buffer, and if the filter fails, it
2816 			 * will have to call ring_buffer_discard_commit()
2817 			 * to remove it.
2818 			 *
2819 			 * Need to also check the unlikely case that the
2820 			 * length is bigger than the temp buffer size.
2821 			 * If that happens, then the reserve is pretty much
2822 			 * guaranteed to fail, as the ring buffer currently
2823 			 * only allows events less than a page. But that may
2824 			 * change in the future, so let the ring buffer reserve
2825 			 * handle the failure in that case.
2826 			 */
2827 			if (val == 1 && likely(len <= max_len)) {
2828 				trace_event_setup(entry, type, trace_ctx);
2829 				entry->array[0] = len;
2830 				/* Return with preemption disabled */
2831 				return entry;
2832 			}
2833 			this_cpu_dec(trace_buffered_event_cnt);
2834 		}
2835 		/* __trace_buffer_lock_reserve() disables preemption */
2836 		preempt_enable_notrace();
2837 	}
2838 
2839 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2840 					    trace_ctx);
2841 	/*
2842 	 * If tracing is off, but we have triggers enabled
2843 	 * we still need to look at the event data. Use the temp_buffer
2844 	 * to store the trace event for the trigger to use. It's recursive
2845 	 * safe and will not be recorded anywhere.
2846 	 */
2847 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848 		*current_rb = temp_buffer;
2849 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2850 						    trace_ctx);
2851 	}
2852 	return entry;
2853 }
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2855 
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2858 
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2860 {
2861 	struct trace_event_call *event_call;
2862 	struct trace_event_file *file;
2863 	struct trace_event *event;
2864 	unsigned long flags;
2865 	struct trace_iterator *iter = tracepoint_print_iter;
2866 
2867 	/* We should never get here if iter is NULL */
2868 	if (WARN_ON_ONCE(!iter))
2869 		return;
2870 
2871 	event_call = fbuffer->trace_file->event_call;
2872 	if (!event_call || !event_call->event.funcs ||
2873 	    !event_call->event.funcs->trace)
2874 		return;
2875 
2876 	file = fbuffer->trace_file;
2877 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879 	     !filter_match_preds(file->filter, fbuffer->entry)))
2880 		return;
2881 
2882 	event = &fbuffer->trace_file->event_call->event;
2883 
2884 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885 	trace_seq_init(&iter->seq);
2886 	iter->ent = fbuffer->entry;
2887 	event_call->event.funcs->trace(iter, 0, event);
2888 	trace_seq_putc(&iter->seq, 0);
2889 	printk("%s", iter->seq.buffer);
2890 
2891 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2892 }
2893 
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895 			     void *buffer, size_t *lenp,
2896 			     loff_t *ppos)
2897 {
2898 	int save_tracepoint_printk;
2899 	int ret;
2900 
2901 	mutex_lock(&tracepoint_printk_mutex);
2902 	save_tracepoint_printk = tracepoint_printk;
2903 
2904 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2905 
2906 	/*
2907 	 * This will force exiting early, as tracepoint_printk
2908 	 * is always zero when tracepoint_printk_iter is not allocated
2909 	 */
2910 	if (!tracepoint_print_iter)
2911 		tracepoint_printk = 0;
2912 
2913 	if (save_tracepoint_printk == tracepoint_printk)
2914 		goto out;
2915 
2916 	if (tracepoint_printk)
2917 		static_key_enable(&tracepoint_printk_key.key);
2918 	else
2919 		static_key_disable(&tracepoint_printk_key.key);
2920 
2921  out:
2922 	mutex_unlock(&tracepoint_printk_mutex);
2923 
2924 	return ret;
2925 }
2926 
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2928 {
2929 	enum event_trigger_type tt = ETT_NONE;
2930 	struct trace_event_file *file = fbuffer->trace_file;
2931 
2932 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933 			fbuffer->entry, &tt))
2934 		goto discard;
2935 
2936 	if (static_key_false(&tracepoint_printk_key.key))
2937 		output_printk(fbuffer);
2938 
2939 	if (static_branch_unlikely(&trace_event_exports_enabled))
2940 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2941 
2942 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2944 
2945 discard:
2946 	if (tt)
2947 		event_triggers_post_call(file, tt);
2948 
2949 }
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2951 
2952 /*
2953  * Skip 3:
2954  *
2955  *   trace_buffer_unlock_commit_regs()
2956  *   trace_event_buffer_commit()
2957  *   trace_event_raw_event_xxx()
2958  */
2959 # define STACK_SKIP 3
2960 
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962 				     struct trace_buffer *buffer,
2963 				     struct ring_buffer_event *event,
2964 				     unsigned int trace_ctx,
2965 				     struct pt_regs *regs)
2966 {
2967 	__buffer_unlock_commit(buffer, event);
2968 
2969 	/*
2970 	 * If regs is not set, then skip the necessary functions.
2971 	 * Note, we can still get here via blktrace, wakeup tracer
2972 	 * and mmiotrace, but that's ok if they lose a function or
2973 	 * two. They are not that meaningful.
2974 	 */
2975 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2977 }
2978 
2979 /*
2980  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2981  */
2982 void
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984 				   struct ring_buffer_event *event)
2985 {
2986 	__buffer_unlock_commit(buffer, event);
2987 }
2988 
2989 void
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991 	       parent_ip, unsigned int trace_ctx)
2992 {
2993 	struct trace_event_call *call = &event_function;
2994 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2995 	struct ring_buffer_event *event;
2996 	struct ftrace_entry *entry;
2997 
2998 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2999 					    trace_ctx);
3000 	if (!event)
3001 		return;
3002 	entry	= ring_buffer_event_data(event);
3003 	entry->ip			= ip;
3004 	entry->parent_ip		= parent_ip;
3005 
3006 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3007 		if (static_branch_unlikely(&trace_function_exports_enabled))
3008 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009 		__buffer_unlock_commit(buffer, event);
3010 	}
3011 }
3012 
3013 #ifdef CONFIG_STACKTRACE
3014 
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING	4
3017 
3018 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3019 
3020 struct ftrace_stack {
3021 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3022 };
3023 
3024 
3025 struct ftrace_stacks {
3026 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3027 };
3028 
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3031 
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033 				 unsigned int trace_ctx,
3034 				 int skip, struct pt_regs *regs)
3035 {
3036 	struct trace_event_call *call = &event_kernel_stack;
3037 	struct ring_buffer_event *event;
3038 	unsigned int size, nr_entries;
3039 	struct ftrace_stack *fstack;
3040 	struct stack_entry *entry;
3041 	int stackidx;
3042 
3043 	/*
3044 	 * Add one, for this function and the call to save_stack_trace()
3045 	 * If regs is set, then these functions will not be in the way.
3046 	 */
3047 #ifndef CONFIG_UNWINDER_ORC
3048 	if (!regs)
3049 		skip++;
3050 #endif
3051 
3052 	preempt_disable_notrace();
3053 
3054 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3055 
3056 	/* This should never happen. If it does, yell once and skip */
3057 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3058 		goto out;
3059 
3060 	/*
3061 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062 	 * interrupt will either see the value pre increment or post
3063 	 * increment. If the interrupt happens pre increment it will have
3064 	 * restored the counter when it returns.  We just need a barrier to
3065 	 * keep gcc from moving things around.
3066 	 */
3067 	barrier();
3068 
3069 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070 	size = ARRAY_SIZE(fstack->calls);
3071 
3072 	if (regs) {
3073 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3074 						   size, skip);
3075 	} else {
3076 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3077 	}
3078 
3079 	size = nr_entries * sizeof(unsigned long);
3080 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3082 				    trace_ctx);
3083 	if (!event)
3084 		goto out;
3085 	entry = ring_buffer_event_data(event);
3086 
3087 	memcpy(&entry->caller, fstack->calls, size);
3088 	entry->size = nr_entries;
3089 
3090 	if (!call_filter_check_discard(call, entry, buffer, event))
3091 		__buffer_unlock_commit(buffer, event);
3092 
3093  out:
3094 	/* Again, don't let gcc optimize things here */
3095 	barrier();
3096 	__this_cpu_dec(ftrace_stack_reserve);
3097 	preempt_enable_notrace();
3098 
3099 }
3100 
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102 				      struct trace_buffer *buffer,
3103 				      unsigned int trace_ctx,
3104 				      int skip, struct pt_regs *regs)
3105 {
3106 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3107 		return;
3108 
3109 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3110 }
3111 
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3113 		   int skip)
3114 {
3115 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3116 
3117 	if (rcu_is_watching()) {
3118 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3119 		return;
3120 	}
3121 
3122 	/*
3123 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124 	 * but if the above rcu_is_watching() failed, then the NMI
3125 	 * triggered someplace critical, and ct_irq_enter() should
3126 	 * not be called from NMI.
3127 	 */
3128 	if (unlikely(in_nmi()))
3129 		return;
3130 
3131 	ct_irq_enter_irqson();
3132 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133 	ct_irq_exit_irqson();
3134 }
3135 
3136 /**
3137  * trace_dump_stack - record a stack back trace in the trace buffer
3138  * @skip: Number of functions to skip (helper handlers)
3139  */
3140 void trace_dump_stack(int skip)
3141 {
3142 	if (tracing_disabled || tracing_selftest_running)
3143 		return;
3144 
3145 #ifndef CONFIG_UNWINDER_ORC
3146 	/* Skip 1 to skip this function. */
3147 	skip++;
3148 #endif
3149 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3150 			     tracing_gen_ctx(), skip, NULL);
3151 }
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3153 
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3156 
3157 static void
3158 ftrace_trace_userstack(struct trace_array *tr,
3159 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3160 {
3161 	struct trace_event_call *call = &event_user_stack;
3162 	struct ring_buffer_event *event;
3163 	struct userstack_entry *entry;
3164 
3165 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3166 		return;
3167 
3168 	/*
3169 	 * NMIs can not handle page faults, even with fix ups.
3170 	 * The save user stack can (and often does) fault.
3171 	 */
3172 	if (unlikely(in_nmi()))
3173 		return;
3174 
3175 	/*
3176 	 * prevent recursion, since the user stack tracing may
3177 	 * trigger other kernel events.
3178 	 */
3179 	preempt_disable();
3180 	if (__this_cpu_read(user_stack_count))
3181 		goto out;
3182 
3183 	__this_cpu_inc(user_stack_count);
3184 
3185 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186 					    sizeof(*entry), trace_ctx);
3187 	if (!event)
3188 		goto out_drop_count;
3189 	entry	= ring_buffer_event_data(event);
3190 
3191 	entry->tgid		= current->tgid;
3192 	memset(&entry->caller, 0, sizeof(entry->caller));
3193 
3194 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195 	if (!call_filter_check_discard(call, entry, buffer, event))
3196 		__buffer_unlock_commit(buffer, event);
3197 
3198  out_drop_count:
3199 	__this_cpu_dec(user_stack_count);
3200  out:
3201 	preempt_enable();
3202 }
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205 				   struct trace_buffer *buffer,
3206 				   unsigned int trace_ctx)
3207 {
3208 }
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3210 
3211 #endif /* CONFIG_STACKTRACE */
3212 
3213 static inline void
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215 			  unsigned long long delta)
3216 {
3217 	entry->bottom_delta_ts = delta & U32_MAX;
3218 	entry->top_delta_ts = (delta >> 32);
3219 }
3220 
3221 void trace_last_func_repeats(struct trace_array *tr,
3222 			     struct trace_func_repeats *last_info,
3223 			     unsigned int trace_ctx)
3224 {
3225 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3226 	struct func_repeats_entry *entry;
3227 	struct ring_buffer_event *event;
3228 	u64 delta;
3229 
3230 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231 					    sizeof(*entry), trace_ctx);
3232 	if (!event)
3233 		return;
3234 
3235 	delta = ring_buffer_event_time_stamp(buffer, event) -
3236 		last_info->ts_last_call;
3237 
3238 	entry = ring_buffer_event_data(event);
3239 	entry->ip = last_info->ip;
3240 	entry->parent_ip = last_info->parent_ip;
3241 	entry->count = last_info->count;
3242 	func_repeats_set_delta_ts(entry, delta);
3243 
3244 	__buffer_unlock_commit(buffer, event);
3245 }
3246 
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3249 	int nesting;
3250 	char buffer[4][TRACE_BUF_SIZE];
3251 };
3252 
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3254 
3255 /*
3256  * This allows for lockless recording.  If we're nested too deeply, then
3257  * this returns NULL.
3258  */
3259 static char *get_trace_buf(void)
3260 {
3261 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3262 
3263 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3264 		return NULL;
3265 
3266 	buffer->nesting++;
3267 
3268 	/* Interrupts must see nesting incremented before we use the buffer */
3269 	barrier();
3270 	return &buffer->buffer[buffer->nesting - 1][0];
3271 }
3272 
3273 static void put_trace_buf(void)
3274 {
3275 	/* Don't let the decrement of nesting leak before this */
3276 	barrier();
3277 	this_cpu_dec(trace_percpu_buffer->nesting);
3278 }
3279 
3280 static int alloc_percpu_trace_buffer(void)
3281 {
3282 	struct trace_buffer_struct __percpu *buffers;
3283 
3284 	if (trace_percpu_buffer)
3285 		return 0;
3286 
3287 	buffers = alloc_percpu(struct trace_buffer_struct);
3288 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3289 		return -ENOMEM;
3290 
3291 	trace_percpu_buffer = buffers;
3292 	return 0;
3293 }
3294 
3295 static int buffers_allocated;
3296 
3297 void trace_printk_init_buffers(void)
3298 {
3299 	if (buffers_allocated)
3300 		return;
3301 
3302 	if (alloc_percpu_trace_buffer())
3303 		return;
3304 
3305 	/* trace_printk() is for debug use only. Don't use it in production. */
3306 
3307 	pr_warn("\n");
3308 	pr_warn("**********************************************************\n");
3309 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3310 	pr_warn("**                                                      **\n");
3311 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3312 	pr_warn("**                                                      **\n");
3313 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3314 	pr_warn("** unsafe for production use.                           **\n");
3315 	pr_warn("**                                                      **\n");
3316 	pr_warn("** If you see this message and you are not debugging    **\n");
3317 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3318 	pr_warn("**                                                      **\n");
3319 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3320 	pr_warn("**********************************************************\n");
3321 
3322 	/* Expand the buffers to set size */
3323 	tracing_update_buffers();
3324 
3325 	buffers_allocated = 1;
3326 
3327 	/*
3328 	 * trace_printk_init_buffers() can be called by modules.
3329 	 * If that happens, then we need to start cmdline recording
3330 	 * directly here. If the global_trace.buffer is already
3331 	 * allocated here, then this was called by module code.
3332 	 */
3333 	if (global_trace.array_buffer.buffer)
3334 		tracing_start_cmdline_record();
3335 }
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3337 
3338 void trace_printk_start_comm(void)
3339 {
3340 	/* Start tracing comms if trace printk is set */
3341 	if (!buffers_allocated)
3342 		return;
3343 	tracing_start_cmdline_record();
3344 }
3345 
3346 static void trace_printk_start_stop_comm(int enabled)
3347 {
3348 	if (!buffers_allocated)
3349 		return;
3350 
3351 	if (enabled)
3352 		tracing_start_cmdline_record();
3353 	else
3354 		tracing_stop_cmdline_record();
3355 }
3356 
3357 /**
3358  * trace_vbprintk - write binary msg to tracing buffer
3359  * @ip:    The address of the caller
3360  * @fmt:   The string format to write to the buffer
3361  * @args:  Arguments for @fmt
3362  */
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3364 {
3365 	struct trace_event_call *call = &event_bprint;
3366 	struct ring_buffer_event *event;
3367 	struct trace_buffer *buffer;
3368 	struct trace_array *tr = &global_trace;
3369 	struct bprint_entry *entry;
3370 	unsigned int trace_ctx;
3371 	char *tbuffer;
3372 	int len = 0, size;
3373 
3374 	if (unlikely(tracing_selftest_running || tracing_disabled))
3375 		return 0;
3376 
3377 	/* Don't pollute graph traces with trace_vprintk internals */
3378 	pause_graph_tracing();
3379 
3380 	trace_ctx = tracing_gen_ctx();
3381 	preempt_disable_notrace();
3382 
3383 	tbuffer = get_trace_buf();
3384 	if (!tbuffer) {
3385 		len = 0;
3386 		goto out_nobuffer;
3387 	}
3388 
3389 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3390 
3391 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3392 		goto out_put;
3393 
3394 	size = sizeof(*entry) + sizeof(u32) * len;
3395 	buffer = tr->array_buffer.buffer;
3396 	ring_buffer_nest_start(buffer);
3397 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3398 					    trace_ctx);
3399 	if (!event)
3400 		goto out;
3401 	entry = ring_buffer_event_data(event);
3402 	entry->ip			= ip;
3403 	entry->fmt			= fmt;
3404 
3405 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3407 		__buffer_unlock_commit(buffer, event);
3408 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3409 	}
3410 
3411 out:
3412 	ring_buffer_nest_end(buffer);
3413 out_put:
3414 	put_trace_buf();
3415 
3416 out_nobuffer:
3417 	preempt_enable_notrace();
3418 	unpause_graph_tracing();
3419 
3420 	return len;
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3423 
3424 __printf(3, 0)
3425 static int
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427 		      unsigned long ip, const char *fmt, va_list args)
3428 {
3429 	struct trace_event_call *call = &event_print;
3430 	struct ring_buffer_event *event;
3431 	int len = 0, size;
3432 	struct print_entry *entry;
3433 	unsigned int trace_ctx;
3434 	char *tbuffer;
3435 
3436 	if (tracing_disabled || tracing_selftest_running)
3437 		return 0;
3438 
3439 	/* Don't pollute graph traces with trace_vprintk internals */
3440 	pause_graph_tracing();
3441 
3442 	trace_ctx = tracing_gen_ctx();
3443 	preempt_disable_notrace();
3444 
3445 
3446 	tbuffer = get_trace_buf();
3447 	if (!tbuffer) {
3448 		len = 0;
3449 		goto out_nobuffer;
3450 	}
3451 
3452 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3453 
3454 	size = sizeof(*entry) + len + 1;
3455 	ring_buffer_nest_start(buffer);
3456 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3457 					    trace_ctx);
3458 	if (!event)
3459 		goto out;
3460 	entry = ring_buffer_event_data(event);
3461 	entry->ip = ip;
3462 
3463 	memcpy(&entry->buf, tbuffer, len + 1);
3464 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3465 		__buffer_unlock_commit(buffer, event);
3466 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3467 	}
3468 
3469 out:
3470 	ring_buffer_nest_end(buffer);
3471 	put_trace_buf();
3472 
3473 out_nobuffer:
3474 	preempt_enable_notrace();
3475 	unpause_graph_tracing();
3476 
3477 	return len;
3478 }
3479 
3480 __printf(3, 0)
3481 int trace_array_vprintk(struct trace_array *tr,
3482 			unsigned long ip, const char *fmt, va_list args)
3483 {
3484 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3485 }
3486 
3487 /**
3488  * trace_array_printk - Print a message to a specific instance
3489  * @tr: The instance trace_array descriptor
3490  * @ip: The instruction pointer that this is called from.
3491  * @fmt: The format to print (printf format)
3492  *
3493  * If a subsystem sets up its own instance, they have the right to
3494  * printk strings into their tracing instance buffer using this
3495  * function. Note, this function will not write into the top level
3496  * buffer (use trace_printk() for that), as writing into the top level
3497  * buffer should only have events that can be individually disabled.
3498  * trace_printk() is only used for debugging a kernel, and should not
3499  * be ever incorporated in normal use.
3500  *
3501  * trace_array_printk() can be used, as it will not add noise to the
3502  * top level tracing buffer.
3503  *
3504  * Note, trace_array_init_printk() must be called on @tr before this
3505  * can be used.
3506  */
3507 __printf(3, 0)
3508 int trace_array_printk(struct trace_array *tr,
3509 		       unsigned long ip, const char *fmt, ...)
3510 {
3511 	int ret;
3512 	va_list ap;
3513 
3514 	if (!tr)
3515 		return -ENOENT;
3516 
3517 	/* This is only allowed for created instances */
3518 	if (tr == &global_trace)
3519 		return 0;
3520 
3521 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3522 		return 0;
3523 
3524 	va_start(ap, fmt);
3525 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3526 	va_end(ap);
3527 	return ret;
3528 }
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3530 
3531 /**
3532  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533  * @tr: The trace array to initialize the buffers for
3534  *
3535  * As trace_array_printk() only writes into instances, they are OK to
3536  * have in the kernel (unlike trace_printk()). This needs to be called
3537  * before trace_array_printk() can be used on a trace_array.
3538  */
3539 int trace_array_init_printk(struct trace_array *tr)
3540 {
3541 	if (!tr)
3542 		return -ENOENT;
3543 
3544 	/* This is only allowed for created instances */
3545 	if (tr == &global_trace)
3546 		return -EINVAL;
3547 
3548 	return alloc_percpu_trace_buffer();
3549 }
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3551 
3552 __printf(3, 4)
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554 			   unsigned long ip, const char *fmt, ...)
3555 {
3556 	int ret;
3557 	va_list ap;
3558 
3559 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3560 		return 0;
3561 
3562 	va_start(ap, fmt);
3563 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3564 	va_end(ap);
3565 	return ret;
3566 }
3567 
3568 __printf(2, 0)
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3570 {
3571 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3572 }
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3574 
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3576 {
3577 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3578 
3579 	iter->idx++;
3580 	if (buf_iter)
3581 		ring_buffer_iter_advance(buf_iter);
3582 }
3583 
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586 		unsigned long *lost_events)
3587 {
3588 	struct ring_buffer_event *event;
3589 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3590 
3591 	if (buf_iter) {
3592 		event = ring_buffer_iter_peek(buf_iter, ts);
3593 		if (lost_events)
3594 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595 				(unsigned long)-1 : 0;
3596 	} else {
3597 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3598 					 lost_events);
3599 	}
3600 
3601 	if (event) {
3602 		iter->ent_size = ring_buffer_event_length(event);
3603 		return ring_buffer_event_data(event);
3604 	}
3605 	iter->ent_size = 0;
3606 	return NULL;
3607 }
3608 
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611 		  unsigned long *missing_events, u64 *ent_ts)
3612 {
3613 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3614 	struct trace_entry *ent, *next = NULL;
3615 	unsigned long lost_events = 0, next_lost = 0;
3616 	int cpu_file = iter->cpu_file;
3617 	u64 next_ts = 0, ts;
3618 	int next_cpu = -1;
3619 	int next_size = 0;
3620 	int cpu;
3621 
3622 	/*
3623 	 * If we are in a per_cpu trace file, don't bother by iterating over
3624 	 * all cpu and peek directly.
3625 	 */
3626 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3628 			return NULL;
3629 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3630 		if (ent_cpu)
3631 			*ent_cpu = cpu_file;
3632 
3633 		return ent;
3634 	}
3635 
3636 	for_each_tracing_cpu(cpu) {
3637 
3638 		if (ring_buffer_empty_cpu(buffer, cpu))
3639 			continue;
3640 
3641 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3642 
3643 		/*
3644 		 * Pick the entry with the smallest timestamp:
3645 		 */
3646 		if (ent && (!next || ts < next_ts)) {
3647 			next = ent;
3648 			next_cpu = cpu;
3649 			next_ts = ts;
3650 			next_lost = lost_events;
3651 			next_size = iter->ent_size;
3652 		}
3653 	}
3654 
3655 	iter->ent_size = next_size;
3656 
3657 	if (ent_cpu)
3658 		*ent_cpu = next_cpu;
3659 
3660 	if (ent_ts)
3661 		*ent_ts = next_ts;
3662 
3663 	if (missing_events)
3664 		*missing_events = next_lost;
3665 
3666 	return next;
3667 }
3668 
3669 #define STATIC_FMT_BUF_SIZE	128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3671 
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3673 {
3674 	char *tmp;
3675 
3676 	/*
3677 	 * iter->tr is NULL when used with tp_printk, which makes
3678 	 * this get called where it is not safe to call krealloc().
3679 	 */
3680 	if (!iter->tr || iter->fmt == static_fmt_buf)
3681 		return NULL;
3682 
3683 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3684 		       GFP_KERNEL);
3685 	if (tmp) {
3686 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3687 		iter->fmt = tmp;
3688 	}
3689 
3690 	return tmp;
3691 }
3692 
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3695 			   bool star, int len)
3696 {
3697 	unsigned long addr = (unsigned long)str;
3698 	struct trace_event *trace_event;
3699 	struct trace_event_call *event;
3700 
3701 	/* Ignore strings with no length */
3702 	if (star && !len)
3703 		return true;
3704 
3705 	/* OK if part of the event data */
3706 	if ((addr >= (unsigned long)iter->ent) &&
3707 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3708 		return true;
3709 
3710 	/* OK if part of the temp seq buffer */
3711 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3713 		return true;
3714 
3715 	/* Core rodata can not be freed */
3716 	if (is_kernel_rodata(addr))
3717 		return true;
3718 
3719 	if (trace_is_tracepoint_string(str))
3720 		return true;
3721 
3722 	/*
3723 	 * Now this could be a module event, referencing core module
3724 	 * data, which is OK.
3725 	 */
3726 	if (!iter->ent)
3727 		return false;
3728 
3729 	trace_event = ftrace_find_event(iter->ent->type);
3730 	if (!trace_event)
3731 		return false;
3732 
3733 	event = container_of(trace_event, struct trace_event_call, event);
3734 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3735 		return false;
3736 
3737 	/* Would rather have rodata, but this will suffice */
3738 	if (within_module_core(addr, event->module))
3739 		return true;
3740 
3741 	return false;
3742 }
3743 
3744 static const char *show_buffer(struct trace_seq *s)
3745 {
3746 	struct seq_buf *seq = &s->seq;
3747 
3748 	seq_buf_terminate(seq);
3749 
3750 	return seq->buffer;
3751 }
3752 
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3754 
3755 static int test_can_verify_check(const char *fmt, ...)
3756 {
3757 	char buf[16];
3758 	va_list ap;
3759 	int ret;
3760 
3761 	/*
3762 	 * The verifier is dependent on vsnprintf() modifies the va_list
3763 	 * passed to it, where it is sent as a reference. Some architectures
3764 	 * (like x86_32) passes it by value, which means that vsnprintf()
3765 	 * does not modify the va_list passed to it, and the verifier
3766 	 * would then need to be able to understand all the values that
3767 	 * vsnprintf can use. If it is passed by value, then the verifier
3768 	 * is disabled.
3769 	 */
3770 	va_start(ap, fmt);
3771 	vsnprintf(buf, 16, "%d", ap);
3772 	ret = va_arg(ap, int);
3773 	va_end(ap);
3774 
3775 	return ret;
3776 }
3777 
3778 static void test_can_verify(void)
3779 {
3780 	if (!test_can_verify_check("%d %d", 0, 1)) {
3781 		pr_info("trace event string verifier disabled\n");
3782 		static_branch_inc(&trace_no_verify);
3783 	}
3784 }
3785 
3786 /**
3787  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788  * @iter: The iterator that holds the seq buffer and the event being printed
3789  * @fmt: The format used to print the event
3790  * @ap: The va_list holding the data to print from @fmt.
3791  *
3792  * This writes the data into the @iter->seq buffer using the data from
3793  * @fmt and @ap. If the format has a %s, then the source of the string
3794  * is examined to make sure it is safe to print, otherwise it will
3795  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3796  * pointer.
3797  */
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3799 			 va_list ap)
3800 {
3801 	const char *p = fmt;
3802 	const char *str;
3803 	int i, j;
3804 
3805 	if (WARN_ON_ONCE(!fmt))
3806 		return;
3807 
3808 	if (static_branch_unlikely(&trace_no_verify))
3809 		goto print;
3810 
3811 	/* Don't bother checking when doing a ftrace_dump() */
3812 	if (iter->fmt == static_fmt_buf)
3813 		goto print;
3814 
3815 	while (*p) {
3816 		bool star = false;
3817 		int len = 0;
3818 
3819 		j = 0;
3820 
3821 		/* We only care about %s and variants */
3822 		for (i = 0; p[i]; i++) {
3823 			if (i + 1 >= iter->fmt_size) {
3824 				/*
3825 				 * If we can't expand the copy buffer,
3826 				 * just print it.
3827 				 */
3828 				if (!trace_iter_expand_format(iter))
3829 					goto print;
3830 			}
3831 
3832 			if (p[i] == '\\' && p[i+1]) {
3833 				i++;
3834 				continue;
3835 			}
3836 			if (p[i] == '%') {
3837 				/* Need to test cases like %08.*s */
3838 				for (j = 1; p[i+j]; j++) {
3839 					if (isdigit(p[i+j]) ||
3840 					    p[i+j] == '.')
3841 						continue;
3842 					if (p[i+j] == '*') {
3843 						star = true;
3844 						continue;
3845 					}
3846 					break;
3847 				}
3848 				if (p[i+j] == 's')
3849 					break;
3850 				star = false;
3851 			}
3852 			j = 0;
3853 		}
3854 		/* If no %s found then just print normally */
3855 		if (!p[i])
3856 			break;
3857 
3858 		/* Copy up to the %s, and print that */
3859 		strncpy(iter->fmt, p, i);
3860 		iter->fmt[i] = '\0';
3861 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3862 
3863 		/*
3864 		 * If iter->seq is full, the above call no longer guarantees
3865 		 * that ap is in sync with fmt processing, and further calls
3866 		 * to va_arg() can return wrong positional arguments.
3867 		 *
3868 		 * Ensure that ap is no longer used in this case.
3869 		 */
3870 		if (iter->seq.full) {
3871 			p = "";
3872 			break;
3873 		}
3874 
3875 		if (star)
3876 			len = va_arg(ap, int);
3877 
3878 		/* The ap now points to the string data of the %s */
3879 		str = va_arg(ap, const char *);
3880 
3881 		/*
3882 		 * If you hit this warning, it is likely that the
3883 		 * trace event in question used %s on a string that
3884 		 * was saved at the time of the event, but may not be
3885 		 * around when the trace is read. Use __string(),
3886 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887 		 * instead. See samples/trace_events/trace-events-sample.h
3888 		 * for reference.
3889 		 */
3890 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891 			      "fmt: '%s' current_buffer: '%s'",
3892 			      fmt, show_buffer(&iter->seq))) {
3893 			int ret;
3894 
3895 			/* Try to safely read the string */
3896 			if (star) {
3897 				if (len + 1 > iter->fmt_size)
3898 					len = iter->fmt_size - 1;
3899 				if (len < 0)
3900 					len = 0;
3901 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3902 				iter->fmt[len] = 0;
3903 				star = false;
3904 			} else {
3905 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3906 								  iter->fmt_size);
3907 			}
3908 			if (ret < 0)
3909 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3910 			else
3911 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3912 						 str, iter->fmt);
3913 			str = "[UNSAFE-MEMORY]";
3914 			strcpy(iter->fmt, "%s");
3915 		} else {
3916 			strncpy(iter->fmt, p + i, j + 1);
3917 			iter->fmt[j+1] = '\0';
3918 		}
3919 		if (star)
3920 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3921 		else
3922 			trace_seq_printf(&iter->seq, iter->fmt, str);
3923 
3924 		p += i + j + 1;
3925 	}
3926  print:
3927 	if (*p)
3928 		trace_seq_vprintf(&iter->seq, p, ap);
3929 }
3930 
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3932 {
3933 	const char *p, *new_fmt;
3934 	char *q;
3935 
3936 	if (WARN_ON_ONCE(!fmt))
3937 		return fmt;
3938 
3939 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3940 		return fmt;
3941 
3942 	p = fmt;
3943 	new_fmt = q = iter->fmt;
3944 	while (*p) {
3945 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946 			if (!trace_iter_expand_format(iter))
3947 				return fmt;
3948 
3949 			q += iter->fmt - new_fmt;
3950 			new_fmt = iter->fmt;
3951 		}
3952 
3953 		*q++ = *p++;
3954 
3955 		/* Replace %p with %px */
3956 		if (p[-1] == '%') {
3957 			if (p[0] == '%') {
3958 				*q++ = *p++;
3959 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3960 				*q++ = *p++;
3961 				*q++ = 'x';
3962 			}
3963 		}
3964 	}
3965 	*q = '\0';
3966 
3967 	return new_fmt;
3968 }
3969 
3970 #define STATIC_TEMP_BUF_SIZE	128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3972 
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975 					  int *ent_cpu, u64 *ent_ts)
3976 {
3977 	/* __find_next_entry will reset ent_size */
3978 	int ent_size = iter->ent_size;
3979 	struct trace_entry *entry;
3980 
3981 	/*
3982 	 * If called from ftrace_dump(), then the iter->temp buffer
3983 	 * will be the static_temp_buf and not created from kmalloc.
3984 	 * If the entry size is greater than the buffer, we can
3985 	 * not save it. Just return NULL in that case. This is only
3986 	 * used to add markers when two consecutive events' time
3987 	 * stamps have a large delta. See trace_print_lat_context()
3988 	 */
3989 	if (iter->temp == static_temp_buf &&
3990 	    STATIC_TEMP_BUF_SIZE < ent_size)
3991 		return NULL;
3992 
3993 	/*
3994 	 * The __find_next_entry() may call peek_next_entry(), which may
3995 	 * call ring_buffer_peek() that may make the contents of iter->ent
3996 	 * undefined. Need to copy iter->ent now.
3997 	 */
3998 	if (iter->ent && iter->ent != iter->temp) {
3999 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4001 			void *temp;
4002 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4003 			if (!temp)
4004 				return NULL;
4005 			kfree(iter->temp);
4006 			iter->temp = temp;
4007 			iter->temp_size = iter->ent_size;
4008 		}
4009 		memcpy(iter->temp, iter->ent, iter->ent_size);
4010 		iter->ent = iter->temp;
4011 	}
4012 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013 	/* Put back the original ent_size */
4014 	iter->ent_size = ent_size;
4015 
4016 	return entry;
4017 }
4018 
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4021 {
4022 	iter->ent = __find_next_entry(iter, &iter->cpu,
4023 				      &iter->lost_events, &iter->ts);
4024 
4025 	if (iter->ent)
4026 		trace_iterator_increment(iter);
4027 
4028 	return iter->ent ? iter : NULL;
4029 }
4030 
4031 static void trace_consume(struct trace_iterator *iter)
4032 {
4033 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034 			    &iter->lost_events);
4035 }
4036 
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4038 {
4039 	struct trace_iterator *iter = m->private;
4040 	int i = (int)*pos;
4041 	void *ent;
4042 
4043 	WARN_ON_ONCE(iter->leftover);
4044 
4045 	(*pos)++;
4046 
4047 	/* can't go backwards */
4048 	if (iter->idx > i)
4049 		return NULL;
4050 
4051 	if (iter->idx < 0)
4052 		ent = trace_find_next_entry_inc(iter);
4053 	else
4054 		ent = iter;
4055 
4056 	while (ent && iter->idx < i)
4057 		ent = trace_find_next_entry_inc(iter);
4058 
4059 	iter->pos = *pos;
4060 
4061 	return ent;
4062 }
4063 
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4065 {
4066 	struct ring_buffer_iter *buf_iter;
4067 	unsigned long entries = 0;
4068 	u64 ts;
4069 
4070 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4071 
4072 	buf_iter = trace_buffer_iter(iter, cpu);
4073 	if (!buf_iter)
4074 		return;
4075 
4076 	ring_buffer_iter_reset(buf_iter);
4077 
4078 	/*
4079 	 * We could have the case with the max latency tracers
4080 	 * that a reset never took place on a cpu. This is evident
4081 	 * by the timestamp being before the start of the buffer.
4082 	 */
4083 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084 		if (ts >= iter->array_buffer->time_start)
4085 			break;
4086 		entries++;
4087 		ring_buffer_iter_advance(buf_iter);
4088 	}
4089 
4090 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4091 }
4092 
4093 /*
4094  * The current tracer is copied to avoid a global locking
4095  * all around.
4096  */
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4098 {
4099 	struct trace_iterator *iter = m->private;
4100 	struct trace_array *tr = iter->tr;
4101 	int cpu_file = iter->cpu_file;
4102 	void *p = NULL;
4103 	loff_t l = 0;
4104 	int cpu;
4105 
4106 	/*
4107 	 * copy the tracer to avoid using a global lock all around.
4108 	 * iter->trace is a copy of current_trace, the pointer to the
4109 	 * name may be used instead of a strcmp(), as iter->trace->name
4110 	 * will point to the same string as current_trace->name.
4111 	 */
4112 	mutex_lock(&trace_types_lock);
4113 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114 		*iter->trace = *tr->current_trace;
4115 	mutex_unlock(&trace_types_lock);
4116 
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118 	if (iter->snapshot && iter->trace->use_max_tr)
4119 		return ERR_PTR(-EBUSY);
4120 #endif
4121 
4122 	if (*pos != iter->pos) {
4123 		iter->ent = NULL;
4124 		iter->cpu = 0;
4125 		iter->idx = -1;
4126 
4127 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128 			for_each_tracing_cpu(cpu)
4129 				tracing_iter_reset(iter, cpu);
4130 		} else
4131 			tracing_iter_reset(iter, cpu_file);
4132 
4133 		iter->leftover = 0;
4134 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4135 			;
4136 
4137 	} else {
4138 		/*
4139 		 * If we overflowed the seq_file before, then we want
4140 		 * to just reuse the trace_seq buffer again.
4141 		 */
4142 		if (iter->leftover)
4143 			p = iter;
4144 		else {
4145 			l = *pos - 1;
4146 			p = s_next(m, p, &l);
4147 		}
4148 	}
4149 
4150 	trace_event_read_lock();
4151 	trace_access_lock(cpu_file);
4152 	return p;
4153 }
4154 
4155 static void s_stop(struct seq_file *m, void *p)
4156 {
4157 	struct trace_iterator *iter = m->private;
4158 
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160 	if (iter->snapshot && iter->trace->use_max_tr)
4161 		return;
4162 #endif
4163 
4164 	trace_access_unlock(iter->cpu_file);
4165 	trace_event_read_unlock();
4166 }
4167 
4168 static void
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170 		      unsigned long *entries, int cpu)
4171 {
4172 	unsigned long count;
4173 
4174 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4175 	/*
4176 	 * If this buffer has skipped entries, then we hold all
4177 	 * entries for the trace and we need to ignore the
4178 	 * ones before the time stamp.
4179 	 */
4180 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182 		/* total is the same as the entries */
4183 		*total = count;
4184 	} else
4185 		*total = count +
4186 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4187 	*entries = count;
4188 }
4189 
4190 static void
4191 get_total_entries(struct array_buffer *buf,
4192 		  unsigned long *total, unsigned long *entries)
4193 {
4194 	unsigned long t, e;
4195 	int cpu;
4196 
4197 	*total = 0;
4198 	*entries = 0;
4199 
4200 	for_each_tracing_cpu(cpu) {
4201 		get_total_entries_cpu(buf, &t, &e, cpu);
4202 		*total += t;
4203 		*entries += e;
4204 	}
4205 }
4206 
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4208 {
4209 	unsigned long total, entries;
4210 
4211 	if (!tr)
4212 		tr = &global_trace;
4213 
4214 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4215 
4216 	return entries;
4217 }
4218 
4219 unsigned long trace_total_entries(struct trace_array *tr)
4220 {
4221 	unsigned long total, entries;
4222 
4223 	if (!tr)
4224 		tr = &global_trace;
4225 
4226 	get_total_entries(&tr->array_buffer, &total, &entries);
4227 
4228 	return entries;
4229 }
4230 
4231 static void print_lat_help_header(struct seq_file *m)
4232 {
4233 	seq_puts(m, "#                    _------=> CPU#            \n"
4234 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4235 		    "#                  | / _----=> need-resched    \n"
4236 		    "#                  || / _---=> hardirq/softirq \n"
4237 		    "#                  ||| / _--=> preempt-depth   \n"
4238 		    "#                  |||| / _-=> migrate-disable \n"
4239 		    "#                  ||||| /     delay           \n"
4240 		    "#  cmd     pid     |||||| time  |   caller     \n"
4241 		    "#     \\   /        ||||||  \\    |    /       \n");
4242 }
4243 
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4245 {
4246 	unsigned long total;
4247 	unsigned long entries;
4248 
4249 	get_total_entries(buf, &total, &entries);
4250 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4251 		   entries, total, num_online_cpus());
4252 	seq_puts(m, "#\n");
4253 }
4254 
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4256 				   unsigned int flags)
4257 {
4258 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4259 
4260 	print_event_info(buf, m);
4261 
4262 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4263 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4264 }
4265 
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4267 				       unsigned int flags)
4268 {
4269 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270 	static const char space[] = "            ";
4271 	int prec = tgid ? 12 : 2;
4272 
4273 	print_event_info(buf, m);
4274 
4275 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4276 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4277 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4278 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4279 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4280 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4281 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4282 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4283 }
4284 
4285 void
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4287 {
4288 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289 	struct array_buffer *buf = iter->array_buffer;
4290 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291 	struct tracer *type = iter->trace;
4292 	unsigned long entries;
4293 	unsigned long total;
4294 	const char *name = type->name;
4295 
4296 	get_total_entries(buf, &total, &entries);
4297 
4298 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4299 		   name, UTS_RELEASE);
4300 	seq_puts(m, "# -----------------------------------"
4301 		 "---------------------------------\n");
4302 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304 		   nsecs_to_usecs(data->saved_latency),
4305 		   entries,
4306 		   total,
4307 		   buf->cpu,
4308 		   preempt_model_none()      ? "server" :
4309 		   preempt_model_voluntary() ? "desktop" :
4310 		   preempt_model_full()      ? "preempt" :
4311 		   preempt_model_rt()        ? "preempt_rt" :
4312 		   "unknown",
4313 		   /* These are reserved for later use */
4314 		   0, 0, 0, 0);
4315 #ifdef CONFIG_SMP
4316 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4317 #else
4318 	seq_puts(m, ")\n");
4319 #endif
4320 	seq_puts(m, "#    -----------------\n");
4321 	seq_printf(m, "#    | task: %.16s-%d "
4322 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323 		   data->comm, data->pid,
4324 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325 		   data->policy, data->rt_priority);
4326 	seq_puts(m, "#    -----------------\n");
4327 
4328 	if (data->critical_start) {
4329 		seq_puts(m, "#  => started at: ");
4330 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331 		trace_print_seq(m, &iter->seq);
4332 		seq_puts(m, "\n#  => ended at:   ");
4333 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334 		trace_print_seq(m, &iter->seq);
4335 		seq_puts(m, "\n#\n");
4336 	}
4337 
4338 	seq_puts(m, "#\n");
4339 }
4340 
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4342 {
4343 	struct trace_seq *s = &iter->seq;
4344 	struct trace_array *tr = iter->tr;
4345 
4346 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4347 		return;
4348 
4349 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4350 		return;
4351 
4352 	if (cpumask_available(iter->started) &&
4353 	    cpumask_test_cpu(iter->cpu, iter->started))
4354 		return;
4355 
4356 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4357 		return;
4358 
4359 	if (cpumask_available(iter->started))
4360 		cpumask_set_cpu(iter->cpu, iter->started);
4361 
4362 	/* Don't print started cpu buffer for the first entry of the trace */
4363 	if (iter->idx > 1)
4364 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4365 				iter->cpu);
4366 }
4367 
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4369 {
4370 	struct trace_array *tr = iter->tr;
4371 	struct trace_seq *s = &iter->seq;
4372 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373 	struct trace_entry *entry;
4374 	struct trace_event *event;
4375 
4376 	entry = iter->ent;
4377 
4378 	test_cpu_buff_start(iter);
4379 
4380 	event = ftrace_find_event(entry->type);
4381 
4382 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384 			trace_print_lat_context(iter);
4385 		else
4386 			trace_print_context(iter);
4387 	}
4388 
4389 	if (trace_seq_has_overflowed(s))
4390 		return TRACE_TYPE_PARTIAL_LINE;
4391 
4392 	if (event)
4393 		return event->funcs->trace(iter, sym_flags, event);
4394 
4395 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4396 
4397 	return trace_handle_return(s);
4398 }
4399 
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4401 {
4402 	struct trace_array *tr = iter->tr;
4403 	struct trace_seq *s = &iter->seq;
4404 	struct trace_entry *entry;
4405 	struct trace_event *event;
4406 
4407 	entry = iter->ent;
4408 
4409 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410 		trace_seq_printf(s, "%d %d %llu ",
4411 				 entry->pid, iter->cpu, iter->ts);
4412 
4413 	if (trace_seq_has_overflowed(s))
4414 		return TRACE_TYPE_PARTIAL_LINE;
4415 
4416 	event = ftrace_find_event(entry->type);
4417 	if (event)
4418 		return event->funcs->raw(iter, 0, event);
4419 
4420 	trace_seq_printf(s, "%d ?\n", entry->type);
4421 
4422 	return trace_handle_return(s);
4423 }
4424 
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4426 {
4427 	struct trace_array *tr = iter->tr;
4428 	struct trace_seq *s = &iter->seq;
4429 	unsigned char newline = '\n';
4430 	struct trace_entry *entry;
4431 	struct trace_event *event;
4432 
4433 	entry = iter->ent;
4434 
4435 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4437 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4439 		if (trace_seq_has_overflowed(s))
4440 			return TRACE_TYPE_PARTIAL_LINE;
4441 	}
4442 
4443 	event = ftrace_find_event(entry->type);
4444 	if (event) {
4445 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446 		if (ret != TRACE_TYPE_HANDLED)
4447 			return ret;
4448 	}
4449 
4450 	SEQ_PUT_FIELD(s, newline);
4451 
4452 	return trace_handle_return(s);
4453 }
4454 
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4456 {
4457 	struct trace_array *tr = iter->tr;
4458 	struct trace_seq *s = &iter->seq;
4459 	struct trace_entry *entry;
4460 	struct trace_event *event;
4461 
4462 	entry = iter->ent;
4463 
4464 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465 		SEQ_PUT_FIELD(s, entry->pid);
4466 		SEQ_PUT_FIELD(s, iter->cpu);
4467 		SEQ_PUT_FIELD(s, iter->ts);
4468 		if (trace_seq_has_overflowed(s))
4469 			return TRACE_TYPE_PARTIAL_LINE;
4470 	}
4471 
4472 	event = ftrace_find_event(entry->type);
4473 	return event ? event->funcs->binary(iter, 0, event) :
4474 		TRACE_TYPE_HANDLED;
4475 }
4476 
4477 int trace_empty(struct trace_iterator *iter)
4478 {
4479 	struct ring_buffer_iter *buf_iter;
4480 	int cpu;
4481 
4482 	/* If we are looking at one CPU buffer, only check that one */
4483 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484 		cpu = iter->cpu_file;
4485 		buf_iter = trace_buffer_iter(iter, cpu);
4486 		if (buf_iter) {
4487 			if (!ring_buffer_iter_empty(buf_iter))
4488 				return 0;
4489 		} else {
4490 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491 				return 0;
4492 		}
4493 		return 1;
4494 	}
4495 
4496 	for_each_tracing_cpu(cpu) {
4497 		buf_iter = trace_buffer_iter(iter, cpu);
4498 		if (buf_iter) {
4499 			if (!ring_buffer_iter_empty(buf_iter))
4500 				return 0;
4501 		} else {
4502 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4503 				return 0;
4504 		}
4505 	}
4506 
4507 	return 1;
4508 }
4509 
4510 /*  Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4512 {
4513 	struct trace_array *tr = iter->tr;
4514 	unsigned long trace_flags = tr->trace_flags;
4515 	enum print_line_t ret;
4516 
4517 	if (iter->lost_events) {
4518 		if (iter->lost_events == (unsigned long)-1)
4519 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4520 					 iter->cpu);
4521 		else
4522 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523 					 iter->cpu, iter->lost_events);
4524 		if (trace_seq_has_overflowed(&iter->seq))
4525 			return TRACE_TYPE_PARTIAL_LINE;
4526 	}
4527 
4528 	if (iter->trace && iter->trace->print_line) {
4529 		ret = iter->trace->print_line(iter);
4530 		if (ret != TRACE_TYPE_UNHANDLED)
4531 			return ret;
4532 	}
4533 
4534 	if (iter->ent->type == TRACE_BPUTS &&
4535 			trace_flags & TRACE_ITER_PRINTK &&
4536 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537 		return trace_print_bputs_msg_only(iter);
4538 
4539 	if (iter->ent->type == TRACE_BPRINT &&
4540 			trace_flags & TRACE_ITER_PRINTK &&
4541 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542 		return trace_print_bprintk_msg_only(iter);
4543 
4544 	if (iter->ent->type == TRACE_PRINT &&
4545 			trace_flags & TRACE_ITER_PRINTK &&
4546 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547 		return trace_print_printk_msg_only(iter);
4548 
4549 	if (trace_flags & TRACE_ITER_BIN)
4550 		return print_bin_fmt(iter);
4551 
4552 	if (trace_flags & TRACE_ITER_HEX)
4553 		return print_hex_fmt(iter);
4554 
4555 	if (trace_flags & TRACE_ITER_RAW)
4556 		return print_raw_fmt(iter);
4557 
4558 	return print_trace_fmt(iter);
4559 }
4560 
4561 void trace_latency_header(struct seq_file *m)
4562 {
4563 	struct trace_iterator *iter = m->private;
4564 	struct trace_array *tr = iter->tr;
4565 
4566 	/* print nothing if the buffers are empty */
4567 	if (trace_empty(iter))
4568 		return;
4569 
4570 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571 		print_trace_header(m, iter);
4572 
4573 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574 		print_lat_help_header(m);
4575 }
4576 
4577 void trace_default_header(struct seq_file *m)
4578 {
4579 	struct trace_iterator *iter = m->private;
4580 	struct trace_array *tr = iter->tr;
4581 	unsigned long trace_flags = tr->trace_flags;
4582 
4583 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4584 		return;
4585 
4586 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587 		/* print nothing if the buffers are empty */
4588 		if (trace_empty(iter))
4589 			return;
4590 		print_trace_header(m, iter);
4591 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4592 			print_lat_help_header(m);
4593 	} else {
4594 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4596 				print_func_help_header_irq(iter->array_buffer,
4597 							   m, trace_flags);
4598 			else
4599 				print_func_help_header(iter->array_buffer, m,
4600 						       trace_flags);
4601 		}
4602 	}
4603 }
4604 
4605 static void test_ftrace_alive(struct seq_file *m)
4606 {
4607 	if (!ftrace_is_dead())
4608 		return;
4609 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4611 }
4612 
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4615 {
4616 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618 		    "#                      Takes a snapshot of the main buffer.\n"
4619 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620 		    "#                      (Doesn't have to be '2' works with any number that\n"
4621 		    "#                       is not a '0' or '1')\n");
4622 }
4623 
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4625 {
4626 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4630 #else
4631 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632 		    "#                     Must use main snapshot file to allocate.\n");
4633 #endif
4634 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635 		    "#                      (Doesn't have to be '2' works with any number that\n"
4636 		    "#                       is not a '0' or '1')\n");
4637 }
4638 
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4640 {
4641 	if (iter->tr->allocated_snapshot)
4642 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4643 	else
4644 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4645 
4646 	seq_puts(m, "# Snapshot commands:\n");
4647 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648 		show_snapshot_main_help(m);
4649 	else
4650 		show_snapshot_percpu_help(m);
4651 }
4652 #else
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4655 #endif
4656 
4657 static int s_show(struct seq_file *m, void *v)
4658 {
4659 	struct trace_iterator *iter = v;
4660 	int ret;
4661 
4662 	if (iter->ent == NULL) {
4663 		if (iter->tr) {
4664 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4665 			seq_puts(m, "#\n");
4666 			test_ftrace_alive(m);
4667 		}
4668 		if (iter->snapshot && trace_empty(iter))
4669 			print_snapshot_help(m, iter);
4670 		else if (iter->trace && iter->trace->print_header)
4671 			iter->trace->print_header(m);
4672 		else
4673 			trace_default_header(m);
4674 
4675 	} else if (iter->leftover) {
4676 		/*
4677 		 * If we filled the seq_file buffer earlier, we
4678 		 * want to just show it now.
4679 		 */
4680 		ret = trace_print_seq(m, &iter->seq);
4681 
4682 		/* ret should this time be zero, but you never know */
4683 		iter->leftover = ret;
4684 
4685 	} else {
4686 		print_trace_line(iter);
4687 		ret = trace_print_seq(m, &iter->seq);
4688 		/*
4689 		 * If we overflow the seq_file buffer, then it will
4690 		 * ask us for this data again at start up.
4691 		 * Use that instead.
4692 		 *  ret is 0 if seq_file write succeeded.
4693 		 *        -1 otherwise.
4694 		 */
4695 		iter->leftover = ret;
4696 	}
4697 
4698 	return 0;
4699 }
4700 
4701 /*
4702  * Should be used after trace_array_get(), trace_types_lock
4703  * ensures that i_cdev was already initialized.
4704  */
4705 static inline int tracing_get_cpu(struct inode *inode)
4706 {
4707 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4708 		return (long)inode->i_cdev - 1;
4709 	return RING_BUFFER_ALL_CPUS;
4710 }
4711 
4712 static const struct seq_operations tracer_seq_ops = {
4713 	.start		= s_start,
4714 	.next		= s_next,
4715 	.stop		= s_stop,
4716 	.show		= s_show,
4717 };
4718 
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4721 {
4722 	struct trace_array *tr = inode->i_private;
4723 	struct trace_iterator *iter;
4724 	int cpu;
4725 
4726 	if (tracing_disabled)
4727 		return ERR_PTR(-ENODEV);
4728 
4729 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4730 	if (!iter)
4731 		return ERR_PTR(-ENOMEM);
4732 
4733 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4734 				    GFP_KERNEL);
4735 	if (!iter->buffer_iter)
4736 		goto release;
4737 
4738 	/*
4739 	 * trace_find_next_entry() may need to save off iter->ent.
4740 	 * It will place it into the iter->temp buffer. As most
4741 	 * events are less than 128, allocate a buffer of that size.
4742 	 * If one is greater, then trace_find_next_entry() will
4743 	 * allocate a new buffer to adjust for the bigger iter->ent.
4744 	 * It's not critical if it fails to get allocated here.
4745 	 */
4746 	iter->temp = kmalloc(128, GFP_KERNEL);
4747 	if (iter->temp)
4748 		iter->temp_size = 128;
4749 
4750 	/*
4751 	 * trace_event_printf() may need to modify given format
4752 	 * string to replace %p with %px so that it shows real address
4753 	 * instead of hash value. However, that is only for the event
4754 	 * tracing, other tracer may not need. Defer the allocation
4755 	 * until it is needed.
4756 	 */
4757 	iter->fmt = NULL;
4758 	iter->fmt_size = 0;
4759 
4760 	/*
4761 	 * We make a copy of the current tracer to avoid concurrent
4762 	 * changes on it while we are reading.
4763 	 */
4764 	mutex_lock(&trace_types_lock);
4765 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4766 	if (!iter->trace)
4767 		goto fail;
4768 
4769 	*iter->trace = *tr->current_trace;
4770 
4771 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4772 		goto fail;
4773 
4774 	iter->tr = tr;
4775 
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777 	/* Currently only the top directory has a snapshot */
4778 	if (tr->current_trace->print_max || snapshot)
4779 		iter->array_buffer = &tr->max_buffer;
4780 	else
4781 #endif
4782 		iter->array_buffer = &tr->array_buffer;
4783 	iter->snapshot = snapshot;
4784 	iter->pos = -1;
4785 	iter->cpu_file = tracing_get_cpu(inode);
4786 	mutex_init(&iter->mutex);
4787 
4788 	/* Notify the tracer early; before we stop tracing. */
4789 	if (iter->trace->open)
4790 		iter->trace->open(iter);
4791 
4792 	/* Annotate start of buffers if we had overruns */
4793 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4794 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4795 
4796 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797 	if (trace_clocks[tr->clock_id].in_ns)
4798 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4799 
4800 	/*
4801 	 * If pause-on-trace is enabled, then stop the trace while
4802 	 * dumping, unless this is the "snapshot" file
4803 	 */
4804 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805 		tracing_stop_tr(tr);
4806 
4807 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808 		for_each_tracing_cpu(cpu) {
4809 			iter->buffer_iter[cpu] =
4810 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4811 							 cpu, GFP_KERNEL);
4812 		}
4813 		ring_buffer_read_prepare_sync();
4814 		for_each_tracing_cpu(cpu) {
4815 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4816 			tracing_iter_reset(iter, cpu);
4817 		}
4818 	} else {
4819 		cpu = iter->cpu_file;
4820 		iter->buffer_iter[cpu] =
4821 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4822 						 cpu, GFP_KERNEL);
4823 		ring_buffer_read_prepare_sync();
4824 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4825 		tracing_iter_reset(iter, cpu);
4826 	}
4827 
4828 	mutex_unlock(&trace_types_lock);
4829 
4830 	return iter;
4831 
4832  fail:
4833 	mutex_unlock(&trace_types_lock);
4834 	kfree(iter->trace);
4835 	kfree(iter->temp);
4836 	kfree(iter->buffer_iter);
4837 release:
4838 	seq_release_private(inode, file);
4839 	return ERR_PTR(-ENOMEM);
4840 }
4841 
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4843 {
4844 	int ret;
4845 
4846 	ret = tracing_check_open_get_tr(NULL);
4847 	if (ret)
4848 		return ret;
4849 
4850 	filp->private_data = inode->i_private;
4851 	return 0;
4852 }
4853 
4854 bool tracing_is_disabled(void)
4855 {
4856 	return (tracing_disabled) ? true: false;
4857 }
4858 
4859 /*
4860  * Open and update trace_array ref count.
4861  * Must have the current trace_array passed to it.
4862  */
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4864 {
4865 	struct trace_array *tr = inode->i_private;
4866 	int ret;
4867 
4868 	ret = tracing_check_open_get_tr(tr);
4869 	if (ret)
4870 		return ret;
4871 
4872 	filp->private_data = inode->i_private;
4873 
4874 	return 0;
4875 }
4876 
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4878 {
4879 	stream_open(inode, filp);
4880 	return tracing_open_generic_tr(inode, filp);
4881 }
4882 
4883 static int tracing_release(struct inode *inode, struct file *file)
4884 {
4885 	struct trace_array *tr = inode->i_private;
4886 	struct seq_file *m = file->private_data;
4887 	struct trace_iterator *iter;
4888 	int cpu;
4889 
4890 	if (!(file->f_mode & FMODE_READ)) {
4891 		trace_array_put(tr);
4892 		return 0;
4893 	}
4894 
4895 	/* Writes do not use seq_file */
4896 	iter = m->private;
4897 	mutex_lock(&trace_types_lock);
4898 
4899 	for_each_tracing_cpu(cpu) {
4900 		if (iter->buffer_iter[cpu])
4901 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4902 	}
4903 
4904 	if (iter->trace && iter->trace->close)
4905 		iter->trace->close(iter);
4906 
4907 	if (!iter->snapshot && tr->stop_count)
4908 		/* reenable tracing if it was previously enabled */
4909 		tracing_start_tr(tr);
4910 
4911 	__trace_array_put(tr);
4912 
4913 	mutex_unlock(&trace_types_lock);
4914 
4915 	mutex_destroy(&iter->mutex);
4916 	free_cpumask_var(iter->started);
4917 	kfree(iter->fmt);
4918 	kfree(iter->temp);
4919 	kfree(iter->trace);
4920 	kfree(iter->buffer_iter);
4921 	seq_release_private(inode, file);
4922 
4923 	return 0;
4924 }
4925 
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4927 {
4928 	struct trace_array *tr = inode->i_private;
4929 
4930 	trace_array_put(tr);
4931 	return 0;
4932 }
4933 
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4935 {
4936 	struct trace_array *tr = inode->i_private;
4937 
4938 	trace_array_put(tr);
4939 
4940 	return single_release(inode, file);
4941 }
4942 
4943 static int tracing_open(struct inode *inode, struct file *file)
4944 {
4945 	struct trace_array *tr = inode->i_private;
4946 	struct trace_iterator *iter;
4947 	int ret;
4948 
4949 	ret = tracing_check_open_get_tr(tr);
4950 	if (ret)
4951 		return ret;
4952 
4953 	/* If this file was open for write, then erase contents */
4954 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955 		int cpu = tracing_get_cpu(inode);
4956 		struct array_buffer *trace_buf = &tr->array_buffer;
4957 
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959 		if (tr->current_trace->print_max)
4960 			trace_buf = &tr->max_buffer;
4961 #endif
4962 
4963 		if (cpu == RING_BUFFER_ALL_CPUS)
4964 			tracing_reset_online_cpus(trace_buf);
4965 		else
4966 			tracing_reset_cpu(trace_buf, cpu);
4967 	}
4968 
4969 	if (file->f_mode & FMODE_READ) {
4970 		iter = __tracing_open(inode, file, false);
4971 		if (IS_ERR(iter))
4972 			ret = PTR_ERR(iter);
4973 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4975 	}
4976 
4977 	if (ret < 0)
4978 		trace_array_put(tr);
4979 
4980 	return ret;
4981 }
4982 
4983 /*
4984  * Some tracers are not suitable for instance buffers.
4985  * A tracer is always available for the global array (toplevel)
4986  * or if it explicitly states that it is.
4987  */
4988 static bool
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4990 {
4991 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4992 }
4993 
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4997 {
4998 	while (t && !trace_ok_for_array(t, tr))
4999 		t = t->next;
5000 
5001 	return t;
5002 }
5003 
5004 static void *
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5006 {
5007 	struct trace_array *tr = m->private;
5008 	struct tracer *t = v;
5009 
5010 	(*pos)++;
5011 
5012 	if (t)
5013 		t = get_tracer_for_array(tr, t->next);
5014 
5015 	return t;
5016 }
5017 
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5019 {
5020 	struct trace_array *tr = m->private;
5021 	struct tracer *t;
5022 	loff_t l = 0;
5023 
5024 	mutex_lock(&trace_types_lock);
5025 
5026 	t = get_tracer_for_array(tr, trace_types);
5027 	for (; t && l < *pos; t = t_next(m, t, &l))
5028 			;
5029 
5030 	return t;
5031 }
5032 
5033 static void t_stop(struct seq_file *m, void *p)
5034 {
5035 	mutex_unlock(&trace_types_lock);
5036 }
5037 
5038 static int t_show(struct seq_file *m, void *v)
5039 {
5040 	struct tracer *t = v;
5041 
5042 	if (!t)
5043 		return 0;
5044 
5045 	seq_puts(m, t->name);
5046 	if (t->next)
5047 		seq_putc(m, ' ');
5048 	else
5049 		seq_putc(m, '\n');
5050 
5051 	return 0;
5052 }
5053 
5054 static const struct seq_operations show_traces_seq_ops = {
5055 	.start		= t_start,
5056 	.next		= t_next,
5057 	.stop		= t_stop,
5058 	.show		= t_show,
5059 };
5060 
5061 static int show_traces_open(struct inode *inode, struct file *file)
5062 {
5063 	struct trace_array *tr = inode->i_private;
5064 	struct seq_file *m;
5065 	int ret;
5066 
5067 	ret = tracing_check_open_get_tr(tr);
5068 	if (ret)
5069 		return ret;
5070 
5071 	ret = seq_open(file, &show_traces_seq_ops);
5072 	if (ret) {
5073 		trace_array_put(tr);
5074 		return ret;
5075 	}
5076 
5077 	m = file->private_data;
5078 	m->private = tr;
5079 
5080 	return 0;
5081 }
5082 
5083 static int show_traces_release(struct inode *inode, struct file *file)
5084 {
5085 	struct trace_array *tr = inode->i_private;
5086 
5087 	trace_array_put(tr);
5088 	return seq_release(inode, file);
5089 }
5090 
5091 static ssize_t
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093 		   size_t count, loff_t *ppos)
5094 {
5095 	return count;
5096 }
5097 
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5099 {
5100 	int ret;
5101 
5102 	if (file->f_mode & FMODE_READ)
5103 		ret = seq_lseek(file, offset, whence);
5104 	else
5105 		file->f_pos = ret = 0;
5106 
5107 	return ret;
5108 }
5109 
5110 static const struct file_operations tracing_fops = {
5111 	.open		= tracing_open,
5112 	.read		= seq_read,
5113 	.write		= tracing_write_stub,
5114 	.llseek		= tracing_lseek,
5115 	.release	= tracing_release,
5116 };
5117 
5118 static const struct file_operations show_traces_fops = {
5119 	.open		= show_traces_open,
5120 	.read		= seq_read,
5121 	.llseek		= seq_lseek,
5122 	.release	= show_traces_release,
5123 };
5124 
5125 static ssize_t
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127 		     size_t count, loff_t *ppos)
5128 {
5129 	struct trace_array *tr = file_inode(filp)->i_private;
5130 	char *mask_str;
5131 	int len;
5132 
5133 	len = snprintf(NULL, 0, "%*pb\n",
5134 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135 	mask_str = kmalloc(len, GFP_KERNEL);
5136 	if (!mask_str)
5137 		return -ENOMEM;
5138 
5139 	len = snprintf(mask_str, len, "%*pb\n",
5140 		       cpumask_pr_args(tr->tracing_cpumask));
5141 	if (len >= count) {
5142 		count = -EINVAL;
5143 		goto out_err;
5144 	}
5145 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5146 
5147 out_err:
5148 	kfree(mask_str);
5149 
5150 	return count;
5151 }
5152 
5153 int tracing_set_cpumask(struct trace_array *tr,
5154 			cpumask_var_t tracing_cpumask_new)
5155 {
5156 	int cpu;
5157 
5158 	if (!tr)
5159 		return -EINVAL;
5160 
5161 	local_irq_disable();
5162 	arch_spin_lock(&tr->max_lock);
5163 	for_each_tracing_cpu(cpu) {
5164 		/*
5165 		 * Increase/decrease the disabled counter if we are
5166 		 * about to flip a bit in the cpumask:
5167 		 */
5168 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5172 		}
5173 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5177 		}
5178 	}
5179 	arch_spin_unlock(&tr->max_lock);
5180 	local_irq_enable();
5181 
5182 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5183 
5184 	return 0;
5185 }
5186 
5187 static ssize_t
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189 		      size_t count, loff_t *ppos)
5190 {
5191 	struct trace_array *tr = file_inode(filp)->i_private;
5192 	cpumask_var_t tracing_cpumask_new;
5193 	int err;
5194 
5195 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5196 		return -ENOMEM;
5197 
5198 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5199 	if (err)
5200 		goto err_free;
5201 
5202 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5203 	if (err)
5204 		goto err_free;
5205 
5206 	free_cpumask_var(tracing_cpumask_new);
5207 
5208 	return count;
5209 
5210 err_free:
5211 	free_cpumask_var(tracing_cpumask_new);
5212 
5213 	return err;
5214 }
5215 
5216 static const struct file_operations tracing_cpumask_fops = {
5217 	.open		= tracing_open_generic_tr,
5218 	.read		= tracing_cpumask_read,
5219 	.write		= tracing_cpumask_write,
5220 	.release	= tracing_release_generic_tr,
5221 	.llseek		= generic_file_llseek,
5222 };
5223 
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5225 {
5226 	struct tracer_opt *trace_opts;
5227 	struct trace_array *tr = m->private;
5228 	u32 tracer_flags;
5229 	int i;
5230 
5231 	mutex_lock(&trace_types_lock);
5232 	tracer_flags = tr->current_trace->flags->val;
5233 	trace_opts = tr->current_trace->flags->opts;
5234 
5235 	for (i = 0; trace_options[i]; i++) {
5236 		if (tr->trace_flags & (1 << i))
5237 			seq_printf(m, "%s\n", trace_options[i]);
5238 		else
5239 			seq_printf(m, "no%s\n", trace_options[i]);
5240 	}
5241 
5242 	for (i = 0; trace_opts[i].name; i++) {
5243 		if (tracer_flags & trace_opts[i].bit)
5244 			seq_printf(m, "%s\n", trace_opts[i].name);
5245 		else
5246 			seq_printf(m, "no%s\n", trace_opts[i].name);
5247 	}
5248 	mutex_unlock(&trace_types_lock);
5249 
5250 	return 0;
5251 }
5252 
5253 static int __set_tracer_option(struct trace_array *tr,
5254 			       struct tracer_flags *tracer_flags,
5255 			       struct tracer_opt *opts, int neg)
5256 {
5257 	struct tracer *trace = tracer_flags->trace;
5258 	int ret;
5259 
5260 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5261 	if (ret)
5262 		return ret;
5263 
5264 	if (neg)
5265 		tracer_flags->val &= ~opts->bit;
5266 	else
5267 		tracer_flags->val |= opts->bit;
5268 	return 0;
5269 }
5270 
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5273 {
5274 	struct tracer *trace = tr->current_trace;
5275 	struct tracer_flags *tracer_flags = trace->flags;
5276 	struct tracer_opt *opts = NULL;
5277 	int i;
5278 
5279 	for (i = 0; tracer_flags->opts[i].name; i++) {
5280 		opts = &tracer_flags->opts[i];
5281 
5282 		if (strcmp(cmp, opts->name) == 0)
5283 			return __set_tracer_option(tr, trace->flags, opts, neg);
5284 	}
5285 
5286 	return -EINVAL;
5287 }
5288 
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5291 {
5292 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5293 		return -1;
5294 
5295 	return 0;
5296 }
5297 
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5299 {
5300 	int *map;
5301 
5302 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5303 	    (mask == TRACE_ITER_RECORD_CMD))
5304 		lockdep_assert_held(&event_mutex);
5305 
5306 	/* do nothing if flag is already set */
5307 	if (!!(tr->trace_flags & mask) == !!enabled)
5308 		return 0;
5309 
5310 	/* Give the tracer a chance to approve the change */
5311 	if (tr->current_trace->flag_changed)
5312 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5313 			return -EINVAL;
5314 
5315 	if (enabled)
5316 		tr->trace_flags |= mask;
5317 	else
5318 		tr->trace_flags &= ~mask;
5319 
5320 	if (mask == TRACE_ITER_RECORD_CMD)
5321 		trace_event_enable_cmd_record(enabled);
5322 
5323 	if (mask == TRACE_ITER_RECORD_TGID) {
5324 		if (!tgid_map) {
5325 			tgid_map_max = pid_max;
5326 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5327 				       GFP_KERNEL);
5328 
5329 			/*
5330 			 * Pairs with smp_load_acquire() in
5331 			 * trace_find_tgid_ptr() to ensure that if it observes
5332 			 * the tgid_map we just allocated then it also observes
5333 			 * the corresponding tgid_map_max value.
5334 			 */
5335 			smp_store_release(&tgid_map, map);
5336 		}
5337 		if (!tgid_map) {
5338 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5339 			return -ENOMEM;
5340 		}
5341 
5342 		trace_event_enable_tgid_record(enabled);
5343 	}
5344 
5345 	if (mask == TRACE_ITER_EVENT_FORK)
5346 		trace_event_follow_fork(tr, enabled);
5347 
5348 	if (mask == TRACE_ITER_FUNC_FORK)
5349 		ftrace_pid_follow_fork(tr, enabled);
5350 
5351 	if (mask == TRACE_ITER_OVERWRITE) {
5352 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5355 #endif
5356 	}
5357 
5358 	if (mask == TRACE_ITER_PRINTK) {
5359 		trace_printk_start_stop_comm(enabled);
5360 		trace_printk_control(enabled);
5361 	}
5362 
5363 	return 0;
5364 }
5365 
5366 int trace_set_options(struct trace_array *tr, char *option)
5367 {
5368 	char *cmp;
5369 	int neg = 0;
5370 	int ret;
5371 	size_t orig_len = strlen(option);
5372 	int len;
5373 
5374 	cmp = strstrip(option);
5375 
5376 	len = str_has_prefix(cmp, "no");
5377 	if (len)
5378 		neg = 1;
5379 
5380 	cmp += len;
5381 
5382 	mutex_lock(&event_mutex);
5383 	mutex_lock(&trace_types_lock);
5384 
5385 	ret = match_string(trace_options, -1, cmp);
5386 	/* If no option could be set, test the specific tracer options */
5387 	if (ret < 0)
5388 		ret = set_tracer_option(tr, cmp, neg);
5389 	else
5390 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5391 
5392 	mutex_unlock(&trace_types_lock);
5393 	mutex_unlock(&event_mutex);
5394 
5395 	/*
5396 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5397 	 * turn it back into a space.
5398 	 */
5399 	if (orig_len > strlen(option))
5400 		option[strlen(option)] = ' ';
5401 
5402 	return ret;
5403 }
5404 
5405 static void __init apply_trace_boot_options(void)
5406 {
5407 	char *buf = trace_boot_options_buf;
5408 	char *option;
5409 
5410 	while (true) {
5411 		option = strsep(&buf, ",");
5412 
5413 		if (!option)
5414 			break;
5415 
5416 		if (*option)
5417 			trace_set_options(&global_trace, option);
5418 
5419 		/* Put back the comma to allow this to be called again */
5420 		if (buf)
5421 			*(buf - 1) = ',';
5422 	}
5423 }
5424 
5425 static ssize_t
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427 			size_t cnt, loff_t *ppos)
5428 {
5429 	struct seq_file *m = filp->private_data;
5430 	struct trace_array *tr = m->private;
5431 	char buf[64];
5432 	int ret;
5433 
5434 	if (cnt >= sizeof(buf))
5435 		return -EINVAL;
5436 
5437 	if (copy_from_user(buf, ubuf, cnt))
5438 		return -EFAULT;
5439 
5440 	buf[cnt] = 0;
5441 
5442 	ret = trace_set_options(tr, buf);
5443 	if (ret < 0)
5444 		return ret;
5445 
5446 	*ppos += cnt;
5447 
5448 	return cnt;
5449 }
5450 
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5452 {
5453 	struct trace_array *tr = inode->i_private;
5454 	int ret;
5455 
5456 	ret = tracing_check_open_get_tr(tr);
5457 	if (ret)
5458 		return ret;
5459 
5460 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5461 	if (ret < 0)
5462 		trace_array_put(tr);
5463 
5464 	return ret;
5465 }
5466 
5467 static const struct file_operations tracing_iter_fops = {
5468 	.open		= tracing_trace_options_open,
5469 	.read		= seq_read,
5470 	.llseek		= seq_lseek,
5471 	.release	= tracing_single_release_tr,
5472 	.write		= tracing_trace_options_write,
5473 };
5474 
5475 static const char readme_msg[] =
5476 	"tracing mini-HOWTO:\n\n"
5477 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5478 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479 	" Important files:\n"
5480 	"  trace\t\t\t- The static contents of the buffer\n"
5481 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5482 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483 	"  current_tracer\t- function and latency tracers\n"
5484 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5485 	"  error_log\t- error log for failed commands (that support it)\n"
5486 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5488 	"  trace_clock\t\t- change the clock used to order events\n"
5489 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5490 	"      global:   Synced across CPUs but slows tracing down.\n"
5491 	"     counter:   Not a clock, but just an increment\n"
5492 	"      uptime:   Jiffy counter from time of boot\n"
5493 	"        perf:   Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495 	"     x86-tsc:   TSC cycle counter\n"
5496 #endif
5497 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5498 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5499 	"    absolute:   Absolute (standalone) timestamp\n"
5500 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5503 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504 	"\t\t\t  Remove sub-buffer with rmdir\n"
5505 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5506 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5507 	"\t\t\t  option name\n"
5508 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5511 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5512 	"\t\t\t  functions\n"
5513 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5514 	"\t     modules: Can select a group via module\n"
5515 	"\t      Format: :mod:<module-name>\n"
5516 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5517 	"\t    triggers: a command to perform when function is hit\n"
5518 	"\t      Format: <function>:<trigger>[:count]\n"
5519 	"\t     trigger: traceon, traceoff\n"
5520 	"\t\t      enable_event:<system>:<event>\n"
5521 	"\t\t      disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5523 	"\t\t      stacktrace\n"
5524 #endif
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5526 	"\t\t      snapshot\n"
5527 #endif
5528 	"\t\t      dump\n"
5529 	"\t\t      cpudump\n"
5530 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5531 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532 	"\t     The first one will disable tracing every time do_fault is hit\n"
5533 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5534 	"\t       The first time do trap is hit and it disables tracing, the\n"
5535 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5536 	"\t       the counter will not decrement. It only decrements when the\n"
5537 	"\t       trigger did work\n"
5538 	"\t     To remove trigger without count:\n"
5539 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5540 	"\t     To remove trigger with a count:\n"
5541 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544 	"\t    modules: Can select a group via module command :mod:\n"
5545 	"\t    Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5549 	"\t\t    (function)\n"
5550 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5551 	"\t\t    (function)\n"
5552 #endif
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5561 	"\t\t\t  information\n"
5562 #endif
5563 #ifdef CONFIG_STACK_TRACER
5564 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5565 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5566 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5567 	"\t\t\t  new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5570 	"\t\t\t  traces\n"
5571 #endif
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_KPROBE_EVENTS
5578 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #ifdef CONFIG_UPROBE_EVENTS
5582 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5584 #endif
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586 	"\t  accepts: event-definitions (one definition per line)\n"
5587 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5591 #endif
5592 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593 	"\t           -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5597 #endif
5598 #ifdef CONFIG_UPROBE_EVENTS
5599   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5600 #endif
5601 	"\t     args: <name>=fetcharg[:type]\n"
5602 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5605 #else
5606 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5607 #endif
5608 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611 	"\t           <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613 	"\t    field: <stype> <name>;\n"
5614 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615 	"\t           [unsigned] char/int/long\n"
5616 #endif
5617 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5618 	"\t            of the <attached-group>/<attached-event>.\n"
5619 #endif
5620 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5621 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624 	"\t\t\t  events\n"
5625 	"      filter\t\t- If set, only events passing filter are traced\n"
5626 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5627 	"\t\t\t  <event>:\n"
5628 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629 	"      filter\t\t- If set, only events passing filter are traced\n"
5630 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5631 	"\t    Format: <trigger>[:count][if <filter>]\n"
5632 	"\t   trigger: traceon, traceoff\n"
5633 	"\t            enable_event:<system>:<event>\n"
5634 	"\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636 	"\t            enable_hist:<system>:<event>\n"
5637 	"\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640 	"\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643 	"\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646 	"\t\t    hist (see below)\n"
5647 #endif
5648 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651 	"\t                  events/block/block_unplug/trigger\n"
5652 	"\t   The first disables tracing every time block_unplug is hit.\n"
5653 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656 	"\t   Like function triggers, the counter is only decremented if it\n"
5657 	"\t    enabled or disabled tracing.\n"
5658 	"\t   To remove a trigger without a count:\n"
5659 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660 	"\t   To remove a trigger with a count:\n"
5661 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662 	"\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5666 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667 	"\t            [:values=<field1[,field2,...]>]\n"
5668 	"\t            [:sort=<field1[,field2,...]>]\n"
5669 	"\t            [:size=#entries]\n"
5670 	"\t            [:pause][:continue][:clear]\n"
5671 	"\t            [:name=histname1]\n"
5672 	"\t            [:<handler>.<action>]\n"
5673 	"\t            [if <filter>]\n\n"
5674 	"\t    Note, special fields can be used as well:\n"
5675 	"\t            common_timestamp - to record current timestamp\n"
5676 	"\t            common_cpu - to record the CPU the event happened on\n"
5677 	"\n"
5678 	"\t    A hist trigger variable can be:\n"
5679 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5680 	"\t        - a reference to another variable e.g. y=$x,\n"
5681 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5682 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5683 	"\n"
5684 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5686 	"\t    variable reference, field or numeric literal.\n"
5687 	"\n"
5688 	"\t    When a matching event is hit, an entry is added to a hash\n"
5689 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5690 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5691 	"\t    correspond to fields in the event's format description.  Keys\n"
5692 	"\t    can be any field, or the special string 'stacktrace'.\n"
5693 	"\t    Compound keys consisting of up to two fields can be specified\n"
5694 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5695 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5696 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5697 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5698 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5699 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5700 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5701 	"\t    its histogram data will be shared with other triggers of the\n"
5702 	"\t    same name, and trigger hits will update this common data.\n\n"
5703 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5704 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5705 	"\t    triggers attached to an event, there will be a table for each\n"
5706 	"\t    trigger in the output.  The table displayed for a named\n"
5707 	"\t    trigger will be the same as any other instance having the\n"
5708 	"\t    same name.  The default format used to display a given field\n"
5709 	"\t    can be modified by appending any of the following modifiers\n"
5710 	"\t    to the field name, as applicable:\n\n"
5711 	"\t            .hex        display a number as a hex value\n"
5712 	"\t            .sym        display an address as a symbol\n"
5713 	"\t            .sym-offset display an address as a symbol and offset\n"
5714 	"\t            .execname   display a common_pid as a program name\n"
5715 	"\t            .syscall    display a syscall id as a syscall name\n"
5716 	"\t            .log2       display log2 value rather than raw number\n"
5717 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5718 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5719 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5720 	"\t    trigger or to start a hist trigger but not log any events\n"
5721 	"\t    until told to do so.  'continue' can be used to start or\n"
5722 	"\t    restart a paused hist trigger.\n\n"
5723 	"\t    The 'clear' parameter will clear the contents of a running\n"
5724 	"\t    hist trigger and leave its current paused/active state\n"
5725 	"\t    unchanged.\n\n"
5726 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5727 	"\t    have one event conditionally start and stop another event's\n"
5728 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5729 	"\t    the enable_event and disable_event triggers.\n\n"
5730 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5731 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5732 	"\t        <handler>.<action>\n\n"
5733 	"\t    The available handlers are:\n\n"
5734 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5735 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5736 	"\t        onchange(var)            - invoke action if var changes\n\n"
5737 	"\t    The available actions are:\n\n"
5738 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739 	"\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745 	"\t  Write into this file to define/undefine new synthetic events.\n"
5746 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750 
5751 static ssize_t
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753 		       size_t cnt, loff_t *ppos)
5754 {
5755 	return simple_read_from_buffer(ubuf, cnt, ppos,
5756 					readme_msg, strlen(readme_msg));
5757 }
5758 
5759 static const struct file_operations tracing_readme_fops = {
5760 	.open		= tracing_open_generic,
5761 	.read		= tracing_readme_read,
5762 	.llseek		= generic_file_llseek,
5763 };
5764 
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767 	int pid = ++(*pos);
5768 
5769 	return trace_find_tgid_ptr(pid);
5770 }
5771 
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5773 {
5774 	int pid = *pos;
5775 
5776 	return trace_find_tgid_ptr(pid);
5777 }
5778 
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5780 {
5781 }
5782 
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5784 {
5785 	int *entry = (int *)v;
5786 	int pid = entry - tgid_map;
5787 	int tgid = *entry;
5788 
5789 	if (tgid == 0)
5790 		return SEQ_SKIP;
5791 
5792 	seq_printf(m, "%d %d\n", pid, tgid);
5793 	return 0;
5794 }
5795 
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797 	.start		= saved_tgids_start,
5798 	.stop		= saved_tgids_stop,
5799 	.next		= saved_tgids_next,
5800 	.show		= saved_tgids_show,
5801 };
5802 
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5804 {
5805 	int ret;
5806 
5807 	ret = tracing_check_open_get_tr(NULL);
5808 	if (ret)
5809 		return ret;
5810 
5811 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5812 }
5813 
5814 
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816 	.open		= tracing_saved_tgids_open,
5817 	.read		= seq_read,
5818 	.llseek		= seq_lseek,
5819 	.release	= seq_release,
5820 };
5821 
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5823 {
5824 	unsigned int *ptr = v;
5825 
5826 	if (*pos || m->count)
5827 		ptr++;
5828 
5829 	(*pos)++;
5830 
5831 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5832 	     ptr++) {
5833 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5834 			continue;
5835 
5836 		return ptr;
5837 	}
5838 
5839 	return NULL;
5840 }
5841 
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5843 {
5844 	void *v;
5845 	loff_t l = 0;
5846 
5847 	preempt_disable();
5848 	arch_spin_lock(&trace_cmdline_lock);
5849 
5850 	v = &savedcmd->map_cmdline_to_pid[0];
5851 	while (l <= *pos) {
5852 		v = saved_cmdlines_next(m, v, &l);
5853 		if (!v)
5854 			return NULL;
5855 	}
5856 
5857 	return v;
5858 }
5859 
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5861 {
5862 	arch_spin_unlock(&trace_cmdline_lock);
5863 	preempt_enable();
5864 }
5865 
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5867 {
5868 	char buf[TASK_COMM_LEN];
5869 	unsigned int *pid = v;
5870 
5871 	__trace_find_cmdline(*pid, buf);
5872 	seq_printf(m, "%d %s\n", *pid, buf);
5873 	return 0;
5874 }
5875 
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877 	.start		= saved_cmdlines_start,
5878 	.next		= saved_cmdlines_next,
5879 	.stop		= saved_cmdlines_stop,
5880 	.show		= saved_cmdlines_show,
5881 };
5882 
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5884 {
5885 	int ret;
5886 
5887 	ret = tracing_check_open_get_tr(NULL);
5888 	if (ret)
5889 		return ret;
5890 
5891 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5892 }
5893 
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895 	.open		= tracing_saved_cmdlines_open,
5896 	.read		= seq_read,
5897 	.llseek		= seq_lseek,
5898 	.release	= seq_release,
5899 };
5900 
5901 static ssize_t
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903 				 size_t cnt, loff_t *ppos)
5904 {
5905 	char buf[64];
5906 	int r;
5907 
5908 	preempt_disable();
5909 	arch_spin_lock(&trace_cmdline_lock);
5910 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911 	arch_spin_unlock(&trace_cmdline_lock);
5912 	preempt_enable();
5913 
5914 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916 
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5918 {
5919 	kfree(s->saved_cmdlines);
5920 	kfree(s->map_cmdline_to_pid);
5921 	kfree(s);
5922 }
5923 
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5925 {
5926 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5927 
5928 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5929 	if (!s)
5930 		return -ENOMEM;
5931 
5932 	if (allocate_cmdlines_buffer(val, s) < 0) {
5933 		kfree(s);
5934 		return -ENOMEM;
5935 	}
5936 
5937 	preempt_disable();
5938 	arch_spin_lock(&trace_cmdline_lock);
5939 	savedcmd_temp = savedcmd;
5940 	savedcmd = s;
5941 	arch_spin_unlock(&trace_cmdline_lock);
5942 	preempt_enable();
5943 	free_saved_cmdlines_buffer(savedcmd_temp);
5944 
5945 	return 0;
5946 }
5947 
5948 static ssize_t
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950 				  size_t cnt, loff_t *ppos)
5951 {
5952 	unsigned long val;
5953 	int ret;
5954 
5955 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5956 	if (ret)
5957 		return ret;
5958 
5959 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960 	if (!val || val > PID_MAX_DEFAULT)
5961 		return -EINVAL;
5962 
5963 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5964 	if (ret < 0)
5965 		return ret;
5966 
5967 	*ppos += cnt;
5968 
5969 	return cnt;
5970 }
5971 
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973 	.open		= tracing_open_generic,
5974 	.read		= tracing_saved_cmdlines_size_read,
5975 	.write		= tracing_saved_cmdlines_size_write,
5976 };
5977 
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5981 {
5982 	if (!ptr->map.eval_string) {
5983 		if (ptr->tail.next) {
5984 			ptr = ptr->tail.next;
5985 			/* Set ptr to the next real item (skip head) */
5986 			ptr++;
5987 		} else
5988 			return NULL;
5989 	}
5990 	return ptr;
5991 }
5992 
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5994 {
5995 	union trace_eval_map_item *ptr = v;
5996 
5997 	/*
5998 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5999 	 * This really should never happen.
6000 	 */
6001 	(*pos)++;
6002 	ptr = update_eval_map(ptr);
6003 	if (WARN_ON_ONCE(!ptr))
6004 		return NULL;
6005 
6006 	ptr++;
6007 	ptr = update_eval_map(ptr);
6008 
6009 	return ptr;
6010 }
6011 
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6013 {
6014 	union trace_eval_map_item *v;
6015 	loff_t l = 0;
6016 
6017 	mutex_lock(&trace_eval_mutex);
6018 
6019 	v = trace_eval_maps;
6020 	if (v)
6021 		v++;
6022 
6023 	while (v && l < *pos) {
6024 		v = eval_map_next(m, v, &l);
6025 	}
6026 
6027 	return v;
6028 }
6029 
6030 static void eval_map_stop(struct seq_file *m, void *v)
6031 {
6032 	mutex_unlock(&trace_eval_mutex);
6033 }
6034 
6035 static int eval_map_show(struct seq_file *m, void *v)
6036 {
6037 	union trace_eval_map_item *ptr = v;
6038 
6039 	seq_printf(m, "%s %ld (%s)\n",
6040 		   ptr->map.eval_string, ptr->map.eval_value,
6041 		   ptr->map.system);
6042 
6043 	return 0;
6044 }
6045 
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047 	.start		= eval_map_start,
6048 	.next		= eval_map_next,
6049 	.stop		= eval_map_stop,
6050 	.show		= eval_map_show,
6051 };
6052 
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6054 {
6055 	int ret;
6056 
6057 	ret = tracing_check_open_get_tr(NULL);
6058 	if (ret)
6059 		return ret;
6060 
6061 	return seq_open(filp, &tracing_eval_map_seq_ops);
6062 }
6063 
6064 static const struct file_operations tracing_eval_map_fops = {
6065 	.open		= tracing_eval_map_open,
6066 	.read		= seq_read,
6067 	.llseek		= seq_lseek,
6068 	.release	= seq_release,
6069 };
6070 
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6073 {
6074 	/* Return tail of array given the head */
6075 	return ptr + ptr->head.length + 1;
6076 }
6077 
6078 static void
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6080 			   int len)
6081 {
6082 	struct trace_eval_map **stop;
6083 	struct trace_eval_map **map;
6084 	union trace_eval_map_item *map_array;
6085 	union trace_eval_map_item *ptr;
6086 
6087 	stop = start + len;
6088 
6089 	/*
6090 	 * The trace_eval_maps contains the map plus a head and tail item,
6091 	 * where the head holds the module and length of array, and the
6092 	 * tail holds a pointer to the next list.
6093 	 */
6094 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6095 	if (!map_array) {
6096 		pr_warn("Unable to allocate trace eval mapping\n");
6097 		return;
6098 	}
6099 
6100 	mutex_lock(&trace_eval_mutex);
6101 
6102 	if (!trace_eval_maps)
6103 		trace_eval_maps = map_array;
6104 	else {
6105 		ptr = trace_eval_maps;
6106 		for (;;) {
6107 			ptr = trace_eval_jmp_to_tail(ptr);
6108 			if (!ptr->tail.next)
6109 				break;
6110 			ptr = ptr->tail.next;
6111 
6112 		}
6113 		ptr->tail.next = map_array;
6114 	}
6115 	map_array->head.mod = mod;
6116 	map_array->head.length = len;
6117 	map_array++;
6118 
6119 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120 		map_array->map = **map;
6121 		map_array++;
6122 	}
6123 	memset(map_array, 0, sizeof(*map_array));
6124 
6125 	mutex_unlock(&trace_eval_mutex);
6126 }
6127 
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6129 {
6130 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131 			  NULL, &tracing_eval_map_fops);
6132 }
6133 
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137 			      struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6139 
6140 static void trace_insert_eval_map(struct module *mod,
6141 				  struct trace_eval_map **start, int len)
6142 {
6143 	struct trace_eval_map **map;
6144 
6145 	if (len <= 0)
6146 		return;
6147 
6148 	map = start;
6149 
6150 	trace_event_eval_update(map, len);
6151 
6152 	trace_insert_eval_map_file(mod, start, len);
6153 }
6154 
6155 static ssize_t
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157 		       size_t cnt, loff_t *ppos)
6158 {
6159 	struct trace_array *tr = filp->private_data;
6160 	char buf[MAX_TRACER_SIZE+2];
6161 	int r;
6162 
6163 	mutex_lock(&trace_types_lock);
6164 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6165 	mutex_unlock(&trace_types_lock);
6166 
6167 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6168 }
6169 
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6171 {
6172 	tracing_reset_online_cpus(&tr->array_buffer);
6173 	return t->init(tr);
6174 }
6175 
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6177 {
6178 	int cpu;
6179 
6180 	for_each_tracing_cpu(cpu)
6181 		per_cpu_ptr(buf->data, cpu)->entries = val;
6182 }
6183 
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187 					struct array_buffer *size_buf, int cpu_id)
6188 {
6189 	int cpu, ret = 0;
6190 
6191 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192 		for_each_tracing_cpu(cpu) {
6193 			ret = ring_buffer_resize(trace_buf->buffer,
6194 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6195 			if (ret < 0)
6196 				break;
6197 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6198 				per_cpu_ptr(size_buf->data, cpu)->entries;
6199 		}
6200 	} else {
6201 		ret = ring_buffer_resize(trace_buf->buffer,
6202 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6203 		if (ret == 0)
6204 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6206 	}
6207 
6208 	return ret;
6209 }
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6211 
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213 					unsigned long size, int cpu)
6214 {
6215 	int ret;
6216 
6217 	/*
6218 	 * If kernel or user changes the size of the ring buffer
6219 	 * we use the size that was given, and we can forget about
6220 	 * expanding it later.
6221 	 */
6222 	ring_buffer_expanded = true;
6223 
6224 	/* May be called before buffers are initialized */
6225 	if (!tr->array_buffer.buffer)
6226 		return 0;
6227 
6228 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6229 	if (ret < 0)
6230 		return ret;
6231 
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234 	    !tr->current_trace->use_max_tr)
6235 		goto out;
6236 
6237 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6238 	if (ret < 0) {
6239 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240 						     &tr->array_buffer, cpu);
6241 		if (r < 0) {
6242 			/*
6243 			 * AARGH! We are left with different
6244 			 * size max buffer!!!!
6245 			 * The max buffer is our "snapshot" buffer.
6246 			 * When a tracer needs a snapshot (one of the
6247 			 * latency tracers), it swaps the max buffer
6248 			 * with the saved snap shot. We succeeded to
6249 			 * update the size of the main buffer, but failed to
6250 			 * update the size of the max buffer. But when we tried
6251 			 * to reset the main buffer to the original size, we
6252 			 * failed there too. This is very unlikely to
6253 			 * happen, but if it does, warn and kill all
6254 			 * tracing.
6255 			 */
6256 			WARN_ON(1);
6257 			tracing_disabled = 1;
6258 		}
6259 		return ret;
6260 	}
6261 
6262 	if (cpu == RING_BUFFER_ALL_CPUS)
6263 		set_buffer_entries(&tr->max_buffer, size);
6264 	else
6265 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6266 
6267  out:
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6269 
6270 	if (cpu == RING_BUFFER_ALL_CPUS)
6271 		set_buffer_entries(&tr->array_buffer, size);
6272 	else
6273 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6274 
6275 	return ret;
6276 }
6277 
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279 				  unsigned long size, int cpu_id)
6280 {
6281 	int ret;
6282 
6283 	mutex_lock(&trace_types_lock);
6284 
6285 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286 		/* make sure, this cpu is enabled in the mask */
6287 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6288 			ret = -EINVAL;
6289 			goto out;
6290 		}
6291 	}
6292 
6293 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6294 	if (ret < 0)
6295 		ret = -ENOMEM;
6296 
6297 out:
6298 	mutex_unlock(&trace_types_lock);
6299 
6300 	return ret;
6301 }
6302 
6303 
6304 /**
6305  * tracing_update_buffers - used by tracing facility to expand ring buffers
6306  *
6307  * To save on memory when the tracing is never used on a system with it
6308  * configured in. The ring buffers are set to a minimum size. But once
6309  * a user starts to use the tracing facility, then they need to grow
6310  * to their default size.
6311  *
6312  * This function is to be called when a tracer is about to be used.
6313  */
6314 int tracing_update_buffers(void)
6315 {
6316 	int ret = 0;
6317 
6318 	mutex_lock(&trace_types_lock);
6319 	if (!ring_buffer_expanded)
6320 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321 						RING_BUFFER_ALL_CPUS);
6322 	mutex_unlock(&trace_types_lock);
6323 
6324 	return ret;
6325 }
6326 
6327 struct trace_option_dentry;
6328 
6329 static void
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6331 
6332 /*
6333  * Used to clear out the tracer before deletion of an instance.
6334  * Must have trace_types_lock held.
6335  */
6336 static void tracing_set_nop(struct trace_array *tr)
6337 {
6338 	if (tr->current_trace == &nop_trace)
6339 		return;
6340 
6341 	tr->current_trace->enabled--;
6342 
6343 	if (tr->current_trace->reset)
6344 		tr->current_trace->reset(tr);
6345 
6346 	tr->current_trace = &nop_trace;
6347 }
6348 
6349 static bool tracer_options_updated;
6350 
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6352 {
6353 	/* Only enable if the directory has been created already. */
6354 	if (!tr->dir)
6355 		return;
6356 
6357 	/* Only create trace option files after update_tracer_options finish */
6358 	if (!tracer_options_updated)
6359 		return;
6360 
6361 	create_trace_option_files(tr, t);
6362 }
6363 
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6365 {
6366 	struct tracer *t;
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368 	bool had_max_tr;
6369 #endif
6370 	int ret = 0;
6371 
6372 	mutex_lock(&trace_types_lock);
6373 
6374 	if (!ring_buffer_expanded) {
6375 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376 						RING_BUFFER_ALL_CPUS);
6377 		if (ret < 0)
6378 			goto out;
6379 		ret = 0;
6380 	}
6381 
6382 	for (t = trace_types; t; t = t->next) {
6383 		if (strcmp(t->name, buf) == 0)
6384 			break;
6385 	}
6386 	if (!t) {
6387 		ret = -EINVAL;
6388 		goto out;
6389 	}
6390 	if (t == tr->current_trace)
6391 		goto out;
6392 
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394 	if (t->use_max_tr) {
6395 		local_irq_disable();
6396 		arch_spin_lock(&tr->max_lock);
6397 		if (tr->cond_snapshot)
6398 			ret = -EBUSY;
6399 		arch_spin_unlock(&tr->max_lock);
6400 		local_irq_enable();
6401 		if (ret)
6402 			goto out;
6403 	}
6404 #endif
6405 	/* Some tracers won't work on kernel command line */
6406 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6407 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6408 			t->name);
6409 		goto out;
6410 	}
6411 
6412 	/* Some tracers are only allowed for the top level buffer */
6413 	if (!trace_ok_for_array(t, tr)) {
6414 		ret = -EINVAL;
6415 		goto out;
6416 	}
6417 
6418 	/* If trace pipe files are being read, we can't change the tracer */
6419 	if (tr->trace_ref) {
6420 		ret = -EBUSY;
6421 		goto out;
6422 	}
6423 
6424 	trace_branch_disable();
6425 
6426 	tr->current_trace->enabled--;
6427 
6428 	if (tr->current_trace->reset)
6429 		tr->current_trace->reset(tr);
6430 
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432 	had_max_tr = tr->current_trace->use_max_tr;
6433 
6434 	/* Current trace needs to be nop_trace before synchronize_rcu */
6435 	tr->current_trace = &nop_trace;
6436 
6437 	if (had_max_tr && !t->use_max_tr) {
6438 		/*
6439 		 * We need to make sure that the update_max_tr sees that
6440 		 * current_trace changed to nop_trace to keep it from
6441 		 * swapping the buffers after we resize it.
6442 		 * The update_max_tr is called from interrupts disabled
6443 		 * so a synchronized_sched() is sufficient.
6444 		 */
6445 		synchronize_rcu();
6446 		free_snapshot(tr);
6447 	}
6448 
6449 	if (t->use_max_tr && !tr->allocated_snapshot) {
6450 		ret = tracing_alloc_snapshot_instance(tr);
6451 		if (ret < 0)
6452 			goto out;
6453 	}
6454 #else
6455 	tr->current_trace = &nop_trace;
6456 #endif
6457 
6458 	if (t->init) {
6459 		ret = tracer_init(t, tr);
6460 		if (ret)
6461 			goto out;
6462 	}
6463 
6464 	tr->current_trace = t;
6465 	tr->current_trace->enabled++;
6466 	trace_branch_enable(tr);
6467  out:
6468 	mutex_unlock(&trace_types_lock);
6469 
6470 	return ret;
6471 }
6472 
6473 static ssize_t
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475 			size_t cnt, loff_t *ppos)
6476 {
6477 	struct trace_array *tr = filp->private_data;
6478 	char buf[MAX_TRACER_SIZE+1];
6479 	char *name;
6480 	size_t ret;
6481 	int err;
6482 
6483 	ret = cnt;
6484 
6485 	if (cnt > MAX_TRACER_SIZE)
6486 		cnt = MAX_TRACER_SIZE;
6487 
6488 	if (copy_from_user(buf, ubuf, cnt))
6489 		return -EFAULT;
6490 
6491 	buf[cnt] = 0;
6492 
6493 	name = strim(buf);
6494 
6495 	err = tracing_set_tracer(tr, name);
6496 	if (err)
6497 		return err;
6498 
6499 	*ppos += ret;
6500 
6501 	return ret;
6502 }
6503 
6504 static ssize_t
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506 		   size_t cnt, loff_t *ppos)
6507 {
6508 	char buf[64];
6509 	int r;
6510 
6511 	r = snprintf(buf, sizeof(buf), "%ld\n",
6512 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513 	if (r > sizeof(buf))
6514 		r = sizeof(buf);
6515 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6516 }
6517 
6518 static ssize_t
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520 		    size_t cnt, loff_t *ppos)
6521 {
6522 	unsigned long val;
6523 	int ret;
6524 
6525 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6526 	if (ret)
6527 		return ret;
6528 
6529 	*ptr = val * 1000;
6530 
6531 	return cnt;
6532 }
6533 
6534 static ssize_t
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536 		    size_t cnt, loff_t *ppos)
6537 {
6538 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6539 }
6540 
6541 static ssize_t
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543 		     size_t cnt, loff_t *ppos)
6544 {
6545 	struct trace_array *tr = filp->private_data;
6546 	int ret;
6547 
6548 	mutex_lock(&trace_types_lock);
6549 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6550 	if (ret < 0)
6551 		goto out;
6552 
6553 	if (tr->current_trace->update_thresh) {
6554 		ret = tr->current_trace->update_thresh(tr);
6555 		if (ret < 0)
6556 			goto out;
6557 	}
6558 
6559 	ret = cnt;
6560 out:
6561 	mutex_unlock(&trace_types_lock);
6562 
6563 	return ret;
6564 }
6565 
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6567 
6568 static ssize_t
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570 		     size_t cnt, loff_t *ppos)
6571 {
6572 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6573 }
6574 
6575 static ssize_t
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577 		      size_t cnt, loff_t *ppos)
6578 {
6579 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6580 }
6581 
6582 #endif
6583 
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6585 {
6586 	struct trace_array *tr = inode->i_private;
6587 	struct trace_iterator *iter;
6588 	int ret;
6589 
6590 	ret = tracing_check_open_get_tr(tr);
6591 	if (ret)
6592 		return ret;
6593 
6594 	mutex_lock(&trace_types_lock);
6595 
6596 	/* create a buffer to store the information to pass to userspace */
6597 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6598 	if (!iter) {
6599 		ret = -ENOMEM;
6600 		__trace_array_put(tr);
6601 		goto out;
6602 	}
6603 
6604 	trace_seq_init(&iter->seq);
6605 	iter->trace = tr->current_trace;
6606 
6607 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6608 		ret = -ENOMEM;
6609 		goto fail;
6610 	}
6611 
6612 	/* trace pipe does not show start of buffer */
6613 	cpumask_setall(iter->started);
6614 
6615 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6617 
6618 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619 	if (trace_clocks[tr->clock_id].in_ns)
6620 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6621 
6622 	iter->tr = tr;
6623 	iter->array_buffer = &tr->array_buffer;
6624 	iter->cpu_file = tracing_get_cpu(inode);
6625 	mutex_init(&iter->mutex);
6626 	filp->private_data = iter;
6627 
6628 	if (iter->trace->pipe_open)
6629 		iter->trace->pipe_open(iter);
6630 
6631 	nonseekable_open(inode, filp);
6632 
6633 	tr->trace_ref++;
6634 out:
6635 	mutex_unlock(&trace_types_lock);
6636 	return ret;
6637 
6638 fail:
6639 	kfree(iter);
6640 	__trace_array_put(tr);
6641 	mutex_unlock(&trace_types_lock);
6642 	return ret;
6643 }
6644 
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6646 {
6647 	struct trace_iterator *iter = file->private_data;
6648 	struct trace_array *tr = inode->i_private;
6649 
6650 	mutex_lock(&trace_types_lock);
6651 
6652 	tr->trace_ref--;
6653 
6654 	if (iter->trace->pipe_close)
6655 		iter->trace->pipe_close(iter);
6656 
6657 	mutex_unlock(&trace_types_lock);
6658 
6659 	free_cpumask_var(iter->started);
6660 	kfree(iter->fmt);
6661 	mutex_destroy(&iter->mutex);
6662 	kfree(iter);
6663 
6664 	trace_array_put(tr);
6665 
6666 	return 0;
6667 }
6668 
6669 static __poll_t
6670 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6671 {
6672 	struct trace_array *tr = iter->tr;
6673 
6674 	/* Iterators are static, they should be filled or empty */
6675 	if (trace_buffer_iter(iter, iter->cpu_file))
6676 		return EPOLLIN | EPOLLRDNORM;
6677 
6678 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6679 		/*
6680 		 * Always select as readable when in blocking mode
6681 		 */
6682 		return EPOLLIN | EPOLLRDNORM;
6683 	else
6684 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6685 					     filp, poll_table, iter->tr->buffer_percent);
6686 }
6687 
6688 static __poll_t
6689 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6690 {
6691 	struct trace_iterator *iter = filp->private_data;
6692 
6693 	return trace_poll(iter, filp, poll_table);
6694 }
6695 
6696 /* Must be called with iter->mutex held. */
6697 static int tracing_wait_pipe(struct file *filp)
6698 {
6699 	struct trace_iterator *iter = filp->private_data;
6700 	int ret;
6701 
6702 	while (trace_empty(iter)) {
6703 
6704 		if ((filp->f_flags & O_NONBLOCK)) {
6705 			return -EAGAIN;
6706 		}
6707 
6708 		/*
6709 		 * We block until we read something and tracing is disabled.
6710 		 * We still block if tracing is disabled, but we have never
6711 		 * read anything. This allows a user to cat this file, and
6712 		 * then enable tracing. But after we have read something,
6713 		 * we give an EOF when tracing is again disabled.
6714 		 *
6715 		 * iter->pos will be 0 if we haven't read anything.
6716 		 */
6717 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6718 			break;
6719 
6720 		mutex_unlock(&iter->mutex);
6721 
6722 		ret = wait_on_pipe(iter, 0);
6723 
6724 		mutex_lock(&iter->mutex);
6725 
6726 		if (ret)
6727 			return ret;
6728 	}
6729 
6730 	return 1;
6731 }
6732 
6733 /*
6734  * Consumer reader.
6735  */
6736 static ssize_t
6737 tracing_read_pipe(struct file *filp, char __user *ubuf,
6738 		  size_t cnt, loff_t *ppos)
6739 {
6740 	struct trace_iterator *iter = filp->private_data;
6741 	ssize_t sret;
6742 
6743 	/*
6744 	 * Avoid more than one consumer on a single file descriptor
6745 	 * This is just a matter of traces coherency, the ring buffer itself
6746 	 * is protected.
6747 	 */
6748 	mutex_lock(&iter->mutex);
6749 
6750 	/* return any leftover data */
6751 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6752 	if (sret != -EBUSY)
6753 		goto out;
6754 
6755 	trace_seq_init(&iter->seq);
6756 
6757 	if (iter->trace->read) {
6758 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6759 		if (sret)
6760 			goto out;
6761 	}
6762 
6763 waitagain:
6764 	sret = tracing_wait_pipe(filp);
6765 	if (sret <= 0)
6766 		goto out;
6767 
6768 	/* stop when tracing is finished */
6769 	if (trace_empty(iter)) {
6770 		sret = 0;
6771 		goto out;
6772 	}
6773 
6774 	if (cnt >= PAGE_SIZE)
6775 		cnt = PAGE_SIZE - 1;
6776 
6777 	/* reset all but tr, trace, and overruns */
6778 	trace_iterator_reset(iter);
6779 	cpumask_clear(iter->started);
6780 	trace_seq_init(&iter->seq);
6781 
6782 	trace_event_read_lock();
6783 	trace_access_lock(iter->cpu_file);
6784 	while (trace_find_next_entry_inc(iter) != NULL) {
6785 		enum print_line_t ret;
6786 		int save_len = iter->seq.seq.len;
6787 
6788 		ret = print_trace_line(iter);
6789 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6790 			/* don't print partial lines */
6791 			iter->seq.seq.len = save_len;
6792 			break;
6793 		}
6794 		if (ret != TRACE_TYPE_NO_CONSUME)
6795 			trace_consume(iter);
6796 
6797 		if (trace_seq_used(&iter->seq) >= cnt)
6798 			break;
6799 
6800 		/*
6801 		 * Setting the full flag means we reached the trace_seq buffer
6802 		 * size and we should leave by partial output condition above.
6803 		 * One of the trace_seq_* functions is not used properly.
6804 		 */
6805 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6806 			  iter->ent->type);
6807 	}
6808 	trace_access_unlock(iter->cpu_file);
6809 	trace_event_read_unlock();
6810 
6811 	/* Now copy what we have to the user */
6812 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6813 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6814 		trace_seq_init(&iter->seq);
6815 
6816 	/*
6817 	 * If there was nothing to send to user, in spite of consuming trace
6818 	 * entries, go back to wait for more entries.
6819 	 */
6820 	if (sret == -EBUSY)
6821 		goto waitagain;
6822 
6823 out:
6824 	mutex_unlock(&iter->mutex);
6825 
6826 	return sret;
6827 }
6828 
6829 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6830 				     unsigned int idx)
6831 {
6832 	__free_page(spd->pages[idx]);
6833 }
6834 
6835 static size_t
6836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6837 {
6838 	size_t count;
6839 	int save_len;
6840 	int ret;
6841 
6842 	/* Seq buffer is page-sized, exactly what we need. */
6843 	for (;;) {
6844 		save_len = iter->seq.seq.len;
6845 		ret = print_trace_line(iter);
6846 
6847 		if (trace_seq_has_overflowed(&iter->seq)) {
6848 			iter->seq.seq.len = save_len;
6849 			break;
6850 		}
6851 
6852 		/*
6853 		 * This should not be hit, because it should only
6854 		 * be set if the iter->seq overflowed. But check it
6855 		 * anyway to be safe.
6856 		 */
6857 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6858 			iter->seq.seq.len = save_len;
6859 			break;
6860 		}
6861 
6862 		count = trace_seq_used(&iter->seq) - save_len;
6863 		if (rem < count) {
6864 			rem = 0;
6865 			iter->seq.seq.len = save_len;
6866 			break;
6867 		}
6868 
6869 		if (ret != TRACE_TYPE_NO_CONSUME)
6870 			trace_consume(iter);
6871 		rem -= count;
6872 		if (!trace_find_next_entry_inc(iter))	{
6873 			rem = 0;
6874 			iter->ent = NULL;
6875 			break;
6876 		}
6877 	}
6878 
6879 	return rem;
6880 }
6881 
6882 static ssize_t tracing_splice_read_pipe(struct file *filp,
6883 					loff_t *ppos,
6884 					struct pipe_inode_info *pipe,
6885 					size_t len,
6886 					unsigned int flags)
6887 {
6888 	struct page *pages_def[PIPE_DEF_BUFFERS];
6889 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6890 	struct trace_iterator *iter = filp->private_data;
6891 	struct splice_pipe_desc spd = {
6892 		.pages		= pages_def,
6893 		.partial	= partial_def,
6894 		.nr_pages	= 0, /* This gets updated below. */
6895 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6896 		.ops		= &default_pipe_buf_ops,
6897 		.spd_release	= tracing_spd_release_pipe,
6898 	};
6899 	ssize_t ret;
6900 	size_t rem;
6901 	unsigned int i;
6902 
6903 	if (splice_grow_spd(pipe, &spd))
6904 		return -ENOMEM;
6905 
6906 	mutex_lock(&iter->mutex);
6907 
6908 	if (iter->trace->splice_read) {
6909 		ret = iter->trace->splice_read(iter, filp,
6910 					       ppos, pipe, len, flags);
6911 		if (ret)
6912 			goto out_err;
6913 	}
6914 
6915 	ret = tracing_wait_pipe(filp);
6916 	if (ret <= 0)
6917 		goto out_err;
6918 
6919 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6920 		ret = -EFAULT;
6921 		goto out_err;
6922 	}
6923 
6924 	trace_event_read_lock();
6925 	trace_access_lock(iter->cpu_file);
6926 
6927 	/* Fill as many pages as possible. */
6928 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6929 		spd.pages[i] = alloc_page(GFP_KERNEL);
6930 		if (!spd.pages[i])
6931 			break;
6932 
6933 		rem = tracing_fill_pipe_page(rem, iter);
6934 
6935 		/* Copy the data into the page, so we can start over. */
6936 		ret = trace_seq_to_buffer(&iter->seq,
6937 					  page_address(spd.pages[i]),
6938 					  trace_seq_used(&iter->seq));
6939 		if (ret < 0) {
6940 			__free_page(spd.pages[i]);
6941 			break;
6942 		}
6943 		spd.partial[i].offset = 0;
6944 		spd.partial[i].len = trace_seq_used(&iter->seq);
6945 
6946 		trace_seq_init(&iter->seq);
6947 	}
6948 
6949 	trace_access_unlock(iter->cpu_file);
6950 	trace_event_read_unlock();
6951 	mutex_unlock(&iter->mutex);
6952 
6953 	spd.nr_pages = i;
6954 
6955 	if (i)
6956 		ret = splice_to_pipe(pipe, &spd);
6957 	else
6958 		ret = 0;
6959 out:
6960 	splice_shrink_spd(&spd);
6961 	return ret;
6962 
6963 out_err:
6964 	mutex_unlock(&iter->mutex);
6965 	goto out;
6966 }
6967 
6968 static ssize_t
6969 tracing_entries_read(struct file *filp, char __user *ubuf,
6970 		     size_t cnt, loff_t *ppos)
6971 {
6972 	struct inode *inode = file_inode(filp);
6973 	struct trace_array *tr = inode->i_private;
6974 	int cpu = tracing_get_cpu(inode);
6975 	char buf[64];
6976 	int r = 0;
6977 	ssize_t ret;
6978 
6979 	mutex_lock(&trace_types_lock);
6980 
6981 	if (cpu == RING_BUFFER_ALL_CPUS) {
6982 		int cpu, buf_size_same;
6983 		unsigned long size;
6984 
6985 		size = 0;
6986 		buf_size_same = 1;
6987 		/* check if all cpu sizes are same */
6988 		for_each_tracing_cpu(cpu) {
6989 			/* fill in the size from first enabled cpu */
6990 			if (size == 0)
6991 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6992 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6993 				buf_size_same = 0;
6994 				break;
6995 			}
6996 		}
6997 
6998 		if (buf_size_same) {
6999 			if (!ring_buffer_expanded)
7000 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7001 					    size >> 10,
7002 					    trace_buf_size >> 10);
7003 			else
7004 				r = sprintf(buf, "%lu\n", size >> 10);
7005 		} else
7006 			r = sprintf(buf, "X\n");
7007 	} else
7008 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7009 
7010 	mutex_unlock(&trace_types_lock);
7011 
7012 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7013 	return ret;
7014 }
7015 
7016 static ssize_t
7017 tracing_entries_write(struct file *filp, const char __user *ubuf,
7018 		      size_t cnt, loff_t *ppos)
7019 {
7020 	struct inode *inode = file_inode(filp);
7021 	struct trace_array *tr = inode->i_private;
7022 	unsigned long val;
7023 	int ret;
7024 
7025 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7026 	if (ret)
7027 		return ret;
7028 
7029 	/* must have at least 1 entry */
7030 	if (!val)
7031 		return -EINVAL;
7032 
7033 	/* value is in KB */
7034 	val <<= 10;
7035 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7036 	if (ret < 0)
7037 		return ret;
7038 
7039 	*ppos += cnt;
7040 
7041 	return cnt;
7042 }
7043 
7044 static ssize_t
7045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7046 				size_t cnt, loff_t *ppos)
7047 {
7048 	struct trace_array *tr = filp->private_data;
7049 	char buf[64];
7050 	int r, cpu;
7051 	unsigned long size = 0, expanded_size = 0;
7052 
7053 	mutex_lock(&trace_types_lock);
7054 	for_each_tracing_cpu(cpu) {
7055 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7056 		if (!ring_buffer_expanded)
7057 			expanded_size += trace_buf_size >> 10;
7058 	}
7059 	if (ring_buffer_expanded)
7060 		r = sprintf(buf, "%lu\n", size);
7061 	else
7062 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7063 	mutex_unlock(&trace_types_lock);
7064 
7065 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7066 }
7067 
7068 static ssize_t
7069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7070 			  size_t cnt, loff_t *ppos)
7071 {
7072 	/*
7073 	 * There is no need to read what the user has written, this function
7074 	 * is just to make sure that there is no error when "echo" is used
7075 	 */
7076 
7077 	*ppos += cnt;
7078 
7079 	return cnt;
7080 }
7081 
7082 static int
7083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7084 {
7085 	struct trace_array *tr = inode->i_private;
7086 
7087 	/* disable tracing ? */
7088 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7089 		tracer_tracing_off(tr);
7090 	/* resize the ring buffer to 0 */
7091 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7092 
7093 	trace_array_put(tr);
7094 
7095 	return 0;
7096 }
7097 
7098 static ssize_t
7099 tracing_mark_write(struct file *filp, const char __user *ubuf,
7100 					size_t cnt, loff_t *fpos)
7101 {
7102 	struct trace_array *tr = filp->private_data;
7103 	struct ring_buffer_event *event;
7104 	enum event_trigger_type tt = ETT_NONE;
7105 	struct trace_buffer *buffer;
7106 	struct print_entry *entry;
7107 	ssize_t written;
7108 	int size;
7109 	int len;
7110 
7111 /* Used in tracing_mark_raw_write() as well */
7112 #define FAULTED_STR "<faulted>"
7113 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7114 
7115 	if (tracing_disabled)
7116 		return -EINVAL;
7117 
7118 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7119 		return -EINVAL;
7120 
7121 	if (cnt > TRACE_BUF_SIZE)
7122 		cnt = TRACE_BUF_SIZE;
7123 
7124 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7125 
7126 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7127 
7128 	/* If less than "<faulted>", then make sure we can still add that */
7129 	if (cnt < FAULTED_SIZE)
7130 		size += FAULTED_SIZE - cnt;
7131 
7132 	buffer = tr->array_buffer.buffer;
7133 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7134 					    tracing_gen_ctx());
7135 	if (unlikely(!event))
7136 		/* Ring buffer disabled, return as if not open for write */
7137 		return -EBADF;
7138 
7139 	entry = ring_buffer_event_data(event);
7140 	entry->ip = _THIS_IP_;
7141 
7142 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7143 	if (len) {
7144 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7145 		cnt = FAULTED_SIZE;
7146 		written = -EFAULT;
7147 	} else
7148 		written = cnt;
7149 
7150 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7151 		/* do not add \n before testing triggers, but add \0 */
7152 		entry->buf[cnt] = '\0';
7153 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7154 	}
7155 
7156 	if (entry->buf[cnt - 1] != '\n') {
7157 		entry->buf[cnt] = '\n';
7158 		entry->buf[cnt + 1] = '\0';
7159 	} else
7160 		entry->buf[cnt] = '\0';
7161 
7162 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7163 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7164 	__buffer_unlock_commit(buffer, event);
7165 
7166 	if (tt)
7167 		event_triggers_post_call(tr->trace_marker_file, tt);
7168 
7169 	return written;
7170 }
7171 
7172 /* Limit it for now to 3K (including tag) */
7173 #define RAW_DATA_MAX_SIZE (1024*3)
7174 
7175 static ssize_t
7176 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7177 					size_t cnt, loff_t *fpos)
7178 {
7179 	struct trace_array *tr = filp->private_data;
7180 	struct ring_buffer_event *event;
7181 	struct trace_buffer *buffer;
7182 	struct raw_data_entry *entry;
7183 	ssize_t written;
7184 	int size;
7185 	int len;
7186 
7187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7188 
7189 	if (tracing_disabled)
7190 		return -EINVAL;
7191 
7192 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7193 		return -EINVAL;
7194 
7195 	/* The marker must at least have a tag id */
7196 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7197 		return -EINVAL;
7198 
7199 	if (cnt > TRACE_BUF_SIZE)
7200 		cnt = TRACE_BUF_SIZE;
7201 
7202 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7203 
7204 	size = sizeof(*entry) + cnt;
7205 	if (cnt < FAULT_SIZE_ID)
7206 		size += FAULT_SIZE_ID - cnt;
7207 
7208 	buffer = tr->array_buffer.buffer;
7209 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7210 					    tracing_gen_ctx());
7211 	if (!event)
7212 		/* Ring buffer disabled, return as if not open for write */
7213 		return -EBADF;
7214 
7215 	entry = ring_buffer_event_data(event);
7216 
7217 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7218 	if (len) {
7219 		entry->id = -1;
7220 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7221 		written = -EFAULT;
7222 	} else
7223 		written = cnt;
7224 
7225 	__buffer_unlock_commit(buffer, event);
7226 
7227 	return written;
7228 }
7229 
7230 static int tracing_clock_show(struct seq_file *m, void *v)
7231 {
7232 	struct trace_array *tr = m->private;
7233 	int i;
7234 
7235 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7236 		seq_printf(m,
7237 			"%s%s%s%s", i ? " " : "",
7238 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7239 			i == tr->clock_id ? "]" : "");
7240 	seq_putc(m, '\n');
7241 
7242 	return 0;
7243 }
7244 
7245 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7246 {
7247 	int i;
7248 
7249 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7250 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7251 			break;
7252 	}
7253 	if (i == ARRAY_SIZE(trace_clocks))
7254 		return -EINVAL;
7255 
7256 	mutex_lock(&trace_types_lock);
7257 
7258 	tr->clock_id = i;
7259 
7260 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7261 
7262 	/*
7263 	 * New clock may not be consistent with the previous clock.
7264 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7265 	 */
7266 	tracing_reset_online_cpus(&tr->array_buffer);
7267 
7268 #ifdef CONFIG_TRACER_MAX_TRACE
7269 	if (tr->max_buffer.buffer)
7270 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7271 	tracing_reset_online_cpus(&tr->max_buffer);
7272 #endif
7273 
7274 	mutex_unlock(&trace_types_lock);
7275 
7276 	return 0;
7277 }
7278 
7279 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7280 				   size_t cnt, loff_t *fpos)
7281 {
7282 	struct seq_file *m = filp->private_data;
7283 	struct trace_array *tr = m->private;
7284 	char buf[64];
7285 	const char *clockstr;
7286 	int ret;
7287 
7288 	if (cnt >= sizeof(buf))
7289 		return -EINVAL;
7290 
7291 	if (copy_from_user(buf, ubuf, cnt))
7292 		return -EFAULT;
7293 
7294 	buf[cnt] = 0;
7295 
7296 	clockstr = strstrip(buf);
7297 
7298 	ret = tracing_set_clock(tr, clockstr);
7299 	if (ret)
7300 		return ret;
7301 
7302 	*fpos += cnt;
7303 
7304 	return cnt;
7305 }
7306 
7307 static int tracing_clock_open(struct inode *inode, struct file *file)
7308 {
7309 	struct trace_array *tr = inode->i_private;
7310 	int ret;
7311 
7312 	ret = tracing_check_open_get_tr(tr);
7313 	if (ret)
7314 		return ret;
7315 
7316 	ret = single_open(file, tracing_clock_show, inode->i_private);
7317 	if (ret < 0)
7318 		trace_array_put(tr);
7319 
7320 	return ret;
7321 }
7322 
7323 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7324 {
7325 	struct trace_array *tr = m->private;
7326 
7327 	mutex_lock(&trace_types_lock);
7328 
7329 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7330 		seq_puts(m, "delta [absolute]\n");
7331 	else
7332 		seq_puts(m, "[delta] absolute\n");
7333 
7334 	mutex_unlock(&trace_types_lock);
7335 
7336 	return 0;
7337 }
7338 
7339 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7340 {
7341 	struct trace_array *tr = inode->i_private;
7342 	int ret;
7343 
7344 	ret = tracing_check_open_get_tr(tr);
7345 	if (ret)
7346 		return ret;
7347 
7348 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7349 	if (ret < 0)
7350 		trace_array_put(tr);
7351 
7352 	return ret;
7353 }
7354 
7355 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7356 {
7357 	if (rbe == this_cpu_read(trace_buffered_event))
7358 		return ring_buffer_time_stamp(buffer);
7359 
7360 	return ring_buffer_event_time_stamp(buffer, rbe);
7361 }
7362 
7363 /*
7364  * Set or disable using the per CPU trace_buffer_event when possible.
7365  */
7366 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7367 {
7368 	int ret = 0;
7369 
7370 	mutex_lock(&trace_types_lock);
7371 
7372 	if (set && tr->no_filter_buffering_ref++)
7373 		goto out;
7374 
7375 	if (!set) {
7376 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7377 			ret = -EINVAL;
7378 			goto out;
7379 		}
7380 
7381 		--tr->no_filter_buffering_ref;
7382 	}
7383  out:
7384 	mutex_unlock(&trace_types_lock);
7385 
7386 	return ret;
7387 }
7388 
7389 struct ftrace_buffer_info {
7390 	struct trace_iterator	iter;
7391 	void			*spare;
7392 	unsigned int		spare_cpu;
7393 	unsigned int		read;
7394 };
7395 
7396 #ifdef CONFIG_TRACER_SNAPSHOT
7397 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7398 {
7399 	struct trace_array *tr = inode->i_private;
7400 	struct trace_iterator *iter;
7401 	struct seq_file *m;
7402 	int ret;
7403 
7404 	ret = tracing_check_open_get_tr(tr);
7405 	if (ret)
7406 		return ret;
7407 
7408 	if (file->f_mode & FMODE_READ) {
7409 		iter = __tracing_open(inode, file, true);
7410 		if (IS_ERR(iter))
7411 			ret = PTR_ERR(iter);
7412 	} else {
7413 		/* Writes still need the seq_file to hold the private data */
7414 		ret = -ENOMEM;
7415 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7416 		if (!m)
7417 			goto out;
7418 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7419 		if (!iter) {
7420 			kfree(m);
7421 			goto out;
7422 		}
7423 		ret = 0;
7424 
7425 		iter->tr = tr;
7426 		iter->array_buffer = &tr->max_buffer;
7427 		iter->cpu_file = tracing_get_cpu(inode);
7428 		m->private = iter;
7429 		file->private_data = m;
7430 	}
7431 out:
7432 	if (ret < 0)
7433 		trace_array_put(tr);
7434 
7435 	return ret;
7436 }
7437 
7438 static ssize_t
7439 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7440 		       loff_t *ppos)
7441 {
7442 	struct seq_file *m = filp->private_data;
7443 	struct trace_iterator *iter = m->private;
7444 	struct trace_array *tr = iter->tr;
7445 	unsigned long val;
7446 	int ret;
7447 
7448 	ret = tracing_update_buffers();
7449 	if (ret < 0)
7450 		return ret;
7451 
7452 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7453 	if (ret)
7454 		return ret;
7455 
7456 	mutex_lock(&trace_types_lock);
7457 
7458 	if (tr->current_trace->use_max_tr) {
7459 		ret = -EBUSY;
7460 		goto out;
7461 	}
7462 
7463 	local_irq_disable();
7464 	arch_spin_lock(&tr->max_lock);
7465 	if (tr->cond_snapshot)
7466 		ret = -EBUSY;
7467 	arch_spin_unlock(&tr->max_lock);
7468 	local_irq_enable();
7469 	if (ret)
7470 		goto out;
7471 
7472 	switch (val) {
7473 	case 0:
7474 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7475 			ret = -EINVAL;
7476 			break;
7477 		}
7478 		if (tr->allocated_snapshot)
7479 			free_snapshot(tr);
7480 		break;
7481 	case 1:
7482 /* Only allow per-cpu swap if the ring buffer supports it */
7483 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7484 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7485 			ret = -EINVAL;
7486 			break;
7487 		}
7488 #endif
7489 		if (tr->allocated_snapshot)
7490 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7491 					&tr->array_buffer, iter->cpu_file);
7492 		else
7493 			ret = tracing_alloc_snapshot_instance(tr);
7494 		if (ret < 0)
7495 			break;
7496 		local_irq_disable();
7497 		/* Now, we're going to swap */
7498 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7499 			update_max_tr(tr, current, smp_processor_id(), NULL);
7500 		else
7501 			update_max_tr_single(tr, current, iter->cpu_file);
7502 		local_irq_enable();
7503 		break;
7504 	default:
7505 		if (tr->allocated_snapshot) {
7506 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7507 				tracing_reset_online_cpus(&tr->max_buffer);
7508 			else
7509 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7510 		}
7511 		break;
7512 	}
7513 
7514 	if (ret >= 0) {
7515 		*ppos += cnt;
7516 		ret = cnt;
7517 	}
7518 out:
7519 	mutex_unlock(&trace_types_lock);
7520 	return ret;
7521 }
7522 
7523 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7524 {
7525 	struct seq_file *m = file->private_data;
7526 	int ret;
7527 
7528 	ret = tracing_release(inode, file);
7529 
7530 	if (file->f_mode & FMODE_READ)
7531 		return ret;
7532 
7533 	/* If write only, the seq_file is just a stub */
7534 	if (m)
7535 		kfree(m->private);
7536 	kfree(m);
7537 
7538 	return 0;
7539 }
7540 
7541 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7542 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7543 				    size_t count, loff_t *ppos);
7544 static int tracing_buffers_release(struct inode *inode, struct file *file);
7545 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7546 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7547 
7548 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7549 {
7550 	struct ftrace_buffer_info *info;
7551 	int ret;
7552 
7553 	/* The following checks for tracefs lockdown */
7554 	ret = tracing_buffers_open(inode, filp);
7555 	if (ret < 0)
7556 		return ret;
7557 
7558 	info = filp->private_data;
7559 
7560 	if (info->iter.trace->use_max_tr) {
7561 		tracing_buffers_release(inode, filp);
7562 		return -EBUSY;
7563 	}
7564 
7565 	info->iter.snapshot = true;
7566 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7567 
7568 	return ret;
7569 }
7570 
7571 #endif /* CONFIG_TRACER_SNAPSHOT */
7572 
7573 
7574 static const struct file_operations tracing_thresh_fops = {
7575 	.open		= tracing_open_generic,
7576 	.read		= tracing_thresh_read,
7577 	.write		= tracing_thresh_write,
7578 	.llseek		= generic_file_llseek,
7579 };
7580 
7581 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7582 static const struct file_operations tracing_max_lat_fops = {
7583 	.open		= tracing_open_generic,
7584 	.read		= tracing_max_lat_read,
7585 	.write		= tracing_max_lat_write,
7586 	.llseek		= generic_file_llseek,
7587 };
7588 #endif
7589 
7590 static const struct file_operations set_tracer_fops = {
7591 	.open		= tracing_open_generic,
7592 	.read		= tracing_set_trace_read,
7593 	.write		= tracing_set_trace_write,
7594 	.llseek		= generic_file_llseek,
7595 };
7596 
7597 static const struct file_operations tracing_pipe_fops = {
7598 	.open		= tracing_open_pipe,
7599 	.poll		= tracing_poll_pipe,
7600 	.read		= tracing_read_pipe,
7601 	.splice_read	= tracing_splice_read_pipe,
7602 	.release	= tracing_release_pipe,
7603 	.llseek		= no_llseek,
7604 };
7605 
7606 static const struct file_operations tracing_entries_fops = {
7607 	.open		= tracing_open_generic_tr,
7608 	.read		= tracing_entries_read,
7609 	.write		= tracing_entries_write,
7610 	.llseek		= generic_file_llseek,
7611 	.release	= tracing_release_generic_tr,
7612 };
7613 
7614 static const struct file_operations tracing_total_entries_fops = {
7615 	.open		= tracing_open_generic_tr,
7616 	.read		= tracing_total_entries_read,
7617 	.llseek		= generic_file_llseek,
7618 	.release	= tracing_release_generic_tr,
7619 };
7620 
7621 static const struct file_operations tracing_free_buffer_fops = {
7622 	.open		= tracing_open_generic_tr,
7623 	.write		= tracing_free_buffer_write,
7624 	.release	= tracing_free_buffer_release,
7625 };
7626 
7627 static const struct file_operations tracing_mark_fops = {
7628 	.open		= tracing_mark_open,
7629 	.write		= tracing_mark_write,
7630 	.release	= tracing_release_generic_tr,
7631 };
7632 
7633 static const struct file_operations tracing_mark_raw_fops = {
7634 	.open		= tracing_mark_open,
7635 	.write		= tracing_mark_raw_write,
7636 	.release	= tracing_release_generic_tr,
7637 };
7638 
7639 static const struct file_operations trace_clock_fops = {
7640 	.open		= tracing_clock_open,
7641 	.read		= seq_read,
7642 	.llseek		= seq_lseek,
7643 	.release	= tracing_single_release_tr,
7644 	.write		= tracing_clock_write,
7645 };
7646 
7647 static const struct file_operations trace_time_stamp_mode_fops = {
7648 	.open		= tracing_time_stamp_mode_open,
7649 	.read		= seq_read,
7650 	.llseek		= seq_lseek,
7651 	.release	= tracing_single_release_tr,
7652 };
7653 
7654 #ifdef CONFIG_TRACER_SNAPSHOT
7655 static const struct file_operations snapshot_fops = {
7656 	.open		= tracing_snapshot_open,
7657 	.read		= seq_read,
7658 	.write		= tracing_snapshot_write,
7659 	.llseek		= tracing_lseek,
7660 	.release	= tracing_snapshot_release,
7661 };
7662 
7663 static const struct file_operations snapshot_raw_fops = {
7664 	.open		= snapshot_raw_open,
7665 	.read		= tracing_buffers_read,
7666 	.release	= tracing_buffers_release,
7667 	.splice_read	= tracing_buffers_splice_read,
7668 	.llseek		= no_llseek,
7669 };
7670 
7671 #endif /* CONFIG_TRACER_SNAPSHOT */
7672 
7673 /*
7674  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7675  * @filp: The active open file structure
7676  * @ubuf: The userspace provided buffer to read value into
7677  * @cnt: The maximum number of bytes to read
7678  * @ppos: The current "file" position
7679  *
7680  * This function implements the write interface for a struct trace_min_max_param.
7681  * The filp->private_data must point to a trace_min_max_param structure that
7682  * defines where to write the value, the min and the max acceptable values,
7683  * and a lock to protect the write.
7684  */
7685 static ssize_t
7686 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7687 {
7688 	struct trace_min_max_param *param = filp->private_data;
7689 	u64 val;
7690 	int err;
7691 
7692 	if (!param)
7693 		return -EFAULT;
7694 
7695 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7696 	if (err)
7697 		return err;
7698 
7699 	if (param->lock)
7700 		mutex_lock(param->lock);
7701 
7702 	if (param->min && val < *param->min)
7703 		err = -EINVAL;
7704 
7705 	if (param->max && val > *param->max)
7706 		err = -EINVAL;
7707 
7708 	if (!err)
7709 		*param->val = val;
7710 
7711 	if (param->lock)
7712 		mutex_unlock(param->lock);
7713 
7714 	if (err)
7715 		return err;
7716 
7717 	return cnt;
7718 }
7719 
7720 /*
7721  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7722  * @filp: The active open file structure
7723  * @ubuf: The userspace provided buffer to read value into
7724  * @cnt: The maximum number of bytes to read
7725  * @ppos: The current "file" position
7726  *
7727  * This function implements the read interface for a struct trace_min_max_param.
7728  * The filp->private_data must point to a trace_min_max_param struct with valid
7729  * data.
7730  */
7731 static ssize_t
7732 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7733 {
7734 	struct trace_min_max_param *param = filp->private_data;
7735 	char buf[U64_STR_SIZE];
7736 	int len;
7737 	u64 val;
7738 
7739 	if (!param)
7740 		return -EFAULT;
7741 
7742 	val = *param->val;
7743 
7744 	if (cnt > sizeof(buf))
7745 		cnt = sizeof(buf);
7746 
7747 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7748 
7749 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7750 }
7751 
7752 const struct file_operations trace_min_max_fops = {
7753 	.open		= tracing_open_generic,
7754 	.read		= trace_min_max_read,
7755 	.write		= trace_min_max_write,
7756 };
7757 
7758 #define TRACING_LOG_ERRS_MAX	8
7759 #define TRACING_LOG_LOC_MAX	128
7760 
7761 #define CMD_PREFIX "  Command: "
7762 
7763 struct err_info {
7764 	const char	**errs;	/* ptr to loc-specific array of err strings */
7765 	u8		type;	/* index into errs -> specific err string */
7766 	u16		pos;	/* caret position */
7767 	u64		ts;
7768 };
7769 
7770 struct tracing_log_err {
7771 	struct list_head	list;
7772 	struct err_info		info;
7773 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7774 	char			*cmd;                     /* what caused err */
7775 };
7776 
7777 static DEFINE_MUTEX(tracing_err_log_lock);
7778 
7779 static struct tracing_log_err *alloc_tracing_log_err(int len)
7780 {
7781 	struct tracing_log_err *err;
7782 
7783 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7784 	if (!err)
7785 		return ERR_PTR(-ENOMEM);
7786 
7787 	err->cmd = kzalloc(len, GFP_KERNEL);
7788 	if (!err->cmd) {
7789 		kfree(err);
7790 		return ERR_PTR(-ENOMEM);
7791 	}
7792 
7793 	return err;
7794 }
7795 
7796 static void free_tracing_log_err(struct tracing_log_err *err)
7797 {
7798 	kfree(err->cmd);
7799 	kfree(err);
7800 }
7801 
7802 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7803 						   int len)
7804 {
7805 	struct tracing_log_err *err;
7806 	char *cmd;
7807 
7808 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7809 		err = alloc_tracing_log_err(len);
7810 		if (PTR_ERR(err) != -ENOMEM)
7811 			tr->n_err_log_entries++;
7812 
7813 		return err;
7814 	}
7815 	cmd = kzalloc(len, GFP_KERNEL);
7816 	if (!cmd)
7817 		return ERR_PTR(-ENOMEM);
7818 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7819 	kfree(err->cmd);
7820 	err->cmd = cmd;
7821 	list_del(&err->list);
7822 
7823 	return err;
7824 }
7825 
7826 /**
7827  * err_pos - find the position of a string within a command for error careting
7828  * @cmd: The tracing command that caused the error
7829  * @str: The string to position the caret at within @cmd
7830  *
7831  * Finds the position of the first occurrence of @str within @cmd.  The
7832  * return value can be passed to tracing_log_err() for caret placement
7833  * within @cmd.
7834  *
7835  * Returns the index within @cmd of the first occurrence of @str or 0
7836  * if @str was not found.
7837  */
7838 unsigned int err_pos(char *cmd, const char *str)
7839 {
7840 	char *found;
7841 
7842 	if (WARN_ON(!strlen(cmd)))
7843 		return 0;
7844 
7845 	found = strstr(cmd, str);
7846 	if (found)
7847 		return found - cmd;
7848 
7849 	return 0;
7850 }
7851 
7852 /**
7853  * tracing_log_err - write an error to the tracing error log
7854  * @tr: The associated trace array for the error (NULL for top level array)
7855  * @loc: A string describing where the error occurred
7856  * @cmd: The tracing command that caused the error
7857  * @errs: The array of loc-specific static error strings
7858  * @type: The index into errs[], which produces the specific static err string
7859  * @pos: The position the caret should be placed in the cmd
7860  *
7861  * Writes an error into tracing/error_log of the form:
7862  *
7863  * <loc>: error: <text>
7864  *   Command: <cmd>
7865  *              ^
7866  *
7867  * tracing/error_log is a small log file containing the last
7868  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7869  * unless there has been a tracing error, and the error log can be
7870  * cleared and have its memory freed by writing the empty string in
7871  * truncation mode to it i.e. echo > tracing/error_log.
7872  *
7873  * NOTE: the @errs array along with the @type param are used to
7874  * produce a static error string - this string is not copied and saved
7875  * when the error is logged - only a pointer to it is saved.  See
7876  * existing callers for examples of how static strings are typically
7877  * defined for use with tracing_log_err().
7878  */
7879 void tracing_log_err(struct trace_array *tr,
7880 		     const char *loc, const char *cmd,
7881 		     const char **errs, u8 type, u16 pos)
7882 {
7883 	struct tracing_log_err *err;
7884 	int len = 0;
7885 
7886 	if (!tr)
7887 		tr = &global_trace;
7888 
7889 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7890 
7891 	mutex_lock(&tracing_err_log_lock);
7892 	err = get_tracing_log_err(tr, len);
7893 	if (PTR_ERR(err) == -ENOMEM) {
7894 		mutex_unlock(&tracing_err_log_lock);
7895 		return;
7896 	}
7897 
7898 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7899 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7900 
7901 	err->info.errs = errs;
7902 	err->info.type = type;
7903 	err->info.pos = pos;
7904 	err->info.ts = local_clock();
7905 
7906 	list_add_tail(&err->list, &tr->err_log);
7907 	mutex_unlock(&tracing_err_log_lock);
7908 }
7909 
7910 static void clear_tracing_err_log(struct trace_array *tr)
7911 {
7912 	struct tracing_log_err *err, *next;
7913 
7914 	mutex_lock(&tracing_err_log_lock);
7915 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7916 		list_del(&err->list);
7917 		free_tracing_log_err(err);
7918 	}
7919 
7920 	tr->n_err_log_entries = 0;
7921 	mutex_unlock(&tracing_err_log_lock);
7922 }
7923 
7924 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7925 {
7926 	struct trace_array *tr = m->private;
7927 
7928 	mutex_lock(&tracing_err_log_lock);
7929 
7930 	return seq_list_start(&tr->err_log, *pos);
7931 }
7932 
7933 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7934 {
7935 	struct trace_array *tr = m->private;
7936 
7937 	return seq_list_next(v, &tr->err_log, pos);
7938 }
7939 
7940 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7941 {
7942 	mutex_unlock(&tracing_err_log_lock);
7943 }
7944 
7945 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7946 {
7947 	u16 i;
7948 
7949 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7950 		seq_putc(m, ' ');
7951 	for (i = 0; i < pos; i++)
7952 		seq_putc(m, ' ');
7953 	seq_puts(m, "^\n");
7954 }
7955 
7956 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7957 {
7958 	struct tracing_log_err *err = v;
7959 
7960 	if (err) {
7961 		const char *err_text = err->info.errs[err->info.type];
7962 		u64 sec = err->info.ts;
7963 		u32 nsec;
7964 
7965 		nsec = do_div(sec, NSEC_PER_SEC);
7966 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7967 			   err->loc, err_text);
7968 		seq_printf(m, "%s", err->cmd);
7969 		tracing_err_log_show_pos(m, err->info.pos);
7970 	}
7971 
7972 	return 0;
7973 }
7974 
7975 static const struct seq_operations tracing_err_log_seq_ops = {
7976 	.start  = tracing_err_log_seq_start,
7977 	.next   = tracing_err_log_seq_next,
7978 	.stop   = tracing_err_log_seq_stop,
7979 	.show   = tracing_err_log_seq_show
7980 };
7981 
7982 static int tracing_err_log_open(struct inode *inode, struct file *file)
7983 {
7984 	struct trace_array *tr = inode->i_private;
7985 	int ret = 0;
7986 
7987 	ret = tracing_check_open_get_tr(tr);
7988 	if (ret)
7989 		return ret;
7990 
7991 	/* If this file was opened for write, then erase contents */
7992 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7993 		clear_tracing_err_log(tr);
7994 
7995 	if (file->f_mode & FMODE_READ) {
7996 		ret = seq_open(file, &tracing_err_log_seq_ops);
7997 		if (!ret) {
7998 			struct seq_file *m = file->private_data;
7999 			m->private = tr;
8000 		} else {
8001 			trace_array_put(tr);
8002 		}
8003 	}
8004 	return ret;
8005 }
8006 
8007 static ssize_t tracing_err_log_write(struct file *file,
8008 				     const char __user *buffer,
8009 				     size_t count, loff_t *ppos)
8010 {
8011 	return count;
8012 }
8013 
8014 static int tracing_err_log_release(struct inode *inode, struct file *file)
8015 {
8016 	struct trace_array *tr = inode->i_private;
8017 
8018 	trace_array_put(tr);
8019 
8020 	if (file->f_mode & FMODE_READ)
8021 		seq_release(inode, file);
8022 
8023 	return 0;
8024 }
8025 
8026 static const struct file_operations tracing_err_log_fops = {
8027 	.open           = tracing_err_log_open,
8028 	.write		= tracing_err_log_write,
8029 	.read           = seq_read,
8030 	.llseek         = seq_lseek,
8031 	.release        = tracing_err_log_release,
8032 };
8033 
8034 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8035 {
8036 	struct trace_array *tr = inode->i_private;
8037 	struct ftrace_buffer_info *info;
8038 	int ret;
8039 
8040 	ret = tracing_check_open_get_tr(tr);
8041 	if (ret)
8042 		return ret;
8043 
8044 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8045 	if (!info) {
8046 		trace_array_put(tr);
8047 		return -ENOMEM;
8048 	}
8049 
8050 	mutex_lock(&trace_types_lock);
8051 
8052 	info->iter.tr		= tr;
8053 	info->iter.cpu_file	= tracing_get_cpu(inode);
8054 	info->iter.trace	= tr->current_trace;
8055 	info->iter.array_buffer = &tr->array_buffer;
8056 	info->spare		= NULL;
8057 	/* Force reading ring buffer for first read */
8058 	info->read		= (unsigned int)-1;
8059 
8060 	filp->private_data = info;
8061 
8062 	tr->trace_ref++;
8063 
8064 	mutex_unlock(&trace_types_lock);
8065 
8066 	ret = nonseekable_open(inode, filp);
8067 	if (ret < 0)
8068 		trace_array_put(tr);
8069 
8070 	return ret;
8071 }
8072 
8073 static __poll_t
8074 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8075 {
8076 	struct ftrace_buffer_info *info = filp->private_data;
8077 	struct trace_iterator *iter = &info->iter;
8078 
8079 	return trace_poll(iter, filp, poll_table);
8080 }
8081 
8082 static ssize_t
8083 tracing_buffers_read(struct file *filp, char __user *ubuf,
8084 		     size_t count, loff_t *ppos)
8085 {
8086 	struct ftrace_buffer_info *info = filp->private_data;
8087 	struct trace_iterator *iter = &info->iter;
8088 	ssize_t ret = 0;
8089 	ssize_t size;
8090 
8091 	if (!count)
8092 		return 0;
8093 
8094 #ifdef CONFIG_TRACER_MAX_TRACE
8095 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8096 		return -EBUSY;
8097 #endif
8098 
8099 	if (!info->spare) {
8100 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8101 							  iter->cpu_file);
8102 		if (IS_ERR(info->spare)) {
8103 			ret = PTR_ERR(info->spare);
8104 			info->spare = NULL;
8105 		} else {
8106 			info->spare_cpu = iter->cpu_file;
8107 		}
8108 	}
8109 	if (!info->spare)
8110 		return ret;
8111 
8112 	/* Do we have previous read data to read? */
8113 	if (info->read < PAGE_SIZE)
8114 		goto read;
8115 
8116  again:
8117 	trace_access_lock(iter->cpu_file);
8118 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8119 				    &info->spare,
8120 				    count,
8121 				    iter->cpu_file, 0);
8122 	trace_access_unlock(iter->cpu_file);
8123 
8124 	if (ret < 0) {
8125 		if (trace_empty(iter)) {
8126 			if ((filp->f_flags & O_NONBLOCK))
8127 				return -EAGAIN;
8128 
8129 			ret = wait_on_pipe(iter, 0);
8130 			if (ret)
8131 				return ret;
8132 
8133 			goto again;
8134 		}
8135 		return 0;
8136 	}
8137 
8138 	info->read = 0;
8139  read:
8140 	size = PAGE_SIZE - info->read;
8141 	if (size > count)
8142 		size = count;
8143 
8144 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8145 	if (ret == size)
8146 		return -EFAULT;
8147 
8148 	size -= ret;
8149 
8150 	*ppos += size;
8151 	info->read += size;
8152 
8153 	return size;
8154 }
8155 
8156 static int tracing_buffers_release(struct inode *inode, struct file *file)
8157 {
8158 	struct ftrace_buffer_info *info = file->private_data;
8159 	struct trace_iterator *iter = &info->iter;
8160 
8161 	mutex_lock(&trace_types_lock);
8162 
8163 	iter->tr->trace_ref--;
8164 
8165 	__trace_array_put(iter->tr);
8166 
8167 	iter->wait_index++;
8168 	/* Make sure the waiters see the new wait_index */
8169 	smp_wmb();
8170 
8171 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8172 
8173 	if (info->spare)
8174 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8175 					   info->spare_cpu, info->spare);
8176 	kvfree(info);
8177 
8178 	mutex_unlock(&trace_types_lock);
8179 
8180 	return 0;
8181 }
8182 
8183 struct buffer_ref {
8184 	struct trace_buffer	*buffer;
8185 	void			*page;
8186 	int			cpu;
8187 	refcount_t		refcount;
8188 };
8189 
8190 static void buffer_ref_release(struct buffer_ref *ref)
8191 {
8192 	if (!refcount_dec_and_test(&ref->refcount))
8193 		return;
8194 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8195 	kfree(ref);
8196 }
8197 
8198 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8199 				    struct pipe_buffer *buf)
8200 {
8201 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8202 
8203 	buffer_ref_release(ref);
8204 	buf->private = 0;
8205 }
8206 
8207 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8208 				struct pipe_buffer *buf)
8209 {
8210 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8211 
8212 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8213 		return false;
8214 
8215 	refcount_inc(&ref->refcount);
8216 	return true;
8217 }
8218 
8219 /* Pipe buffer operations for a buffer. */
8220 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8221 	.release		= buffer_pipe_buf_release,
8222 	.get			= buffer_pipe_buf_get,
8223 };
8224 
8225 /*
8226  * Callback from splice_to_pipe(), if we need to release some pages
8227  * at the end of the spd in case we error'ed out in filling the pipe.
8228  */
8229 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8230 {
8231 	struct buffer_ref *ref =
8232 		(struct buffer_ref *)spd->partial[i].private;
8233 
8234 	buffer_ref_release(ref);
8235 	spd->partial[i].private = 0;
8236 }
8237 
8238 static ssize_t
8239 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8240 			    struct pipe_inode_info *pipe, size_t len,
8241 			    unsigned int flags)
8242 {
8243 	struct ftrace_buffer_info *info = file->private_data;
8244 	struct trace_iterator *iter = &info->iter;
8245 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8246 	struct page *pages_def[PIPE_DEF_BUFFERS];
8247 	struct splice_pipe_desc spd = {
8248 		.pages		= pages_def,
8249 		.partial	= partial_def,
8250 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8251 		.ops		= &buffer_pipe_buf_ops,
8252 		.spd_release	= buffer_spd_release,
8253 	};
8254 	struct buffer_ref *ref;
8255 	int entries, i;
8256 	ssize_t ret = 0;
8257 
8258 #ifdef CONFIG_TRACER_MAX_TRACE
8259 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8260 		return -EBUSY;
8261 #endif
8262 
8263 	if (*ppos & (PAGE_SIZE - 1))
8264 		return -EINVAL;
8265 
8266 	if (len & (PAGE_SIZE - 1)) {
8267 		if (len < PAGE_SIZE)
8268 			return -EINVAL;
8269 		len &= PAGE_MASK;
8270 	}
8271 
8272 	if (splice_grow_spd(pipe, &spd))
8273 		return -ENOMEM;
8274 
8275  again:
8276 	trace_access_lock(iter->cpu_file);
8277 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8278 
8279 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8280 		struct page *page;
8281 		int r;
8282 
8283 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8284 		if (!ref) {
8285 			ret = -ENOMEM;
8286 			break;
8287 		}
8288 
8289 		refcount_set(&ref->refcount, 1);
8290 		ref->buffer = iter->array_buffer->buffer;
8291 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8292 		if (IS_ERR(ref->page)) {
8293 			ret = PTR_ERR(ref->page);
8294 			ref->page = NULL;
8295 			kfree(ref);
8296 			break;
8297 		}
8298 		ref->cpu = iter->cpu_file;
8299 
8300 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8301 					  len, iter->cpu_file, 1);
8302 		if (r < 0) {
8303 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8304 						   ref->page);
8305 			kfree(ref);
8306 			break;
8307 		}
8308 
8309 		page = virt_to_page(ref->page);
8310 
8311 		spd.pages[i] = page;
8312 		spd.partial[i].len = PAGE_SIZE;
8313 		spd.partial[i].offset = 0;
8314 		spd.partial[i].private = (unsigned long)ref;
8315 		spd.nr_pages++;
8316 		*ppos += PAGE_SIZE;
8317 
8318 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8319 	}
8320 
8321 	trace_access_unlock(iter->cpu_file);
8322 	spd.nr_pages = i;
8323 
8324 	/* did we read anything? */
8325 	if (!spd.nr_pages) {
8326 		long wait_index;
8327 
8328 		if (ret)
8329 			goto out;
8330 
8331 		ret = -EAGAIN;
8332 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8333 			goto out;
8334 
8335 		wait_index = READ_ONCE(iter->wait_index);
8336 
8337 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8338 		if (ret)
8339 			goto out;
8340 
8341 		/* No need to wait after waking up when tracing is off */
8342 		if (!tracer_tracing_is_on(iter->tr))
8343 			goto out;
8344 
8345 		/* Make sure we see the new wait_index */
8346 		smp_rmb();
8347 		if (wait_index != iter->wait_index)
8348 			goto out;
8349 
8350 		goto again;
8351 	}
8352 
8353 	ret = splice_to_pipe(pipe, &spd);
8354 out:
8355 	splice_shrink_spd(&spd);
8356 
8357 	return ret;
8358 }
8359 
8360 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8361 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8362 {
8363 	struct ftrace_buffer_info *info = file->private_data;
8364 	struct trace_iterator *iter = &info->iter;
8365 
8366 	if (cmd)
8367 		return -ENOIOCTLCMD;
8368 
8369 	mutex_lock(&trace_types_lock);
8370 
8371 	iter->wait_index++;
8372 	/* Make sure the waiters see the new wait_index */
8373 	smp_wmb();
8374 
8375 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8376 
8377 	mutex_unlock(&trace_types_lock);
8378 	return 0;
8379 }
8380 
8381 static const struct file_operations tracing_buffers_fops = {
8382 	.open		= tracing_buffers_open,
8383 	.read		= tracing_buffers_read,
8384 	.poll		= tracing_buffers_poll,
8385 	.release	= tracing_buffers_release,
8386 	.splice_read	= tracing_buffers_splice_read,
8387 	.unlocked_ioctl = tracing_buffers_ioctl,
8388 	.llseek		= no_llseek,
8389 };
8390 
8391 static ssize_t
8392 tracing_stats_read(struct file *filp, char __user *ubuf,
8393 		   size_t count, loff_t *ppos)
8394 {
8395 	struct inode *inode = file_inode(filp);
8396 	struct trace_array *tr = inode->i_private;
8397 	struct array_buffer *trace_buf = &tr->array_buffer;
8398 	int cpu = tracing_get_cpu(inode);
8399 	struct trace_seq *s;
8400 	unsigned long cnt;
8401 	unsigned long long t;
8402 	unsigned long usec_rem;
8403 
8404 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8405 	if (!s)
8406 		return -ENOMEM;
8407 
8408 	trace_seq_init(s);
8409 
8410 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8411 	trace_seq_printf(s, "entries: %ld\n", cnt);
8412 
8413 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8414 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8415 
8416 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8417 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8418 
8419 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8420 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8421 
8422 	if (trace_clocks[tr->clock_id].in_ns) {
8423 		/* local or global for trace_clock */
8424 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8425 		usec_rem = do_div(t, USEC_PER_SEC);
8426 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8427 								t, usec_rem);
8428 
8429 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8430 		usec_rem = do_div(t, USEC_PER_SEC);
8431 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8432 	} else {
8433 		/* counter or tsc mode for trace_clock */
8434 		trace_seq_printf(s, "oldest event ts: %llu\n",
8435 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8436 
8437 		trace_seq_printf(s, "now ts: %llu\n",
8438 				ring_buffer_time_stamp(trace_buf->buffer));
8439 	}
8440 
8441 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8442 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8443 
8444 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8445 	trace_seq_printf(s, "read events: %ld\n", cnt);
8446 
8447 	count = simple_read_from_buffer(ubuf, count, ppos,
8448 					s->buffer, trace_seq_used(s));
8449 
8450 	kfree(s);
8451 
8452 	return count;
8453 }
8454 
8455 static const struct file_operations tracing_stats_fops = {
8456 	.open		= tracing_open_generic_tr,
8457 	.read		= tracing_stats_read,
8458 	.llseek		= generic_file_llseek,
8459 	.release	= tracing_release_generic_tr,
8460 };
8461 
8462 #ifdef CONFIG_DYNAMIC_FTRACE
8463 
8464 static ssize_t
8465 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8466 		  size_t cnt, loff_t *ppos)
8467 {
8468 	ssize_t ret;
8469 	char *buf;
8470 	int r;
8471 
8472 	/* 256 should be plenty to hold the amount needed */
8473 	buf = kmalloc(256, GFP_KERNEL);
8474 	if (!buf)
8475 		return -ENOMEM;
8476 
8477 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8478 		      ftrace_update_tot_cnt,
8479 		      ftrace_number_of_pages,
8480 		      ftrace_number_of_groups);
8481 
8482 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8483 	kfree(buf);
8484 	return ret;
8485 }
8486 
8487 static const struct file_operations tracing_dyn_info_fops = {
8488 	.open		= tracing_open_generic,
8489 	.read		= tracing_read_dyn_info,
8490 	.llseek		= generic_file_llseek,
8491 };
8492 #endif /* CONFIG_DYNAMIC_FTRACE */
8493 
8494 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8495 static void
8496 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8497 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8498 		void *data)
8499 {
8500 	tracing_snapshot_instance(tr);
8501 }
8502 
8503 static void
8504 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8505 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8506 		      void *data)
8507 {
8508 	struct ftrace_func_mapper *mapper = data;
8509 	long *count = NULL;
8510 
8511 	if (mapper)
8512 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8513 
8514 	if (count) {
8515 
8516 		if (*count <= 0)
8517 			return;
8518 
8519 		(*count)--;
8520 	}
8521 
8522 	tracing_snapshot_instance(tr);
8523 }
8524 
8525 static int
8526 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8527 		      struct ftrace_probe_ops *ops, void *data)
8528 {
8529 	struct ftrace_func_mapper *mapper = data;
8530 	long *count = NULL;
8531 
8532 	seq_printf(m, "%ps:", (void *)ip);
8533 
8534 	seq_puts(m, "snapshot");
8535 
8536 	if (mapper)
8537 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8538 
8539 	if (count)
8540 		seq_printf(m, ":count=%ld\n", *count);
8541 	else
8542 		seq_puts(m, ":unlimited\n");
8543 
8544 	return 0;
8545 }
8546 
8547 static int
8548 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8549 		     unsigned long ip, void *init_data, void **data)
8550 {
8551 	struct ftrace_func_mapper *mapper = *data;
8552 
8553 	if (!mapper) {
8554 		mapper = allocate_ftrace_func_mapper();
8555 		if (!mapper)
8556 			return -ENOMEM;
8557 		*data = mapper;
8558 	}
8559 
8560 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8561 }
8562 
8563 static void
8564 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8565 		     unsigned long ip, void *data)
8566 {
8567 	struct ftrace_func_mapper *mapper = data;
8568 
8569 	if (!ip) {
8570 		if (!mapper)
8571 			return;
8572 		free_ftrace_func_mapper(mapper, NULL);
8573 		return;
8574 	}
8575 
8576 	ftrace_func_mapper_remove_ip(mapper, ip);
8577 }
8578 
8579 static struct ftrace_probe_ops snapshot_probe_ops = {
8580 	.func			= ftrace_snapshot,
8581 	.print			= ftrace_snapshot_print,
8582 };
8583 
8584 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8585 	.func			= ftrace_count_snapshot,
8586 	.print			= ftrace_snapshot_print,
8587 	.init			= ftrace_snapshot_init,
8588 	.free			= ftrace_snapshot_free,
8589 };
8590 
8591 static int
8592 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8593 			       char *glob, char *cmd, char *param, int enable)
8594 {
8595 	struct ftrace_probe_ops *ops;
8596 	void *count = (void *)-1;
8597 	char *number;
8598 	int ret;
8599 
8600 	if (!tr)
8601 		return -ENODEV;
8602 
8603 	/* hash funcs only work with set_ftrace_filter */
8604 	if (!enable)
8605 		return -EINVAL;
8606 
8607 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8608 
8609 	if (glob[0] == '!')
8610 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8611 
8612 	if (!param)
8613 		goto out_reg;
8614 
8615 	number = strsep(&param, ":");
8616 
8617 	if (!strlen(number))
8618 		goto out_reg;
8619 
8620 	/*
8621 	 * We use the callback data field (which is a pointer)
8622 	 * as our counter.
8623 	 */
8624 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8625 	if (ret)
8626 		return ret;
8627 
8628  out_reg:
8629 	ret = tracing_alloc_snapshot_instance(tr);
8630 	if (ret < 0)
8631 		goto out;
8632 
8633 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8634 
8635  out:
8636 	return ret < 0 ? ret : 0;
8637 }
8638 
8639 static struct ftrace_func_command ftrace_snapshot_cmd = {
8640 	.name			= "snapshot",
8641 	.func			= ftrace_trace_snapshot_callback,
8642 };
8643 
8644 static __init int register_snapshot_cmd(void)
8645 {
8646 	return register_ftrace_command(&ftrace_snapshot_cmd);
8647 }
8648 #else
8649 static inline __init int register_snapshot_cmd(void) { return 0; }
8650 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8651 
8652 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8653 {
8654 	if (WARN_ON(!tr->dir))
8655 		return ERR_PTR(-ENODEV);
8656 
8657 	/* Top directory uses NULL as the parent */
8658 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8659 		return NULL;
8660 
8661 	/* All sub buffers have a descriptor */
8662 	return tr->dir;
8663 }
8664 
8665 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8666 {
8667 	struct dentry *d_tracer;
8668 
8669 	if (tr->percpu_dir)
8670 		return tr->percpu_dir;
8671 
8672 	d_tracer = tracing_get_dentry(tr);
8673 	if (IS_ERR(d_tracer))
8674 		return NULL;
8675 
8676 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8677 
8678 	MEM_FAIL(!tr->percpu_dir,
8679 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8680 
8681 	return tr->percpu_dir;
8682 }
8683 
8684 static struct dentry *
8685 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8686 		      void *data, long cpu, const struct file_operations *fops)
8687 {
8688 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8689 
8690 	if (ret) /* See tracing_get_cpu() */
8691 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8692 	return ret;
8693 }
8694 
8695 static void
8696 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8697 {
8698 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8699 	struct dentry *d_cpu;
8700 	char cpu_dir[30]; /* 30 characters should be more than enough */
8701 
8702 	if (!d_percpu)
8703 		return;
8704 
8705 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8706 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8707 	if (!d_cpu) {
8708 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8709 		return;
8710 	}
8711 
8712 	/* per cpu trace_pipe */
8713 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8714 				tr, cpu, &tracing_pipe_fops);
8715 
8716 	/* per cpu trace */
8717 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8718 				tr, cpu, &tracing_fops);
8719 
8720 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8721 				tr, cpu, &tracing_buffers_fops);
8722 
8723 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8724 				tr, cpu, &tracing_stats_fops);
8725 
8726 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8727 				tr, cpu, &tracing_entries_fops);
8728 
8729 #ifdef CONFIG_TRACER_SNAPSHOT
8730 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8731 				tr, cpu, &snapshot_fops);
8732 
8733 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8734 				tr, cpu, &snapshot_raw_fops);
8735 #endif
8736 }
8737 
8738 #ifdef CONFIG_FTRACE_SELFTEST
8739 /* Let selftest have access to static functions in this file */
8740 #include "trace_selftest.c"
8741 #endif
8742 
8743 static ssize_t
8744 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8745 			loff_t *ppos)
8746 {
8747 	struct trace_option_dentry *topt = filp->private_data;
8748 	char *buf;
8749 
8750 	if (topt->flags->val & topt->opt->bit)
8751 		buf = "1\n";
8752 	else
8753 		buf = "0\n";
8754 
8755 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8756 }
8757 
8758 static ssize_t
8759 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8760 			 loff_t *ppos)
8761 {
8762 	struct trace_option_dentry *topt = filp->private_data;
8763 	unsigned long val;
8764 	int ret;
8765 
8766 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8767 	if (ret)
8768 		return ret;
8769 
8770 	if (val != 0 && val != 1)
8771 		return -EINVAL;
8772 
8773 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8774 		mutex_lock(&trace_types_lock);
8775 		ret = __set_tracer_option(topt->tr, topt->flags,
8776 					  topt->opt, !val);
8777 		mutex_unlock(&trace_types_lock);
8778 		if (ret)
8779 			return ret;
8780 	}
8781 
8782 	*ppos += cnt;
8783 
8784 	return cnt;
8785 }
8786 
8787 
8788 static const struct file_operations trace_options_fops = {
8789 	.open = tracing_open_generic,
8790 	.read = trace_options_read,
8791 	.write = trace_options_write,
8792 	.llseek	= generic_file_llseek,
8793 };
8794 
8795 /*
8796  * In order to pass in both the trace_array descriptor as well as the index
8797  * to the flag that the trace option file represents, the trace_array
8798  * has a character array of trace_flags_index[], which holds the index
8799  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8800  * The address of this character array is passed to the flag option file
8801  * read/write callbacks.
8802  *
8803  * In order to extract both the index and the trace_array descriptor,
8804  * get_tr_index() uses the following algorithm.
8805  *
8806  *   idx = *ptr;
8807  *
8808  * As the pointer itself contains the address of the index (remember
8809  * index[1] == 1).
8810  *
8811  * Then to get the trace_array descriptor, by subtracting that index
8812  * from the ptr, we get to the start of the index itself.
8813  *
8814  *   ptr - idx == &index[0]
8815  *
8816  * Then a simple container_of() from that pointer gets us to the
8817  * trace_array descriptor.
8818  */
8819 static void get_tr_index(void *data, struct trace_array **ptr,
8820 			 unsigned int *pindex)
8821 {
8822 	*pindex = *(unsigned char *)data;
8823 
8824 	*ptr = container_of(data - *pindex, struct trace_array,
8825 			    trace_flags_index);
8826 }
8827 
8828 static ssize_t
8829 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8830 			loff_t *ppos)
8831 {
8832 	void *tr_index = filp->private_data;
8833 	struct trace_array *tr;
8834 	unsigned int index;
8835 	char *buf;
8836 
8837 	get_tr_index(tr_index, &tr, &index);
8838 
8839 	if (tr->trace_flags & (1 << index))
8840 		buf = "1\n";
8841 	else
8842 		buf = "0\n";
8843 
8844 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8845 }
8846 
8847 static ssize_t
8848 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8849 			 loff_t *ppos)
8850 {
8851 	void *tr_index = filp->private_data;
8852 	struct trace_array *tr;
8853 	unsigned int index;
8854 	unsigned long val;
8855 	int ret;
8856 
8857 	get_tr_index(tr_index, &tr, &index);
8858 
8859 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8860 	if (ret)
8861 		return ret;
8862 
8863 	if (val != 0 && val != 1)
8864 		return -EINVAL;
8865 
8866 	mutex_lock(&event_mutex);
8867 	mutex_lock(&trace_types_lock);
8868 	ret = set_tracer_flag(tr, 1 << index, val);
8869 	mutex_unlock(&trace_types_lock);
8870 	mutex_unlock(&event_mutex);
8871 
8872 	if (ret < 0)
8873 		return ret;
8874 
8875 	*ppos += cnt;
8876 
8877 	return cnt;
8878 }
8879 
8880 static const struct file_operations trace_options_core_fops = {
8881 	.open = tracing_open_generic,
8882 	.read = trace_options_core_read,
8883 	.write = trace_options_core_write,
8884 	.llseek = generic_file_llseek,
8885 };
8886 
8887 struct dentry *trace_create_file(const char *name,
8888 				 umode_t mode,
8889 				 struct dentry *parent,
8890 				 void *data,
8891 				 const struct file_operations *fops)
8892 {
8893 	struct dentry *ret;
8894 
8895 	ret = tracefs_create_file(name, mode, parent, data, fops);
8896 	if (!ret)
8897 		pr_warn("Could not create tracefs '%s' entry\n", name);
8898 
8899 	return ret;
8900 }
8901 
8902 
8903 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8904 {
8905 	struct dentry *d_tracer;
8906 
8907 	if (tr->options)
8908 		return tr->options;
8909 
8910 	d_tracer = tracing_get_dentry(tr);
8911 	if (IS_ERR(d_tracer))
8912 		return NULL;
8913 
8914 	tr->options = tracefs_create_dir("options", d_tracer);
8915 	if (!tr->options) {
8916 		pr_warn("Could not create tracefs directory 'options'\n");
8917 		return NULL;
8918 	}
8919 
8920 	return tr->options;
8921 }
8922 
8923 static void
8924 create_trace_option_file(struct trace_array *tr,
8925 			 struct trace_option_dentry *topt,
8926 			 struct tracer_flags *flags,
8927 			 struct tracer_opt *opt)
8928 {
8929 	struct dentry *t_options;
8930 
8931 	t_options = trace_options_init_dentry(tr);
8932 	if (!t_options)
8933 		return;
8934 
8935 	topt->flags = flags;
8936 	topt->opt = opt;
8937 	topt->tr = tr;
8938 
8939 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8940 					t_options, topt, &trace_options_fops);
8941 
8942 }
8943 
8944 static void
8945 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8946 {
8947 	struct trace_option_dentry *topts;
8948 	struct trace_options *tr_topts;
8949 	struct tracer_flags *flags;
8950 	struct tracer_opt *opts;
8951 	int cnt;
8952 	int i;
8953 
8954 	if (!tracer)
8955 		return;
8956 
8957 	flags = tracer->flags;
8958 
8959 	if (!flags || !flags->opts)
8960 		return;
8961 
8962 	/*
8963 	 * If this is an instance, only create flags for tracers
8964 	 * the instance may have.
8965 	 */
8966 	if (!trace_ok_for_array(tracer, tr))
8967 		return;
8968 
8969 	for (i = 0; i < tr->nr_topts; i++) {
8970 		/* Make sure there's no duplicate flags. */
8971 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8972 			return;
8973 	}
8974 
8975 	opts = flags->opts;
8976 
8977 	for (cnt = 0; opts[cnt].name; cnt++)
8978 		;
8979 
8980 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8981 	if (!topts)
8982 		return;
8983 
8984 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8985 			    GFP_KERNEL);
8986 	if (!tr_topts) {
8987 		kfree(topts);
8988 		return;
8989 	}
8990 
8991 	tr->topts = tr_topts;
8992 	tr->topts[tr->nr_topts].tracer = tracer;
8993 	tr->topts[tr->nr_topts].topts = topts;
8994 	tr->nr_topts++;
8995 
8996 	for (cnt = 0; opts[cnt].name; cnt++) {
8997 		create_trace_option_file(tr, &topts[cnt], flags,
8998 					 &opts[cnt]);
8999 		MEM_FAIL(topts[cnt].entry == NULL,
9000 			  "Failed to create trace option: %s",
9001 			  opts[cnt].name);
9002 	}
9003 }
9004 
9005 static struct dentry *
9006 create_trace_option_core_file(struct trace_array *tr,
9007 			      const char *option, long index)
9008 {
9009 	struct dentry *t_options;
9010 
9011 	t_options = trace_options_init_dentry(tr);
9012 	if (!t_options)
9013 		return NULL;
9014 
9015 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9016 				 (void *)&tr->trace_flags_index[index],
9017 				 &trace_options_core_fops);
9018 }
9019 
9020 static void create_trace_options_dir(struct trace_array *tr)
9021 {
9022 	struct dentry *t_options;
9023 	bool top_level = tr == &global_trace;
9024 	int i;
9025 
9026 	t_options = trace_options_init_dentry(tr);
9027 	if (!t_options)
9028 		return;
9029 
9030 	for (i = 0; trace_options[i]; i++) {
9031 		if (top_level ||
9032 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9033 			create_trace_option_core_file(tr, trace_options[i], i);
9034 	}
9035 }
9036 
9037 static ssize_t
9038 rb_simple_read(struct file *filp, char __user *ubuf,
9039 	       size_t cnt, loff_t *ppos)
9040 {
9041 	struct trace_array *tr = filp->private_data;
9042 	char buf[64];
9043 	int r;
9044 
9045 	r = tracer_tracing_is_on(tr);
9046 	r = sprintf(buf, "%d\n", r);
9047 
9048 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9049 }
9050 
9051 static ssize_t
9052 rb_simple_write(struct file *filp, const char __user *ubuf,
9053 		size_t cnt, loff_t *ppos)
9054 {
9055 	struct trace_array *tr = filp->private_data;
9056 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9057 	unsigned long val;
9058 	int ret;
9059 
9060 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9061 	if (ret)
9062 		return ret;
9063 
9064 	if (buffer) {
9065 		mutex_lock(&trace_types_lock);
9066 		if (!!val == tracer_tracing_is_on(tr)) {
9067 			val = 0; /* do nothing */
9068 		} else if (val) {
9069 			tracer_tracing_on(tr);
9070 			if (tr->current_trace->start)
9071 				tr->current_trace->start(tr);
9072 		} else {
9073 			tracer_tracing_off(tr);
9074 			if (tr->current_trace->stop)
9075 				tr->current_trace->stop(tr);
9076 			/* Wake up any waiters */
9077 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9078 		}
9079 		mutex_unlock(&trace_types_lock);
9080 	}
9081 
9082 	(*ppos)++;
9083 
9084 	return cnt;
9085 }
9086 
9087 static const struct file_operations rb_simple_fops = {
9088 	.open		= tracing_open_generic_tr,
9089 	.read		= rb_simple_read,
9090 	.write		= rb_simple_write,
9091 	.release	= tracing_release_generic_tr,
9092 	.llseek		= default_llseek,
9093 };
9094 
9095 static ssize_t
9096 buffer_percent_read(struct file *filp, char __user *ubuf,
9097 		    size_t cnt, loff_t *ppos)
9098 {
9099 	struct trace_array *tr = filp->private_data;
9100 	char buf[64];
9101 	int r;
9102 
9103 	r = tr->buffer_percent;
9104 	r = sprintf(buf, "%d\n", r);
9105 
9106 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9107 }
9108 
9109 static ssize_t
9110 buffer_percent_write(struct file *filp, const char __user *ubuf,
9111 		     size_t cnt, loff_t *ppos)
9112 {
9113 	struct trace_array *tr = filp->private_data;
9114 	unsigned long val;
9115 	int ret;
9116 
9117 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9118 	if (ret)
9119 		return ret;
9120 
9121 	if (val > 100)
9122 		return -EINVAL;
9123 
9124 	if (!val)
9125 		val = 1;
9126 
9127 	tr->buffer_percent = val;
9128 
9129 	(*ppos)++;
9130 
9131 	return cnt;
9132 }
9133 
9134 static const struct file_operations buffer_percent_fops = {
9135 	.open		= tracing_open_generic_tr,
9136 	.read		= buffer_percent_read,
9137 	.write		= buffer_percent_write,
9138 	.release	= tracing_release_generic_tr,
9139 	.llseek		= default_llseek,
9140 };
9141 
9142 static struct dentry *trace_instance_dir;
9143 
9144 static void
9145 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9146 
9147 static int
9148 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9149 {
9150 	enum ring_buffer_flags rb_flags;
9151 
9152 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9153 
9154 	buf->tr = tr;
9155 
9156 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9157 	if (!buf->buffer)
9158 		return -ENOMEM;
9159 
9160 	buf->data = alloc_percpu(struct trace_array_cpu);
9161 	if (!buf->data) {
9162 		ring_buffer_free(buf->buffer);
9163 		buf->buffer = NULL;
9164 		return -ENOMEM;
9165 	}
9166 
9167 	/* Allocate the first page for all buffers */
9168 	set_buffer_entries(&tr->array_buffer,
9169 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9170 
9171 	return 0;
9172 }
9173 
9174 static void free_trace_buffer(struct array_buffer *buf)
9175 {
9176 	if (buf->buffer) {
9177 		ring_buffer_free(buf->buffer);
9178 		buf->buffer = NULL;
9179 		free_percpu(buf->data);
9180 		buf->data = NULL;
9181 	}
9182 }
9183 
9184 static int allocate_trace_buffers(struct trace_array *tr, int size)
9185 {
9186 	int ret;
9187 
9188 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9189 	if (ret)
9190 		return ret;
9191 
9192 #ifdef CONFIG_TRACER_MAX_TRACE
9193 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9194 				    allocate_snapshot ? size : 1);
9195 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9196 		free_trace_buffer(&tr->array_buffer);
9197 		return -ENOMEM;
9198 	}
9199 	tr->allocated_snapshot = allocate_snapshot;
9200 
9201 	/*
9202 	 * Only the top level trace array gets its snapshot allocated
9203 	 * from the kernel command line.
9204 	 */
9205 	allocate_snapshot = false;
9206 #endif
9207 
9208 	return 0;
9209 }
9210 
9211 static void free_trace_buffers(struct trace_array *tr)
9212 {
9213 	if (!tr)
9214 		return;
9215 
9216 	free_trace_buffer(&tr->array_buffer);
9217 
9218 #ifdef CONFIG_TRACER_MAX_TRACE
9219 	free_trace_buffer(&tr->max_buffer);
9220 #endif
9221 }
9222 
9223 static void init_trace_flags_index(struct trace_array *tr)
9224 {
9225 	int i;
9226 
9227 	/* Used by the trace options files */
9228 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9229 		tr->trace_flags_index[i] = i;
9230 }
9231 
9232 static void __update_tracer_options(struct trace_array *tr)
9233 {
9234 	struct tracer *t;
9235 
9236 	for (t = trace_types; t; t = t->next)
9237 		add_tracer_options(tr, t);
9238 }
9239 
9240 static void update_tracer_options(struct trace_array *tr)
9241 {
9242 	mutex_lock(&trace_types_lock);
9243 	tracer_options_updated = true;
9244 	__update_tracer_options(tr);
9245 	mutex_unlock(&trace_types_lock);
9246 }
9247 
9248 /* Must have trace_types_lock held */
9249 struct trace_array *trace_array_find(const char *instance)
9250 {
9251 	struct trace_array *tr, *found = NULL;
9252 
9253 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9254 		if (tr->name && strcmp(tr->name, instance) == 0) {
9255 			found = tr;
9256 			break;
9257 		}
9258 	}
9259 
9260 	return found;
9261 }
9262 
9263 struct trace_array *trace_array_find_get(const char *instance)
9264 {
9265 	struct trace_array *tr;
9266 
9267 	mutex_lock(&trace_types_lock);
9268 	tr = trace_array_find(instance);
9269 	if (tr)
9270 		tr->ref++;
9271 	mutex_unlock(&trace_types_lock);
9272 
9273 	return tr;
9274 }
9275 
9276 static int trace_array_create_dir(struct trace_array *tr)
9277 {
9278 	int ret;
9279 
9280 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9281 	if (!tr->dir)
9282 		return -EINVAL;
9283 
9284 	ret = event_trace_add_tracer(tr->dir, tr);
9285 	if (ret) {
9286 		tracefs_remove(tr->dir);
9287 		return ret;
9288 	}
9289 
9290 	init_tracer_tracefs(tr, tr->dir);
9291 	__update_tracer_options(tr);
9292 
9293 	return ret;
9294 }
9295 
9296 static struct trace_array *trace_array_create(const char *name)
9297 {
9298 	struct trace_array *tr;
9299 	int ret;
9300 
9301 	ret = -ENOMEM;
9302 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9303 	if (!tr)
9304 		return ERR_PTR(ret);
9305 
9306 	tr->name = kstrdup(name, GFP_KERNEL);
9307 	if (!tr->name)
9308 		goto out_free_tr;
9309 
9310 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9311 		goto out_free_tr;
9312 
9313 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9314 
9315 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9316 
9317 	raw_spin_lock_init(&tr->start_lock);
9318 
9319 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9320 
9321 	tr->current_trace = &nop_trace;
9322 
9323 	INIT_LIST_HEAD(&tr->systems);
9324 	INIT_LIST_HEAD(&tr->events);
9325 	INIT_LIST_HEAD(&tr->hist_vars);
9326 	INIT_LIST_HEAD(&tr->err_log);
9327 
9328 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9329 		goto out_free_tr;
9330 
9331 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9332 		goto out_free_tr;
9333 
9334 	ftrace_init_trace_array(tr);
9335 
9336 	init_trace_flags_index(tr);
9337 
9338 	if (trace_instance_dir) {
9339 		ret = trace_array_create_dir(tr);
9340 		if (ret)
9341 			goto out_free_tr;
9342 	} else
9343 		__trace_early_add_events(tr);
9344 
9345 	list_add(&tr->list, &ftrace_trace_arrays);
9346 
9347 	tr->ref++;
9348 
9349 	return tr;
9350 
9351  out_free_tr:
9352 	ftrace_free_ftrace_ops(tr);
9353 	free_trace_buffers(tr);
9354 	free_cpumask_var(tr->tracing_cpumask);
9355 	kfree(tr->name);
9356 	kfree(tr);
9357 
9358 	return ERR_PTR(ret);
9359 }
9360 
9361 static int instance_mkdir(const char *name)
9362 {
9363 	struct trace_array *tr;
9364 	int ret;
9365 
9366 	mutex_lock(&event_mutex);
9367 	mutex_lock(&trace_types_lock);
9368 
9369 	ret = -EEXIST;
9370 	if (trace_array_find(name))
9371 		goto out_unlock;
9372 
9373 	tr = trace_array_create(name);
9374 
9375 	ret = PTR_ERR_OR_ZERO(tr);
9376 
9377 out_unlock:
9378 	mutex_unlock(&trace_types_lock);
9379 	mutex_unlock(&event_mutex);
9380 	return ret;
9381 }
9382 
9383 /**
9384  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9385  * @name: The name of the trace array to be looked up/created.
9386  *
9387  * Returns pointer to trace array with given name.
9388  * NULL, if it cannot be created.
9389  *
9390  * NOTE: This function increments the reference counter associated with the
9391  * trace array returned. This makes sure it cannot be freed while in use.
9392  * Use trace_array_put() once the trace array is no longer needed.
9393  * If the trace_array is to be freed, trace_array_destroy() needs to
9394  * be called after the trace_array_put(), or simply let user space delete
9395  * it from the tracefs instances directory. But until the
9396  * trace_array_put() is called, user space can not delete it.
9397  *
9398  */
9399 struct trace_array *trace_array_get_by_name(const char *name)
9400 {
9401 	struct trace_array *tr;
9402 
9403 	mutex_lock(&event_mutex);
9404 	mutex_lock(&trace_types_lock);
9405 
9406 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9407 		if (tr->name && strcmp(tr->name, name) == 0)
9408 			goto out_unlock;
9409 	}
9410 
9411 	tr = trace_array_create(name);
9412 
9413 	if (IS_ERR(tr))
9414 		tr = NULL;
9415 out_unlock:
9416 	if (tr)
9417 		tr->ref++;
9418 
9419 	mutex_unlock(&trace_types_lock);
9420 	mutex_unlock(&event_mutex);
9421 	return tr;
9422 }
9423 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9424 
9425 static int __remove_instance(struct trace_array *tr)
9426 {
9427 	int i;
9428 
9429 	/* Reference counter for a newly created trace array = 1. */
9430 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9431 		return -EBUSY;
9432 
9433 	list_del(&tr->list);
9434 
9435 	/* Disable all the flags that were enabled coming in */
9436 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9437 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9438 			set_tracer_flag(tr, 1 << i, 0);
9439 	}
9440 
9441 	tracing_set_nop(tr);
9442 	clear_ftrace_function_probes(tr);
9443 	event_trace_del_tracer(tr);
9444 	ftrace_clear_pids(tr);
9445 	ftrace_destroy_function_files(tr);
9446 	tracefs_remove(tr->dir);
9447 	free_percpu(tr->last_func_repeats);
9448 	free_trace_buffers(tr);
9449 
9450 	for (i = 0; i < tr->nr_topts; i++) {
9451 		kfree(tr->topts[i].topts);
9452 	}
9453 	kfree(tr->topts);
9454 
9455 	free_cpumask_var(tr->tracing_cpumask);
9456 	kfree(tr->name);
9457 	kfree(tr);
9458 
9459 	return 0;
9460 }
9461 
9462 int trace_array_destroy(struct trace_array *this_tr)
9463 {
9464 	struct trace_array *tr;
9465 	int ret;
9466 
9467 	if (!this_tr)
9468 		return -EINVAL;
9469 
9470 	mutex_lock(&event_mutex);
9471 	mutex_lock(&trace_types_lock);
9472 
9473 	ret = -ENODEV;
9474 
9475 	/* Making sure trace array exists before destroying it. */
9476 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9477 		if (tr == this_tr) {
9478 			ret = __remove_instance(tr);
9479 			break;
9480 		}
9481 	}
9482 
9483 	mutex_unlock(&trace_types_lock);
9484 	mutex_unlock(&event_mutex);
9485 
9486 	return ret;
9487 }
9488 EXPORT_SYMBOL_GPL(trace_array_destroy);
9489 
9490 static int instance_rmdir(const char *name)
9491 {
9492 	struct trace_array *tr;
9493 	int ret;
9494 
9495 	mutex_lock(&event_mutex);
9496 	mutex_lock(&trace_types_lock);
9497 
9498 	ret = -ENODEV;
9499 	tr = trace_array_find(name);
9500 	if (tr)
9501 		ret = __remove_instance(tr);
9502 
9503 	mutex_unlock(&trace_types_lock);
9504 	mutex_unlock(&event_mutex);
9505 
9506 	return ret;
9507 }
9508 
9509 static __init void create_trace_instances(struct dentry *d_tracer)
9510 {
9511 	struct trace_array *tr;
9512 
9513 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9514 							 instance_mkdir,
9515 							 instance_rmdir);
9516 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9517 		return;
9518 
9519 	mutex_lock(&event_mutex);
9520 	mutex_lock(&trace_types_lock);
9521 
9522 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9523 		if (!tr->name)
9524 			continue;
9525 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9526 			     "Failed to create instance directory\n"))
9527 			break;
9528 	}
9529 
9530 	mutex_unlock(&trace_types_lock);
9531 	mutex_unlock(&event_mutex);
9532 }
9533 
9534 static void
9535 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9536 {
9537 	struct trace_event_file *file;
9538 	int cpu;
9539 
9540 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9541 			tr, &show_traces_fops);
9542 
9543 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9544 			tr, &set_tracer_fops);
9545 
9546 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9547 			  tr, &tracing_cpumask_fops);
9548 
9549 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9550 			  tr, &tracing_iter_fops);
9551 
9552 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9553 			  tr, &tracing_fops);
9554 
9555 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9556 			  tr, &tracing_pipe_fops);
9557 
9558 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9559 			  tr, &tracing_entries_fops);
9560 
9561 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9562 			  tr, &tracing_total_entries_fops);
9563 
9564 	trace_create_file("free_buffer", 0200, d_tracer,
9565 			  tr, &tracing_free_buffer_fops);
9566 
9567 	trace_create_file("trace_marker", 0220, d_tracer,
9568 			  tr, &tracing_mark_fops);
9569 
9570 	file = __find_event_file(tr, "ftrace", "print");
9571 	if (file && file->dir)
9572 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9573 				  file, &event_trigger_fops);
9574 	tr->trace_marker_file = file;
9575 
9576 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9577 			  tr, &tracing_mark_raw_fops);
9578 
9579 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9580 			  &trace_clock_fops);
9581 
9582 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9583 			  tr, &rb_simple_fops);
9584 
9585 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9586 			  &trace_time_stamp_mode_fops);
9587 
9588 	tr->buffer_percent = 50;
9589 
9590 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9591 			tr, &buffer_percent_fops);
9592 
9593 	create_trace_options_dir(tr);
9594 
9595 	trace_create_maxlat_file(tr, d_tracer);
9596 
9597 	if (ftrace_create_function_files(tr, d_tracer))
9598 		MEM_FAIL(1, "Could not allocate function filter files");
9599 
9600 #ifdef CONFIG_TRACER_SNAPSHOT
9601 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9602 			  tr, &snapshot_fops);
9603 #endif
9604 
9605 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9606 			  tr, &tracing_err_log_fops);
9607 
9608 	for_each_tracing_cpu(cpu)
9609 		tracing_init_tracefs_percpu(tr, cpu);
9610 
9611 	ftrace_init_tracefs(tr, d_tracer);
9612 }
9613 
9614 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9615 {
9616 	struct vfsmount *mnt;
9617 	struct file_system_type *type;
9618 
9619 	/*
9620 	 * To maintain backward compatibility for tools that mount
9621 	 * debugfs to get to the tracing facility, tracefs is automatically
9622 	 * mounted to the debugfs/tracing directory.
9623 	 */
9624 	type = get_fs_type("tracefs");
9625 	if (!type)
9626 		return NULL;
9627 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9628 	put_filesystem(type);
9629 	if (IS_ERR(mnt))
9630 		return NULL;
9631 	mntget(mnt);
9632 
9633 	return mnt;
9634 }
9635 
9636 /**
9637  * tracing_init_dentry - initialize top level trace array
9638  *
9639  * This is called when creating files or directories in the tracing
9640  * directory. It is called via fs_initcall() by any of the boot up code
9641  * and expects to return the dentry of the top level tracing directory.
9642  */
9643 int tracing_init_dentry(void)
9644 {
9645 	struct trace_array *tr = &global_trace;
9646 
9647 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9648 		pr_warn("Tracing disabled due to lockdown\n");
9649 		return -EPERM;
9650 	}
9651 
9652 	/* The top level trace array uses  NULL as parent */
9653 	if (tr->dir)
9654 		return 0;
9655 
9656 	if (WARN_ON(!tracefs_initialized()))
9657 		return -ENODEV;
9658 
9659 	/*
9660 	 * As there may still be users that expect the tracing
9661 	 * files to exist in debugfs/tracing, we must automount
9662 	 * the tracefs file system there, so older tools still
9663 	 * work with the newer kernel.
9664 	 */
9665 	tr->dir = debugfs_create_automount("tracing", NULL,
9666 					   trace_automount, NULL);
9667 
9668 	return 0;
9669 }
9670 
9671 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9672 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9673 
9674 static struct workqueue_struct *eval_map_wq __initdata;
9675 static struct work_struct eval_map_work __initdata;
9676 static struct work_struct tracerfs_init_work __initdata;
9677 
9678 static void __init eval_map_work_func(struct work_struct *work)
9679 {
9680 	int len;
9681 
9682 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9683 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9684 }
9685 
9686 static int __init trace_eval_init(void)
9687 {
9688 	INIT_WORK(&eval_map_work, eval_map_work_func);
9689 
9690 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9691 	if (!eval_map_wq) {
9692 		pr_err("Unable to allocate eval_map_wq\n");
9693 		/* Do work here */
9694 		eval_map_work_func(&eval_map_work);
9695 		return -ENOMEM;
9696 	}
9697 
9698 	queue_work(eval_map_wq, &eval_map_work);
9699 	return 0;
9700 }
9701 
9702 subsys_initcall(trace_eval_init);
9703 
9704 static int __init trace_eval_sync(void)
9705 {
9706 	/* Make sure the eval map updates are finished */
9707 	if (eval_map_wq)
9708 		destroy_workqueue(eval_map_wq);
9709 	return 0;
9710 }
9711 
9712 late_initcall_sync(trace_eval_sync);
9713 
9714 
9715 #ifdef CONFIG_MODULES
9716 static void trace_module_add_evals(struct module *mod)
9717 {
9718 	if (!mod->num_trace_evals)
9719 		return;
9720 
9721 	/*
9722 	 * Modules with bad taint do not have events created, do
9723 	 * not bother with enums either.
9724 	 */
9725 	if (trace_module_has_bad_taint(mod))
9726 		return;
9727 
9728 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9729 }
9730 
9731 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9732 static void trace_module_remove_evals(struct module *mod)
9733 {
9734 	union trace_eval_map_item *map;
9735 	union trace_eval_map_item **last = &trace_eval_maps;
9736 
9737 	if (!mod->num_trace_evals)
9738 		return;
9739 
9740 	mutex_lock(&trace_eval_mutex);
9741 
9742 	map = trace_eval_maps;
9743 
9744 	while (map) {
9745 		if (map->head.mod == mod)
9746 			break;
9747 		map = trace_eval_jmp_to_tail(map);
9748 		last = &map->tail.next;
9749 		map = map->tail.next;
9750 	}
9751 	if (!map)
9752 		goto out;
9753 
9754 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9755 	kfree(map);
9756  out:
9757 	mutex_unlock(&trace_eval_mutex);
9758 }
9759 #else
9760 static inline void trace_module_remove_evals(struct module *mod) { }
9761 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9762 
9763 static int trace_module_notify(struct notifier_block *self,
9764 			       unsigned long val, void *data)
9765 {
9766 	struct module *mod = data;
9767 
9768 	switch (val) {
9769 	case MODULE_STATE_COMING:
9770 		trace_module_add_evals(mod);
9771 		break;
9772 	case MODULE_STATE_GOING:
9773 		trace_module_remove_evals(mod);
9774 		break;
9775 	}
9776 
9777 	return NOTIFY_OK;
9778 }
9779 
9780 static struct notifier_block trace_module_nb = {
9781 	.notifier_call = trace_module_notify,
9782 	.priority = 0,
9783 };
9784 #endif /* CONFIG_MODULES */
9785 
9786 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9787 {
9788 
9789 	event_trace_init();
9790 
9791 	init_tracer_tracefs(&global_trace, NULL);
9792 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9793 
9794 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9795 			&global_trace, &tracing_thresh_fops);
9796 
9797 	trace_create_file("README", TRACE_MODE_READ, NULL,
9798 			NULL, &tracing_readme_fops);
9799 
9800 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9801 			NULL, &tracing_saved_cmdlines_fops);
9802 
9803 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9804 			  NULL, &tracing_saved_cmdlines_size_fops);
9805 
9806 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9807 			NULL, &tracing_saved_tgids_fops);
9808 
9809 	trace_create_eval_file(NULL);
9810 
9811 #ifdef CONFIG_MODULES
9812 	register_module_notifier(&trace_module_nb);
9813 #endif
9814 
9815 #ifdef CONFIG_DYNAMIC_FTRACE
9816 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9817 			NULL, &tracing_dyn_info_fops);
9818 #endif
9819 
9820 	create_trace_instances(NULL);
9821 
9822 	update_tracer_options(&global_trace);
9823 }
9824 
9825 static __init int tracer_init_tracefs(void)
9826 {
9827 	int ret;
9828 
9829 	trace_access_lock_init();
9830 
9831 	ret = tracing_init_dentry();
9832 	if (ret)
9833 		return 0;
9834 
9835 	if (eval_map_wq) {
9836 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9837 		queue_work(eval_map_wq, &tracerfs_init_work);
9838 	} else {
9839 		tracer_init_tracefs_work_func(NULL);
9840 	}
9841 
9842 	rv_init_interface();
9843 
9844 	return 0;
9845 }
9846 
9847 fs_initcall(tracer_init_tracefs);
9848 
9849 static int trace_panic_handler(struct notifier_block *this,
9850 			       unsigned long event, void *unused)
9851 {
9852 	if (ftrace_dump_on_oops)
9853 		ftrace_dump(ftrace_dump_on_oops);
9854 	return NOTIFY_OK;
9855 }
9856 
9857 static struct notifier_block trace_panic_notifier = {
9858 	.notifier_call  = trace_panic_handler,
9859 	.next           = NULL,
9860 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9861 };
9862 
9863 static int trace_die_handler(struct notifier_block *self,
9864 			     unsigned long val,
9865 			     void *data)
9866 {
9867 	switch (val) {
9868 	case DIE_OOPS:
9869 		if (ftrace_dump_on_oops)
9870 			ftrace_dump(ftrace_dump_on_oops);
9871 		break;
9872 	default:
9873 		break;
9874 	}
9875 	return NOTIFY_OK;
9876 }
9877 
9878 static struct notifier_block trace_die_notifier = {
9879 	.notifier_call = trace_die_handler,
9880 	.priority = 200
9881 };
9882 
9883 /*
9884  * printk is set to max of 1024, we really don't need it that big.
9885  * Nothing should be printing 1000 characters anyway.
9886  */
9887 #define TRACE_MAX_PRINT		1000
9888 
9889 /*
9890  * Define here KERN_TRACE so that we have one place to modify
9891  * it if we decide to change what log level the ftrace dump
9892  * should be at.
9893  */
9894 #define KERN_TRACE		KERN_EMERG
9895 
9896 void
9897 trace_printk_seq(struct trace_seq *s)
9898 {
9899 	/* Probably should print a warning here. */
9900 	if (s->seq.len >= TRACE_MAX_PRINT)
9901 		s->seq.len = TRACE_MAX_PRINT;
9902 
9903 	/*
9904 	 * More paranoid code. Although the buffer size is set to
9905 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9906 	 * an extra layer of protection.
9907 	 */
9908 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9909 		s->seq.len = s->seq.size - 1;
9910 
9911 	/* should be zero ended, but we are paranoid. */
9912 	s->buffer[s->seq.len] = 0;
9913 
9914 	printk(KERN_TRACE "%s", s->buffer);
9915 
9916 	trace_seq_init(s);
9917 }
9918 
9919 void trace_init_global_iter(struct trace_iterator *iter)
9920 {
9921 	iter->tr = &global_trace;
9922 	iter->trace = iter->tr->current_trace;
9923 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9924 	iter->array_buffer = &global_trace.array_buffer;
9925 
9926 	if (iter->trace && iter->trace->open)
9927 		iter->trace->open(iter);
9928 
9929 	/* Annotate start of buffers if we had overruns */
9930 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9931 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9932 
9933 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9934 	if (trace_clocks[iter->tr->clock_id].in_ns)
9935 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9936 
9937 	/* Can not use kmalloc for iter.temp and iter.fmt */
9938 	iter->temp = static_temp_buf;
9939 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9940 	iter->fmt = static_fmt_buf;
9941 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9942 }
9943 
9944 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9945 {
9946 	/* use static because iter can be a bit big for the stack */
9947 	static struct trace_iterator iter;
9948 	static atomic_t dump_running;
9949 	struct trace_array *tr = &global_trace;
9950 	unsigned int old_userobj;
9951 	unsigned long flags;
9952 	int cnt = 0, cpu;
9953 
9954 	/* Only allow one dump user at a time. */
9955 	if (atomic_inc_return(&dump_running) != 1) {
9956 		atomic_dec(&dump_running);
9957 		return;
9958 	}
9959 
9960 	/*
9961 	 * Always turn off tracing when we dump.
9962 	 * We don't need to show trace output of what happens
9963 	 * between multiple crashes.
9964 	 *
9965 	 * If the user does a sysrq-z, then they can re-enable
9966 	 * tracing with echo 1 > tracing_on.
9967 	 */
9968 	tracing_off();
9969 
9970 	local_irq_save(flags);
9971 
9972 	/* Simulate the iterator */
9973 	trace_init_global_iter(&iter);
9974 
9975 	for_each_tracing_cpu(cpu) {
9976 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9977 	}
9978 
9979 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9980 
9981 	/* don't look at user memory in panic mode */
9982 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9983 
9984 	switch (oops_dump_mode) {
9985 	case DUMP_ALL:
9986 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9987 		break;
9988 	case DUMP_ORIG:
9989 		iter.cpu_file = raw_smp_processor_id();
9990 		break;
9991 	case DUMP_NONE:
9992 		goto out_enable;
9993 	default:
9994 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9995 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9996 	}
9997 
9998 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9999 
10000 	/* Did function tracer already get disabled? */
10001 	if (ftrace_is_dead()) {
10002 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10003 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10004 	}
10005 
10006 	/*
10007 	 * We need to stop all tracing on all CPUS to read
10008 	 * the next buffer. This is a bit expensive, but is
10009 	 * not done often. We fill all what we can read,
10010 	 * and then release the locks again.
10011 	 */
10012 
10013 	while (!trace_empty(&iter)) {
10014 
10015 		if (!cnt)
10016 			printk(KERN_TRACE "---------------------------------\n");
10017 
10018 		cnt++;
10019 
10020 		trace_iterator_reset(&iter);
10021 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10022 
10023 		if (trace_find_next_entry_inc(&iter) != NULL) {
10024 			int ret;
10025 
10026 			ret = print_trace_line(&iter);
10027 			if (ret != TRACE_TYPE_NO_CONSUME)
10028 				trace_consume(&iter);
10029 		}
10030 		touch_nmi_watchdog();
10031 
10032 		trace_printk_seq(&iter.seq);
10033 	}
10034 
10035 	if (!cnt)
10036 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10037 	else
10038 		printk(KERN_TRACE "---------------------------------\n");
10039 
10040  out_enable:
10041 	tr->trace_flags |= old_userobj;
10042 
10043 	for_each_tracing_cpu(cpu) {
10044 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10045 	}
10046 	atomic_dec(&dump_running);
10047 	local_irq_restore(flags);
10048 }
10049 EXPORT_SYMBOL_GPL(ftrace_dump);
10050 
10051 #define WRITE_BUFSIZE  4096
10052 
10053 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10054 				size_t count, loff_t *ppos,
10055 				int (*createfn)(const char *))
10056 {
10057 	char *kbuf, *buf, *tmp;
10058 	int ret = 0;
10059 	size_t done = 0;
10060 	size_t size;
10061 
10062 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10063 	if (!kbuf)
10064 		return -ENOMEM;
10065 
10066 	while (done < count) {
10067 		size = count - done;
10068 
10069 		if (size >= WRITE_BUFSIZE)
10070 			size = WRITE_BUFSIZE - 1;
10071 
10072 		if (copy_from_user(kbuf, buffer + done, size)) {
10073 			ret = -EFAULT;
10074 			goto out;
10075 		}
10076 		kbuf[size] = '\0';
10077 		buf = kbuf;
10078 		do {
10079 			tmp = strchr(buf, '\n');
10080 			if (tmp) {
10081 				*tmp = '\0';
10082 				size = tmp - buf + 1;
10083 			} else {
10084 				size = strlen(buf);
10085 				if (done + size < count) {
10086 					if (buf != kbuf)
10087 						break;
10088 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10089 					pr_warn("Line length is too long: Should be less than %d\n",
10090 						WRITE_BUFSIZE - 2);
10091 					ret = -EINVAL;
10092 					goto out;
10093 				}
10094 			}
10095 			done += size;
10096 
10097 			/* Remove comments */
10098 			tmp = strchr(buf, '#');
10099 
10100 			if (tmp)
10101 				*tmp = '\0';
10102 
10103 			ret = createfn(buf);
10104 			if (ret)
10105 				goto out;
10106 			buf += size;
10107 
10108 		} while (done < count);
10109 	}
10110 	ret = done;
10111 
10112 out:
10113 	kfree(kbuf);
10114 
10115 	return ret;
10116 }
10117 
10118 __init static int tracer_alloc_buffers(void)
10119 {
10120 	int ring_buf_size;
10121 	int ret = -ENOMEM;
10122 
10123 
10124 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10125 		pr_warn("Tracing disabled due to lockdown\n");
10126 		return -EPERM;
10127 	}
10128 
10129 	/*
10130 	 * Make sure we don't accidentally add more trace options
10131 	 * than we have bits for.
10132 	 */
10133 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10134 
10135 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10136 		goto out;
10137 
10138 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10139 		goto out_free_buffer_mask;
10140 
10141 	/* Only allocate trace_printk buffers if a trace_printk exists */
10142 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10143 		/* Must be called before global_trace.buffer is allocated */
10144 		trace_printk_init_buffers();
10145 
10146 	/* To save memory, keep the ring buffer size to its minimum */
10147 	if (ring_buffer_expanded)
10148 		ring_buf_size = trace_buf_size;
10149 	else
10150 		ring_buf_size = 1;
10151 
10152 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10153 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10154 
10155 	raw_spin_lock_init(&global_trace.start_lock);
10156 
10157 	/*
10158 	 * The prepare callbacks allocates some memory for the ring buffer. We
10159 	 * don't free the buffer if the CPU goes down. If we were to free
10160 	 * the buffer, then the user would lose any trace that was in the
10161 	 * buffer. The memory will be removed once the "instance" is removed.
10162 	 */
10163 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10164 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10165 				      NULL);
10166 	if (ret < 0)
10167 		goto out_free_cpumask;
10168 	/* Used for event triggers */
10169 	ret = -ENOMEM;
10170 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10171 	if (!temp_buffer)
10172 		goto out_rm_hp_state;
10173 
10174 	if (trace_create_savedcmd() < 0)
10175 		goto out_free_temp_buffer;
10176 
10177 	/* TODO: make the number of buffers hot pluggable with CPUS */
10178 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10179 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10180 		goto out_free_savedcmd;
10181 	}
10182 
10183 	if (global_trace.buffer_disabled)
10184 		tracing_off();
10185 
10186 	if (trace_boot_clock) {
10187 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10188 		if (ret < 0)
10189 			pr_warn("Trace clock %s not defined, going back to default\n",
10190 				trace_boot_clock);
10191 	}
10192 
10193 	/*
10194 	 * register_tracer() might reference current_trace, so it
10195 	 * needs to be set before we register anything. This is
10196 	 * just a bootstrap of current_trace anyway.
10197 	 */
10198 	global_trace.current_trace = &nop_trace;
10199 
10200 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10201 
10202 	ftrace_init_global_array_ops(&global_trace);
10203 
10204 	init_trace_flags_index(&global_trace);
10205 
10206 	register_tracer(&nop_trace);
10207 
10208 	/* Function tracing may start here (via kernel command line) */
10209 	init_function_trace();
10210 
10211 	/* All seems OK, enable tracing */
10212 	tracing_disabled = 0;
10213 
10214 	atomic_notifier_chain_register(&panic_notifier_list,
10215 				       &trace_panic_notifier);
10216 
10217 	register_die_notifier(&trace_die_notifier);
10218 
10219 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10220 
10221 	INIT_LIST_HEAD(&global_trace.systems);
10222 	INIT_LIST_HEAD(&global_trace.events);
10223 	INIT_LIST_HEAD(&global_trace.hist_vars);
10224 	INIT_LIST_HEAD(&global_trace.err_log);
10225 	list_add(&global_trace.list, &ftrace_trace_arrays);
10226 
10227 	apply_trace_boot_options();
10228 
10229 	register_snapshot_cmd();
10230 
10231 	test_can_verify();
10232 
10233 	return 0;
10234 
10235 out_free_savedcmd:
10236 	free_saved_cmdlines_buffer(savedcmd);
10237 out_free_temp_buffer:
10238 	ring_buffer_free(temp_buffer);
10239 out_rm_hp_state:
10240 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10241 out_free_cpumask:
10242 	free_cpumask_var(global_trace.tracing_cpumask);
10243 out_free_buffer_mask:
10244 	free_cpumask_var(tracing_buffer_mask);
10245 out:
10246 	return ret;
10247 }
10248 
10249 void __init ftrace_boot_snapshot(void)
10250 {
10251 	if (snapshot_at_boot) {
10252 		tracing_snapshot();
10253 		internal_trace_puts("** Boot snapshot taken **\n");
10254 	}
10255 }
10256 
10257 void __init early_trace_init(void)
10258 {
10259 	if (tracepoint_printk) {
10260 		tracepoint_print_iter =
10261 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10262 		if (MEM_FAIL(!tracepoint_print_iter,
10263 			     "Failed to allocate trace iterator\n"))
10264 			tracepoint_printk = 0;
10265 		else
10266 			static_key_enable(&tracepoint_printk_key.key);
10267 	}
10268 	tracer_alloc_buffers();
10269 }
10270 
10271 void __init trace_init(void)
10272 {
10273 	trace_event_init();
10274 }
10275 
10276 __init static void clear_boot_tracer(void)
10277 {
10278 	/*
10279 	 * The default tracer at boot buffer is an init section.
10280 	 * This function is called in lateinit. If we did not
10281 	 * find the boot tracer, then clear it out, to prevent
10282 	 * later registration from accessing the buffer that is
10283 	 * about to be freed.
10284 	 */
10285 	if (!default_bootup_tracer)
10286 		return;
10287 
10288 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10289 	       default_bootup_tracer);
10290 	default_bootup_tracer = NULL;
10291 }
10292 
10293 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10294 __init static void tracing_set_default_clock(void)
10295 {
10296 	/* sched_clock_stable() is determined in late_initcall */
10297 	if (!trace_boot_clock && !sched_clock_stable()) {
10298 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10299 			pr_warn("Can not set tracing clock due to lockdown\n");
10300 			return;
10301 		}
10302 
10303 		printk(KERN_WARNING
10304 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10305 		       "If you want to keep using the local clock, then add:\n"
10306 		       "  \"trace_clock=local\"\n"
10307 		       "on the kernel command line\n");
10308 		tracing_set_clock(&global_trace, "global");
10309 	}
10310 }
10311 #else
10312 static inline void tracing_set_default_clock(void) { }
10313 #endif
10314 
10315 __init static int late_trace_init(void)
10316 {
10317 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10318 		static_key_disable(&tracepoint_printk_key.key);
10319 		tracepoint_printk = 0;
10320 	}
10321 
10322 	tracing_set_default_clock();
10323 	clear_boot_tracer();
10324 	return 0;
10325 }
10326 
10327 late_initcall_sync(late_trace_init);
10328