xref: /openbmc/linux/kernel/trace/trace.c (revision f5c27da4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static int __init set_cmdline_ftrace(char *str)
191 {
192 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193 	default_bootup_tracer = bootup_tracer_buf;
194 	/* We are using ftrace early, expand it */
195 	ring_buffer_expanded = true;
196 	return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199 
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
203 		ftrace_dump_on_oops = DUMP_ALL;
204 		return 1;
205 	}
206 
207 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208 		ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211 
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215 
216 static int __init stop_trace_on_warning(char *str)
217 {
218 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219 		__disable_trace_on_warning = 1;
220 	return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223 
224 static int __init boot_alloc_snapshot(char *str)
225 {
226 	allocate_snapshot = true;
227 	/* We also need the main ring buffer expanded */
228 	ring_buffer_expanded = true;
229 	return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232 
233 
234 static int __init boot_snapshot(char *str)
235 {
236 	snapshot_at_boot = true;
237 	boot_alloc_snapshot(str);
238 	return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241 
242 
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244 
245 static int __init set_trace_boot_options(char *str)
246 {
247 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248 	return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251 
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254 
255 static int __init set_trace_boot_clock(char *str)
256 {
257 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258 	trace_boot_clock = trace_boot_clock_buf;
259 	return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262 
263 static int __init set_tracepoint_printk(char *str)
264 {
265 	/* Ignore the "tp_printk_stop_on_boot" param */
266 	if (*str == '_')
267 		return 0;
268 
269 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270 		tracepoint_printk = 1;
271 	return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274 
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277 	tracepoint_printk_stop_on_boot = true;
278 	return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281 
282 unsigned long long ns2usecs(u64 nsec)
283 {
284 	nsec += 500;
285 	do_div(nsec, 1000);
286 	return nsec;
287 }
288 
289 static void
290 trace_process_export(struct trace_export *export,
291 	       struct ring_buffer_event *event, int flag)
292 {
293 	struct trace_entry *entry;
294 	unsigned int size = 0;
295 
296 	if (export->flags & flag) {
297 		entry = ring_buffer_event_data(event);
298 		size = ring_buffer_event_length(event);
299 		export->write(export, entry, size);
300 	}
301 }
302 
303 static DEFINE_MUTEX(ftrace_export_lock);
304 
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306 
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310 
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313 	if (export->flags & TRACE_EXPORT_FUNCTION)
314 		static_branch_inc(&trace_function_exports_enabled);
315 
316 	if (export->flags & TRACE_EXPORT_EVENT)
317 		static_branch_inc(&trace_event_exports_enabled);
318 
319 	if (export->flags & TRACE_EXPORT_MARKER)
320 		static_branch_inc(&trace_marker_exports_enabled);
321 }
322 
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325 	if (export->flags & TRACE_EXPORT_FUNCTION)
326 		static_branch_dec(&trace_function_exports_enabled);
327 
328 	if (export->flags & TRACE_EXPORT_EVENT)
329 		static_branch_dec(&trace_event_exports_enabled);
330 
331 	if (export->flags & TRACE_EXPORT_MARKER)
332 		static_branch_dec(&trace_marker_exports_enabled);
333 }
334 
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337 	struct trace_export *export;
338 
339 	preempt_disable_notrace();
340 
341 	export = rcu_dereference_raw_check(ftrace_exports_list);
342 	while (export) {
343 		trace_process_export(export, event, flag);
344 		export = rcu_dereference_raw_check(export->next);
345 	}
346 
347 	preempt_enable_notrace();
348 }
349 
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353 	rcu_assign_pointer(export->next, *list);
354 	/*
355 	 * We are entering export into the list but another
356 	 * CPU might be walking that list. We need to make sure
357 	 * the export->next pointer is valid before another CPU sees
358 	 * the export pointer included into the list.
359 	 */
360 	rcu_assign_pointer(*list, export);
361 }
362 
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366 	struct trace_export **p;
367 
368 	for (p = list; *p != NULL; p = &(*p)->next)
369 		if (*p == export)
370 			break;
371 
372 	if (*p != export)
373 		return -1;
374 
375 	rcu_assign_pointer(*p, (*p)->next);
376 
377 	return 0;
378 }
379 
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 	ftrace_exports_enable(export);
384 
385 	add_trace_export(list, export);
386 }
387 
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391 	int ret;
392 
393 	ret = rm_trace_export(list, export);
394 	ftrace_exports_disable(export);
395 
396 	return ret;
397 }
398 
399 int register_ftrace_export(struct trace_export *export)
400 {
401 	if (WARN_ON_ONCE(!export->write))
402 		return -1;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	add_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413 
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416 	int ret;
417 
418 	mutex_lock(&ftrace_export_lock);
419 
420 	ret = rm_ftrace_export(&ftrace_exports_list, export);
421 
422 	mutex_unlock(&ftrace_export_lock);
423 
424 	return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427 
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS						\
430 	(FUNCTION_DEFAULT_FLAGS |					\
431 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
432 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
433 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
434 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
435 	 TRACE_ITER_HASH_PTR)
436 
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
439 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440 
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444 
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450 	.trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452 
453 LIST_HEAD(ftrace_trace_arrays);
454 
455 int trace_array_get(struct trace_array *this_tr)
456 {
457 	struct trace_array *tr;
458 	int ret = -ENODEV;
459 
460 	mutex_lock(&trace_types_lock);
461 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462 		if (tr == this_tr) {
463 			tr->ref++;
464 			ret = 0;
465 			break;
466 		}
467 	}
468 	mutex_unlock(&trace_types_lock);
469 
470 	return ret;
471 }
472 
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475 	WARN_ON(!this_tr->ref);
476 	this_tr->ref--;
477 }
478 
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490 	if (!this_tr)
491 		return;
492 
493 	mutex_lock(&trace_types_lock);
494 	__trace_array_put(this_tr);
495 	mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498 
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501 	int ret;
502 
503 	ret = security_locked_down(LOCKDOWN_TRACEFS);
504 	if (ret)
505 		return ret;
506 
507 	if (tracing_disabled)
508 		return -ENODEV;
509 
510 	if (tr && trace_array_get(tr) < 0)
511 		return -ENODEV;
512 
513 	return 0;
514 }
515 
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517 			      struct trace_buffer *buffer,
518 			      struct ring_buffer_event *event)
519 {
520 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521 	    !filter_match_preds(call->filter, rec)) {
522 		__trace_event_discard_commit(buffer, event);
523 		return 1;
524 	}
525 
526 	return 0;
527 }
528 
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539 	return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541 
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554 		       struct trace_pid_list *filtered_no_pids,
555 		       struct task_struct *task)
556 {
557 	/*
558 	 * If filtered_no_pids is not empty, and the task's pid is listed
559 	 * in filtered_no_pids, then return true.
560 	 * Otherwise, if filtered_pids is empty, that means we can
561 	 * trace all tasks. If it has content, then only trace pids
562 	 * within filtered_pids.
563 	 */
564 
565 	return (filtered_pids &&
566 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
567 		(filtered_no_pids &&
568 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570 
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584 				  struct task_struct *self,
585 				  struct task_struct *task)
586 {
587 	if (!pid_list)
588 		return;
589 
590 	/* For forks, we only add if the forking task is listed */
591 	if (self) {
592 		if (!trace_find_filtered_pid(pid_list, self->pid))
593 			return;
594 	}
595 
596 	/* "self" is set for forks, and NULL for exits */
597 	if (self)
598 		trace_pid_list_set(pid_list, task->pid);
599 	else
600 		trace_pid_list_clear(pid_list, task->pid);
601 }
602 
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617 	long pid = (unsigned long)v;
618 	unsigned int next;
619 
620 	(*pos)++;
621 
622 	/* pid already is +1 of the actual previous bit */
623 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
624 		return NULL;
625 
626 	pid = next;
627 
628 	/* Return pid + 1 to allow zero to be represented */
629 	return (void *)(pid + 1);
630 }
631 
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645 	unsigned long pid;
646 	unsigned int first;
647 	loff_t l = 0;
648 
649 	if (trace_pid_list_first(pid_list, &first) < 0)
650 		return NULL;
651 
652 	pid = first;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = trace_pid_list_alloc();
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	if (filtered_pids) {
709 		/* copy the current bits to the new max */
710 		ret = trace_pid_list_first(filtered_pids, &pid);
711 		while (!ret) {
712 			trace_pid_list_set(pid_list, pid);
713 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714 			nr_pids++;
715 		}
716 	}
717 
718 	ret = 0;
719 	while (cnt > 0) {
720 
721 		pos = 0;
722 
723 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
724 		if (ret < 0)
725 			break;
726 
727 		read += ret;
728 		ubuf += ret;
729 		cnt -= ret;
730 
731 		if (!trace_parser_loaded(&parser))
732 			break;
733 
734 		ret = -EINVAL;
735 		if (kstrtoul(parser.buffer, 0, &val))
736 			break;
737 
738 		pid = (pid_t)val;
739 
740 		if (trace_pid_list_set(pid_list, pid) < 0) {
741 			ret = -1;
742 			break;
743 		}
744 		nr_pids++;
745 
746 		trace_parser_clear(&parser);
747 		ret = 0;
748 	}
749 	trace_parser_put(&parser);
750 
751 	if (ret < 0) {
752 		trace_pid_list_free(pid_list);
753 		return ret;
754 	}
755 
756 	if (!nr_pids) {
757 		/* Cleared the list of pids */
758 		trace_pid_list_free(pid_list);
759 		pid_list = NULL;
760 	}
761 
762 	*new_pid_list = pid_list;
763 
764 	return read;
765 }
766 
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 	u64 ts;
770 
771 	/* Early boot up does not have a buffer yet */
772 	if (!buf->buffer)
773 		return trace_clock_local();
774 
775 	ts = ring_buffer_time_stamp(buf->buffer);
776 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777 
778 	return ts;
779 }
780 
781 u64 ftrace_now(int cpu)
782 {
783 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785 
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797 	/*
798 	 * For quick access (irqsoff uses this in fast path), just
799 	 * return the mirror variable of the state of the ring buffer.
800 	 * It's a little racy, but we don't really care.
801 	 */
802 	smp_rmb();
803 	return !global_trace.buffer_disabled;
804 }
805 
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
817 
818 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer		*trace_types __read_mostly;
822 
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827 
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849 
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 
854 static inline void trace_access_lock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		/* gain it for accessing the whole ring buffer. */
858 		down_write(&all_cpu_access_lock);
859 	} else {
860 		/* gain it for accessing a cpu ring buffer. */
861 
862 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 		down_read(&all_cpu_access_lock);
864 
865 		/* Secondly block other access to this @cpu ring buffer. */
866 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 	}
868 }
869 
870 static inline void trace_access_unlock(int cpu)
871 {
872 	if (cpu == RING_BUFFER_ALL_CPUS) {
873 		up_write(&all_cpu_access_lock);
874 	} else {
875 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 		up_read(&all_cpu_access_lock);
877 	}
878 }
879 
880 static inline void trace_access_lock_init(void)
881 {
882 	int cpu;
883 
884 	for_each_possible_cpu(cpu)
885 		mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887 
888 #else
889 
890 static DEFINE_MUTEX(access_lock);
891 
892 static inline void trace_access_lock(int cpu)
893 {
894 	(void)cpu;
895 	mutex_lock(&access_lock);
896 }
897 
898 static inline void trace_access_unlock(int cpu)
899 {
900 	(void)cpu;
901 	mutex_unlock(&access_lock);
902 }
903 
904 static inline void trace_access_lock_init(void)
905 {
906 }
907 
908 #endif
909 
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 				 unsigned int trace_ctx,
913 				 int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 				      struct trace_buffer *buffer,
916 				      unsigned int trace_ctx,
917 				      int skip, struct pt_regs *regs);
918 
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 					unsigned int trace_ctx,
922 					int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 				      struct trace_buffer *buffer,
927 				      unsigned long trace_ctx,
928 				      int skip, struct pt_regs *regs)
929 {
930 }
931 
932 #endif
933 
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936 		  int type, unsigned int trace_ctx)
937 {
938 	struct trace_entry *ent = ring_buffer_event_data(event);
939 
940 	tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942 
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 			  int type,
946 			  unsigned long len,
947 			  unsigned int trace_ctx)
948 {
949 	struct ring_buffer_event *event;
950 
951 	event = ring_buffer_lock_reserve(buffer, len);
952 	if (event != NULL)
953 		trace_event_setup(event, type, trace_ctx);
954 
955 	return event;
956 }
957 
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 	if (tr->array_buffer.buffer)
961 		ring_buffer_record_on(tr->array_buffer.buffer);
962 	/*
963 	 * This flag is looked at when buffers haven't been allocated
964 	 * yet, or by some tracers (like irqsoff), that just want to
965 	 * know if the ring buffer has been disabled, but it can handle
966 	 * races of where it gets disabled but we still do a record.
967 	 * As the check is in the fast path of the tracers, it is more
968 	 * important to be fast than accurate.
969 	 */
970 	tr->buffer_disabled = 0;
971 	/* Make the flag seen by readers */
972 	smp_wmb();
973 }
974 
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983 	tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986 
987 
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 	__this_cpu_write(trace_taskinfo_save, true);
992 
993 	/* If this is the temp buffer, we need to commit fully */
994 	if (this_cpu_read(trace_buffered_event) == event) {
995 		/* Length is in event->array[0] */
996 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 		/* Release the temp buffer */
998 		this_cpu_dec(trace_buffered_event_cnt);
999 		/* ring_buffer_unlock_commit() enables preemption */
1000 		preempt_enable_notrace();
1001 	} else
1002 		ring_buffer_unlock_commit(buffer, event);
1003 }
1004 
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:	   The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013 	struct ring_buffer_event *event;
1014 	struct trace_buffer *buffer;
1015 	struct print_entry *entry;
1016 	unsigned int trace_ctx;
1017 	int alloc;
1018 
1019 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020 		return 0;
1021 
1022 	if (unlikely(tracing_selftest_running || tracing_disabled))
1023 		return 0;
1024 
1025 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026 
1027 	trace_ctx = tracing_gen_ctx();
1028 	buffer = global_trace.array_buffer.buffer;
1029 	ring_buffer_nest_start(buffer);
1030 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031 					    trace_ctx);
1032 	if (!event) {
1033 		size = 0;
1034 		goto out;
1035 	}
1036 
1037 	entry = ring_buffer_event_data(event);
1038 	entry->ip = ip;
1039 
1040 	memcpy(&entry->buf, str, size);
1041 
1042 	/* Add a newline if necessary */
1043 	if (entry->buf[size - 1] != '\n') {
1044 		entry->buf[size] = '\n';
1045 		entry->buf[size + 1] = '\0';
1046 	} else
1047 		entry->buf[size] = '\0';
1048 
1049 	__buffer_unlock_commit(buffer, event);
1050 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052 	ring_buffer_nest_end(buffer);
1053 	return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056 
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:	   The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064 	struct ring_buffer_event *event;
1065 	struct trace_buffer *buffer;
1066 	struct bputs_entry *entry;
1067 	unsigned int trace_ctx;
1068 	int size = sizeof(struct bputs_entry);
1069 	int ret = 0;
1070 
1071 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072 		return 0;
1073 
1074 	if (unlikely(tracing_selftest_running || tracing_disabled))
1075 		return 0;
1076 
1077 	trace_ctx = tracing_gen_ctx();
1078 	buffer = global_trace.array_buffer.buffer;
1079 
1080 	ring_buffer_nest_start(buffer);
1081 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082 					    trace_ctx);
1083 	if (!event)
1084 		goto out;
1085 
1086 	entry = ring_buffer_event_data(event);
1087 	entry->ip			= ip;
1088 	entry->str			= str;
1089 
1090 	__buffer_unlock_commit(buffer, event);
1091 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 
1093 	ret = 1;
1094  out:
1095 	ring_buffer_nest_end(buffer);
1096 	return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099 
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102 					   void *cond_data)
1103 {
1104 	struct tracer *tracer = tr->current_trace;
1105 	unsigned long flags;
1106 
1107 	if (in_nmi()) {
1108 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1110 		return;
1111 	}
1112 
1113 	if (!tr->allocated_snapshot) {
1114 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115 		internal_trace_puts("*** stopping trace here!   ***\n");
1116 		tracing_off();
1117 		return;
1118 	}
1119 
1120 	/* Note, snapshot can not be used when the tracer uses it */
1121 	if (tracer->use_max_tr) {
1122 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 		return;
1125 	}
1126 
1127 	local_irq_save(flags);
1128 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1129 	local_irq_restore(flags);
1130 }
1131 
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134 	tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136 
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153 	struct trace_array *tr = &global_trace;
1154 
1155 	tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158 
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:		The tracing instance to snapshot
1162  * @cond_data:	The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174 	tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177 
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:		The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194 	void *cond_data = NULL;
1195 
1196 	local_irq_disable();
1197 	arch_spin_lock(&tr->max_lock);
1198 
1199 	if (tr->cond_snapshot)
1200 		cond_data = tr->cond_snapshot->cond_data;
1201 
1202 	arch_spin_unlock(&tr->max_lock);
1203 	local_irq_enable();
1204 
1205 	return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208 
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210 					struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212 
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215 	int ret;
1216 
1217 	if (!tr->allocated_snapshot) {
1218 
1219 		/* allocate spare buffer */
1220 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222 		if (ret < 0)
1223 			return ret;
1224 
1225 		tr->allocated_snapshot = true;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233 	/*
1234 	 * We don't free the ring buffer. instead, resize it because
1235 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1236 	 * we want preserve it.
1237 	 */
1238 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239 	set_buffer_entries(&tr->max_buffer, 1);
1240 	tracing_reset_online_cpus(&tr->max_buffer);
1241 	tr->allocated_snapshot = false;
1242 }
1243 
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256 	struct trace_array *tr = &global_trace;
1257 	int ret;
1258 
1259 	ret = tracing_alloc_snapshot_instance(tr);
1260 	WARN_ON(ret < 0);
1261 
1262 	return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265 
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279 	int ret;
1280 
1281 	ret = tracing_alloc_snapshot();
1282 	if (ret < 0)
1283 		return;
1284 
1285 	tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288 
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:		The tracing instance
1292  * @cond_data:	User data to associate with the snapshot
1293  * @update:	Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303 				 cond_update_fn_t update)
1304 {
1305 	struct cond_snapshot *cond_snapshot;
1306 	int ret = 0;
1307 
1308 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309 	if (!cond_snapshot)
1310 		return -ENOMEM;
1311 
1312 	cond_snapshot->cond_data = cond_data;
1313 	cond_snapshot->update = update;
1314 
1315 	mutex_lock(&trace_types_lock);
1316 
1317 	ret = tracing_alloc_snapshot_instance(tr);
1318 	if (ret)
1319 		goto fail_unlock;
1320 
1321 	if (tr->current_trace->use_max_tr) {
1322 		ret = -EBUSY;
1323 		goto fail_unlock;
1324 	}
1325 
1326 	/*
1327 	 * The cond_snapshot can only change to NULL without the
1328 	 * trace_types_lock. We don't care if we race with it going
1329 	 * to NULL, but we want to make sure that it's not set to
1330 	 * something other than NULL when we get here, which we can
1331 	 * do safely with only holding the trace_types_lock and not
1332 	 * having to take the max_lock.
1333 	 */
1334 	if (tr->cond_snapshot) {
1335 		ret = -EBUSY;
1336 		goto fail_unlock;
1337 	}
1338 
1339 	local_irq_disable();
1340 	arch_spin_lock(&tr->max_lock);
1341 	tr->cond_snapshot = cond_snapshot;
1342 	arch_spin_unlock(&tr->max_lock);
1343 	local_irq_enable();
1344 
1345 	mutex_unlock(&trace_types_lock);
1346 
1347 	return ret;
1348 
1349  fail_unlock:
1350 	mutex_unlock(&trace_types_lock);
1351 	kfree(cond_snapshot);
1352 	return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355 
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:		The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368 	int ret = 0;
1369 
1370 	local_irq_disable();
1371 	arch_spin_lock(&tr->max_lock);
1372 
1373 	if (!tr->cond_snapshot)
1374 		ret = -EINVAL;
1375 	else {
1376 		kfree(tr->cond_snapshot);
1377 		tr->cond_snapshot = NULL;
1378 	}
1379 
1380 	arch_spin_unlock(&tr->max_lock);
1381 	local_irq_enable();
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/*
1496 	 * nr_entries can not be zero and the startup
1497 	 * tests require some buffer space. Therefore
1498 	 * ensure we have at least 4096 bytes of buffer.
1499 	 */
1500 	trace_buf_size = max(4096UL, buf_size);
1501 	return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504 
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507 	unsigned long threshold;
1508 	int ret;
1509 
1510 	if (!str)
1511 		return 0;
1512 	ret = kstrtoul(str, 0, &threshold);
1513 	if (ret < 0)
1514 		return 0;
1515 	tracing_thresh = threshold * 1000;
1516 	return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519 
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522 	return nsecs / 1000;
1523 }
1524 
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533 
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536 	TRACE_FLAGS
1537 	NULL
1538 };
1539 
1540 static struct {
1541 	u64 (*func)(void);
1542 	const char *name;
1543 	int in_ns;		/* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545 	{ trace_clock_local,		"local",	1 },
1546 	{ trace_clock_global,		"global",	1 },
1547 	{ trace_clock_counter,		"counter",	0 },
1548 	{ trace_clock_jiffies,		"uptime",	0 },
1549 	{ trace_clock,			"perf",		1 },
1550 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1551 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1552 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1553 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1554 	ARCH_TRACE_CLOCKS
1555 };
1556 
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559 	if (trace_clocks[tr->clock_id].in_ns)
1560 		return true;
1561 
1562 	return false;
1563 }
1564 
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570 	memset(parser, 0, sizeof(*parser));
1571 
1572 	parser->buffer = kmalloc(size, GFP_KERNEL);
1573 	if (!parser->buffer)
1574 		return 1;
1575 
1576 	parser->size = size;
1577 	return 0;
1578 }
1579 
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585 	kfree(parser->buffer);
1586 	parser->buffer = NULL;
1587 }
1588 
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601 	size_t cnt, loff_t *ppos)
1602 {
1603 	char ch;
1604 	size_t read = 0;
1605 	ssize_t ret;
1606 
1607 	if (!*ppos)
1608 		trace_parser_clear(parser);
1609 
1610 	ret = get_user(ch, ubuf++);
1611 	if (ret)
1612 		goto out;
1613 
1614 	read++;
1615 	cnt--;
1616 
1617 	/*
1618 	 * The parser is not finished with the last write,
1619 	 * continue reading the user input without skipping spaces.
1620 	 */
1621 	if (!parser->cont) {
1622 		/* skip white space */
1623 		while (cnt && isspace(ch)) {
1624 			ret = get_user(ch, ubuf++);
1625 			if (ret)
1626 				goto out;
1627 			read++;
1628 			cnt--;
1629 		}
1630 
1631 		parser->idx = 0;
1632 
1633 		/* only spaces were written */
1634 		if (isspace(ch) || !ch) {
1635 			*ppos += read;
1636 			ret = read;
1637 			goto out;
1638 		}
1639 	}
1640 
1641 	/* read the non-space input */
1642 	while (cnt && !isspace(ch) && ch) {
1643 		if (parser->idx < parser->size - 1)
1644 			parser->buffer[parser->idx++] = ch;
1645 		else {
1646 			ret = -EINVAL;
1647 			goto out;
1648 		}
1649 		ret = get_user(ch, ubuf++);
1650 		if (ret)
1651 			goto out;
1652 		read++;
1653 		cnt--;
1654 	}
1655 
1656 	/* We either got finished input or we have to wait for another call. */
1657 	if (isspace(ch) || !ch) {
1658 		parser->buffer[parser->idx] = 0;
1659 		parser->cont = false;
1660 	} else if (parser->idx < parser->size - 1) {
1661 		parser->cont = true;
1662 		parser->buffer[parser->idx++] = ch;
1663 		/* Make sure the parsed string always terminates with '\0'. */
1664 		parser->buffer[parser->idx] = 0;
1665 	} else {
1666 		ret = -EINVAL;
1667 		goto out;
1668 	}
1669 
1670 	*ppos += read;
1671 	ret = read;
1672 
1673 out:
1674 	return ret;
1675 }
1676 
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680 	int len;
1681 
1682 	if (trace_seq_used(s) <= s->seq.readpos)
1683 		return -EBUSY;
1684 
1685 	len = trace_seq_used(s) - s->seq.readpos;
1686 	if (cnt > len)
1687 		cnt = len;
1688 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689 
1690 	s->seq.readpos += cnt;
1691 	return cnt;
1692 }
1693 
1694 unsigned long __read_mostly	tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696 
1697 #ifdef LATENCY_FS_NOTIFY
1698 
1699 static struct workqueue_struct *fsnotify_wq;
1700 
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703 	struct trace_array *tr = container_of(work, struct trace_array,
1704 					      fsnotify_work);
1705 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707 
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710 	struct trace_array *tr = container_of(iwork, struct trace_array,
1711 					      fsnotify_irqwork);
1712 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714 
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716 				     struct dentry *d_tracer)
1717 {
1718 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1721 					      TRACE_MODE_WRITE,
1722 					      d_tracer, &tr->max_latency,
1723 					      &tracing_max_lat_fops);
1724 }
1725 
1726 __init static int latency_fsnotify_init(void)
1727 {
1728 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1730 	if (!fsnotify_wq) {
1731 		pr_err("Unable to allocate tr_max_lat_wq\n");
1732 		return -ENOMEM;
1733 	}
1734 	return 0;
1735 }
1736 
1737 late_initcall_sync(latency_fsnotify_init);
1738 
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741 	if (!fsnotify_wq)
1742 		return;
1743 	/*
1744 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745 	 * possible that we are called from __schedule() or do_idle(), which
1746 	 * could cause a deadlock.
1747 	 */
1748 	irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750 
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1752 	|| defined(CONFIG_OSNOISE_TRACER)
1753 
1754 #define trace_create_maxlat_file(tr, d_tracer)				\
1755 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1756 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757 
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1760 #endif
1761 
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771 	struct array_buffer *trace_buf = &tr->array_buffer;
1772 	struct array_buffer *max_buf = &tr->max_buffer;
1773 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775 
1776 	max_buf->cpu = cpu;
1777 	max_buf->time_start = data->preempt_timestamp;
1778 
1779 	max_data->saved_latency = tr->max_latency;
1780 	max_data->critical_start = data->critical_start;
1781 	max_data->critical_end = data->critical_end;
1782 
1783 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784 	max_data->pid = tsk->pid;
1785 	/*
1786 	 * If tsk == current, then use current_uid(), as that does not use
1787 	 * RCU. The irq tracer can be called out of RCU scope.
1788 	 */
1789 	if (tsk == current)
1790 		max_data->uid = current_uid();
1791 	else
1792 		max_data->uid = task_uid(tsk);
1793 
1794 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795 	max_data->policy = tsk->policy;
1796 	max_data->rt_priority = tsk->rt_priority;
1797 
1798 	/* record this tasks comm */
1799 	tracing_record_cmdline(tsk);
1800 	latency_fsnotify(tr);
1801 }
1802 
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815 	      void *cond_data)
1816 {
1817 	if (tr->stop_count)
1818 		return;
1819 
1820 	WARN_ON_ONCE(!irqs_disabled());
1821 
1822 	if (!tr->allocated_snapshot) {
1823 		/* Only the nop tracer should hit this when disabling */
1824 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825 		return;
1826 	}
1827 
1828 	arch_spin_lock(&tr->max_lock);
1829 
1830 	/* Inherit the recordable setting from array_buffer */
1831 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832 		ring_buffer_record_on(tr->max_buffer.buffer);
1833 	else
1834 		ring_buffer_record_off(tr->max_buffer.buffer);
1835 
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838 		goto out_unlock;
1839 #endif
1840 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841 
1842 	__update_max_tr(tr, tsk, cpu);
1843 
1844  out_unlock:
1845 	arch_spin_unlock(&tr->max_lock);
1846 }
1847 
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859 	int ret;
1860 
1861 	if (tr->stop_count)
1862 		return;
1863 
1864 	WARN_ON_ONCE(!irqs_disabled());
1865 	if (!tr->allocated_snapshot) {
1866 		/* Only the nop tracer should hit this when disabling */
1867 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868 		return;
1869 	}
1870 
1871 	arch_spin_lock(&tr->max_lock);
1872 
1873 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874 
1875 	if (ret == -EBUSY) {
1876 		/*
1877 		 * We failed to swap the buffer due to a commit taking
1878 		 * place on this CPU. We fail to record, but we reset
1879 		 * the max trace buffer (no one writes directly to it)
1880 		 * and flag that it failed.
1881 		 */
1882 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883 			"Failed to swap buffers due to commit in progress\n");
1884 	}
1885 
1886 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887 
1888 	__update_max_tr(tr, tsk, cpu);
1889 	arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892 
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895 	/* Iterators are static, they should be filled or empty */
1896 	if (trace_buffer_iter(iter, iter->cpu_file))
1897 		return 0;
1898 
1899 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900 				full);
1901 }
1902 
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905 
1906 struct trace_selftests {
1907 	struct list_head		list;
1908 	struct tracer			*type;
1909 };
1910 
1911 static LIST_HEAD(postponed_selftests);
1912 
1913 static int save_selftest(struct tracer *type)
1914 {
1915 	struct trace_selftests *selftest;
1916 
1917 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918 	if (!selftest)
1919 		return -ENOMEM;
1920 
1921 	selftest->type = type;
1922 	list_add(&selftest->list, &postponed_selftests);
1923 	return 0;
1924 }
1925 
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928 	struct trace_array *tr = &global_trace;
1929 	struct tracer *saved_tracer = tr->current_trace;
1930 	int ret;
1931 
1932 	if (!type->selftest || tracing_selftest_disabled)
1933 		return 0;
1934 
1935 	/*
1936 	 * If a tracer registers early in boot up (before scheduling is
1937 	 * initialized and such), then do not run its selftests yet.
1938 	 * Instead, run it a little later in the boot process.
1939 	 */
1940 	if (!selftests_can_run)
1941 		return save_selftest(type);
1942 
1943 	if (!tracing_is_on()) {
1944 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945 			type->name);
1946 		return 0;
1947 	}
1948 
1949 	/*
1950 	 * Run a selftest on this tracer.
1951 	 * Here we reset the trace buffer, and set the current
1952 	 * tracer to be this tracer. The tracer can then run some
1953 	 * internal tracing to verify that everything is in order.
1954 	 * If we fail, we do not register this tracer.
1955 	 */
1956 	tracing_reset_online_cpus(&tr->array_buffer);
1957 
1958 	tr->current_trace = type;
1959 
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961 	if (type->use_max_tr) {
1962 		/* If we expanded the buffers, make sure the max is expanded too */
1963 		if (ring_buffer_expanded)
1964 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965 					   RING_BUFFER_ALL_CPUS);
1966 		tr->allocated_snapshot = true;
1967 	}
1968 #endif
1969 
1970 	/* the test is responsible for initializing and enabling */
1971 	pr_info("Testing tracer %s: ", type->name);
1972 	ret = type->selftest(type, tr);
1973 	/* the test is responsible for resetting too */
1974 	tr->current_trace = saved_tracer;
1975 	if (ret) {
1976 		printk(KERN_CONT "FAILED!\n");
1977 		/* Add the warning after printing 'FAILED' */
1978 		WARN_ON(1);
1979 		return -1;
1980 	}
1981 	/* Only reset on passing, to avoid touching corrupted buffers */
1982 	tracing_reset_online_cpus(&tr->array_buffer);
1983 
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985 	if (type->use_max_tr) {
1986 		tr->allocated_snapshot = false;
1987 
1988 		/* Shrink the max buffer again */
1989 		if (ring_buffer_expanded)
1990 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1991 					   RING_BUFFER_ALL_CPUS);
1992 	}
1993 #endif
1994 
1995 	printk(KERN_CONT "PASSED\n");
1996 	return 0;
1997 }
1998 
1999 static __init int init_trace_selftests(void)
2000 {
2001 	struct trace_selftests *p, *n;
2002 	struct tracer *t, **last;
2003 	int ret;
2004 
2005 	selftests_can_run = true;
2006 
2007 	mutex_lock(&trace_types_lock);
2008 
2009 	if (list_empty(&postponed_selftests))
2010 		goto out;
2011 
2012 	pr_info("Running postponed tracer tests:\n");
2013 
2014 	tracing_selftest_running = true;
2015 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016 		/* This loop can take minutes when sanitizers are enabled, so
2017 		 * lets make sure we allow RCU processing.
2018 		 */
2019 		cond_resched();
2020 		ret = run_tracer_selftest(p->type);
2021 		/* If the test fails, then warn and remove from available_tracers */
2022 		if (ret < 0) {
2023 			WARN(1, "tracer: %s failed selftest, disabling\n",
2024 			     p->type->name);
2025 			last = &trace_types;
2026 			for (t = trace_types; t; t = t->next) {
2027 				if (t == p->type) {
2028 					*last = t->next;
2029 					break;
2030 				}
2031 				last = &t->next;
2032 			}
2033 		}
2034 		list_del(&p->list);
2035 		kfree(p);
2036 	}
2037 	tracing_selftest_running = false;
2038 
2039  out:
2040 	mutex_unlock(&trace_types_lock);
2041 
2042 	return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048 	return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051 
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053 
2054 static void __init apply_trace_boot_options(void);
2055 
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064 	struct tracer *t;
2065 	int ret = 0;
2066 
2067 	if (!type->name) {
2068 		pr_info("Tracer must have a name\n");
2069 		return -1;
2070 	}
2071 
2072 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074 		return -1;
2075 	}
2076 
2077 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078 		pr_warn("Can not register tracer %s due to lockdown\n",
2079 			   type->name);
2080 		return -EPERM;
2081 	}
2082 
2083 	mutex_lock(&trace_types_lock);
2084 
2085 	tracing_selftest_running = true;
2086 
2087 	for (t = trace_types; t; t = t->next) {
2088 		if (strcmp(type->name, t->name) == 0) {
2089 			/* already found */
2090 			pr_info("Tracer %s already registered\n",
2091 				type->name);
2092 			ret = -1;
2093 			goto out;
2094 		}
2095 	}
2096 
2097 	if (!type->set_flag)
2098 		type->set_flag = &dummy_set_flag;
2099 	if (!type->flags) {
2100 		/*allocate a dummy tracer_flags*/
2101 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102 		if (!type->flags) {
2103 			ret = -ENOMEM;
2104 			goto out;
2105 		}
2106 		type->flags->val = 0;
2107 		type->flags->opts = dummy_tracer_opt;
2108 	} else
2109 		if (!type->flags->opts)
2110 			type->flags->opts = dummy_tracer_opt;
2111 
2112 	/* store the tracer for __set_tracer_option */
2113 	type->flags->trace = type;
2114 
2115 	ret = run_tracer_selftest(type);
2116 	if (ret < 0)
2117 		goto out;
2118 
2119 	type->next = trace_types;
2120 	trace_types = type;
2121 	add_tracer_options(&global_trace, type);
2122 
2123  out:
2124 	tracing_selftest_running = false;
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	if (ret || !default_bootup_tracer)
2128 		goto out_unlock;
2129 
2130 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131 		goto out_unlock;
2132 
2133 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134 	/* Do we want this tracer to start on bootup? */
2135 	tracing_set_tracer(&global_trace, type->name);
2136 	default_bootup_tracer = NULL;
2137 
2138 	apply_trace_boot_options();
2139 
2140 	/* disable other selftests, since this will break it. */
2141 	disable_tracing_selftest("running a tracer");
2142 
2143  out_unlock:
2144 	return ret;
2145 }
2146 
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149 	struct trace_buffer *buffer = buf->buffer;
2150 
2151 	if (!buffer)
2152 		return;
2153 
2154 	ring_buffer_record_disable(buffer);
2155 
2156 	/* Make sure all commits have finished */
2157 	synchronize_rcu();
2158 	ring_buffer_reset_cpu(buffer, cpu);
2159 
2160 	ring_buffer_record_enable(buffer);
2161 }
2162 
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165 	struct trace_buffer *buffer = buf->buffer;
2166 
2167 	if (!buffer)
2168 		return;
2169 
2170 	ring_buffer_record_disable(buffer);
2171 
2172 	/* Make sure all commits have finished */
2173 	synchronize_rcu();
2174 
2175 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176 
2177 	ring_buffer_reset_online_cpus(buffer);
2178 
2179 	ring_buffer_record_enable(buffer);
2180 }
2181 
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2184 {
2185 	struct trace_array *tr;
2186 
2187 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188 		if (!tr->clear_trace)
2189 			continue;
2190 		tr->clear_trace = false;
2191 		tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193 		tracing_reset_online_cpus(&tr->max_buffer);
2194 #endif
2195 	}
2196 }
2197 
2198 /*
2199  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200  * is the tgid last observed corresponding to pid=i.
2201  */
2202 static int *tgid_map;
2203 
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2206 
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2209 /*
2210  * Preemption must be disabled before acquiring trace_cmdline_lock.
2211  * The various trace_arrays' max_lock must be acquired in a context
2212  * where interrupt is disabled.
2213  */
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217 	unsigned *map_cmdline_to_pid;
2218 	unsigned cmdline_num;
2219 	int cmdline_idx;
2220 	char *saved_cmdlines;
2221 };
2222 static struct saved_cmdlines_buffer *savedcmd;
2223 
2224 static inline char *get_saved_cmdlines(int idx)
2225 {
2226 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2227 }
2228 
2229 static inline void set_cmdline(int idx, const char *cmdline)
2230 {
2231 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2232 }
2233 
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235 				    struct saved_cmdlines_buffer *s)
2236 {
2237 	s->map_cmdline_to_pid = kmalloc_array(val,
2238 					      sizeof(*s->map_cmdline_to_pid),
2239 					      GFP_KERNEL);
2240 	if (!s->map_cmdline_to_pid)
2241 		return -ENOMEM;
2242 
2243 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244 	if (!s->saved_cmdlines) {
2245 		kfree(s->map_cmdline_to_pid);
2246 		return -ENOMEM;
2247 	}
2248 
2249 	s->cmdline_idx = 0;
2250 	s->cmdline_num = val;
2251 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252 	       sizeof(s->map_pid_to_cmdline));
2253 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254 	       val * sizeof(*s->map_cmdline_to_pid));
2255 
2256 	return 0;
2257 }
2258 
2259 static int trace_create_savedcmd(void)
2260 {
2261 	int ret;
2262 
2263 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2264 	if (!savedcmd)
2265 		return -ENOMEM;
2266 
2267 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2268 	if (ret < 0) {
2269 		kfree(savedcmd);
2270 		savedcmd = NULL;
2271 		return -ENOMEM;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 int is_tracing_stopped(void)
2278 {
2279 	return global_trace.stop_count;
2280 }
2281 
2282 /**
2283  * tracing_start - quick start of the tracer
2284  *
2285  * If tracing is enabled but was stopped by tracing_stop,
2286  * this will start the tracer back up.
2287  */
2288 void tracing_start(void)
2289 {
2290 	struct trace_buffer *buffer;
2291 	unsigned long flags;
2292 
2293 	if (tracing_disabled)
2294 		return;
2295 
2296 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297 	if (--global_trace.stop_count) {
2298 		if (global_trace.stop_count < 0) {
2299 			/* Someone screwed up their debugging */
2300 			WARN_ON_ONCE(1);
2301 			global_trace.stop_count = 0;
2302 		}
2303 		goto out;
2304 	}
2305 
2306 	/* Prevent the buffers from switching */
2307 	arch_spin_lock(&global_trace.max_lock);
2308 
2309 	buffer = global_trace.array_buffer.buffer;
2310 	if (buffer)
2311 		ring_buffer_record_enable(buffer);
2312 
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314 	buffer = global_trace.max_buffer.buffer;
2315 	if (buffer)
2316 		ring_buffer_record_enable(buffer);
2317 #endif
2318 
2319 	arch_spin_unlock(&global_trace.max_lock);
2320 
2321  out:
2322 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2323 }
2324 
2325 static void tracing_start_tr(struct trace_array *tr)
2326 {
2327 	struct trace_buffer *buffer;
2328 	unsigned long flags;
2329 
2330 	if (tracing_disabled)
2331 		return;
2332 
2333 	/* If global, we need to also start the max tracer */
2334 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335 		return tracing_start();
2336 
2337 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2338 
2339 	if (--tr->stop_count) {
2340 		if (tr->stop_count < 0) {
2341 			/* Someone screwed up their debugging */
2342 			WARN_ON_ONCE(1);
2343 			tr->stop_count = 0;
2344 		}
2345 		goto out;
2346 	}
2347 
2348 	buffer = tr->array_buffer.buffer;
2349 	if (buffer)
2350 		ring_buffer_record_enable(buffer);
2351 
2352  out:
2353 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2354 }
2355 
2356 /**
2357  * tracing_stop - quick stop of the tracer
2358  *
2359  * Light weight way to stop tracing. Use in conjunction with
2360  * tracing_start.
2361  */
2362 void tracing_stop(void)
2363 {
2364 	struct trace_buffer *buffer;
2365 	unsigned long flags;
2366 
2367 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368 	if (global_trace.stop_count++)
2369 		goto out;
2370 
2371 	/* Prevent the buffers from switching */
2372 	arch_spin_lock(&global_trace.max_lock);
2373 
2374 	buffer = global_trace.array_buffer.buffer;
2375 	if (buffer)
2376 		ring_buffer_record_disable(buffer);
2377 
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379 	buffer = global_trace.max_buffer.buffer;
2380 	if (buffer)
2381 		ring_buffer_record_disable(buffer);
2382 #endif
2383 
2384 	arch_spin_unlock(&global_trace.max_lock);
2385 
2386  out:
2387 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2388 }
2389 
2390 static void tracing_stop_tr(struct trace_array *tr)
2391 {
2392 	struct trace_buffer *buffer;
2393 	unsigned long flags;
2394 
2395 	/* If global, we need to also stop the max tracer */
2396 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397 		return tracing_stop();
2398 
2399 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2400 	if (tr->stop_count++)
2401 		goto out;
2402 
2403 	buffer = tr->array_buffer.buffer;
2404 	if (buffer)
2405 		ring_buffer_record_disable(buffer);
2406 
2407  out:
2408 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2409 }
2410 
2411 static int trace_save_cmdline(struct task_struct *tsk)
2412 {
2413 	unsigned tpid, idx;
2414 
2415 	/* treat recording of idle task as a success */
2416 	if (!tsk->pid)
2417 		return 1;
2418 
2419 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2420 
2421 	/*
2422 	 * It's not the end of the world if we don't get
2423 	 * the lock, but we also don't want to spin
2424 	 * nor do we want to disable interrupts,
2425 	 * so if we miss here, then better luck next time.
2426 	 *
2427 	 * This is called within the scheduler and wake up, so interrupts
2428 	 * had better been disabled and run queue lock been held.
2429 	 */
2430 	lockdep_assert_preemption_disabled();
2431 	if (!arch_spin_trylock(&trace_cmdline_lock))
2432 		return 0;
2433 
2434 	idx = savedcmd->map_pid_to_cmdline[tpid];
2435 	if (idx == NO_CMDLINE_MAP) {
2436 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2437 
2438 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2439 		savedcmd->cmdline_idx = idx;
2440 	}
2441 
2442 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443 	set_cmdline(idx, tsk->comm);
2444 
2445 	arch_spin_unlock(&trace_cmdline_lock);
2446 
2447 	return 1;
2448 }
2449 
2450 static void __trace_find_cmdline(int pid, char comm[])
2451 {
2452 	unsigned map;
2453 	int tpid;
2454 
2455 	if (!pid) {
2456 		strcpy(comm, "<idle>");
2457 		return;
2458 	}
2459 
2460 	if (WARN_ON_ONCE(pid < 0)) {
2461 		strcpy(comm, "<XXX>");
2462 		return;
2463 	}
2464 
2465 	tpid = pid & (PID_MAX_DEFAULT - 1);
2466 	map = savedcmd->map_pid_to_cmdline[tpid];
2467 	if (map != NO_CMDLINE_MAP) {
2468 		tpid = savedcmd->map_cmdline_to_pid[map];
2469 		if (tpid == pid) {
2470 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2471 			return;
2472 		}
2473 	}
2474 	strcpy(comm, "<...>");
2475 }
2476 
2477 void trace_find_cmdline(int pid, char comm[])
2478 {
2479 	preempt_disable();
2480 	arch_spin_lock(&trace_cmdline_lock);
2481 
2482 	__trace_find_cmdline(pid, comm);
2483 
2484 	arch_spin_unlock(&trace_cmdline_lock);
2485 	preempt_enable();
2486 }
2487 
2488 static int *trace_find_tgid_ptr(int pid)
2489 {
2490 	/*
2491 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492 	 * if we observe a non-NULL tgid_map then we also observe the correct
2493 	 * tgid_map_max.
2494 	 */
2495 	int *map = smp_load_acquire(&tgid_map);
2496 
2497 	if (unlikely(!map || pid > tgid_map_max))
2498 		return NULL;
2499 
2500 	return &map[pid];
2501 }
2502 
2503 int trace_find_tgid(int pid)
2504 {
2505 	int *ptr = trace_find_tgid_ptr(pid);
2506 
2507 	return ptr ? *ptr : 0;
2508 }
2509 
2510 static int trace_save_tgid(struct task_struct *tsk)
2511 {
2512 	int *ptr;
2513 
2514 	/* treat recording of idle task as a success */
2515 	if (!tsk->pid)
2516 		return 1;
2517 
2518 	ptr = trace_find_tgid_ptr(tsk->pid);
2519 	if (!ptr)
2520 		return 0;
2521 
2522 	*ptr = tsk->tgid;
2523 	return 1;
2524 }
2525 
2526 static bool tracing_record_taskinfo_skip(int flags)
2527 {
2528 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2529 		return true;
2530 	if (!__this_cpu_read(trace_taskinfo_save))
2531 		return true;
2532 	return false;
2533 }
2534 
2535 /**
2536  * tracing_record_taskinfo - record the task info of a task
2537  *
2538  * @task:  task to record
2539  * @flags: TRACE_RECORD_CMDLINE for recording comm
2540  *         TRACE_RECORD_TGID for recording tgid
2541  */
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2543 {
2544 	bool done;
2545 
2546 	if (tracing_record_taskinfo_skip(flags))
2547 		return;
2548 
2549 	/*
2550 	 * Record as much task information as possible. If some fail, continue
2551 	 * to try to record the others.
2552 	 */
2553 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2555 
2556 	/* If recording any information failed, retry again soon. */
2557 	if (!done)
2558 		return;
2559 
2560 	__this_cpu_write(trace_taskinfo_save, false);
2561 }
2562 
2563 /**
2564  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2565  *
2566  * @prev: previous task during sched_switch
2567  * @next: next task during sched_switch
2568  * @flags: TRACE_RECORD_CMDLINE for recording comm
2569  *         TRACE_RECORD_TGID for recording tgid
2570  */
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572 					  struct task_struct *next, int flags)
2573 {
2574 	bool done;
2575 
2576 	if (tracing_record_taskinfo_skip(flags))
2577 		return;
2578 
2579 	/*
2580 	 * Record as much task information as possible. If some fail, continue
2581 	 * to try to record the others.
2582 	 */
2583 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2587 
2588 	/* If recording any information failed, retry again soon. */
2589 	if (!done)
2590 		return;
2591 
2592 	__this_cpu_write(trace_taskinfo_save, false);
2593 }
2594 
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2597 {
2598 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2599 }
2600 
2601 void tracing_record_tgid(struct task_struct *task)
2602 {
2603 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2604 }
2605 
2606 /*
2607  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609  * simplifies those functions and keeps them in sync.
2610  */
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2612 {
2613 	return trace_seq_has_overflowed(s) ?
2614 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2615 }
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2617 
2618 static unsigned short migration_disable_value(void)
2619 {
2620 #if defined(CONFIG_SMP)
2621 	return current->migration_disabled;
2622 #else
2623 	return 0;
2624 #endif
2625 }
2626 
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2628 {
2629 	unsigned int trace_flags = irqs_status;
2630 	unsigned int pc;
2631 
2632 	pc = preempt_count();
2633 
2634 	if (pc & NMI_MASK)
2635 		trace_flags |= TRACE_FLAG_NMI;
2636 	if (pc & HARDIRQ_MASK)
2637 		trace_flags |= TRACE_FLAG_HARDIRQ;
2638 	if (in_serving_softirq())
2639 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2640 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641 		trace_flags |= TRACE_FLAG_BH_OFF;
2642 
2643 	if (tif_need_resched())
2644 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645 	if (test_preempt_need_resched())
2646 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2649 }
2650 
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2653 			  int type,
2654 			  unsigned long len,
2655 			  unsigned int trace_ctx)
2656 {
2657 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2658 }
2659 
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2663 
2664 /**
2665  * trace_buffered_event_enable - enable buffering events
2666  *
2667  * When events are being filtered, it is quicker to use a temporary
2668  * buffer to write the event data into if there's a likely chance
2669  * that it will not be committed. The discard of the ring buffer
2670  * is not as fast as committing, and is much slower than copying
2671  * a commit.
2672  *
2673  * When an event is to be filtered, allocate per cpu buffers to
2674  * write the event data into, and if the event is filtered and discarded
2675  * it is simply dropped, otherwise, the entire data is to be committed
2676  * in one shot.
2677  */
2678 void trace_buffered_event_enable(void)
2679 {
2680 	struct ring_buffer_event *event;
2681 	struct page *page;
2682 	int cpu;
2683 
2684 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2685 
2686 	if (trace_buffered_event_ref++)
2687 		return;
2688 
2689 	for_each_tracing_cpu(cpu) {
2690 		page = alloc_pages_node(cpu_to_node(cpu),
2691 					GFP_KERNEL | __GFP_NORETRY, 0);
2692 		if (!page)
2693 			goto failed;
2694 
2695 		event = page_address(page);
2696 		memset(event, 0, sizeof(*event));
2697 
2698 		per_cpu(trace_buffered_event, cpu) = event;
2699 
2700 		preempt_disable();
2701 		if (cpu == smp_processor_id() &&
2702 		    __this_cpu_read(trace_buffered_event) !=
2703 		    per_cpu(trace_buffered_event, cpu))
2704 			WARN_ON_ONCE(1);
2705 		preempt_enable();
2706 	}
2707 
2708 	return;
2709  failed:
2710 	trace_buffered_event_disable();
2711 }
2712 
2713 static void enable_trace_buffered_event(void *data)
2714 {
2715 	/* Probably not needed, but do it anyway */
2716 	smp_rmb();
2717 	this_cpu_dec(trace_buffered_event_cnt);
2718 }
2719 
2720 static void disable_trace_buffered_event(void *data)
2721 {
2722 	this_cpu_inc(trace_buffered_event_cnt);
2723 }
2724 
2725 /**
2726  * trace_buffered_event_disable - disable buffering events
2727  *
2728  * When a filter is removed, it is faster to not use the buffered
2729  * events, and to commit directly into the ring buffer. Free up
2730  * the temp buffers when there are no more users. This requires
2731  * special synchronization with current events.
2732  */
2733 void trace_buffered_event_disable(void)
2734 {
2735 	int cpu;
2736 
2737 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738 
2739 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2740 		return;
2741 
2742 	if (--trace_buffered_event_ref)
2743 		return;
2744 
2745 	preempt_disable();
2746 	/* For each CPU, set the buffer as used. */
2747 	smp_call_function_many(tracing_buffer_mask,
2748 			       disable_trace_buffered_event, NULL, 1);
2749 	preempt_enable();
2750 
2751 	/* Wait for all current users to finish */
2752 	synchronize_rcu();
2753 
2754 	for_each_tracing_cpu(cpu) {
2755 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756 		per_cpu(trace_buffered_event, cpu) = NULL;
2757 	}
2758 	/*
2759 	 * Make sure trace_buffered_event is NULL before clearing
2760 	 * trace_buffered_event_cnt.
2761 	 */
2762 	smp_wmb();
2763 
2764 	preempt_disable();
2765 	/* Do the work on each cpu */
2766 	smp_call_function_many(tracing_buffer_mask,
2767 			       enable_trace_buffered_event, NULL, 1);
2768 	preempt_enable();
2769 }
2770 
2771 static struct trace_buffer *temp_buffer;
2772 
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775 			  struct trace_event_file *trace_file,
2776 			  int type, unsigned long len,
2777 			  unsigned int trace_ctx)
2778 {
2779 	struct ring_buffer_event *entry;
2780 	struct trace_array *tr = trace_file->tr;
2781 	int val;
2782 
2783 	*current_rb = tr->array_buffer.buffer;
2784 
2785 	if (!tr->no_filter_buffering_ref &&
2786 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787 		preempt_disable_notrace();
2788 		/*
2789 		 * Filtering is on, so try to use the per cpu buffer first.
2790 		 * This buffer will simulate a ring_buffer_event,
2791 		 * where the type_len is zero and the array[0] will
2792 		 * hold the full length.
2793 		 * (see include/linux/ring-buffer.h for details on
2794 		 *  how the ring_buffer_event is structured).
2795 		 *
2796 		 * Using a temp buffer during filtering and copying it
2797 		 * on a matched filter is quicker than writing directly
2798 		 * into the ring buffer and then discarding it when
2799 		 * it doesn't match. That is because the discard
2800 		 * requires several atomic operations to get right.
2801 		 * Copying on match and doing nothing on a failed match
2802 		 * is still quicker than no copy on match, but having
2803 		 * to discard out of the ring buffer on a failed match.
2804 		 */
2805 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2806 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2807 
2808 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2809 
2810 			/*
2811 			 * Preemption is disabled, but interrupts and NMIs
2812 			 * can still come in now. If that happens after
2813 			 * the above increment, then it will have to go
2814 			 * back to the old method of allocating the event
2815 			 * on the ring buffer, and if the filter fails, it
2816 			 * will have to call ring_buffer_discard_commit()
2817 			 * to remove it.
2818 			 *
2819 			 * Need to also check the unlikely case that the
2820 			 * length is bigger than the temp buffer size.
2821 			 * If that happens, then the reserve is pretty much
2822 			 * guaranteed to fail, as the ring buffer currently
2823 			 * only allows events less than a page. But that may
2824 			 * change in the future, so let the ring buffer reserve
2825 			 * handle the failure in that case.
2826 			 */
2827 			if (val == 1 && likely(len <= max_len)) {
2828 				trace_event_setup(entry, type, trace_ctx);
2829 				entry->array[0] = len;
2830 				/* Return with preemption disabled */
2831 				return entry;
2832 			}
2833 			this_cpu_dec(trace_buffered_event_cnt);
2834 		}
2835 		/* __trace_buffer_lock_reserve() disables preemption */
2836 		preempt_enable_notrace();
2837 	}
2838 
2839 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2840 					    trace_ctx);
2841 	/*
2842 	 * If tracing is off, but we have triggers enabled
2843 	 * we still need to look at the event data. Use the temp_buffer
2844 	 * to store the trace event for the trigger to use. It's recursive
2845 	 * safe and will not be recorded anywhere.
2846 	 */
2847 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848 		*current_rb = temp_buffer;
2849 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2850 						    trace_ctx);
2851 	}
2852 	return entry;
2853 }
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2855 
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2858 
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2860 {
2861 	struct trace_event_call *event_call;
2862 	struct trace_event_file *file;
2863 	struct trace_event *event;
2864 	unsigned long flags;
2865 	struct trace_iterator *iter = tracepoint_print_iter;
2866 
2867 	/* We should never get here if iter is NULL */
2868 	if (WARN_ON_ONCE(!iter))
2869 		return;
2870 
2871 	event_call = fbuffer->trace_file->event_call;
2872 	if (!event_call || !event_call->event.funcs ||
2873 	    !event_call->event.funcs->trace)
2874 		return;
2875 
2876 	file = fbuffer->trace_file;
2877 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879 	     !filter_match_preds(file->filter, fbuffer->entry)))
2880 		return;
2881 
2882 	event = &fbuffer->trace_file->event_call->event;
2883 
2884 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885 	trace_seq_init(&iter->seq);
2886 	iter->ent = fbuffer->entry;
2887 	event_call->event.funcs->trace(iter, 0, event);
2888 	trace_seq_putc(&iter->seq, 0);
2889 	printk("%s", iter->seq.buffer);
2890 
2891 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2892 }
2893 
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895 			     void *buffer, size_t *lenp,
2896 			     loff_t *ppos)
2897 {
2898 	int save_tracepoint_printk;
2899 	int ret;
2900 
2901 	mutex_lock(&tracepoint_printk_mutex);
2902 	save_tracepoint_printk = tracepoint_printk;
2903 
2904 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2905 
2906 	/*
2907 	 * This will force exiting early, as tracepoint_printk
2908 	 * is always zero when tracepoint_printk_iter is not allocated
2909 	 */
2910 	if (!tracepoint_print_iter)
2911 		tracepoint_printk = 0;
2912 
2913 	if (save_tracepoint_printk == tracepoint_printk)
2914 		goto out;
2915 
2916 	if (tracepoint_printk)
2917 		static_key_enable(&tracepoint_printk_key.key);
2918 	else
2919 		static_key_disable(&tracepoint_printk_key.key);
2920 
2921  out:
2922 	mutex_unlock(&tracepoint_printk_mutex);
2923 
2924 	return ret;
2925 }
2926 
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2928 {
2929 	enum event_trigger_type tt = ETT_NONE;
2930 	struct trace_event_file *file = fbuffer->trace_file;
2931 
2932 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933 			fbuffer->entry, &tt))
2934 		goto discard;
2935 
2936 	if (static_key_false(&tracepoint_printk_key.key))
2937 		output_printk(fbuffer);
2938 
2939 	if (static_branch_unlikely(&trace_event_exports_enabled))
2940 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2941 
2942 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2944 
2945 discard:
2946 	if (tt)
2947 		event_triggers_post_call(file, tt);
2948 
2949 }
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2951 
2952 /*
2953  * Skip 3:
2954  *
2955  *   trace_buffer_unlock_commit_regs()
2956  *   trace_event_buffer_commit()
2957  *   trace_event_raw_event_xxx()
2958  */
2959 # define STACK_SKIP 3
2960 
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962 				     struct trace_buffer *buffer,
2963 				     struct ring_buffer_event *event,
2964 				     unsigned int trace_ctx,
2965 				     struct pt_regs *regs)
2966 {
2967 	__buffer_unlock_commit(buffer, event);
2968 
2969 	/*
2970 	 * If regs is not set, then skip the necessary functions.
2971 	 * Note, we can still get here via blktrace, wakeup tracer
2972 	 * and mmiotrace, but that's ok if they lose a function or
2973 	 * two. They are not that meaningful.
2974 	 */
2975 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2977 }
2978 
2979 /*
2980  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2981  */
2982 void
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984 				   struct ring_buffer_event *event)
2985 {
2986 	__buffer_unlock_commit(buffer, event);
2987 }
2988 
2989 void
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991 	       parent_ip, unsigned int trace_ctx)
2992 {
2993 	struct trace_event_call *call = &event_function;
2994 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2995 	struct ring_buffer_event *event;
2996 	struct ftrace_entry *entry;
2997 
2998 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2999 					    trace_ctx);
3000 	if (!event)
3001 		return;
3002 	entry	= ring_buffer_event_data(event);
3003 	entry->ip			= ip;
3004 	entry->parent_ip		= parent_ip;
3005 
3006 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3007 		if (static_branch_unlikely(&trace_function_exports_enabled))
3008 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009 		__buffer_unlock_commit(buffer, event);
3010 	}
3011 }
3012 
3013 #ifdef CONFIG_STACKTRACE
3014 
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING	4
3017 
3018 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3019 
3020 struct ftrace_stack {
3021 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3022 };
3023 
3024 
3025 struct ftrace_stacks {
3026 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3027 };
3028 
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3031 
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033 				 unsigned int trace_ctx,
3034 				 int skip, struct pt_regs *regs)
3035 {
3036 	struct trace_event_call *call = &event_kernel_stack;
3037 	struct ring_buffer_event *event;
3038 	unsigned int size, nr_entries;
3039 	struct ftrace_stack *fstack;
3040 	struct stack_entry *entry;
3041 	int stackidx;
3042 
3043 	/*
3044 	 * Add one, for this function and the call to save_stack_trace()
3045 	 * If regs is set, then these functions will not be in the way.
3046 	 */
3047 #ifndef CONFIG_UNWINDER_ORC
3048 	if (!regs)
3049 		skip++;
3050 #endif
3051 
3052 	preempt_disable_notrace();
3053 
3054 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3055 
3056 	/* This should never happen. If it does, yell once and skip */
3057 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3058 		goto out;
3059 
3060 	/*
3061 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062 	 * interrupt will either see the value pre increment or post
3063 	 * increment. If the interrupt happens pre increment it will have
3064 	 * restored the counter when it returns.  We just need a barrier to
3065 	 * keep gcc from moving things around.
3066 	 */
3067 	barrier();
3068 
3069 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070 	size = ARRAY_SIZE(fstack->calls);
3071 
3072 	if (regs) {
3073 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3074 						   size, skip);
3075 	} else {
3076 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3077 	}
3078 
3079 	size = nr_entries * sizeof(unsigned long);
3080 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3082 				    trace_ctx);
3083 	if (!event)
3084 		goto out;
3085 	entry = ring_buffer_event_data(event);
3086 
3087 	memcpy(&entry->caller, fstack->calls, size);
3088 	entry->size = nr_entries;
3089 
3090 	if (!call_filter_check_discard(call, entry, buffer, event))
3091 		__buffer_unlock_commit(buffer, event);
3092 
3093  out:
3094 	/* Again, don't let gcc optimize things here */
3095 	barrier();
3096 	__this_cpu_dec(ftrace_stack_reserve);
3097 	preempt_enable_notrace();
3098 
3099 }
3100 
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102 				      struct trace_buffer *buffer,
3103 				      unsigned int trace_ctx,
3104 				      int skip, struct pt_regs *regs)
3105 {
3106 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3107 		return;
3108 
3109 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3110 }
3111 
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3113 		   int skip)
3114 {
3115 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3116 
3117 	if (rcu_is_watching()) {
3118 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3119 		return;
3120 	}
3121 
3122 	/*
3123 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124 	 * but if the above rcu_is_watching() failed, then the NMI
3125 	 * triggered someplace critical, and ct_irq_enter() should
3126 	 * not be called from NMI.
3127 	 */
3128 	if (unlikely(in_nmi()))
3129 		return;
3130 
3131 	ct_irq_enter_irqson();
3132 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133 	ct_irq_exit_irqson();
3134 }
3135 
3136 /**
3137  * trace_dump_stack - record a stack back trace in the trace buffer
3138  * @skip: Number of functions to skip (helper handlers)
3139  */
3140 void trace_dump_stack(int skip)
3141 {
3142 	if (tracing_disabled || tracing_selftest_running)
3143 		return;
3144 
3145 #ifndef CONFIG_UNWINDER_ORC
3146 	/* Skip 1 to skip this function. */
3147 	skip++;
3148 #endif
3149 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3150 			     tracing_gen_ctx(), skip, NULL);
3151 }
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3153 
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3156 
3157 static void
3158 ftrace_trace_userstack(struct trace_array *tr,
3159 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3160 {
3161 	struct trace_event_call *call = &event_user_stack;
3162 	struct ring_buffer_event *event;
3163 	struct userstack_entry *entry;
3164 
3165 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3166 		return;
3167 
3168 	/*
3169 	 * NMIs can not handle page faults, even with fix ups.
3170 	 * The save user stack can (and often does) fault.
3171 	 */
3172 	if (unlikely(in_nmi()))
3173 		return;
3174 
3175 	/*
3176 	 * prevent recursion, since the user stack tracing may
3177 	 * trigger other kernel events.
3178 	 */
3179 	preempt_disable();
3180 	if (__this_cpu_read(user_stack_count))
3181 		goto out;
3182 
3183 	__this_cpu_inc(user_stack_count);
3184 
3185 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186 					    sizeof(*entry), trace_ctx);
3187 	if (!event)
3188 		goto out_drop_count;
3189 	entry	= ring_buffer_event_data(event);
3190 
3191 	entry->tgid		= current->tgid;
3192 	memset(&entry->caller, 0, sizeof(entry->caller));
3193 
3194 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195 	if (!call_filter_check_discard(call, entry, buffer, event))
3196 		__buffer_unlock_commit(buffer, event);
3197 
3198  out_drop_count:
3199 	__this_cpu_dec(user_stack_count);
3200  out:
3201 	preempt_enable();
3202 }
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205 				   struct trace_buffer *buffer,
3206 				   unsigned int trace_ctx)
3207 {
3208 }
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3210 
3211 #endif /* CONFIG_STACKTRACE */
3212 
3213 static inline void
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215 			  unsigned long long delta)
3216 {
3217 	entry->bottom_delta_ts = delta & U32_MAX;
3218 	entry->top_delta_ts = (delta >> 32);
3219 }
3220 
3221 void trace_last_func_repeats(struct trace_array *tr,
3222 			     struct trace_func_repeats *last_info,
3223 			     unsigned int trace_ctx)
3224 {
3225 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3226 	struct func_repeats_entry *entry;
3227 	struct ring_buffer_event *event;
3228 	u64 delta;
3229 
3230 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231 					    sizeof(*entry), trace_ctx);
3232 	if (!event)
3233 		return;
3234 
3235 	delta = ring_buffer_event_time_stamp(buffer, event) -
3236 		last_info->ts_last_call;
3237 
3238 	entry = ring_buffer_event_data(event);
3239 	entry->ip = last_info->ip;
3240 	entry->parent_ip = last_info->parent_ip;
3241 	entry->count = last_info->count;
3242 	func_repeats_set_delta_ts(entry, delta);
3243 
3244 	__buffer_unlock_commit(buffer, event);
3245 }
3246 
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3249 	int nesting;
3250 	char buffer[4][TRACE_BUF_SIZE];
3251 };
3252 
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3254 
3255 /*
3256  * This allows for lockless recording.  If we're nested too deeply, then
3257  * this returns NULL.
3258  */
3259 static char *get_trace_buf(void)
3260 {
3261 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3262 
3263 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3264 		return NULL;
3265 
3266 	buffer->nesting++;
3267 
3268 	/* Interrupts must see nesting incremented before we use the buffer */
3269 	barrier();
3270 	return &buffer->buffer[buffer->nesting - 1][0];
3271 }
3272 
3273 static void put_trace_buf(void)
3274 {
3275 	/* Don't let the decrement of nesting leak before this */
3276 	barrier();
3277 	this_cpu_dec(trace_percpu_buffer->nesting);
3278 }
3279 
3280 static int alloc_percpu_trace_buffer(void)
3281 {
3282 	struct trace_buffer_struct __percpu *buffers;
3283 
3284 	if (trace_percpu_buffer)
3285 		return 0;
3286 
3287 	buffers = alloc_percpu(struct trace_buffer_struct);
3288 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3289 		return -ENOMEM;
3290 
3291 	trace_percpu_buffer = buffers;
3292 	return 0;
3293 }
3294 
3295 static int buffers_allocated;
3296 
3297 void trace_printk_init_buffers(void)
3298 {
3299 	if (buffers_allocated)
3300 		return;
3301 
3302 	if (alloc_percpu_trace_buffer())
3303 		return;
3304 
3305 	/* trace_printk() is for debug use only. Don't use it in production. */
3306 
3307 	pr_warn("\n");
3308 	pr_warn("**********************************************************\n");
3309 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3310 	pr_warn("**                                                      **\n");
3311 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3312 	pr_warn("**                                                      **\n");
3313 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3314 	pr_warn("** unsafe for production use.                           **\n");
3315 	pr_warn("**                                                      **\n");
3316 	pr_warn("** If you see this message and you are not debugging    **\n");
3317 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3318 	pr_warn("**                                                      **\n");
3319 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3320 	pr_warn("**********************************************************\n");
3321 
3322 	/* Expand the buffers to set size */
3323 	tracing_update_buffers();
3324 
3325 	buffers_allocated = 1;
3326 
3327 	/*
3328 	 * trace_printk_init_buffers() can be called by modules.
3329 	 * If that happens, then we need to start cmdline recording
3330 	 * directly here. If the global_trace.buffer is already
3331 	 * allocated here, then this was called by module code.
3332 	 */
3333 	if (global_trace.array_buffer.buffer)
3334 		tracing_start_cmdline_record();
3335 }
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3337 
3338 void trace_printk_start_comm(void)
3339 {
3340 	/* Start tracing comms if trace printk is set */
3341 	if (!buffers_allocated)
3342 		return;
3343 	tracing_start_cmdline_record();
3344 }
3345 
3346 static void trace_printk_start_stop_comm(int enabled)
3347 {
3348 	if (!buffers_allocated)
3349 		return;
3350 
3351 	if (enabled)
3352 		tracing_start_cmdline_record();
3353 	else
3354 		tracing_stop_cmdline_record();
3355 }
3356 
3357 /**
3358  * trace_vbprintk - write binary msg to tracing buffer
3359  * @ip:    The address of the caller
3360  * @fmt:   The string format to write to the buffer
3361  * @args:  Arguments for @fmt
3362  */
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3364 {
3365 	struct trace_event_call *call = &event_bprint;
3366 	struct ring_buffer_event *event;
3367 	struct trace_buffer *buffer;
3368 	struct trace_array *tr = &global_trace;
3369 	struct bprint_entry *entry;
3370 	unsigned int trace_ctx;
3371 	char *tbuffer;
3372 	int len = 0, size;
3373 
3374 	if (unlikely(tracing_selftest_running || tracing_disabled))
3375 		return 0;
3376 
3377 	/* Don't pollute graph traces with trace_vprintk internals */
3378 	pause_graph_tracing();
3379 
3380 	trace_ctx = tracing_gen_ctx();
3381 	preempt_disable_notrace();
3382 
3383 	tbuffer = get_trace_buf();
3384 	if (!tbuffer) {
3385 		len = 0;
3386 		goto out_nobuffer;
3387 	}
3388 
3389 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3390 
3391 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3392 		goto out_put;
3393 
3394 	size = sizeof(*entry) + sizeof(u32) * len;
3395 	buffer = tr->array_buffer.buffer;
3396 	ring_buffer_nest_start(buffer);
3397 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3398 					    trace_ctx);
3399 	if (!event)
3400 		goto out;
3401 	entry = ring_buffer_event_data(event);
3402 	entry->ip			= ip;
3403 	entry->fmt			= fmt;
3404 
3405 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3407 		__buffer_unlock_commit(buffer, event);
3408 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3409 	}
3410 
3411 out:
3412 	ring_buffer_nest_end(buffer);
3413 out_put:
3414 	put_trace_buf();
3415 
3416 out_nobuffer:
3417 	preempt_enable_notrace();
3418 	unpause_graph_tracing();
3419 
3420 	return len;
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3423 
3424 __printf(3, 0)
3425 static int
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427 		      unsigned long ip, const char *fmt, va_list args)
3428 {
3429 	struct trace_event_call *call = &event_print;
3430 	struct ring_buffer_event *event;
3431 	int len = 0, size;
3432 	struct print_entry *entry;
3433 	unsigned int trace_ctx;
3434 	char *tbuffer;
3435 
3436 	if (tracing_disabled || tracing_selftest_running)
3437 		return 0;
3438 
3439 	/* Don't pollute graph traces with trace_vprintk internals */
3440 	pause_graph_tracing();
3441 
3442 	trace_ctx = tracing_gen_ctx();
3443 	preempt_disable_notrace();
3444 
3445 
3446 	tbuffer = get_trace_buf();
3447 	if (!tbuffer) {
3448 		len = 0;
3449 		goto out_nobuffer;
3450 	}
3451 
3452 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3453 
3454 	size = sizeof(*entry) + len + 1;
3455 	ring_buffer_nest_start(buffer);
3456 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3457 					    trace_ctx);
3458 	if (!event)
3459 		goto out;
3460 	entry = ring_buffer_event_data(event);
3461 	entry->ip = ip;
3462 
3463 	memcpy(&entry->buf, tbuffer, len + 1);
3464 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3465 		__buffer_unlock_commit(buffer, event);
3466 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3467 	}
3468 
3469 out:
3470 	ring_buffer_nest_end(buffer);
3471 	put_trace_buf();
3472 
3473 out_nobuffer:
3474 	preempt_enable_notrace();
3475 	unpause_graph_tracing();
3476 
3477 	return len;
3478 }
3479 
3480 __printf(3, 0)
3481 int trace_array_vprintk(struct trace_array *tr,
3482 			unsigned long ip, const char *fmt, va_list args)
3483 {
3484 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3485 }
3486 
3487 /**
3488  * trace_array_printk - Print a message to a specific instance
3489  * @tr: The instance trace_array descriptor
3490  * @ip: The instruction pointer that this is called from.
3491  * @fmt: The format to print (printf format)
3492  *
3493  * If a subsystem sets up its own instance, they have the right to
3494  * printk strings into their tracing instance buffer using this
3495  * function. Note, this function will not write into the top level
3496  * buffer (use trace_printk() for that), as writing into the top level
3497  * buffer should only have events that can be individually disabled.
3498  * trace_printk() is only used for debugging a kernel, and should not
3499  * be ever incorporated in normal use.
3500  *
3501  * trace_array_printk() can be used, as it will not add noise to the
3502  * top level tracing buffer.
3503  *
3504  * Note, trace_array_init_printk() must be called on @tr before this
3505  * can be used.
3506  */
3507 __printf(3, 0)
3508 int trace_array_printk(struct trace_array *tr,
3509 		       unsigned long ip, const char *fmt, ...)
3510 {
3511 	int ret;
3512 	va_list ap;
3513 
3514 	if (!tr)
3515 		return -ENOENT;
3516 
3517 	/* This is only allowed for created instances */
3518 	if (tr == &global_trace)
3519 		return 0;
3520 
3521 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3522 		return 0;
3523 
3524 	va_start(ap, fmt);
3525 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3526 	va_end(ap);
3527 	return ret;
3528 }
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3530 
3531 /**
3532  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533  * @tr: The trace array to initialize the buffers for
3534  *
3535  * As trace_array_printk() only writes into instances, they are OK to
3536  * have in the kernel (unlike trace_printk()). This needs to be called
3537  * before trace_array_printk() can be used on a trace_array.
3538  */
3539 int trace_array_init_printk(struct trace_array *tr)
3540 {
3541 	if (!tr)
3542 		return -ENOENT;
3543 
3544 	/* This is only allowed for created instances */
3545 	if (tr == &global_trace)
3546 		return -EINVAL;
3547 
3548 	return alloc_percpu_trace_buffer();
3549 }
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3551 
3552 __printf(3, 4)
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554 			   unsigned long ip, const char *fmt, ...)
3555 {
3556 	int ret;
3557 	va_list ap;
3558 
3559 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3560 		return 0;
3561 
3562 	va_start(ap, fmt);
3563 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3564 	va_end(ap);
3565 	return ret;
3566 }
3567 
3568 __printf(2, 0)
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3570 {
3571 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3572 }
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3574 
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3576 {
3577 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3578 
3579 	iter->idx++;
3580 	if (buf_iter)
3581 		ring_buffer_iter_advance(buf_iter);
3582 }
3583 
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586 		unsigned long *lost_events)
3587 {
3588 	struct ring_buffer_event *event;
3589 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3590 
3591 	if (buf_iter) {
3592 		event = ring_buffer_iter_peek(buf_iter, ts);
3593 		if (lost_events)
3594 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595 				(unsigned long)-1 : 0;
3596 	} else {
3597 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3598 					 lost_events);
3599 	}
3600 
3601 	if (event) {
3602 		iter->ent_size = ring_buffer_event_length(event);
3603 		return ring_buffer_event_data(event);
3604 	}
3605 	iter->ent_size = 0;
3606 	return NULL;
3607 }
3608 
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611 		  unsigned long *missing_events, u64 *ent_ts)
3612 {
3613 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3614 	struct trace_entry *ent, *next = NULL;
3615 	unsigned long lost_events = 0, next_lost = 0;
3616 	int cpu_file = iter->cpu_file;
3617 	u64 next_ts = 0, ts;
3618 	int next_cpu = -1;
3619 	int next_size = 0;
3620 	int cpu;
3621 
3622 	/*
3623 	 * If we are in a per_cpu trace file, don't bother by iterating over
3624 	 * all cpu and peek directly.
3625 	 */
3626 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3628 			return NULL;
3629 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3630 		if (ent_cpu)
3631 			*ent_cpu = cpu_file;
3632 
3633 		return ent;
3634 	}
3635 
3636 	for_each_tracing_cpu(cpu) {
3637 
3638 		if (ring_buffer_empty_cpu(buffer, cpu))
3639 			continue;
3640 
3641 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3642 
3643 		/*
3644 		 * Pick the entry with the smallest timestamp:
3645 		 */
3646 		if (ent && (!next || ts < next_ts)) {
3647 			next = ent;
3648 			next_cpu = cpu;
3649 			next_ts = ts;
3650 			next_lost = lost_events;
3651 			next_size = iter->ent_size;
3652 		}
3653 	}
3654 
3655 	iter->ent_size = next_size;
3656 
3657 	if (ent_cpu)
3658 		*ent_cpu = next_cpu;
3659 
3660 	if (ent_ts)
3661 		*ent_ts = next_ts;
3662 
3663 	if (missing_events)
3664 		*missing_events = next_lost;
3665 
3666 	return next;
3667 }
3668 
3669 #define STATIC_FMT_BUF_SIZE	128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3671 
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3673 {
3674 	char *tmp;
3675 
3676 	/*
3677 	 * iter->tr is NULL when used with tp_printk, which makes
3678 	 * this get called where it is not safe to call krealloc().
3679 	 */
3680 	if (!iter->tr || iter->fmt == static_fmt_buf)
3681 		return NULL;
3682 
3683 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3684 		       GFP_KERNEL);
3685 	if (tmp) {
3686 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3687 		iter->fmt = tmp;
3688 	}
3689 
3690 	return tmp;
3691 }
3692 
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3695 			   bool star, int len)
3696 {
3697 	unsigned long addr = (unsigned long)str;
3698 	struct trace_event *trace_event;
3699 	struct trace_event_call *event;
3700 
3701 	/* Ignore strings with no length */
3702 	if (star && !len)
3703 		return true;
3704 
3705 	/* OK if part of the event data */
3706 	if ((addr >= (unsigned long)iter->ent) &&
3707 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3708 		return true;
3709 
3710 	/* OK if part of the temp seq buffer */
3711 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3713 		return true;
3714 
3715 	/* Core rodata can not be freed */
3716 	if (is_kernel_rodata(addr))
3717 		return true;
3718 
3719 	if (trace_is_tracepoint_string(str))
3720 		return true;
3721 
3722 	/*
3723 	 * Now this could be a module event, referencing core module
3724 	 * data, which is OK.
3725 	 */
3726 	if (!iter->ent)
3727 		return false;
3728 
3729 	trace_event = ftrace_find_event(iter->ent->type);
3730 	if (!trace_event)
3731 		return false;
3732 
3733 	event = container_of(trace_event, struct trace_event_call, event);
3734 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3735 		return false;
3736 
3737 	/* Would rather have rodata, but this will suffice */
3738 	if (within_module_core(addr, event->module))
3739 		return true;
3740 
3741 	return false;
3742 }
3743 
3744 static const char *show_buffer(struct trace_seq *s)
3745 {
3746 	struct seq_buf *seq = &s->seq;
3747 
3748 	seq_buf_terminate(seq);
3749 
3750 	return seq->buffer;
3751 }
3752 
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3754 
3755 static int test_can_verify_check(const char *fmt, ...)
3756 {
3757 	char buf[16];
3758 	va_list ap;
3759 	int ret;
3760 
3761 	/*
3762 	 * The verifier is dependent on vsnprintf() modifies the va_list
3763 	 * passed to it, where it is sent as a reference. Some architectures
3764 	 * (like x86_32) passes it by value, which means that vsnprintf()
3765 	 * does not modify the va_list passed to it, and the verifier
3766 	 * would then need to be able to understand all the values that
3767 	 * vsnprintf can use. If it is passed by value, then the verifier
3768 	 * is disabled.
3769 	 */
3770 	va_start(ap, fmt);
3771 	vsnprintf(buf, 16, "%d", ap);
3772 	ret = va_arg(ap, int);
3773 	va_end(ap);
3774 
3775 	return ret;
3776 }
3777 
3778 static void test_can_verify(void)
3779 {
3780 	if (!test_can_verify_check("%d %d", 0, 1)) {
3781 		pr_info("trace event string verifier disabled\n");
3782 		static_branch_inc(&trace_no_verify);
3783 	}
3784 }
3785 
3786 /**
3787  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788  * @iter: The iterator that holds the seq buffer and the event being printed
3789  * @fmt: The format used to print the event
3790  * @ap: The va_list holding the data to print from @fmt.
3791  *
3792  * This writes the data into the @iter->seq buffer using the data from
3793  * @fmt and @ap. If the format has a %s, then the source of the string
3794  * is examined to make sure it is safe to print, otherwise it will
3795  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3796  * pointer.
3797  */
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3799 			 va_list ap)
3800 {
3801 	const char *p = fmt;
3802 	const char *str;
3803 	int i, j;
3804 
3805 	if (WARN_ON_ONCE(!fmt))
3806 		return;
3807 
3808 	if (static_branch_unlikely(&trace_no_verify))
3809 		goto print;
3810 
3811 	/* Don't bother checking when doing a ftrace_dump() */
3812 	if (iter->fmt == static_fmt_buf)
3813 		goto print;
3814 
3815 	while (*p) {
3816 		bool star = false;
3817 		int len = 0;
3818 
3819 		j = 0;
3820 
3821 		/* We only care about %s and variants */
3822 		for (i = 0; p[i]; i++) {
3823 			if (i + 1 >= iter->fmt_size) {
3824 				/*
3825 				 * If we can't expand the copy buffer,
3826 				 * just print it.
3827 				 */
3828 				if (!trace_iter_expand_format(iter))
3829 					goto print;
3830 			}
3831 
3832 			if (p[i] == '\\' && p[i+1]) {
3833 				i++;
3834 				continue;
3835 			}
3836 			if (p[i] == '%') {
3837 				/* Need to test cases like %08.*s */
3838 				for (j = 1; p[i+j]; j++) {
3839 					if (isdigit(p[i+j]) ||
3840 					    p[i+j] == '.')
3841 						continue;
3842 					if (p[i+j] == '*') {
3843 						star = true;
3844 						continue;
3845 					}
3846 					break;
3847 				}
3848 				if (p[i+j] == 's')
3849 					break;
3850 				star = false;
3851 			}
3852 			j = 0;
3853 		}
3854 		/* If no %s found then just print normally */
3855 		if (!p[i])
3856 			break;
3857 
3858 		/* Copy up to the %s, and print that */
3859 		strncpy(iter->fmt, p, i);
3860 		iter->fmt[i] = '\0';
3861 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3862 
3863 		/*
3864 		 * If iter->seq is full, the above call no longer guarantees
3865 		 * that ap is in sync with fmt processing, and further calls
3866 		 * to va_arg() can return wrong positional arguments.
3867 		 *
3868 		 * Ensure that ap is no longer used in this case.
3869 		 */
3870 		if (iter->seq.full) {
3871 			p = "";
3872 			break;
3873 		}
3874 
3875 		if (star)
3876 			len = va_arg(ap, int);
3877 
3878 		/* The ap now points to the string data of the %s */
3879 		str = va_arg(ap, const char *);
3880 
3881 		/*
3882 		 * If you hit this warning, it is likely that the
3883 		 * trace event in question used %s on a string that
3884 		 * was saved at the time of the event, but may not be
3885 		 * around when the trace is read. Use __string(),
3886 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887 		 * instead. See samples/trace_events/trace-events-sample.h
3888 		 * for reference.
3889 		 */
3890 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891 			      "fmt: '%s' current_buffer: '%s'",
3892 			      fmt, show_buffer(&iter->seq))) {
3893 			int ret;
3894 
3895 			/* Try to safely read the string */
3896 			if (star) {
3897 				if (len + 1 > iter->fmt_size)
3898 					len = iter->fmt_size - 1;
3899 				if (len < 0)
3900 					len = 0;
3901 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3902 				iter->fmt[len] = 0;
3903 				star = false;
3904 			} else {
3905 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3906 								  iter->fmt_size);
3907 			}
3908 			if (ret < 0)
3909 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3910 			else
3911 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3912 						 str, iter->fmt);
3913 			str = "[UNSAFE-MEMORY]";
3914 			strcpy(iter->fmt, "%s");
3915 		} else {
3916 			strncpy(iter->fmt, p + i, j + 1);
3917 			iter->fmt[j+1] = '\0';
3918 		}
3919 		if (star)
3920 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3921 		else
3922 			trace_seq_printf(&iter->seq, iter->fmt, str);
3923 
3924 		p += i + j + 1;
3925 	}
3926  print:
3927 	if (*p)
3928 		trace_seq_vprintf(&iter->seq, p, ap);
3929 }
3930 
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3932 {
3933 	const char *p, *new_fmt;
3934 	char *q;
3935 
3936 	if (WARN_ON_ONCE(!fmt))
3937 		return fmt;
3938 
3939 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3940 		return fmt;
3941 
3942 	p = fmt;
3943 	new_fmt = q = iter->fmt;
3944 	while (*p) {
3945 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946 			if (!trace_iter_expand_format(iter))
3947 				return fmt;
3948 
3949 			q += iter->fmt - new_fmt;
3950 			new_fmt = iter->fmt;
3951 		}
3952 
3953 		*q++ = *p++;
3954 
3955 		/* Replace %p with %px */
3956 		if (p[-1] == '%') {
3957 			if (p[0] == '%') {
3958 				*q++ = *p++;
3959 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3960 				*q++ = *p++;
3961 				*q++ = 'x';
3962 			}
3963 		}
3964 	}
3965 	*q = '\0';
3966 
3967 	return new_fmt;
3968 }
3969 
3970 #define STATIC_TEMP_BUF_SIZE	128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3972 
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975 					  int *ent_cpu, u64 *ent_ts)
3976 {
3977 	/* __find_next_entry will reset ent_size */
3978 	int ent_size = iter->ent_size;
3979 	struct trace_entry *entry;
3980 
3981 	/*
3982 	 * If called from ftrace_dump(), then the iter->temp buffer
3983 	 * will be the static_temp_buf and not created from kmalloc.
3984 	 * If the entry size is greater than the buffer, we can
3985 	 * not save it. Just return NULL in that case. This is only
3986 	 * used to add markers when two consecutive events' time
3987 	 * stamps have a large delta. See trace_print_lat_context()
3988 	 */
3989 	if (iter->temp == static_temp_buf &&
3990 	    STATIC_TEMP_BUF_SIZE < ent_size)
3991 		return NULL;
3992 
3993 	/*
3994 	 * The __find_next_entry() may call peek_next_entry(), which may
3995 	 * call ring_buffer_peek() that may make the contents of iter->ent
3996 	 * undefined. Need to copy iter->ent now.
3997 	 */
3998 	if (iter->ent && iter->ent != iter->temp) {
3999 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4001 			void *temp;
4002 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4003 			if (!temp)
4004 				return NULL;
4005 			kfree(iter->temp);
4006 			iter->temp = temp;
4007 			iter->temp_size = iter->ent_size;
4008 		}
4009 		memcpy(iter->temp, iter->ent, iter->ent_size);
4010 		iter->ent = iter->temp;
4011 	}
4012 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013 	/* Put back the original ent_size */
4014 	iter->ent_size = ent_size;
4015 
4016 	return entry;
4017 }
4018 
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4021 {
4022 	iter->ent = __find_next_entry(iter, &iter->cpu,
4023 				      &iter->lost_events, &iter->ts);
4024 
4025 	if (iter->ent)
4026 		trace_iterator_increment(iter);
4027 
4028 	return iter->ent ? iter : NULL;
4029 }
4030 
4031 static void trace_consume(struct trace_iterator *iter)
4032 {
4033 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034 			    &iter->lost_events);
4035 }
4036 
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4038 {
4039 	struct trace_iterator *iter = m->private;
4040 	int i = (int)*pos;
4041 	void *ent;
4042 
4043 	WARN_ON_ONCE(iter->leftover);
4044 
4045 	(*pos)++;
4046 
4047 	/* can't go backwards */
4048 	if (iter->idx > i)
4049 		return NULL;
4050 
4051 	if (iter->idx < 0)
4052 		ent = trace_find_next_entry_inc(iter);
4053 	else
4054 		ent = iter;
4055 
4056 	while (ent && iter->idx < i)
4057 		ent = trace_find_next_entry_inc(iter);
4058 
4059 	iter->pos = *pos;
4060 
4061 	return ent;
4062 }
4063 
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4065 {
4066 	struct ring_buffer_iter *buf_iter;
4067 	unsigned long entries = 0;
4068 	u64 ts;
4069 
4070 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4071 
4072 	buf_iter = trace_buffer_iter(iter, cpu);
4073 	if (!buf_iter)
4074 		return;
4075 
4076 	ring_buffer_iter_reset(buf_iter);
4077 
4078 	/*
4079 	 * We could have the case with the max latency tracers
4080 	 * that a reset never took place on a cpu. This is evident
4081 	 * by the timestamp being before the start of the buffer.
4082 	 */
4083 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084 		if (ts >= iter->array_buffer->time_start)
4085 			break;
4086 		entries++;
4087 		ring_buffer_iter_advance(buf_iter);
4088 	}
4089 
4090 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4091 }
4092 
4093 /*
4094  * The current tracer is copied to avoid a global locking
4095  * all around.
4096  */
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4098 {
4099 	struct trace_iterator *iter = m->private;
4100 	struct trace_array *tr = iter->tr;
4101 	int cpu_file = iter->cpu_file;
4102 	void *p = NULL;
4103 	loff_t l = 0;
4104 	int cpu;
4105 
4106 	/*
4107 	 * copy the tracer to avoid using a global lock all around.
4108 	 * iter->trace is a copy of current_trace, the pointer to the
4109 	 * name may be used instead of a strcmp(), as iter->trace->name
4110 	 * will point to the same string as current_trace->name.
4111 	 */
4112 	mutex_lock(&trace_types_lock);
4113 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114 		*iter->trace = *tr->current_trace;
4115 	mutex_unlock(&trace_types_lock);
4116 
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118 	if (iter->snapshot && iter->trace->use_max_tr)
4119 		return ERR_PTR(-EBUSY);
4120 #endif
4121 
4122 	if (*pos != iter->pos) {
4123 		iter->ent = NULL;
4124 		iter->cpu = 0;
4125 		iter->idx = -1;
4126 
4127 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128 			for_each_tracing_cpu(cpu)
4129 				tracing_iter_reset(iter, cpu);
4130 		} else
4131 			tracing_iter_reset(iter, cpu_file);
4132 
4133 		iter->leftover = 0;
4134 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4135 			;
4136 
4137 	} else {
4138 		/*
4139 		 * If we overflowed the seq_file before, then we want
4140 		 * to just reuse the trace_seq buffer again.
4141 		 */
4142 		if (iter->leftover)
4143 			p = iter;
4144 		else {
4145 			l = *pos - 1;
4146 			p = s_next(m, p, &l);
4147 		}
4148 	}
4149 
4150 	trace_event_read_lock();
4151 	trace_access_lock(cpu_file);
4152 	return p;
4153 }
4154 
4155 static void s_stop(struct seq_file *m, void *p)
4156 {
4157 	struct trace_iterator *iter = m->private;
4158 
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160 	if (iter->snapshot && iter->trace->use_max_tr)
4161 		return;
4162 #endif
4163 
4164 	trace_access_unlock(iter->cpu_file);
4165 	trace_event_read_unlock();
4166 }
4167 
4168 static void
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170 		      unsigned long *entries, int cpu)
4171 {
4172 	unsigned long count;
4173 
4174 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4175 	/*
4176 	 * If this buffer has skipped entries, then we hold all
4177 	 * entries for the trace and we need to ignore the
4178 	 * ones before the time stamp.
4179 	 */
4180 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182 		/* total is the same as the entries */
4183 		*total = count;
4184 	} else
4185 		*total = count +
4186 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4187 	*entries = count;
4188 }
4189 
4190 static void
4191 get_total_entries(struct array_buffer *buf,
4192 		  unsigned long *total, unsigned long *entries)
4193 {
4194 	unsigned long t, e;
4195 	int cpu;
4196 
4197 	*total = 0;
4198 	*entries = 0;
4199 
4200 	for_each_tracing_cpu(cpu) {
4201 		get_total_entries_cpu(buf, &t, &e, cpu);
4202 		*total += t;
4203 		*entries += e;
4204 	}
4205 }
4206 
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4208 {
4209 	unsigned long total, entries;
4210 
4211 	if (!tr)
4212 		tr = &global_trace;
4213 
4214 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4215 
4216 	return entries;
4217 }
4218 
4219 unsigned long trace_total_entries(struct trace_array *tr)
4220 {
4221 	unsigned long total, entries;
4222 
4223 	if (!tr)
4224 		tr = &global_trace;
4225 
4226 	get_total_entries(&tr->array_buffer, &total, &entries);
4227 
4228 	return entries;
4229 }
4230 
4231 static void print_lat_help_header(struct seq_file *m)
4232 {
4233 	seq_puts(m, "#                    _------=> CPU#            \n"
4234 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4235 		    "#                  | / _----=> need-resched    \n"
4236 		    "#                  || / _---=> hardirq/softirq \n"
4237 		    "#                  ||| / _--=> preempt-depth   \n"
4238 		    "#                  |||| / _-=> migrate-disable \n"
4239 		    "#                  ||||| /     delay           \n"
4240 		    "#  cmd     pid     |||||| time  |   caller     \n"
4241 		    "#     \\   /        ||||||  \\    |    /       \n");
4242 }
4243 
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4245 {
4246 	unsigned long total;
4247 	unsigned long entries;
4248 
4249 	get_total_entries(buf, &total, &entries);
4250 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4251 		   entries, total, num_online_cpus());
4252 	seq_puts(m, "#\n");
4253 }
4254 
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4256 				   unsigned int flags)
4257 {
4258 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4259 
4260 	print_event_info(buf, m);
4261 
4262 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4263 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4264 }
4265 
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4267 				       unsigned int flags)
4268 {
4269 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270 	static const char space[] = "            ";
4271 	int prec = tgid ? 12 : 2;
4272 
4273 	print_event_info(buf, m);
4274 
4275 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4276 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4277 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4278 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4279 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4280 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4281 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4282 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4283 }
4284 
4285 void
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4287 {
4288 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289 	struct array_buffer *buf = iter->array_buffer;
4290 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291 	struct tracer *type = iter->trace;
4292 	unsigned long entries;
4293 	unsigned long total;
4294 	const char *name = type->name;
4295 
4296 	get_total_entries(buf, &total, &entries);
4297 
4298 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4299 		   name, UTS_RELEASE);
4300 	seq_puts(m, "# -----------------------------------"
4301 		 "---------------------------------\n");
4302 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304 		   nsecs_to_usecs(data->saved_latency),
4305 		   entries,
4306 		   total,
4307 		   buf->cpu,
4308 		   preempt_model_none()      ? "server" :
4309 		   preempt_model_voluntary() ? "desktop" :
4310 		   preempt_model_full()      ? "preempt" :
4311 		   preempt_model_rt()        ? "preempt_rt" :
4312 		   "unknown",
4313 		   /* These are reserved for later use */
4314 		   0, 0, 0, 0);
4315 #ifdef CONFIG_SMP
4316 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4317 #else
4318 	seq_puts(m, ")\n");
4319 #endif
4320 	seq_puts(m, "#    -----------------\n");
4321 	seq_printf(m, "#    | task: %.16s-%d "
4322 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323 		   data->comm, data->pid,
4324 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325 		   data->policy, data->rt_priority);
4326 	seq_puts(m, "#    -----------------\n");
4327 
4328 	if (data->critical_start) {
4329 		seq_puts(m, "#  => started at: ");
4330 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331 		trace_print_seq(m, &iter->seq);
4332 		seq_puts(m, "\n#  => ended at:   ");
4333 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334 		trace_print_seq(m, &iter->seq);
4335 		seq_puts(m, "\n#\n");
4336 	}
4337 
4338 	seq_puts(m, "#\n");
4339 }
4340 
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4342 {
4343 	struct trace_seq *s = &iter->seq;
4344 	struct trace_array *tr = iter->tr;
4345 
4346 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4347 		return;
4348 
4349 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4350 		return;
4351 
4352 	if (cpumask_available(iter->started) &&
4353 	    cpumask_test_cpu(iter->cpu, iter->started))
4354 		return;
4355 
4356 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4357 		return;
4358 
4359 	if (cpumask_available(iter->started))
4360 		cpumask_set_cpu(iter->cpu, iter->started);
4361 
4362 	/* Don't print started cpu buffer for the first entry of the trace */
4363 	if (iter->idx > 1)
4364 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4365 				iter->cpu);
4366 }
4367 
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4369 {
4370 	struct trace_array *tr = iter->tr;
4371 	struct trace_seq *s = &iter->seq;
4372 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373 	struct trace_entry *entry;
4374 	struct trace_event *event;
4375 
4376 	entry = iter->ent;
4377 
4378 	test_cpu_buff_start(iter);
4379 
4380 	event = ftrace_find_event(entry->type);
4381 
4382 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384 			trace_print_lat_context(iter);
4385 		else
4386 			trace_print_context(iter);
4387 	}
4388 
4389 	if (trace_seq_has_overflowed(s))
4390 		return TRACE_TYPE_PARTIAL_LINE;
4391 
4392 	if (event)
4393 		return event->funcs->trace(iter, sym_flags, event);
4394 
4395 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4396 
4397 	return trace_handle_return(s);
4398 }
4399 
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4401 {
4402 	struct trace_array *tr = iter->tr;
4403 	struct trace_seq *s = &iter->seq;
4404 	struct trace_entry *entry;
4405 	struct trace_event *event;
4406 
4407 	entry = iter->ent;
4408 
4409 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410 		trace_seq_printf(s, "%d %d %llu ",
4411 				 entry->pid, iter->cpu, iter->ts);
4412 
4413 	if (trace_seq_has_overflowed(s))
4414 		return TRACE_TYPE_PARTIAL_LINE;
4415 
4416 	event = ftrace_find_event(entry->type);
4417 	if (event)
4418 		return event->funcs->raw(iter, 0, event);
4419 
4420 	trace_seq_printf(s, "%d ?\n", entry->type);
4421 
4422 	return trace_handle_return(s);
4423 }
4424 
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4426 {
4427 	struct trace_array *tr = iter->tr;
4428 	struct trace_seq *s = &iter->seq;
4429 	unsigned char newline = '\n';
4430 	struct trace_entry *entry;
4431 	struct trace_event *event;
4432 
4433 	entry = iter->ent;
4434 
4435 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4437 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4439 		if (trace_seq_has_overflowed(s))
4440 			return TRACE_TYPE_PARTIAL_LINE;
4441 	}
4442 
4443 	event = ftrace_find_event(entry->type);
4444 	if (event) {
4445 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446 		if (ret != TRACE_TYPE_HANDLED)
4447 			return ret;
4448 	}
4449 
4450 	SEQ_PUT_FIELD(s, newline);
4451 
4452 	return trace_handle_return(s);
4453 }
4454 
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4456 {
4457 	struct trace_array *tr = iter->tr;
4458 	struct trace_seq *s = &iter->seq;
4459 	struct trace_entry *entry;
4460 	struct trace_event *event;
4461 
4462 	entry = iter->ent;
4463 
4464 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465 		SEQ_PUT_FIELD(s, entry->pid);
4466 		SEQ_PUT_FIELD(s, iter->cpu);
4467 		SEQ_PUT_FIELD(s, iter->ts);
4468 		if (trace_seq_has_overflowed(s))
4469 			return TRACE_TYPE_PARTIAL_LINE;
4470 	}
4471 
4472 	event = ftrace_find_event(entry->type);
4473 	return event ? event->funcs->binary(iter, 0, event) :
4474 		TRACE_TYPE_HANDLED;
4475 }
4476 
4477 int trace_empty(struct trace_iterator *iter)
4478 {
4479 	struct ring_buffer_iter *buf_iter;
4480 	int cpu;
4481 
4482 	/* If we are looking at one CPU buffer, only check that one */
4483 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484 		cpu = iter->cpu_file;
4485 		buf_iter = trace_buffer_iter(iter, cpu);
4486 		if (buf_iter) {
4487 			if (!ring_buffer_iter_empty(buf_iter))
4488 				return 0;
4489 		} else {
4490 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491 				return 0;
4492 		}
4493 		return 1;
4494 	}
4495 
4496 	for_each_tracing_cpu(cpu) {
4497 		buf_iter = trace_buffer_iter(iter, cpu);
4498 		if (buf_iter) {
4499 			if (!ring_buffer_iter_empty(buf_iter))
4500 				return 0;
4501 		} else {
4502 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4503 				return 0;
4504 		}
4505 	}
4506 
4507 	return 1;
4508 }
4509 
4510 /*  Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4512 {
4513 	struct trace_array *tr = iter->tr;
4514 	unsigned long trace_flags = tr->trace_flags;
4515 	enum print_line_t ret;
4516 
4517 	if (iter->lost_events) {
4518 		if (iter->lost_events == (unsigned long)-1)
4519 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4520 					 iter->cpu);
4521 		else
4522 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523 					 iter->cpu, iter->lost_events);
4524 		if (trace_seq_has_overflowed(&iter->seq))
4525 			return TRACE_TYPE_PARTIAL_LINE;
4526 	}
4527 
4528 	if (iter->trace && iter->trace->print_line) {
4529 		ret = iter->trace->print_line(iter);
4530 		if (ret != TRACE_TYPE_UNHANDLED)
4531 			return ret;
4532 	}
4533 
4534 	if (iter->ent->type == TRACE_BPUTS &&
4535 			trace_flags & TRACE_ITER_PRINTK &&
4536 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537 		return trace_print_bputs_msg_only(iter);
4538 
4539 	if (iter->ent->type == TRACE_BPRINT &&
4540 			trace_flags & TRACE_ITER_PRINTK &&
4541 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542 		return trace_print_bprintk_msg_only(iter);
4543 
4544 	if (iter->ent->type == TRACE_PRINT &&
4545 			trace_flags & TRACE_ITER_PRINTK &&
4546 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547 		return trace_print_printk_msg_only(iter);
4548 
4549 	if (trace_flags & TRACE_ITER_BIN)
4550 		return print_bin_fmt(iter);
4551 
4552 	if (trace_flags & TRACE_ITER_HEX)
4553 		return print_hex_fmt(iter);
4554 
4555 	if (trace_flags & TRACE_ITER_RAW)
4556 		return print_raw_fmt(iter);
4557 
4558 	return print_trace_fmt(iter);
4559 }
4560 
4561 void trace_latency_header(struct seq_file *m)
4562 {
4563 	struct trace_iterator *iter = m->private;
4564 	struct trace_array *tr = iter->tr;
4565 
4566 	/* print nothing if the buffers are empty */
4567 	if (trace_empty(iter))
4568 		return;
4569 
4570 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571 		print_trace_header(m, iter);
4572 
4573 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574 		print_lat_help_header(m);
4575 }
4576 
4577 void trace_default_header(struct seq_file *m)
4578 {
4579 	struct trace_iterator *iter = m->private;
4580 	struct trace_array *tr = iter->tr;
4581 	unsigned long trace_flags = tr->trace_flags;
4582 
4583 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4584 		return;
4585 
4586 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587 		/* print nothing if the buffers are empty */
4588 		if (trace_empty(iter))
4589 			return;
4590 		print_trace_header(m, iter);
4591 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4592 			print_lat_help_header(m);
4593 	} else {
4594 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4596 				print_func_help_header_irq(iter->array_buffer,
4597 							   m, trace_flags);
4598 			else
4599 				print_func_help_header(iter->array_buffer, m,
4600 						       trace_flags);
4601 		}
4602 	}
4603 }
4604 
4605 static void test_ftrace_alive(struct seq_file *m)
4606 {
4607 	if (!ftrace_is_dead())
4608 		return;
4609 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4611 }
4612 
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4615 {
4616 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618 		    "#                      Takes a snapshot of the main buffer.\n"
4619 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620 		    "#                      (Doesn't have to be '2' works with any number that\n"
4621 		    "#                       is not a '0' or '1')\n");
4622 }
4623 
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4625 {
4626 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4630 #else
4631 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632 		    "#                     Must use main snapshot file to allocate.\n");
4633 #endif
4634 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635 		    "#                      (Doesn't have to be '2' works with any number that\n"
4636 		    "#                       is not a '0' or '1')\n");
4637 }
4638 
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4640 {
4641 	if (iter->tr->allocated_snapshot)
4642 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4643 	else
4644 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4645 
4646 	seq_puts(m, "# Snapshot commands:\n");
4647 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648 		show_snapshot_main_help(m);
4649 	else
4650 		show_snapshot_percpu_help(m);
4651 }
4652 #else
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4655 #endif
4656 
4657 static int s_show(struct seq_file *m, void *v)
4658 {
4659 	struct trace_iterator *iter = v;
4660 	int ret;
4661 
4662 	if (iter->ent == NULL) {
4663 		if (iter->tr) {
4664 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4665 			seq_puts(m, "#\n");
4666 			test_ftrace_alive(m);
4667 		}
4668 		if (iter->snapshot && trace_empty(iter))
4669 			print_snapshot_help(m, iter);
4670 		else if (iter->trace && iter->trace->print_header)
4671 			iter->trace->print_header(m);
4672 		else
4673 			trace_default_header(m);
4674 
4675 	} else if (iter->leftover) {
4676 		/*
4677 		 * If we filled the seq_file buffer earlier, we
4678 		 * want to just show it now.
4679 		 */
4680 		ret = trace_print_seq(m, &iter->seq);
4681 
4682 		/* ret should this time be zero, but you never know */
4683 		iter->leftover = ret;
4684 
4685 	} else {
4686 		print_trace_line(iter);
4687 		ret = trace_print_seq(m, &iter->seq);
4688 		/*
4689 		 * If we overflow the seq_file buffer, then it will
4690 		 * ask us for this data again at start up.
4691 		 * Use that instead.
4692 		 *  ret is 0 if seq_file write succeeded.
4693 		 *        -1 otherwise.
4694 		 */
4695 		iter->leftover = ret;
4696 	}
4697 
4698 	return 0;
4699 }
4700 
4701 /*
4702  * Should be used after trace_array_get(), trace_types_lock
4703  * ensures that i_cdev was already initialized.
4704  */
4705 static inline int tracing_get_cpu(struct inode *inode)
4706 {
4707 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4708 		return (long)inode->i_cdev - 1;
4709 	return RING_BUFFER_ALL_CPUS;
4710 }
4711 
4712 static const struct seq_operations tracer_seq_ops = {
4713 	.start		= s_start,
4714 	.next		= s_next,
4715 	.stop		= s_stop,
4716 	.show		= s_show,
4717 };
4718 
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4721 {
4722 	struct trace_array *tr = inode->i_private;
4723 	struct trace_iterator *iter;
4724 	int cpu;
4725 
4726 	if (tracing_disabled)
4727 		return ERR_PTR(-ENODEV);
4728 
4729 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4730 	if (!iter)
4731 		return ERR_PTR(-ENOMEM);
4732 
4733 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4734 				    GFP_KERNEL);
4735 	if (!iter->buffer_iter)
4736 		goto release;
4737 
4738 	/*
4739 	 * trace_find_next_entry() may need to save off iter->ent.
4740 	 * It will place it into the iter->temp buffer. As most
4741 	 * events are less than 128, allocate a buffer of that size.
4742 	 * If one is greater, then trace_find_next_entry() will
4743 	 * allocate a new buffer to adjust for the bigger iter->ent.
4744 	 * It's not critical if it fails to get allocated here.
4745 	 */
4746 	iter->temp = kmalloc(128, GFP_KERNEL);
4747 	if (iter->temp)
4748 		iter->temp_size = 128;
4749 
4750 	/*
4751 	 * trace_event_printf() may need to modify given format
4752 	 * string to replace %p with %px so that it shows real address
4753 	 * instead of hash value. However, that is only for the event
4754 	 * tracing, other tracer may not need. Defer the allocation
4755 	 * until it is needed.
4756 	 */
4757 	iter->fmt = NULL;
4758 	iter->fmt_size = 0;
4759 
4760 	/*
4761 	 * We make a copy of the current tracer to avoid concurrent
4762 	 * changes on it while we are reading.
4763 	 */
4764 	mutex_lock(&trace_types_lock);
4765 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4766 	if (!iter->trace)
4767 		goto fail;
4768 
4769 	*iter->trace = *tr->current_trace;
4770 
4771 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4772 		goto fail;
4773 
4774 	iter->tr = tr;
4775 
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777 	/* Currently only the top directory has a snapshot */
4778 	if (tr->current_trace->print_max || snapshot)
4779 		iter->array_buffer = &tr->max_buffer;
4780 	else
4781 #endif
4782 		iter->array_buffer = &tr->array_buffer;
4783 	iter->snapshot = snapshot;
4784 	iter->pos = -1;
4785 	iter->cpu_file = tracing_get_cpu(inode);
4786 	mutex_init(&iter->mutex);
4787 
4788 	/* Notify the tracer early; before we stop tracing. */
4789 	if (iter->trace->open)
4790 		iter->trace->open(iter);
4791 
4792 	/* Annotate start of buffers if we had overruns */
4793 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4794 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4795 
4796 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797 	if (trace_clocks[tr->clock_id].in_ns)
4798 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4799 
4800 	/*
4801 	 * If pause-on-trace is enabled, then stop the trace while
4802 	 * dumping, unless this is the "snapshot" file
4803 	 */
4804 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805 		tracing_stop_tr(tr);
4806 
4807 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808 		for_each_tracing_cpu(cpu) {
4809 			iter->buffer_iter[cpu] =
4810 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4811 							 cpu, GFP_KERNEL);
4812 		}
4813 		ring_buffer_read_prepare_sync();
4814 		for_each_tracing_cpu(cpu) {
4815 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4816 			tracing_iter_reset(iter, cpu);
4817 		}
4818 	} else {
4819 		cpu = iter->cpu_file;
4820 		iter->buffer_iter[cpu] =
4821 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4822 						 cpu, GFP_KERNEL);
4823 		ring_buffer_read_prepare_sync();
4824 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4825 		tracing_iter_reset(iter, cpu);
4826 	}
4827 
4828 	mutex_unlock(&trace_types_lock);
4829 
4830 	return iter;
4831 
4832  fail:
4833 	mutex_unlock(&trace_types_lock);
4834 	kfree(iter->trace);
4835 	kfree(iter->temp);
4836 	kfree(iter->buffer_iter);
4837 release:
4838 	seq_release_private(inode, file);
4839 	return ERR_PTR(-ENOMEM);
4840 }
4841 
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4843 {
4844 	int ret;
4845 
4846 	ret = tracing_check_open_get_tr(NULL);
4847 	if (ret)
4848 		return ret;
4849 
4850 	filp->private_data = inode->i_private;
4851 	return 0;
4852 }
4853 
4854 bool tracing_is_disabled(void)
4855 {
4856 	return (tracing_disabled) ? true: false;
4857 }
4858 
4859 /*
4860  * Open and update trace_array ref count.
4861  * Must have the current trace_array passed to it.
4862  */
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4864 {
4865 	struct trace_array *tr = inode->i_private;
4866 	int ret;
4867 
4868 	ret = tracing_check_open_get_tr(tr);
4869 	if (ret)
4870 		return ret;
4871 
4872 	filp->private_data = inode->i_private;
4873 
4874 	return 0;
4875 }
4876 
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4878 {
4879 	stream_open(inode, filp);
4880 	return tracing_open_generic_tr(inode, filp);
4881 }
4882 
4883 static int tracing_release(struct inode *inode, struct file *file)
4884 {
4885 	struct trace_array *tr = inode->i_private;
4886 	struct seq_file *m = file->private_data;
4887 	struct trace_iterator *iter;
4888 	int cpu;
4889 
4890 	if (!(file->f_mode & FMODE_READ)) {
4891 		trace_array_put(tr);
4892 		return 0;
4893 	}
4894 
4895 	/* Writes do not use seq_file */
4896 	iter = m->private;
4897 	mutex_lock(&trace_types_lock);
4898 
4899 	for_each_tracing_cpu(cpu) {
4900 		if (iter->buffer_iter[cpu])
4901 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4902 	}
4903 
4904 	if (iter->trace && iter->trace->close)
4905 		iter->trace->close(iter);
4906 
4907 	if (!iter->snapshot && tr->stop_count)
4908 		/* reenable tracing if it was previously enabled */
4909 		tracing_start_tr(tr);
4910 
4911 	__trace_array_put(tr);
4912 
4913 	mutex_unlock(&trace_types_lock);
4914 
4915 	mutex_destroy(&iter->mutex);
4916 	free_cpumask_var(iter->started);
4917 	kfree(iter->fmt);
4918 	kfree(iter->temp);
4919 	kfree(iter->trace);
4920 	kfree(iter->buffer_iter);
4921 	seq_release_private(inode, file);
4922 
4923 	return 0;
4924 }
4925 
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4927 {
4928 	struct trace_array *tr = inode->i_private;
4929 
4930 	trace_array_put(tr);
4931 	return 0;
4932 }
4933 
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4935 {
4936 	struct trace_array *tr = inode->i_private;
4937 
4938 	trace_array_put(tr);
4939 
4940 	return single_release(inode, file);
4941 }
4942 
4943 static int tracing_open(struct inode *inode, struct file *file)
4944 {
4945 	struct trace_array *tr = inode->i_private;
4946 	struct trace_iterator *iter;
4947 	int ret;
4948 
4949 	ret = tracing_check_open_get_tr(tr);
4950 	if (ret)
4951 		return ret;
4952 
4953 	/* If this file was open for write, then erase contents */
4954 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955 		int cpu = tracing_get_cpu(inode);
4956 		struct array_buffer *trace_buf = &tr->array_buffer;
4957 
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959 		if (tr->current_trace->print_max)
4960 			trace_buf = &tr->max_buffer;
4961 #endif
4962 
4963 		if (cpu == RING_BUFFER_ALL_CPUS)
4964 			tracing_reset_online_cpus(trace_buf);
4965 		else
4966 			tracing_reset_cpu(trace_buf, cpu);
4967 	}
4968 
4969 	if (file->f_mode & FMODE_READ) {
4970 		iter = __tracing_open(inode, file, false);
4971 		if (IS_ERR(iter))
4972 			ret = PTR_ERR(iter);
4973 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4975 	}
4976 
4977 	if (ret < 0)
4978 		trace_array_put(tr);
4979 
4980 	return ret;
4981 }
4982 
4983 /*
4984  * Some tracers are not suitable for instance buffers.
4985  * A tracer is always available for the global array (toplevel)
4986  * or if it explicitly states that it is.
4987  */
4988 static bool
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4990 {
4991 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4992 }
4993 
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4997 {
4998 	while (t && !trace_ok_for_array(t, tr))
4999 		t = t->next;
5000 
5001 	return t;
5002 }
5003 
5004 static void *
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5006 {
5007 	struct trace_array *tr = m->private;
5008 	struct tracer *t = v;
5009 
5010 	(*pos)++;
5011 
5012 	if (t)
5013 		t = get_tracer_for_array(tr, t->next);
5014 
5015 	return t;
5016 }
5017 
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5019 {
5020 	struct trace_array *tr = m->private;
5021 	struct tracer *t;
5022 	loff_t l = 0;
5023 
5024 	mutex_lock(&trace_types_lock);
5025 
5026 	t = get_tracer_for_array(tr, trace_types);
5027 	for (; t && l < *pos; t = t_next(m, t, &l))
5028 			;
5029 
5030 	return t;
5031 }
5032 
5033 static void t_stop(struct seq_file *m, void *p)
5034 {
5035 	mutex_unlock(&trace_types_lock);
5036 }
5037 
5038 static int t_show(struct seq_file *m, void *v)
5039 {
5040 	struct tracer *t = v;
5041 
5042 	if (!t)
5043 		return 0;
5044 
5045 	seq_puts(m, t->name);
5046 	if (t->next)
5047 		seq_putc(m, ' ');
5048 	else
5049 		seq_putc(m, '\n');
5050 
5051 	return 0;
5052 }
5053 
5054 static const struct seq_operations show_traces_seq_ops = {
5055 	.start		= t_start,
5056 	.next		= t_next,
5057 	.stop		= t_stop,
5058 	.show		= t_show,
5059 };
5060 
5061 static int show_traces_open(struct inode *inode, struct file *file)
5062 {
5063 	struct trace_array *tr = inode->i_private;
5064 	struct seq_file *m;
5065 	int ret;
5066 
5067 	ret = tracing_check_open_get_tr(tr);
5068 	if (ret)
5069 		return ret;
5070 
5071 	ret = seq_open(file, &show_traces_seq_ops);
5072 	if (ret) {
5073 		trace_array_put(tr);
5074 		return ret;
5075 	}
5076 
5077 	m = file->private_data;
5078 	m->private = tr;
5079 
5080 	return 0;
5081 }
5082 
5083 static int show_traces_release(struct inode *inode, struct file *file)
5084 {
5085 	struct trace_array *tr = inode->i_private;
5086 
5087 	trace_array_put(tr);
5088 	return seq_release(inode, file);
5089 }
5090 
5091 static ssize_t
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093 		   size_t count, loff_t *ppos)
5094 {
5095 	return count;
5096 }
5097 
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5099 {
5100 	int ret;
5101 
5102 	if (file->f_mode & FMODE_READ)
5103 		ret = seq_lseek(file, offset, whence);
5104 	else
5105 		file->f_pos = ret = 0;
5106 
5107 	return ret;
5108 }
5109 
5110 static const struct file_operations tracing_fops = {
5111 	.open		= tracing_open,
5112 	.read		= seq_read,
5113 	.write		= tracing_write_stub,
5114 	.llseek		= tracing_lseek,
5115 	.release	= tracing_release,
5116 };
5117 
5118 static const struct file_operations show_traces_fops = {
5119 	.open		= show_traces_open,
5120 	.read		= seq_read,
5121 	.llseek		= seq_lseek,
5122 	.release	= show_traces_release,
5123 };
5124 
5125 static ssize_t
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127 		     size_t count, loff_t *ppos)
5128 {
5129 	struct trace_array *tr = file_inode(filp)->i_private;
5130 	char *mask_str;
5131 	int len;
5132 
5133 	len = snprintf(NULL, 0, "%*pb\n",
5134 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135 	mask_str = kmalloc(len, GFP_KERNEL);
5136 	if (!mask_str)
5137 		return -ENOMEM;
5138 
5139 	len = snprintf(mask_str, len, "%*pb\n",
5140 		       cpumask_pr_args(tr->tracing_cpumask));
5141 	if (len >= count) {
5142 		count = -EINVAL;
5143 		goto out_err;
5144 	}
5145 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5146 
5147 out_err:
5148 	kfree(mask_str);
5149 
5150 	return count;
5151 }
5152 
5153 int tracing_set_cpumask(struct trace_array *tr,
5154 			cpumask_var_t tracing_cpumask_new)
5155 {
5156 	int cpu;
5157 
5158 	if (!tr)
5159 		return -EINVAL;
5160 
5161 	local_irq_disable();
5162 	arch_spin_lock(&tr->max_lock);
5163 	for_each_tracing_cpu(cpu) {
5164 		/*
5165 		 * Increase/decrease the disabled counter if we are
5166 		 * about to flip a bit in the cpumask:
5167 		 */
5168 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5172 		}
5173 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5177 		}
5178 	}
5179 	arch_spin_unlock(&tr->max_lock);
5180 	local_irq_enable();
5181 
5182 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5183 
5184 	return 0;
5185 }
5186 
5187 static ssize_t
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189 		      size_t count, loff_t *ppos)
5190 {
5191 	struct trace_array *tr = file_inode(filp)->i_private;
5192 	cpumask_var_t tracing_cpumask_new;
5193 	int err;
5194 
5195 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5196 		return -ENOMEM;
5197 
5198 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5199 	if (err)
5200 		goto err_free;
5201 
5202 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5203 	if (err)
5204 		goto err_free;
5205 
5206 	free_cpumask_var(tracing_cpumask_new);
5207 
5208 	return count;
5209 
5210 err_free:
5211 	free_cpumask_var(tracing_cpumask_new);
5212 
5213 	return err;
5214 }
5215 
5216 static const struct file_operations tracing_cpumask_fops = {
5217 	.open		= tracing_open_generic_tr,
5218 	.read		= tracing_cpumask_read,
5219 	.write		= tracing_cpumask_write,
5220 	.release	= tracing_release_generic_tr,
5221 	.llseek		= generic_file_llseek,
5222 };
5223 
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5225 {
5226 	struct tracer_opt *trace_opts;
5227 	struct trace_array *tr = m->private;
5228 	u32 tracer_flags;
5229 	int i;
5230 
5231 	mutex_lock(&trace_types_lock);
5232 	tracer_flags = tr->current_trace->flags->val;
5233 	trace_opts = tr->current_trace->flags->opts;
5234 
5235 	for (i = 0; trace_options[i]; i++) {
5236 		if (tr->trace_flags & (1 << i))
5237 			seq_printf(m, "%s\n", trace_options[i]);
5238 		else
5239 			seq_printf(m, "no%s\n", trace_options[i]);
5240 	}
5241 
5242 	for (i = 0; trace_opts[i].name; i++) {
5243 		if (tracer_flags & trace_opts[i].bit)
5244 			seq_printf(m, "%s\n", trace_opts[i].name);
5245 		else
5246 			seq_printf(m, "no%s\n", trace_opts[i].name);
5247 	}
5248 	mutex_unlock(&trace_types_lock);
5249 
5250 	return 0;
5251 }
5252 
5253 static int __set_tracer_option(struct trace_array *tr,
5254 			       struct tracer_flags *tracer_flags,
5255 			       struct tracer_opt *opts, int neg)
5256 {
5257 	struct tracer *trace = tracer_flags->trace;
5258 	int ret;
5259 
5260 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5261 	if (ret)
5262 		return ret;
5263 
5264 	if (neg)
5265 		tracer_flags->val &= ~opts->bit;
5266 	else
5267 		tracer_flags->val |= opts->bit;
5268 	return 0;
5269 }
5270 
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5273 {
5274 	struct tracer *trace = tr->current_trace;
5275 	struct tracer_flags *tracer_flags = trace->flags;
5276 	struct tracer_opt *opts = NULL;
5277 	int i;
5278 
5279 	for (i = 0; tracer_flags->opts[i].name; i++) {
5280 		opts = &tracer_flags->opts[i];
5281 
5282 		if (strcmp(cmp, opts->name) == 0)
5283 			return __set_tracer_option(tr, trace->flags, opts, neg);
5284 	}
5285 
5286 	return -EINVAL;
5287 }
5288 
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5291 {
5292 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5293 		return -1;
5294 
5295 	return 0;
5296 }
5297 
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5299 {
5300 	int *map;
5301 
5302 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5303 	    (mask == TRACE_ITER_RECORD_CMD))
5304 		lockdep_assert_held(&event_mutex);
5305 
5306 	/* do nothing if flag is already set */
5307 	if (!!(tr->trace_flags & mask) == !!enabled)
5308 		return 0;
5309 
5310 	/* Give the tracer a chance to approve the change */
5311 	if (tr->current_trace->flag_changed)
5312 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5313 			return -EINVAL;
5314 
5315 	if (enabled)
5316 		tr->trace_flags |= mask;
5317 	else
5318 		tr->trace_flags &= ~mask;
5319 
5320 	if (mask == TRACE_ITER_RECORD_CMD)
5321 		trace_event_enable_cmd_record(enabled);
5322 
5323 	if (mask == TRACE_ITER_RECORD_TGID) {
5324 		if (!tgid_map) {
5325 			tgid_map_max = pid_max;
5326 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5327 				       GFP_KERNEL);
5328 
5329 			/*
5330 			 * Pairs with smp_load_acquire() in
5331 			 * trace_find_tgid_ptr() to ensure that if it observes
5332 			 * the tgid_map we just allocated then it also observes
5333 			 * the corresponding tgid_map_max value.
5334 			 */
5335 			smp_store_release(&tgid_map, map);
5336 		}
5337 		if (!tgid_map) {
5338 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5339 			return -ENOMEM;
5340 		}
5341 
5342 		trace_event_enable_tgid_record(enabled);
5343 	}
5344 
5345 	if (mask == TRACE_ITER_EVENT_FORK)
5346 		trace_event_follow_fork(tr, enabled);
5347 
5348 	if (mask == TRACE_ITER_FUNC_FORK)
5349 		ftrace_pid_follow_fork(tr, enabled);
5350 
5351 	if (mask == TRACE_ITER_OVERWRITE) {
5352 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5355 #endif
5356 	}
5357 
5358 	if (mask == TRACE_ITER_PRINTK) {
5359 		trace_printk_start_stop_comm(enabled);
5360 		trace_printk_control(enabled);
5361 	}
5362 
5363 	return 0;
5364 }
5365 
5366 int trace_set_options(struct trace_array *tr, char *option)
5367 {
5368 	char *cmp;
5369 	int neg = 0;
5370 	int ret;
5371 	size_t orig_len = strlen(option);
5372 	int len;
5373 
5374 	cmp = strstrip(option);
5375 
5376 	len = str_has_prefix(cmp, "no");
5377 	if (len)
5378 		neg = 1;
5379 
5380 	cmp += len;
5381 
5382 	mutex_lock(&event_mutex);
5383 	mutex_lock(&trace_types_lock);
5384 
5385 	ret = match_string(trace_options, -1, cmp);
5386 	/* If no option could be set, test the specific tracer options */
5387 	if (ret < 0)
5388 		ret = set_tracer_option(tr, cmp, neg);
5389 	else
5390 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5391 
5392 	mutex_unlock(&trace_types_lock);
5393 	mutex_unlock(&event_mutex);
5394 
5395 	/*
5396 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5397 	 * turn it back into a space.
5398 	 */
5399 	if (orig_len > strlen(option))
5400 		option[strlen(option)] = ' ';
5401 
5402 	return ret;
5403 }
5404 
5405 static void __init apply_trace_boot_options(void)
5406 {
5407 	char *buf = trace_boot_options_buf;
5408 	char *option;
5409 
5410 	while (true) {
5411 		option = strsep(&buf, ",");
5412 
5413 		if (!option)
5414 			break;
5415 
5416 		if (*option)
5417 			trace_set_options(&global_trace, option);
5418 
5419 		/* Put back the comma to allow this to be called again */
5420 		if (buf)
5421 			*(buf - 1) = ',';
5422 	}
5423 }
5424 
5425 static ssize_t
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427 			size_t cnt, loff_t *ppos)
5428 {
5429 	struct seq_file *m = filp->private_data;
5430 	struct trace_array *tr = m->private;
5431 	char buf[64];
5432 	int ret;
5433 
5434 	if (cnt >= sizeof(buf))
5435 		return -EINVAL;
5436 
5437 	if (copy_from_user(buf, ubuf, cnt))
5438 		return -EFAULT;
5439 
5440 	buf[cnt] = 0;
5441 
5442 	ret = trace_set_options(tr, buf);
5443 	if (ret < 0)
5444 		return ret;
5445 
5446 	*ppos += cnt;
5447 
5448 	return cnt;
5449 }
5450 
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5452 {
5453 	struct trace_array *tr = inode->i_private;
5454 	int ret;
5455 
5456 	ret = tracing_check_open_get_tr(tr);
5457 	if (ret)
5458 		return ret;
5459 
5460 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5461 	if (ret < 0)
5462 		trace_array_put(tr);
5463 
5464 	return ret;
5465 }
5466 
5467 static const struct file_operations tracing_iter_fops = {
5468 	.open		= tracing_trace_options_open,
5469 	.read		= seq_read,
5470 	.llseek		= seq_lseek,
5471 	.release	= tracing_single_release_tr,
5472 	.write		= tracing_trace_options_write,
5473 };
5474 
5475 static const char readme_msg[] =
5476 	"tracing mini-HOWTO:\n\n"
5477 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5478 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479 	" Important files:\n"
5480 	"  trace\t\t\t- The static contents of the buffer\n"
5481 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5482 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483 	"  current_tracer\t- function and latency tracers\n"
5484 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5485 	"  error_log\t- error log for failed commands (that support it)\n"
5486 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5488 	"  trace_clock\t\t- change the clock used to order events\n"
5489 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5490 	"      global:   Synced across CPUs but slows tracing down.\n"
5491 	"     counter:   Not a clock, but just an increment\n"
5492 	"      uptime:   Jiffy counter from time of boot\n"
5493 	"        perf:   Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495 	"     x86-tsc:   TSC cycle counter\n"
5496 #endif
5497 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5498 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5499 	"    absolute:   Absolute (standalone) timestamp\n"
5500 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5503 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504 	"\t\t\t  Remove sub-buffer with rmdir\n"
5505 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5506 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5507 	"\t\t\t  option name\n"
5508 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5511 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5512 	"\t\t\t  functions\n"
5513 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5514 	"\t     modules: Can select a group via module\n"
5515 	"\t      Format: :mod:<module-name>\n"
5516 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5517 	"\t    triggers: a command to perform when function is hit\n"
5518 	"\t      Format: <function>:<trigger>[:count]\n"
5519 	"\t     trigger: traceon, traceoff\n"
5520 	"\t\t      enable_event:<system>:<event>\n"
5521 	"\t\t      disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5523 	"\t\t      stacktrace\n"
5524 #endif
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5526 	"\t\t      snapshot\n"
5527 #endif
5528 	"\t\t      dump\n"
5529 	"\t\t      cpudump\n"
5530 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5531 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532 	"\t     The first one will disable tracing every time do_fault is hit\n"
5533 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5534 	"\t       The first time do trap is hit and it disables tracing, the\n"
5535 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5536 	"\t       the counter will not decrement. It only decrements when the\n"
5537 	"\t       trigger did work\n"
5538 	"\t     To remove trigger without count:\n"
5539 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5540 	"\t     To remove trigger with a count:\n"
5541 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544 	"\t    modules: Can select a group via module command :mod:\n"
5545 	"\t    Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5549 	"\t\t    (function)\n"
5550 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5551 	"\t\t    (function)\n"
5552 #endif
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5561 	"\t\t\t  information\n"
5562 #endif
5563 #ifdef CONFIG_STACK_TRACER
5564 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5565 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5566 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5567 	"\t\t\t  new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5570 	"\t\t\t  traces\n"
5571 #endif
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_KPROBE_EVENTS
5578 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #ifdef CONFIG_UPROBE_EVENTS
5582 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5584 #endif
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586 	"\t  accepts: event-definitions (one definition per line)\n"
5587 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5591 #endif
5592 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593 	"\t           -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5597 #endif
5598 #ifdef CONFIG_UPROBE_EVENTS
5599   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5600 #endif
5601 	"\t     args: <name>=fetcharg[:type]\n"
5602 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5605 #else
5606 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5607 #endif
5608 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611 	"\t           <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613 	"\t    field: <stype> <name>;\n"
5614 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615 	"\t           [unsigned] char/int/long\n"
5616 #endif
5617 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5618 	"\t            of the <attached-group>/<attached-event>.\n"
5619 #endif
5620 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5621 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624 	"\t\t\t  events\n"
5625 	"      filter\t\t- If set, only events passing filter are traced\n"
5626 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5627 	"\t\t\t  <event>:\n"
5628 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629 	"      filter\t\t- If set, only events passing filter are traced\n"
5630 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5631 	"\t    Format: <trigger>[:count][if <filter>]\n"
5632 	"\t   trigger: traceon, traceoff\n"
5633 	"\t            enable_event:<system>:<event>\n"
5634 	"\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636 	"\t            enable_hist:<system>:<event>\n"
5637 	"\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640 	"\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643 	"\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646 	"\t\t    hist (see below)\n"
5647 #endif
5648 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651 	"\t                  events/block/block_unplug/trigger\n"
5652 	"\t   The first disables tracing every time block_unplug is hit.\n"
5653 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656 	"\t   Like function triggers, the counter is only decremented if it\n"
5657 	"\t    enabled or disabled tracing.\n"
5658 	"\t   To remove a trigger without a count:\n"
5659 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660 	"\t   To remove a trigger with a count:\n"
5661 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662 	"\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5666 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667 	"\t            [:values=<field1[,field2,...]>]\n"
5668 	"\t            [:sort=<field1[,field2,...]>]\n"
5669 	"\t            [:size=#entries]\n"
5670 	"\t            [:pause][:continue][:clear]\n"
5671 	"\t            [:name=histname1]\n"
5672 	"\t            [:<handler>.<action>]\n"
5673 	"\t            [if <filter>]\n\n"
5674 	"\t    Note, special fields can be used as well:\n"
5675 	"\t            common_timestamp - to record current timestamp\n"
5676 	"\t            common_cpu - to record the CPU the event happened on\n"
5677 	"\n"
5678 	"\t    A hist trigger variable can be:\n"
5679 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5680 	"\t        - a reference to another variable e.g. y=$x,\n"
5681 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5682 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5683 	"\n"
5684 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5686 	"\t    variable reference, field or numeric literal.\n"
5687 	"\n"
5688 	"\t    When a matching event is hit, an entry is added to a hash\n"
5689 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5690 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5691 	"\t    correspond to fields in the event's format description.  Keys\n"
5692 	"\t    can be any field, or the special string 'stacktrace'.\n"
5693 	"\t    Compound keys consisting of up to two fields can be specified\n"
5694 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5695 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5696 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5697 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5698 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5699 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5700 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5701 	"\t    its histogram data will be shared with other triggers of the\n"
5702 	"\t    same name, and trigger hits will update this common data.\n\n"
5703 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5704 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5705 	"\t    triggers attached to an event, there will be a table for each\n"
5706 	"\t    trigger in the output.  The table displayed for a named\n"
5707 	"\t    trigger will be the same as any other instance having the\n"
5708 	"\t    same name.  The default format used to display a given field\n"
5709 	"\t    can be modified by appending any of the following modifiers\n"
5710 	"\t    to the field name, as applicable:\n\n"
5711 	"\t            .hex        display a number as a hex value\n"
5712 	"\t            .sym        display an address as a symbol\n"
5713 	"\t            .sym-offset display an address as a symbol and offset\n"
5714 	"\t            .execname   display a common_pid as a program name\n"
5715 	"\t            .syscall    display a syscall id as a syscall name\n"
5716 	"\t            .log2       display log2 value rather than raw number\n"
5717 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5718 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5719 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5720 	"\t    trigger or to start a hist trigger but not log any events\n"
5721 	"\t    until told to do so.  'continue' can be used to start or\n"
5722 	"\t    restart a paused hist trigger.\n\n"
5723 	"\t    The 'clear' parameter will clear the contents of a running\n"
5724 	"\t    hist trigger and leave its current paused/active state\n"
5725 	"\t    unchanged.\n\n"
5726 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5727 	"\t    have one event conditionally start and stop another event's\n"
5728 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5729 	"\t    the enable_event and disable_event triggers.\n\n"
5730 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5731 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5732 	"\t        <handler>.<action>\n\n"
5733 	"\t    The available handlers are:\n\n"
5734 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5735 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5736 	"\t        onchange(var)            - invoke action if var changes\n\n"
5737 	"\t    The available actions are:\n\n"
5738 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739 	"\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745 	"\t  Write into this file to define/undefine new synthetic events.\n"
5746 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750 
5751 static ssize_t
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753 		       size_t cnt, loff_t *ppos)
5754 {
5755 	return simple_read_from_buffer(ubuf, cnt, ppos,
5756 					readme_msg, strlen(readme_msg));
5757 }
5758 
5759 static const struct file_operations tracing_readme_fops = {
5760 	.open		= tracing_open_generic,
5761 	.read		= tracing_readme_read,
5762 	.llseek		= generic_file_llseek,
5763 };
5764 
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767 	int pid = ++(*pos);
5768 
5769 	return trace_find_tgid_ptr(pid);
5770 }
5771 
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5773 {
5774 	int pid = *pos;
5775 
5776 	return trace_find_tgid_ptr(pid);
5777 }
5778 
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5780 {
5781 }
5782 
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5784 {
5785 	int *entry = (int *)v;
5786 	int pid = entry - tgid_map;
5787 	int tgid = *entry;
5788 
5789 	if (tgid == 0)
5790 		return SEQ_SKIP;
5791 
5792 	seq_printf(m, "%d %d\n", pid, tgid);
5793 	return 0;
5794 }
5795 
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797 	.start		= saved_tgids_start,
5798 	.stop		= saved_tgids_stop,
5799 	.next		= saved_tgids_next,
5800 	.show		= saved_tgids_show,
5801 };
5802 
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5804 {
5805 	int ret;
5806 
5807 	ret = tracing_check_open_get_tr(NULL);
5808 	if (ret)
5809 		return ret;
5810 
5811 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5812 }
5813 
5814 
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816 	.open		= tracing_saved_tgids_open,
5817 	.read		= seq_read,
5818 	.llseek		= seq_lseek,
5819 	.release	= seq_release,
5820 };
5821 
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5823 {
5824 	unsigned int *ptr = v;
5825 
5826 	if (*pos || m->count)
5827 		ptr++;
5828 
5829 	(*pos)++;
5830 
5831 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5832 	     ptr++) {
5833 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5834 			continue;
5835 
5836 		return ptr;
5837 	}
5838 
5839 	return NULL;
5840 }
5841 
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5843 {
5844 	void *v;
5845 	loff_t l = 0;
5846 
5847 	preempt_disable();
5848 	arch_spin_lock(&trace_cmdline_lock);
5849 
5850 	v = &savedcmd->map_cmdline_to_pid[0];
5851 	while (l <= *pos) {
5852 		v = saved_cmdlines_next(m, v, &l);
5853 		if (!v)
5854 			return NULL;
5855 	}
5856 
5857 	return v;
5858 }
5859 
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5861 {
5862 	arch_spin_unlock(&trace_cmdline_lock);
5863 	preempt_enable();
5864 }
5865 
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5867 {
5868 	char buf[TASK_COMM_LEN];
5869 	unsigned int *pid = v;
5870 
5871 	__trace_find_cmdline(*pid, buf);
5872 	seq_printf(m, "%d %s\n", *pid, buf);
5873 	return 0;
5874 }
5875 
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877 	.start		= saved_cmdlines_start,
5878 	.next		= saved_cmdlines_next,
5879 	.stop		= saved_cmdlines_stop,
5880 	.show		= saved_cmdlines_show,
5881 };
5882 
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5884 {
5885 	int ret;
5886 
5887 	ret = tracing_check_open_get_tr(NULL);
5888 	if (ret)
5889 		return ret;
5890 
5891 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5892 }
5893 
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895 	.open		= tracing_saved_cmdlines_open,
5896 	.read		= seq_read,
5897 	.llseek		= seq_lseek,
5898 	.release	= seq_release,
5899 };
5900 
5901 static ssize_t
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903 				 size_t cnt, loff_t *ppos)
5904 {
5905 	char buf[64];
5906 	int r;
5907 
5908 	preempt_disable();
5909 	arch_spin_lock(&trace_cmdline_lock);
5910 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911 	arch_spin_unlock(&trace_cmdline_lock);
5912 	preempt_enable();
5913 
5914 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916 
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5918 {
5919 	kfree(s->saved_cmdlines);
5920 	kfree(s->map_cmdline_to_pid);
5921 	kfree(s);
5922 }
5923 
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5925 {
5926 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5927 
5928 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5929 	if (!s)
5930 		return -ENOMEM;
5931 
5932 	if (allocate_cmdlines_buffer(val, s) < 0) {
5933 		kfree(s);
5934 		return -ENOMEM;
5935 	}
5936 
5937 	preempt_disable();
5938 	arch_spin_lock(&trace_cmdline_lock);
5939 	savedcmd_temp = savedcmd;
5940 	savedcmd = s;
5941 	arch_spin_unlock(&trace_cmdline_lock);
5942 	preempt_enable();
5943 	free_saved_cmdlines_buffer(savedcmd_temp);
5944 
5945 	return 0;
5946 }
5947 
5948 static ssize_t
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950 				  size_t cnt, loff_t *ppos)
5951 {
5952 	unsigned long val;
5953 	int ret;
5954 
5955 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5956 	if (ret)
5957 		return ret;
5958 
5959 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960 	if (!val || val > PID_MAX_DEFAULT)
5961 		return -EINVAL;
5962 
5963 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5964 	if (ret < 0)
5965 		return ret;
5966 
5967 	*ppos += cnt;
5968 
5969 	return cnt;
5970 }
5971 
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973 	.open		= tracing_open_generic,
5974 	.read		= tracing_saved_cmdlines_size_read,
5975 	.write		= tracing_saved_cmdlines_size_write,
5976 };
5977 
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5981 {
5982 	if (!ptr->map.eval_string) {
5983 		if (ptr->tail.next) {
5984 			ptr = ptr->tail.next;
5985 			/* Set ptr to the next real item (skip head) */
5986 			ptr++;
5987 		} else
5988 			return NULL;
5989 	}
5990 	return ptr;
5991 }
5992 
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5994 {
5995 	union trace_eval_map_item *ptr = v;
5996 
5997 	/*
5998 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5999 	 * This really should never happen.
6000 	 */
6001 	(*pos)++;
6002 	ptr = update_eval_map(ptr);
6003 	if (WARN_ON_ONCE(!ptr))
6004 		return NULL;
6005 
6006 	ptr++;
6007 	ptr = update_eval_map(ptr);
6008 
6009 	return ptr;
6010 }
6011 
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6013 {
6014 	union trace_eval_map_item *v;
6015 	loff_t l = 0;
6016 
6017 	mutex_lock(&trace_eval_mutex);
6018 
6019 	v = trace_eval_maps;
6020 	if (v)
6021 		v++;
6022 
6023 	while (v && l < *pos) {
6024 		v = eval_map_next(m, v, &l);
6025 	}
6026 
6027 	return v;
6028 }
6029 
6030 static void eval_map_stop(struct seq_file *m, void *v)
6031 {
6032 	mutex_unlock(&trace_eval_mutex);
6033 }
6034 
6035 static int eval_map_show(struct seq_file *m, void *v)
6036 {
6037 	union trace_eval_map_item *ptr = v;
6038 
6039 	seq_printf(m, "%s %ld (%s)\n",
6040 		   ptr->map.eval_string, ptr->map.eval_value,
6041 		   ptr->map.system);
6042 
6043 	return 0;
6044 }
6045 
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047 	.start		= eval_map_start,
6048 	.next		= eval_map_next,
6049 	.stop		= eval_map_stop,
6050 	.show		= eval_map_show,
6051 };
6052 
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6054 {
6055 	int ret;
6056 
6057 	ret = tracing_check_open_get_tr(NULL);
6058 	if (ret)
6059 		return ret;
6060 
6061 	return seq_open(filp, &tracing_eval_map_seq_ops);
6062 }
6063 
6064 static const struct file_operations tracing_eval_map_fops = {
6065 	.open		= tracing_eval_map_open,
6066 	.read		= seq_read,
6067 	.llseek		= seq_lseek,
6068 	.release	= seq_release,
6069 };
6070 
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6073 {
6074 	/* Return tail of array given the head */
6075 	return ptr + ptr->head.length + 1;
6076 }
6077 
6078 static void
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6080 			   int len)
6081 {
6082 	struct trace_eval_map **stop;
6083 	struct trace_eval_map **map;
6084 	union trace_eval_map_item *map_array;
6085 	union trace_eval_map_item *ptr;
6086 
6087 	stop = start + len;
6088 
6089 	/*
6090 	 * The trace_eval_maps contains the map plus a head and tail item,
6091 	 * where the head holds the module and length of array, and the
6092 	 * tail holds a pointer to the next list.
6093 	 */
6094 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6095 	if (!map_array) {
6096 		pr_warn("Unable to allocate trace eval mapping\n");
6097 		return;
6098 	}
6099 
6100 	mutex_lock(&trace_eval_mutex);
6101 
6102 	if (!trace_eval_maps)
6103 		trace_eval_maps = map_array;
6104 	else {
6105 		ptr = trace_eval_maps;
6106 		for (;;) {
6107 			ptr = trace_eval_jmp_to_tail(ptr);
6108 			if (!ptr->tail.next)
6109 				break;
6110 			ptr = ptr->tail.next;
6111 
6112 		}
6113 		ptr->tail.next = map_array;
6114 	}
6115 	map_array->head.mod = mod;
6116 	map_array->head.length = len;
6117 	map_array++;
6118 
6119 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120 		map_array->map = **map;
6121 		map_array++;
6122 	}
6123 	memset(map_array, 0, sizeof(*map_array));
6124 
6125 	mutex_unlock(&trace_eval_mutex);
6126 }
6127 
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6129 {
6130 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131 			  NULL, &tracing_eval_map_fops);
6132 }
6133 
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137 			      struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6139 
6140 static void trace_insert_eval_map(struct module *mod,
6141 				  struct trace_eval_map **start, int len)
6142 {
6143 	struct trace_eval_map **map;
6144 
6145 	if (len <= 0)
6146 		return;
6147 
6148 	map = start;
6149 
6150 	trace_event_eval_update(map, len);
6151 
6152 	trace_insert_eval_map_file(mod, start, len);
6153 }
6154 
6155 static ssize_t
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157 		       size_t cnt, loff_t *ppos)
6158 {
6159 	struct trace_array *tr = filp->private_data;
6160 	char buf[MAX_TRACER_SIZE+2];
6161 	int r;
6162 
6163 	mutex_lock(&trace_types_lock);
6164 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6165 	mutex_unlock(&trace_types_lock);
6166 
6167 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6168 }
6169 
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6171 {
6172 	tracing_reset_online_cpus(&tr->array_buffer);
6173 	return t->init(tr);
6174 }
6175 
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6177 {
6178 	int cpu;
6179 
6180 	for_each_tracing_cpu(cpu)
6181 		per_cpu_ptr(buf->data, cpu)->entries = val;
6182 }
6183 
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187 					struct array_buffer *size_buf, int cpu_id)
6188 {
6189 	int cpu, ret = 0;
6190 
6191 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192 		for_each_tracing_cpu(cpu) {
6193 			ret = ring_buffer_resize(trace_buf->buffer,
6194 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6195 			if (ret < 0)
6196 				break;
6197 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6198 				per_cpu_ptr(size_buf->data, cpu)->entries;
6199 		}
6200 	} else {
6201 		ret = ring_buffer_resize(trace_buf->buffer,
6202 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6203 		if (ret == 0)
6204 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6206 	}
6207 
6208 	return ret;
6209 }
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6211 
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213 					unsigned long size, int cpu)
6214 {
6215 	int ret;
6216 
6217 	/*
6218 	 * If kernel or user changes the size of the ring buffer
6219 	 * we use the size that was given, and we can forget about
6220 	 * expanding it later.
6221 	 */
6222 	ring_buffer_expanded = true;
6223 
6224 	/* May be called before buffers are initialized */
6225 	if (!tr->array_buffer.buffer)
6226 		return 0;
6227 
6228 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6229 	if (ret < 0)
6230 		return ret;
6231 
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234 	    !tr->current_trace->use_max_tr)
6235 		goto out;
6236 
6237 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6238 	if (ret < 0) {
6239 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240 						     &tr->array_buffer, cpu);
6241 		if (r < 0) {
6242 			/*
6243 			 * AARGH! We are left with different
6244 			 * size max buffer!!!!
6245 			 * The max buffer is our "snapshot" buffer.
6246 			 * When a tracer needs a snapshot (one of the
6247 			 * latency tracers), it swaps the max buffer
6248 			 * with the saved snap shot. We succeeded to
6249 			 * update the size of the main buffer, but failed to
6250 			 * update the size of the max buffer. But when we tried
6251 			 * to reset the main buffer to the original size, we
6252 			 * failed there too. This is very unlikely to
6253 			 * happen, but if it does, warn and kill all
6254 			 * tracing.
6255 			 */
6256 			WARN_ON(1);
6257 			tracing_disabled = 1;
6258 		}
6259 		return ret;
6260 	}
6261 
6262 	if (cpu == RING_BUFFER_ALL_CPUS)
6263 		set_buffer_entries(&tr->max_buffer, size);
6264 	else
6265 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6266 
6267  out:
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6269 
6270 	if (cpu == RING_BUFFER_ALL_CPUS)
6271 		set_buffer_entries(&tr->array_buffer, size);
6272 	else
6273 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6274 
6275 	return ret;
6276 }
6277 
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279 				  unsigned long size, int cpu_id)
6280 {
6281 	int ret;
6282 
6283 	mutex_lock(&trace_types_lock);
6284 
6285 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286 		/* make sure, this cpu is enabled in the mask */
6287 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6288 			ret = -EINVAL;
6289 			goto out;
6290 		}
6291 	}
6292 
6293 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6294 	if (ret < 0)
6295 		ret = -ENOMEM;
6296 
6297 out:
6298 	mutex_unlock(&trace_types_lock);
6299 
6300 	return ret;
6301 }
6302 
6303 
6304 /**
6305  * tracing_update_buffers - used by tracing facility to expand ring buffers
6306  *
6307  * To save on memory when the tracing is never used on a system with it
6308  * configured in. The ring buffers are set to a minimum size. But once
6309  * a user starts to use the tracing facility, then they need to grow
6310  * to their default size.
6311  *
6312  * This function is to be called when a tracer is about to be used.
6313  */
6314 int tracing_update_buffers(void)
6315 {
6316 	int ret = 0;
6317 
6318 	mutex_lock(&trace_types_lock);
6319 	if (!ring_buffer_expanded)
6320 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321 						RING_BUFFER_ALL_CPUS);
6322 	mutex_unlock(&trace_types_lock);
6323 
6324 	return ret;
6325 }
6326 
6327 struct trace_option_dentry;
6328 
6329 static void
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6331 
6332 /*
6333  * Used to clear out the tracer before deletion of an instance.
6334  * Must have trace_types_lock held.
6335  */
6336 static void tracing_set_nop(struct trace_array *tr)
6337 {
6338 	if (tr->current_trace == &nop_trace)
6339 		return;
6340 
6341 	tr->current_trace->enabled--;
6342 
6343 	if (tr->current_trace->reset)
6344 		tr->current_trace->reset(tr);
6345 
6346 	tr->current_trace = &nop_trace;
6347 }
6348 
6349 static bool tracer_options_updated;
6350 
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6352 {
6353 	/* Only enable if the directory has been created already. */
6354 	if (!tr->dir)
6355 		return;
6356 
6357 	/* Only create trace option files after update_tracer_options finish */
6358 	if (!tracer_options_updated)
6359 		return;
6360 
6361 	create_trace_option_files(tr, t);
6362 }
6363 
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6365 {
6366 	struct tracer *t;
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368 	bool had_max_tr;
6369 #endif
6370 	int ret = 0;
6371 
6372 	mutex_lock(&trace_types_lock);
6373 
6374 	if (!ring_buffer_expanded) {
6375 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376 						RING_BUFFER_ALL_CPUS);
6377 		if (ret < 0)
6378 			goto out;
6379 		ret = 0;
6380 	}
6381 
6382 	for (t = trace_types; t; t = t->next) {
6383 		if (strcmp(t->name, buf) == 0)
6384 			break;
6385 	}
6386 	if (!t) {
6387 		ret = -EINVAL;
6388 		goto out;
6389 	}
6390 	if (t == tr->current_trace)
6391 		goto out;
6392 
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394 	if (t->use_max_tr) {
6395 		local_irq_disable();
6396 		arch_spin_lock(&tr->max_lock);
6397 		if (tr->cond_snapshot)
6398 			ret = -EBUSY;
6399 		arch_spin_unlock(&tr->max_lock);
6400 		local_irq_enable();
6401 		if (ret)
6402 			goto out;
6403 	}
6404 #endif
6405 	/* Some tracers won't work on kernel command line */
6406 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6407 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6408 			t->name);
6409 		goto out;
6410 	}
6411 
6412 	/* Some tracers are only allowed for the top level buffer */
6413 	if (!trace_ok_for_array(t, tr)) {
6414 		ret = -EINVAL;
6415 		goto out;
6416 	}
6417 
6418 	/* If trace pipe files are being read, we can't change the tracer */
6419 	if (tr->trace_ref) {
6420 		ret = -EBUSY;
6421 		goto out;
6422 	}
6423 
6424 	trace_branch_disable();
6425 
6426 	tr->current_trace->enabled--;
6427 
6428 	if (tr->current_trace->reset)
6429 		tr->current_trace->reset(tr);
6430 
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432 	had_max_tr = tr->current_trace->use_max_tr;
6433 
6434 	/* Current trace needs to be nop_trace before synchronize_rcu */
6435 	tr->current_trace = &nop_trace;
6436 
6437 	if (had_max_tr && !t->use_max_tr) {
6438 		/*
6439 		 * We need to make sure that the update_max_tr sees that
6440 		 * current_trace changed to nop_trace to keep it from
6441 		 * swapping the buffers after we resize it.
6442 		 * The update_max_tr is called from interrupts disabled
6443 		 * so a synchronized_sched() is sufficient.
6444 		 */
6445 		synchronize_rcu();
6446 		free_snapshot(tr);
6447 	}
6448 
6449 	if (t->use_max_tr && !tr->allocated_snapshot) {
6450 		ret = tracing_alloc_snapshot_instance(tr);
6451 		if (ret < 0)
6452 			goto out;
6453 	}
6454 #else
6455 	tr->current_trace = &nop_trace;
6456 #endif
6457 
6458 	if (t->init) {
6459 		ret = tracer_init(t, tr);
6460 		if (ret)
6461 			goto out;
6462 	}
6463 
6464 	tr->current_trace = t;
6465 	tr->current_trace->enabled++;
6466 	trace_branch_enable(tr);
6467  out:
6468 	mutex_unlock(&trace_types_lock);
6469 
6470 	return ret;
6471 }
6472 
6473 static ssize_t
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475 			size_t cnt, loff_t *ppos)
6476 {
6477 	struct trace_array *tr = filp->private_data;
6478 	char buf[MAX_TRACER_SIZE+1];
6479 	char *name;
6480 	size_t ret;
6481 	int err;
6482 
6483 	ret = cnt;
6484 
6485 	if (cnt > MAX_TRACER_SIZE)
6486 		cnt = MAX_TRACER_SIZE;
6487 
6488 	if (copy_from_user(buf, ubuf, cnt))
6489 		return -EFAULT;
6490 
6491 	buf[cnt] = 0;
6492 
6493 	name = strim(buf);
6494 
6495 	err = tracing_set_tracer(tr, name);
6496 	if (err)
6497 		return err;
6498 
6499 	*ppos += ret;
6500 
6501 	return ret;
6502 }
6503 
6504 static ssize_t
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506 		   size_t cnt, loff_t *ppos)
6507 {
6508 	char buf[64];
6509 	int r;
6510 
6511 	r = snprintf(buf, sizeof(buf), "%ld\n",
6512 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513 	if (r > sizeof(buf))
6514 		r = sizeof(buf);
6515 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6516 }
6517 
6518 static ssize_t
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520 		    size_t cnt, loff_t *ppos)
6521 {
6522 	unsigned long val;
6523 	int ret;
6524 
6525 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6526 	if (ret)
6527 		return ret;
6528 
6529 	*ptr = val * 1000;
6530 
6531 	return cnt;
6532 }
6533 
6534 static ssize_t
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536 		    size_t cnt, loff_t *ppos)
6537 {
6538 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6539 }
6540 
6541 static ssize_t
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543 		     size_t cnt, loff_t *ppos)
6544 {
6545 	struct trace_array *tr = filp->private_data;
6546 	int ret;
6547 
6548 	mutex_lock(&trace_types_lock);
6549 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6550 	if (ret < 0)
6551 		goto out;
6552 
6553 	if (tr->current_trace->update_thresh) {
6554 		ret = tr->current_trace->update_thresh(tr);
6555 		if (ret < 0)
6556 			goto out;
6557 	}
6558 
6559 	ret = cnt;
6560 out:
6561 	mutex_unlock(&trace_types_lock);
6562 
6563 	return ret;
6564 }
6565 
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6567 
6568 static ssize_t
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570 		     size_t cnt, loff_t *ppos)
6571 {
6572 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6573 }
6574 
6575 static ssize_t
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577 		      size_t cnt, loff_t *ppos)
6578 {
6579 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6580 }
6581 
6582 #endif
6583 
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6585 {
6586 	struct trace_array *tr = inode->i_private;
6587 	struct trace_iterator *iter;
6588 	int ret;
6589 
6590 	ret = tracing_check_open_get_tr(tr);
6591 	if (ret)
6592 		return ret;
6593 
6594 	mutex_lock(&trace_types_lock);
6595 
6596 	/* create a buffer to store the information to pass to userspace */
6597 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6598 	if (!iter) {
6599 		ret = -ENOMEM;
6600 		__trace_array_put(tr);
6601 		goto out;
6602 	}
6603 
6604 	trace_seq_init(&iter->seq);
6605 	iter->trace = tr->current_trace;
6606 
6607 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6608 		ret = -ENOMEM;
6609 		goto fail;
6610 	}
6611 
6612 	/* trace pipe does not show start of buffer */
6613 	cpumask_setall(iter->started);
6614 
6615 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6617 
6618 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619 	if (trace_clocks[tr->clock_id].in_ns)
6620 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6621 
6622 	iter->tr = tr;
6623 	iter->array_buffer = &tr->array_buffer;
6624 	iter->cpu_file = tracing_get_cpu(inode);
6625 	mutex_init(&iter->mutex);
6626 	filp->private_data = iter;
6627 
6628 	if (iter->trace->pipe_open)
6629 		iter->trace->pipe_open(iter);
6630 
6631 	nonseekable_open(inode, filp);
6632 
6633 	tr->trace_ref++;
6634 out:
6635 	mutex_unlock(&trace_types_lock);
6636 	return ret;
6637 
6638 fail:
6639 	kfree(iter);
6640 	__trace_array_put(tr);
6641 	mutex_unlock(&trace_types_lock);
6642 	return ret;
6643 }
6644 
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6646 {
6647 	struct trace_iterator *iter = file->private_data;
6648 	struct trace_array *tr = inode->i_private;
6649 
6650 	mutex_lock(&trace_types_lock);
6651 
6652 	tr->trace_ref--;
6653 
6654 	if (iter->trace->pipe_close)
6655 		iter->trace->pipe_close(iter);
6656 
6657 	mutex_unlock(&trace_types_lock);
6658 
6659 	free_cpumask_var(iter->started);
6660 	mutex_destroy(&iter->mutex);
6661 	kfree(iter);
6662 
6663 	trace_array_put(tr);
6664 
6665 	return 0;
6666 }
6667 
6668 static __poll_t
6669 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6670 {
6671 	struct trace_array *tr = iter->tr;
6672 
6673 	/* Iterators are static, they should be filled or empty */
6674 	if (trace_buffer_iter(iter, iter->cpu_file))
6675 		return EPOLLIN | EPOLLRDNORM;
6676 
6677 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6678 		/*
6679 		 * Always select as readable when in blocking mode
6680 		 */
6681 		return EPOLLIN | EPOLLRDNORM;
6682 	else
6683 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6684 					     filp, poll_table);
6685 }
6686 
6687 static __poll_t
6688 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6689 {
6690 	struct trace_iterator *iter = filp->private_data;
6691 
6692 	return trace_poll(iter, filp, poll_table);
6693 }
6694 
6695 /* Must be called with iter->mutex held. */
6696 static int tracing_wait_pipe(struct file *filp)
6697 {
6698 	struct trace_iterator *iter = filp->private_data;
6699 	int ret;
6700 
6701 	while (trace_empty(iter)) {
6702 
6703 		if ((filp->f_flags & O_NONBLOCK)) {
6704 			return -EAGAIN;
6705 		}
6706 
6707 		/*
6708 		 * We block until we read something and tracing is disabled.
6709 		 * We still block if tracing is disabled, but we have never
6710 		 * read anything. This allows a user to cat this file, and
6711 		 * then enable tracing. But after we have read something,
6712 		 * we give an EOF when tracing is again disabled.
6713 		 *
6714 		 * iter->pos will be 0 if we haven't read anything.
6715 		 */
6716 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6717 			break;
6718 
6719 		mutex_unlock(&iter->mutex);
6720 
6721 		ret = wait_on_pipe(iter, 0);
6722 
6723 		mutex_lock(&iter->mutex);
6724 
6725 		if (ret)
6726 			return ret;
6727 	}
6728 
6729 	return 1;
6730 }
6731 
6732 /*
6733  * Consumer reader.
6734  */
6735 static ssize_t
6736 tracing_read_pipe(struct file *filp, char __user *ubuf,
6737 		  size_t cnt, loff_t *ppos)
6738 {
6739 	struct trace_iterator *iter = filp->private_data;
6740 	ssize_t sret;
6741 
6742 	/*
6743 	 * Avoid more than one consumer on a single file descriptor
6744 	 * This is just a matter of traces coherency, the ring buffer itself
6745 	 * is protected.
6746 	 */
6747 	mutex_lock(&iter->mutex);
6748 
6749 	/* return any leftover data */
6750 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6751 	if (sret != -EBUSY)
6752 		goto out;
6753 
6754 	trace_seq_init(&iter->seq);
6755 
6756 	if (iter->trace->read) {
6757 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6758 		if (sret)
6759 			goto out;
6760 	}
6761 
6762 waitagain:
6763 	sret = tracing_wait_pipe(filp);
6764 	if (sret <= 0)
6765 		goto out;
6766 
6767 	/* stop when tracing is finished */
6768 	if (trace_empty(iter)) {
6769 		sret = 0;
6770 		goto out;
6771 	}
6772 
6773 	if (cnt >= PAGE_SIZE)
6774 		cnt = PAGE_SIZE - 1;
6775 
6776 	/* reset all but tr, trace, and overruns */
6777 	trace_iterator_reset(iter);
6778 	cpumask_clear(iter->started);
6779 	trace_seq_init(&iter->seq);
6780 
6781 	trace_event_read_lock();
6782 	trace_access_lock(iter->cpu_file);
6783 	while (trace_find_next_entry_inc(iter) != NULL) {
6784 		enum print_line_t ret;
6785 		int save_len = iter->seq.seq.len;
6786 
6787 		ret = print_trace_line(iter);
6788 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789 			/* don't print partial lines */
6790 			iter->seq.seq.len = save_len;
6791 			break;
6792 		}
6793 		if (ret != TRACE_TYPE_NO_CONSUME)
6794 			trace_consume(iter);
6795 
6796 		if (trace_seq_used(&iter->seq) >= cnt)
6797 			break;
6798 
6799 		/*
6800 		 * Setting the full flag means we reached the trace_seq buffer
6801 		 * size and we should leave by partial output condition above.
6802 		 * One of the trace_seq_* functions is not used properly.
6803 		 */
6804 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6805 			  iter->ent->type);
6806 	}
6807 	trace_access_unlock(iter->cpu_file);
6808 	trace_event_read_unlock();
6809 
6810 	/* Now copy what we have to the user */
6811 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6812 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6813 		trace_seq_init(&iter->seq);
6814 
6815 	/*
6816 	 * If there was nothing to send to user, in spite of consuming trace
6817 	 * entries, go back to wait for more entries.
6818 	 */
6819 	if (sret == -EBUSY)
6820 		goto waitagain;
6821 
6822 out:
6823 	mutex_unlock(&iter->mutex);
6824 
6825 	return sret;
6826 }
6827 
6828 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6829 				     unsigned int idx)
6830 {
6831 	__free_page(spd->pages[idx]);
6832 }
6833 
6834 static size_t
6835 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6836 {
6837 	size_t count;
6838 	int save_len;
6839 	int ret;
6840 
6841 	/* Seq buffer is page-sized, exactly what we need. */
6842 	for (;;) {
6843 		save_len = iter->seq.seq.len;
6844 		ret = print_trace_line(iter);
6845 
6846 		if (trace_seq_has_overflowed(&iter->seq)) {
6847 			iter->seq.seq.len = save_len;
6848 			break;
6849 		}
6850 
6851 		/*
6852 		 * This should not be hit, because it should only
6853 		 * be set if the iter->seq overflowed. But check it
6854 		 * anyway to be safe.
6855 		 */
6856 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6857 			iter->seq.seq.len = save_len;
6858 			break;
6859 		}
6860 
6861 		count = trace_seq_used(&iter->seq) - save_len;
6862 		if (rem < count) {
6863 			rem = 0;
6864 			iter->seq.seq.len = save_len;
6865 			break;
6866 		}
6867 
6868 		if (ret != TRACE_TYPE_NO_CONSUME)
6869 			trace_consume(iter);
6870 		rem -= count;
6871 		if (!trace_find_next_entry_inc(iter))	{
6872 			rem = 0;
6873 			iter->ent = NULL;
6874 			break;
6875 		}
6876 	}
6877 
6878 	return rem;
6879 }
6880 
6881 static ssize_t tracing_splice_read_pipe(struct file *filp,
6882 					loff_t *ppos,
6883 					struct pipe_inode_info *pipe,
6884 					size_t len,
6885 					unsigned int flags)
6886 {
6887 	struct page *pages_def[PIPE_DEF_BUFFERS];
6888 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6889 	struct trace_iterator *iter = filp->private_data;
6890 	struct splice_pipe_desc spd = {
6891 		.pages		= pages_def,
6892 		.partial	= partial_def,
6893 		.nr_pages	= 0, /* This gets updated below. */
6894 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6895 		.ops		= &default_pipe_buf_ops,
6896 		.spd_release	= tracing_spd_release_pipe,
6897 	};
6898 	ssize_t ret;
6899 	size_t rem;
6900 	unsigned int i;
6901 
6902 	if (splice_grow_spd(pipe, &spd))
6903 		return -ENOMEM;
6904 
6905 	mutex_lock(&iter->mutex);
6906 
6907 	if (iter->trace->splice_read) {
6908 		ret = iter->trace->splice_read(iter, filp,
6909 					       ppos, pipe, len, flags);
6910 		if (ret)
6911 			goto out_err;
6912 	}
6913 
6914 	ret = tracing_wait_pipe(filp);
6915 	if (ret <= 0)
6916 		goto out_err;
6917 
6918 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6919 		ret = -EFAULT;
6920 		goto out_err;
6921 	}
6922 
6923 	trace_event_read_lock();
6924 	trace_access_lock(iter->cpu_file);
6925 
6926 	/* Fill as many pages as possible. */
6927 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6928 		spd.pages[i] = alloc_page(GFP_KERNEL);
6929 		if (!spd.pages[i])
6930 			break;
6931 
6932 		rem = tracing_fill_pipe_page(rem, iter);
6933 
6934 		/* Copy the data into the page, so we can start over. */
6935 		ret = trace_seq_to_buffer(&iter->seq,
6936 					  page_address(spd.pages[i]),
6937 					  trace_seq_used(&iter->seq));
6938 		if (ret < 0) {
6939 			__free_page(spd.pages[i]);
6940 			break;
6941 		}
6942 		spd.partial[i].offset = 0;
6943 		spd.partial[i].len = trace_seq_used(&iter->seq);
6944 
6945 		trace_seq_init(&iter->seq);
6946 	}
6947 
6948 	trace_access_unlock(iter->cpu_file);
6949 	trace_event_read_unlock();
6950 	mutex_unlock(&iter->mutex);
6951 
6952 	spd.nr_pages = i;
6953 
6954 	if (i)
6955 		ret = splice_to_pipe(pipe, &spd);
6956 	else
6957 		ret = 0;
6958 out:
6959 	splice_shrink_spd(&spd);
6960 	return ret;
6961 
6962 out_err:
6963 	mutex_unlock(&iter->mutex);
6964 	goto out;
6965 }
6966 
6967 static ssize_t
6968 tracing_entries_read(struct file *filp, char __user *ubuf,
6969 		     size_t cnt, loff_t *ppos)
6970 {
6971 	struct inode *inode = file_inode(filp);
6972 	struct trace_array *tr = inode->i_private;
6973 	int cpu = tracing_get_cpu(inode);
6974 	char buf[64];
6975 	int r = 0;
6976 	ssize_t ret;
6977 
6978 	mutex_lock(&trace_types_lock);
6979 
6980 	if (cpu == RING_BUFFER_ALL_CPUS) {
6981 		int cpu, buf_size_same;
6982 		unsigned long size;
6983 
6984 		size = 0;
6985 		buf_size_same = 1;
6986 		/* check if all cpu sizes are same */
6987 		for_each_tracing_cpu(cpu) {
6988 			/* fill in the size from first enabled cpu */
6989 			if (size == 0)
6990 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6991 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6992 				buf_size_same = 0;
6993 				break;
6994 			}
6995 		}
6996 
6997 		if (buf_size_same) {
6998 			if (!ring_buffer_expanded)
6999 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7000 					    size >> 10,
7001 					    trace_buf_size >> 10);
7002 			else
7003 				r = sprintf(buf, "%lu\n", size >> 10);
7004 		} else
7005 			r = sprintf(buf, "X\n");
7006 	} else
7007 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7008 
7009 	mutex_unlock(&trace_types_lock);
7010 
7011 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012 	return ret;
7013 }
7014 
7015 static ssize_t
7016 tracing_entries_write(struct file *filp, const char __user *ubuf,
7017 		      size_t cnt, loff_t *ppos)
7018 {
7019 	struct inode *inode = file_inode(filp);
7020 	struct trace_array *tr = inode->i_private;
7021 	unsigned long val;
7022 	int ret;
7023 
7024 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7025 	if (ret)
7026 		return ret;
7027 
7028 	/* must have at least 1 entry */
7029 	if (!val)
7030 		return -EINVAL;
7031 
7032 	/* value is in KB */
7033 	val <<= 10;
7034 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7035 	if (ret < 0)
7036 		return ret;
7037 
7038 	*ppos += cnt;
7039 
7040 	return cnt;
7041 }
7042 
7043 static ssize_t
7044 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7045 				size_t cnt, loff_t *ppos)
7046 {
7047 	struct trace_array *tr = filp->private_data;
7048 	char buf[64];
7049 	int r, cpu;
7050 	unsigned long size = 0, expanded_size = 0;
7051 
7052 	mutex_lock(&trace_types_lock);
7053 	for_each_tracing_cpu(cpu) {
7054 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7055 		if (!ring_buffer_expanded)
7056 			expanded_size += trace_buf_size >> 10;
7057 	}
7058 	if (ring_buffer_expanded)
7059 		r = sprintf(buf, "%lu\n", size);
7060 	else
7061 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7062 	mutex_unlock(&trace_types_lock);
7063 
7064 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7065 }
7066 
7067 static ssize_t
7068 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7069 			  size_t cnt, loff_t *ppos)
7070 {
7071 	/*
7072 	 * There is no need to read what the user has written, this function
7073 	 * is just to make sure that there is no error when "echo" is used
7074 	 */
7075 
7076 	*ppos += cnt;
7077 
7078 	return cnt;
7079 }
7080 
7081 static int
7082 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7083 {
7084 	struct trace_array *tr = inode->i_private;
7085 
7086 	/* disable tracing ? */
7087 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7088 		tracer_tracing_off(tr);
7089 	/* resize the ring buffer to 0 */
7090 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7091 
7092 	trace_array_put(tr);
7093 
7094 	return 0;
7095 }
7096 
7097 static ssize_t
7098 tracing_mark_write(struct file *filp, const char __user *ubuf,
7099 					size_t cnt, loff_t *fpos)
7100 {
7101 	struct trace_array *tr = filp->private_data;
7102 	struct ring_buffer_event *event;
7103 	enum event_trigger_type tt = ETT_NONE;
7104 	struct trace_buffer *buffer;
7105 	struct print_entry *entry;
7106 	ssize_t written;
7107 	int size;
7108 	int len;
7109 
7110 /* Used in tracing_mark_raw_write() as well */
7111 #define FAULTED_STR "<faulted>"
7112 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7113 
7114 	if (tracing_disabled)
7115 		return -EINVAL;
7116 
7117 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7118 		return -EINVAL;
7119 
7120 	if (cnt > TRACE_BUF_SIZE)
7121 		cnt = TRACE_BUF_SIZE;
7122 
7123 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7124 
7125 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7126 
7127 	/* If less than "<faulted>", then make sure we can still add that */
7128 	if (cnt < FAULTED_SIZE)
7129 		size += FAULTED_SIZE - cnt;
7130 
7131 	buffer = tr->array_buffer.buffer;
7132 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7133 					    tracing_gen_ctx());
7134 	if (unlikely(!event))
7135 		/* Ring buffer disabled, return as if not open for write */
7136 		return -EBADF;
7137 
7138 	entry = ring_buffer_event_data(event);
7139 	entry->ip = _THIS_IP_;
7140 
7141 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7142 	if (len) {
7143 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7144 		cnt = FAULTED_SIZE;
7145 		written = -EFAULT;
7146 	} else
7147 		written = cnt;
7148 
7149 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7150 		/* do not add \n before testing triggers, but add \0 */
7151 		entry->buf[cnt] = '\0';
7152 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7153 	}
7154 
7155 	if (entry->buf[cnt - 1] != '\n') {
7156 		entry->buf[cnt] = '\n';
7157 		entry->buf[cnt + 1] = '\0';
7158 	} else
7159 		entry->buf[cnt] = '\0';
7160 
7161 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7162 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7163 	__buffer_unlock_commit(buffer, event);
7164 
7165 	if (tt)
7166 		event_triggers_post_call(tr->trace_marker_file, tt);
7167 
7168 	return written;
7169 }
7170 
7171 /* Limit it for now to 3K (including tag) */
7172 #define RAW_DATA_MAX_SIZE (1024*3)
7173 
7174 static ssize_t
7175 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7176 					size_t cnt, loff_t *fpos)
7177 {
7178 	struct trace_array *tr = filp->private_data;
7179 	struct ring_buffer_event *event;
7180 	struct trace_buffer *buffer;
7181 	struct raw_data_entry *entry;
7182 	ssize_t written;
7183 	int size;
7184 	int len;
7185 
7186 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7187 
7188 	if (tracing_disabled)
7189 		return -EINVAL;
7190 
7191 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7192 		return -EINVAL;
7193 
7194 	/* The marker must at least have a tag id */
7195 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7196 		return -EINVAL;
7197 
7198 	if (cnt > TRACE_BUF_SIZE)
7199 		cnt = TRACE_BUF_SIZE;
7200 
7201 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7202 
7203 	size = sizeof(*entry) + cnt;
7204 	if (cnt < FAULT_SIZE_ID)
7205 		size += FAULT_SIZE_ID - cnt;
7206 
7207 	buffer = tr->array_buffer.buffer;
7208 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7209 					    tracing_gen_ctx());
7210 	if (!event)
7211 		/* Ring buffer disabled, return as if not open for write */
7212 		return -EBADF;
7213 
7214 	entry = ring_buffer_event_data(event);
7215 
7216 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7217 	if (len) {
7218 		entry->id = -1;
7219 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7220 		written = -EFAULT;
7221 	} else
7222 		written = cnt;
7223 
7224 	__buffer_unlock_commit(buffer, event);
7225 
7226 	return written;
7227 }
7228 
7229 static int tracing_clock_show(struct seq_file *m, void *v)
7230 {
7231 	struct trace_array *tr = m->private;
7232 	int i;
7233 
7234 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7235 		seq_printf(m,
7236 			"%s%s%s%s", i ? " " : "",
7237 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7238 			i == tr->clock_id ? "]" : "");
7239 	seq_putc(m, '\n');
7240 
7241 	return 0;
7242 }
7243 
7244 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7245 {
7246 	int i;
7247 
7248 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7249 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7250 			break;
7251 	}
7252 	if (i == ARRAY_SIZE(trace_clocks))
7253 		return -EINVAL;
7254 
7255 	mutex_lock(&trace_types_lock);
7256 
7257 	tr->clock_id = i;
7258 
7259 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7260 
7261 	/*
7262 	 * New clock may not be consistent with the previous clock.
7263 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7264 	 */
7265 	tracing_reset_online_cpus(&tr->array_buffer);
7266 
7267 #ifdef CONFIG_TRACER_MAX_TRACE
7268 	if (tr->max_buffer.buffer)
7269 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7270 	tracing_reset_online_cpus(&tr->max_buffer);
7271 #endif
7272 
7273 	mutex_unlock(&trace_types_lock);
7274 
7275 	return 0;
7276 }
7277 
7278 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7279 				   size_t cnt, loff_t *fpos)
7280 {
7281 	struct seq_file *m = filp->private_data;
7282 	struct trace_array *tr = m->private;
7283 	char buf[64];
7284 	const char *clockstr;
7285 	int ret;
7286 
7287 	if (cnt >= sizeof(buf))
7288 		return -EINVAL;
7289 
7290 	if (copy_from_user(buf, ubuf, cnt))
7291 		return -EFAULT;
7292 
7293 	buf[cnt] = 0;
7294 
7295 	clockstr = strstrip(buf);
7296 
7297 	ret = tracing_set_clock(tr, clockstr);
7298 	if (ret)
7299 		return ret;
7300 
7301 	*fpos += cnt;
7302 
7303 	return cnt;
7304 }
7305 
7306 static int tracing_clock_open(struct inode *inode, struct file *file)
7307 {
7308 	struct trace_array *tr = inode->i_private;
7309 	int ret;
7310 
7311 	ret = tracing_check_open_get_tr(tr);
7312 	if (ret)
7313 		return ret;
7314 
7315 	ret = single_open(file, tracing_clock_show, inode->i_private);
7316 	if (ret < 0)
7317 		trace_array_put(tr);
7318 
7319 	return ret;
7320 }
7321 
7322 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7323 {
7324 	struct trace_array *tr = m->private;
7325 
7326 	mutex_lock(&trace_types_lock);
7327 
7328 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7329 		seq_puts(m, "delta [absolute]\n");
7330 	else
7331 		seq_puts(m, "[delta] absolute\n");
7332 
7333 	mutex_unlock(&trace_types_lock);
7334 
7335 	return 0;
7336 }
7337 
7338 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7339 {
7340 	struct trace_array *tr = inode->i_private;
7341 	int ret;
7342 
7343 	ret = tracing_check_open_get_tr(tr);
7344 	if (ret)
7345 		return ret;
7346 
7347 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7348 	if (ret < 0)
7349 		trace_array_put(tr);
7350 
7351 	return ret;
7352 }
7353 
7354 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7355 {
7356 	if (rbe == this_cpu_read(trace_buffered_event))
7357 		return ring_buffer_time_stamp(buffer);
7358 
7359 	return ring_buffer_event_time_stamp(buffer, rbe);
7360 }
7361 
7362 /*
7363  * Set or disable using the per CPU trace_buffer_event when possible.
7364  */
7365 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7366 {
7367 	int ret = 0;
7368 
7369 	mutex_lock(&trace_types_lock);
7370 
7371 	if (set && tr->no_filter_buffering_ref++)
7372 		goto out;
7373 
7374 	if (!set) {
7375 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7376 			ret = -EINVAL;
7377 			goto out;
7378 		}
7379 
7380 		--tr->no_filter_buffering_ref;
7381 	}
7382  out:
7383 	mutex_unlock(&trace_types_lock);
7384 
7385 	return ret;
7386 }
7387 
7388 struct ftrace_buffer_info {
7389 	struct trace_iterator	iter;
7390 	void			*spare;
7391 	unsigned int		spare_cpu;
7392 	unsigned int		read;
7393 };
7394 
7395 #ifdef CONFIG_TRACER_SNAPSHOT
7396 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7397 {
7398 	struct trace_array *tr = inode->i_private;
7399 	struct trace_iterator *iter;
7400 	struct seq_file *m;
7401 	int ret;
7402 
7403 	ret = tracing_check_open_get_tr(tr);
7404 	if (ret)
7405 		return ret;
7406 
7407 	if (file->f_mode & FMODE_READ) {
7408 		iter = __tracing_open(inode, file, true);
7409 		if (IS_ERR(iter))
7410 			ret = PTR_ERR(iter);
7411 	} else {
7412 		/* Writes still need the seq_file to hold the private data */
7413 		ret = -ENOMEM;
7414 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7415 		if (!m)
7416 			goto out;
7417 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7418 		if (!iter) {
7419 			kfree(m);
7420 			goto out;
7421 		}
7422 		ret = 0;
7423 
7424 		iter->tr = tr;
7425 		iter->array_buffer = &tr->max_buffer;
7426 		iter->cpu_file = tracing_get_cpu(inode);
7427 		m->private = iter;
7428 		file->private_data = m;
7429 	}
7430 out:
7431 	if (ret < 0)
7432 		trace_array_put(tr);
7433 
7434 	return ret;
7435 }
7436 
7437 static ssize_t
7438 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7439 		       loff_t *ppos)
7440 {
7441 	struct seq_file *m = filp->private_data;
7442 	struct trace_iterator *iter = m->private;
7443 	struct trace_array *tr = iter->tr;
7444 	unsigned long val;
7445 	int ret;
7446 
7447 	ret = tracing_update_buffers();
7448 	if (ret < 0)
7449 		return ret;
7450 
7451 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7452 	if (ret)
7453 		return ret;
7454 
7455 	mutex_lock(&trace_types_lock);
7456 
7457 	if (tr->current_trace->use_max_tr) {
7458 		ret = -EBUSY;
7459 		goto out;
7460 	}
7461 
7462 	local_irq_disable();
7463 	arch_spin_lock(&tr->max_lock);
7464 	if (tr->cond_snapshot)
7465 		ret = -EBUSY;
7466 	arch_spin_unlock(&tr->max_lock);
7467 	local_irq_enable();
7468 	if (ret)
7469 		goto out;
7470 
7471 	switch (val) {
7472 	case 0:
7473 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7474 			ret = -EINVAL;
7475 			break;
7476 		}
7477 		if (tr->allocated_snapshot)
7478 			free_snapshot(tr);
7479 		break;
7480 	case 1:
7481 /* Only allow per-cpu swap if the ring buffer supports it */
7482 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7483 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7484 			ret = -EINVAL;
7485 			break;
7486 		}
7487 #endif
7488 		if (tr->allocated_snapshot)
7489 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7490 					&tr->array_buffer, iter->cpu_file);
7491 		else
7492 			ret = tracing_alloc_snapshot_instance(tr);
7493 		if (ret < 0)
7494 			break;
7495 		local_irq_disable();
7496 		/* Now, we're going to swap */
7497 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7498 			update_max_tr(tr, current, smp_processor_id(), NULL);
7499 		else
7500 			update_max_tr_single(tr, current, iter->cpu_file);
7501 		local_irq_enable();
7502 		break;
7503 	default:
7504 		if (tr->allocated_snapshot) {
7505 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7506 				tracing_reset_online_cpus(&tr->max_buffer);
7507 			else
7508 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7509 		}
7510 		break;
7511 	}
7512 
7513 	if (ret >= 0) {
7514 		*ppos += cnt;
7515 		ret = cnt;
7516 	}
7517 out:
7518 	mutex_unlock(&trace_types_lock);
7519 	return ret;
7520 }
7521 
7522 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7523 {
7524 	struct seq_file *m = file->private_data;
7525 	int ret;
7526 
7527 	ret = tracing_release(inode, file);
7528 
7529 	if (file->f_mode & FMODE_READ)
7530 		return ret;
7531 
7532 	/* If write only, the seq_file is just a stub */
7533 	if (m)
7534 		kfree(m->private);
7535 	kfree(m);
7536 
7537 	return 0;
7538 }
7539 
7540 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7541 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7542 				    size_t count, loff_t *ppos);
7543 static int tracing_buffers_release(struct inode *inode, struct file *file);
7544 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7545 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7546 
7547 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7548 {
7549 	struct ftrace_buffer_info *info;
7550 	int ret;
7551 
7552 	/* The following checks for tracefs lockdown */
7553 	ret = tracing_buffers_open(inode, filp);
7554 	if (ret < 0)
7555 		return ret;
7556 
7557 	info = filp->private_data;
7558 
7559 	if (info->iter.trace->use_max_tr) {
7560 		tracing_buffers_release(inode, filp);
7561 		return -EBUSY;
7562 	}
7563 
7564 	info->iter.snapshot = true;
7565 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7566 
7567 	return ret;
7568 }
7569 
7570 #endif /* CONFIG_TRACER_SNAPSHOT */
7571 
7572 
7573 static const struct file_operations tracing_thresh_fops = {
7574 	.open		= tracing_open_generic,
7575 	.read		= tracing_thresh_read,
7576 	.write		= tracing_thresh_write,
7577 	.llseek		= generic_file_llseek,
7578 };
7579 
7580 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7581 static const struct file_operations tracing_max_lat_fops = {
7582 	.open		= tracing_open_generic,
7583 	.read		= tracing_max_lat_read,
7584 	.write		= tracing_max_lat_write,
7585 	.llseek		= generic_file_llseek,
7586 };
7587 #endif
7588 
7589 static const struct file_operations set_tracer_fops = {
7590 	.open		= tracing_open_generic,
7591 	.read		= tracing_set_trace_read,
7592 	.write		= tracing_set_trace_write,
7593 	.llseek		= generic_file_llseek,
7594 };
7595 
7596 static const struct file_operations tracing_pipe_fops = {
7597 	.open		= tracing_open_pipe,
7598 	.poll		= tracing_poll_pipe,
7599 	.read		= tracing_read_pipe,
7600 	.splice_read	= tracing_splice_read_pipe,
7601 	.release	= tracing_release_pipe,
7602 	.llseek		= no_llseek,
7603 };
7604 
7605 static const struct file_operations tracing_entries_fops = {
7606 	.open		= tracing_open_generic_tr,
7607 	.read		= tracing_entries_read,
7608 	.write		= tracing_entries_write,
7609 	.llseek		= generic_file_llseek,
7610 	.release	= tracing_release_generic_tr,
7611 };
7612 
7613 static const struct file_operations tracing_total_entries_fops = {
7614 	.open		= tracing_open_generic_tr,
7615 	.read		= tracing_total_entries_read,
7616 	.llseek		= generic_file_llseek,
7617 	.release	= tracing_release_generic_tr,
7618 };
7619 
7620 static const struct file_operations tracing_free_buffer_fops = {
7621 	.open		= tracing_open_generic_tr,
7622 	.write		= tracing_free_buffer_write,
7623 	.release	= tracing_free_buffer_release,
7624 };
7625 
7626 static const struct file_operations tracing_mark_fops = {
7627 	.open		= tracing_mark_open,
7628 	.write		= tracing_mark_write,
7629 	.release	= tracing_release_generic_tr,
7630 };
7631 
7632 static const struct file_operations tracing_mark_raw_fops = {
7633 	.open		= tracing_mark_open,
7634 	.write		= tracing_mark_raw_write,
7635 	.release	= tracing_release_generic_tr,
7636 };
7637 
7638 static const struct file_operations trace_clock_fops = {
7639 	.open		= tracing_clock_open,
7640 	.read		= seq_read,
7641 	.llseek		= seq_lseek,
7642 	.release	= tracing_single_release_tr,
7643 	.write		= tracing_clock_write,
7644 };
7645 
7646 static const struct file_operations trace_time_stamp_mode_fops = {
7647 	.open		= tracing_time_stamp_mode_open,
7648 	.read		= seq_read,
7649 	.llseek		= seq_lseek,
7650 	.release	= tracing_single_release_tr,
7651 };
7652 
7653 #ifdef CONFIG_TRACER_SNAPSHOT
7654 static const struct file_operations snapshot_fops = {
7655 	.open		= tracing_snapshot_open,
7656 	.read		= seq_read,
7657 	.write		= tracing_snapshot_write,
7658 	.llseek		= tracing_lseek,
7659 	.release	= tracing_snapshot_release,
7660 };
7661 
7662 static const struct file_operations snapshot_raw_fops = {
7663 	.open		= snapshot_raw_open,
7664 	.read		= tracing_buffers_read,
7665 	.release	= tracing_buffers_release,
7666 	.splice_read	= tracing_buffers_splice_read,
7667 	.llseek		= no_llseek,
7668 };
7669 
7670 #endif /* CONFIG_TRACER_SNAPSHOT */
7671 
7672 /*
7673  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7674  * @filp: The active open file structure
7675  * @ubuf: The userspace provided buffer to read value into
7676  * @cnt: The maximum number of bytes to read
7677  * @ppos: The current "file" position
7678  *
7679  * This function implements the write interface for a struct trace_min_max_param.
7680  * The filp->private_data must point to a trace_min_max_param structure that
7681  * defines where to write the value, the min and the max acceptable values,
7682  * and a lock to protect the write.
7683  */
7684 static ssize_t
7685 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7686 {
7687 	struct trace_min_max_param *param = filp->private_data;
7688 	u64 val;
7689 	int err;
7690 
7691 	if (!param)
7692 		return -EFAULT;
7693 
7694 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7695 	if (err)
7696 		return err;
7697 
7698 	if (param->lock)
7699 		mutex_lock(param->lock);
7700 
7701 	if (param->min && val < *param->min)
7702 		err = -EINVAL;
7703 
7704 	if (param->max && val > *param->max)
7705 		err = -EINVAL;
7706 
7707 	if (!err)
7708 		*param->val = val;
7709 
7710 	if (param->lock)
7711 		mutex_unlock(param->lock);
7712 
7713 	if (err)
7714 		return err;
7715 
7716 	return cnt;
7717 }
7718 
7719 /*
7720  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7721  * @filp: The active open file structure
7722  * @ubuf: The userspace provided buffer to read value into
7723  * @cnt: The maximum number of bytes to read
7724  * @ppos: The current "file" position
7725  *
7726  * This function implements the read interface for a struct trace_min_max_param.
7727  * The filp->private_data must point to a trace_min_max_param struct with valid
7728  * data.
7729  */
7730 static ssize_t
7731 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7732 {
7733 	struct trace_min_max_param *param = filp->private_data;
7734 	char buf[U64_STR_SIZE];
7735 	int len;
7736 	u64 val;
7737 
7738 	if (!param)
7739 		return -EFAULT;
7740 
7741 	val = *param->val;
7742 
7743 	if (cnt > sizeof(buf))
7744 		cnt = sizeof(buf);
7745 
7746 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7747 
7748 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7749 }
7750 
7751 const struct file_operations trace_min_max_fops = {
7752 	.open		= tracing_open_generic,
7753 	.read		= trace_min_max_read,
7754 	.write		= trace_min_max_write,
7755 };
7756 
7757 #define TRACING_LOG_ERRS_MAX	8
7758 #define TRACING_LOG_LOC_MAX	128
7759 
7760 #define CMD_PREFIX "  Command: "
7761 
7762 struct err_info {
7763 	const char	**errs;	/* ptr to loc-specific array of err strings */
7764 	u8		type;	/* index into errs -> specific err string */
7765 	u16		pos;	/* caret position */
7766 	u64		ts;
7767 };
7768 
7769 struct tracing_log_err {
7770 	struct list_head	list;
7771 	struct err_info		info;
7772 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7773 	char			*cmd;                     /* what caused err */
7774 };
7775 
7776 static DEFINE_MUTEX(tracing_err_log_lock);
7777 
7778 static struct tracing_log_err *alloc_tracing_log_err(int len)
7779 {
7780 	struct tracing_log_err *err;
7781 
7782 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7783 	if (!err)
7784 		return ERR_PTR(-ENOMEM);
7785 
7786 	err->cmd = kzalloc(len, GFP_KERNEL);
7787 	if (!err->cmd) {
7788 		kfree(err);
7789 		return ERR_PTR(-ENOMEM);
7790 	}
7791 
7792 	return err;
7793 }
7794 
7795 static void free_tracing_log_err(struct tracing_log_err *err)
7796 {
7797 	kfree(err->cmd);
7798 	kfree(err);
7799 }
7800 
7801 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7802 						   int len)
7803 {
7804 	struct tracing_log_err *err;
7805 
7806 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7807 		err = alloc_tracing_log_err(len);
7808 		if (PTR_ERR(err) != -ENOMEM)
7809 			tr->n_err_log_entries++;
7810 
7811 		return err;
7812 	}
7813 
7814 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7815 	kfree(err->cmd);
7816 	err->cmd = kzalloc(len, GFP_KERNEL);
7817 	if (!err->cmd)
7818 		return ERR_PTR(-ENOMEM);
7819 	list_del(&err->list);
7820 
7821 	return err;
7822 }
7823 
7824 /**
7825  * err_pos - find the position of a string within a command for error careting
7826  * @cmd: The tracing command that caused the error
7827  * @str: The string to position the caret at within @cmd
7828  *
7829  * Finds the position of the first occurrence of @str within @cmd.  The
7830  * return value can be passed to tracing_log_err() for caret placement
7831  * within @cmd.
7832  *
7833  * Returns the index within @cmd of the first occurrence of @str or 0
7834  * if @str was not found.
7835  */
7836 unsigned int err_pos(char *cmd, const char *str)
7837 {
7838 	char *found;
7839 
7840 	if (WARN_ON(!strlen(cmd)))
7841 		return 0;
7842 
7843 	found = strstr(cmd, str);
7844 	if (found)
7845 		return found - cmd;
7846 
7847 	return 0;
7848 }
7849 
7850 /**
7851  * tracing_log_err - write an error to the tracing error log
7852  * @tr: The associated trace array for the error (NULL for top level array)
7853  * @loc: A string describing where the error occurred
7854  * @cmd: The tracing command that caused the error
7855  * @errs: The array of loc-specific static error strings
7856  * @type: The index into errs[], which produces the specific static err string
7857  * @pos: The position the caret should be placed in the cmd
7858  *
7859  * Writes an error into tracing/error_log of the form:
7860  *
7861  * <loc>: error: <text>
7862  *   Command: <cmd>
7863  *              ^
7864  *
7865  * tracing/error_log is a small log file containing the last
7866  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7867  * unless there has been a tracing error, and the error log can be
7868  * cleared and have its memory freed by writing the empty string in
7869  * truncation mode to it i.e. echo > tracing/error_log.
7870  *
7871  * NOTE: the @errs array along with the @type param are used to
7872  * produce a static error string - this string is not copied and saved
7873  * when the error is logged - only a pointer to it is saved.  See
7874  * existing callers for examples of how static strings are typically
7875  * defined for use with tracing_log_err().
7876  */
7877 void tracing_log_err(struct trace_array *tr,
7878 		     const char *loc, const char *cmd,
7879 		     const char **errs, u8 type, u16 pos)
7880 {
7881 	struct tracing_log_err *err;
7882 	int len = 0;
7883 
7884 	if (!tr)
7885 		tr = &global_trace;
7886 
7887 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7888 
7889 	mutex_lock(&tracing_err_log_lock);
7890 	err = get_tracing_log_err(tr, len);
7891 	if (PTR_ERR(err) == -ENOMEM) {
7892 		mutex_unlock(&tracing_err_log_lock);
7893 		return;
7894 	}
7895 
7896 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7897 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7898 
7899 	err->info.errs = errs;
7900 	err->info.type = type;
7901 	err->info.pos = pos;
7902 	err->info.ts = local_clock();
7903 
7904 	list_add_tail(&err->list, &tr->err_log);
7905 	mutex_unlock(&tracing_err_log_lock);
7906 }
7907 
7908 static void clear_tracing_err_log(struct trace_array *tr)
7909 {
7910 	struct tracing_log_err *err, *next;
7911 
7912 	mutex_lock(&tracing_err_log_lock);
7913 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7914 		list_del(&err->list);
7915 		free_tracing_log_err(err);
7916 	}
7917 
7918 	tr->n_err_log_entries = 0;
7919 	mutex_unlock(&tracing_err_log_lock);
7920 }
7921 
7922 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7923 {
7924 	struct trace_array *tr = m->private;
7925 
7926 	mutex_lock(&tracing_err_log_lock);
7927 
7928 	return seq_list_start(&tr->err_log, *pos);
7929 }
7930 
7931 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7932 {
7933 	struct trace_array *tr = m->private;
7934 
7935 	return seq_list_next(v, &tr->err_log, pos);
7936 }
7937 
7938 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7939 {
7940 	mutex_unlock(&tracing_err_log_lock);
7941 }
7942 
7943 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7944 {
7945 	u16 i;
7946 
7947 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7948 		seq_putc(m, ' ');
7949 	for (i = 0; i < pos; i++)
7950 		seq_putc(m, ' ');
7951 	seq_puts(m, "^\n");
7952 }
7953 
7954 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7955 {
7956 	struct tracing_log_err *err = v;
7957 
7958 	if (err) {
7959 		const char *err_text = err->info.errs[err->info.type];
7960 		u64 sec = err->info.ts;
7961 		u32 nsec;
7962 
7963 		nsec = do_div(sec, NSEC_PER_SEC);
7964 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7965 			   err->loc, err_text);
7966 		seq_printf(m, "%s", err->cmd);
7967 		tracing_err_log_show_pos(m, err->info.pos);
7968 	}
7969 
7970 	return 0;
7971 }
7972 
7973 static const struct seq_operations tracing_err_log_seq_ops = {
7974 	.start  = tracing_err_log_seq_start,
7975 	.next   = tracing_err_log_seq_next,
7976 	.stop   = tracing_err_log_seq_stop,
7977 	.show   = tracing_err_log_seq_show
7978 };
7979 
7980 static int tracing_err_log_open(struct inode *inode, struct file *file)
7981 {
7982 	struct trace_array *tr = inode->i_private;
7983 	int ret = 0;
7984 
7985 	ret = tracing_check_open_get_tr(tr);
7986 	if (ret)
7987 		return ret;
7988 
7989 	/* If this file was opened for write, then erase contents */
7990 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7991 		clear_tracing_err_log(tr);
7992 
7993 	if (file->f_mode & FMODE_READ) {
7994 		ret = seq_open(file, &tracing_err_log_seq_ops);
7995 		if (!ret) {
7996 			struct seq_file *m = file->private_data;
7997 			m->private = tr;
7998 		} else {
7999 			trace_array_put(tr);
8000 		}
8001 	}
8002 	return ret;
8003 }
8004 
8005 static ssize_t tracing_err_log_write(struct file *file,
8006 				     const char __user *buffer,
8007 				     size_t count, loff_t *ppos)
8008 {
8009 	return count;
8010 }
8011 
8012 static int tracing_err_log_release(struct inode *inode, struct file *file)
8013 {
8014 	struct trace_array *tr = inode->i_private;
8015 
8016 	trace_array_put(tr);
8017 
8018 	if (file->f_mode & FMODE_READ)
8019 		seq_release(inode, file);
8020 
8021 	return 0;
8022 }
8023 
8024 static const struct file_operations tracing_err_log_fops = {
8025 	.open           = tracing_err_log_open,
8026 	.write		= tracing_err_log_write,
8027 	.read           = seq_read,
8028 	.llseek         = seq_lseek,
8029 	.release        = tracing_err_log_release,
8030 };
8031 
8032 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8033 {
8034 	struct trace_array *tr = inode->i_private;
8035 	struct ftrace_buffer_info *info;
8036 	int ret;
8037 
8038 	ret = tracing_check_open_get_tr(tr);
8039 	if (ret)
8040 		return ret;
8041 
8042 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8043 	if (!info) {
8044 		trace_array_put(tr);
8045 		return -ENOMEM;
8046 	}
8047 
8048 	mutex_lock(&trace_types_lock);
8049 
8050 	info->iter.tr		= tr;
8051 	info->iter.cpu_file	= tracing_get_cpu(inode);
8052 	info->iter.trace	= tr->current_trace;
8053 	info->iter.array_buffer = &tr->array_buffer;
8054 	info->spare		= NULL;
8055 	/* Force reading ring buffer for first read */
8056 	info->read		= (unsigned int)-1;
8057 
8058 	filp->private_data = info;
8059 
8060 	tr->trace_ref++;
8061 
8062 	mutex_unlock(&trace_types_lock);
8063 
8064 	ret = nonseekable_open(inode, filp);
8065 	if (ret < 0)
8066 		trace_array_put(tr);
8067 
8068 	return ret;
8069 }
8070 
8071 static __poll_t
8072 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8073 {
8074 	struct ftrace_buffer_info *info = filp->private_data;
8075 	struct trace_iterator *iter = &info->iter;
8076 
8077 	return trace_poll(iter, filp, poll_table);
8078 }
8079 
8080 static ssize_t
8081 tracing_buffers_read(struct file *filp, char __user *ubuf,
8082 		     size_t count, loff_t *ppos)
8083 {
8084 	struct ftrace_buffer_info *info = filp->private_data;
8085 	struct trace_iterator *iter = &info->iter;
8086 	ssize_t ret = 0;
8087 	ssize_t size;
8088 
8089 	if (!count)
8090 		return 0;
8091 
8092 #ifdef CONFIG_TRACER_MAX_TRACE
8093 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8094 		return -EBUSY;
8095 #endif
8096 
8097 	if (!info->spare) {
8098 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8099 							  iter->cpu_file);
8100 		if (IS_ERR(info->spare)) {
8101 			ret = PTR_ERR(info->spare);
8102 			info->spare = NULL;
8103 		} else {
8104 			info->spare_cpu = iter->cpu_file;
8105 		}
8106 	}
8107 	if (!info->spare)
8108 		return ret;
8109 
8110 	/* Do we have previous read data to read? */
8111 	if (info->read < PAGE_SIZE)
8112 		goto read;
8113 
8114  again:
8115 	trace_access_lock(iter->cpu_file);
8116 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8117 				    &info->spare,
8118 				    count,
8119 				    iter->cpu_file, 0);
8120 	trace_access_unlock(iter->cpu_file);
8121 
8122 	if (ret < 0) {
8123 		if (trace_empty(iter)) {
8124 			if ((filp->f_flags & O_NONBLOCK))
8125 				return -EAGAIN;
8126 
8127 			ret = wait_on_pipe(iter, 0);
8128 			if (ret)
8129 				return ret;
8130 
8131 			goto again;
8132 		}
8133 		return 0;
8134 	}
8135 
8136 	info->read = 0;
8137  read:
8138 	size = PAGE_SIZE - info->read;
8139 	if (size > count)
8140 		size = count;
8141 
8142 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8143 	if (ret == size)
8144 		return -EFAULT;
8145 
8146 	size -= ret;
8147 
8148 	*ppos += size;
8149 	info->read += size;
8150 
8151 	return size;
8152 }
8153 
8154 static int tracing_buffers_release(struct inode *inode, struct file *file)
8155 {
8156 	struct ftrace_buffer_info *info = file->private_data;
8157 	struct trace_iterator *iter = &info->iter;
8158 
8159 	mutex_lock(&trace_types_lock);
8160 
8161 	iter->tr->trace_ref--;
8162 
8163 	__trace_array_put(iter->tr);
8164 
8165 	iter->wait_index++;
8166 	/* Make sure the waiters see the new wait_index */
8167 	smp_wmb();
8168 
8169 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8170 
8171 	if (info->spare)
8172 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8173 					   info->spare_cpu, info->spare);
8174 	kvfree(info);
8175 
8176 	mutex_unlock(&trace_types_lock);
8177 
8178 	return 0;
8179 }
8180 
8181 struct buffer_ref {
8182 	struct trace_buffer	*buffer;
8183 	void			*page;
8184 	int			cpu;
8185 	refcount_t		refcount;
8186 };
8187 
8188 static void buffer_ref_release(struct buffer_ref *ref)
8189 {
8190 	if (!refcount_dec_and_test(&ref->refcount))
8191 		return;
8192 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8193 	kfree(ref);
8194 }
8195 
8196 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8197 				    struct pipe_buffer *buf)
8198 {
8199 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8200 
8201 	buffer_ref_release(ref);
8202 	buf->private = 0;
8203 }
8204 
8205 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8206 				struct pipe_buffer *buf)
8207 {
8208 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8209 
8210 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8211 		return false;
8212 
8213 	refcount_inc(&ref->refcount);
8214 	return true;
8215 }
8216 
8217 /* Pipe buffer operations for a buffer. */
8218 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8219 	.release		= buffer_pipe_buf_release,
8220 	.get			= buffer_pipe_buf_get,
8221 };
8222 
8223 /*
8224  * Callback from splice_to_pipe(), if we need to release some pages
8225  * at the end of the spd in case we error'ed out in filling the pipe.
8226  */
8227 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8228 {
8229 	struct buffer_ref *ref =
8230 		(struct buffer_ref *)spd->partial[i].private;
8231 
8232 	buffer_ref_release(ref);
8233 	spd->partial[i].private = 0;
8234 }
8235 
8236 static ssize_t
8237 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8238 			    struct pipe_inode_info *pipe, size_t len,
8239 			    unsigned int flags)
8240 {
8241 	struct ftrace_buffer_info *info = file->private_data;
8242 	struct trace_iterator *iter = &info->iter;
8243 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8244 	struct page *pages_def[PIPE_DEF_BUFFERS];
8245 	struct splice_pipe_desc spd = {
8246 		.pages		= pages_def,
8247 		.partial	= partial_def,
8248 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8249 		.ops		= &buffer_pipe_buf_ops,
8250 		.spd_release	= buffer_spd_release,
8251 	};
8252 	struct buffer_ref *ref;
8253 	int entries, i;
8254 	ssize_t ret = 0;
8255 
8256 #ifdef CONFIG_TRACER_MAX_TRACE
8257 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8258 		return -EBUSY;
8259 #endif
8260 
8261 	if (*ppos & (PAGE_SIZE - 1))
8262 		return -EINVAL;
8263 
8264 	if (len & (PAGE_SIZE - 1)) {
8265 		if (len < PAGE_SIZE)
8266 			return -EINVAL;
8267 		len &= PAGE_MASK;
8268 	}
8269 
8270 	if (splice_grow_spd(pipe, &spd))
8271 		return -ENOMEM;
8272 
8273  again:
8274 	trace_access_lock(iter->cpu_file);
8275 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8276 
8277 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8278 		struct page *page;
8279 		int r;
8280 
8281 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8282 		if (!ref) {
8283 			ret = -ENOMEM;
8284 			break;
8285 		}
8286 
8287 		refcount_set(&ref->refcount, 1);
8288 		ref->buffer = iter->array_buffer->buffer;
8289 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8290 		if (IS_ERR(ref->page)) {
8291 			ret = PTR_ERR(ref->page);
8292 			ref->page = NULL;
8293 			kfree(ref);
8294 			break;
8295 		}
8296 		ref->cpu = iter->cpu_file;
8297 
8298 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8299 					  len, iter->cpu_file, 1);
8300 		if (r < 0) {
8301 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8302 						   ref->page);
8303 			kfree(ref);
8304 			break;
8305 		}
8306 
8307 		page = virt_to_page(ref->page);
8308 
8309 		spd.pages[i] = page;
8310 		spd.partial[i].len = PAGE_SIZE;
8311 		spd.partial[i].offset = 0;
8312 		spd.partial[i].private = (unsigned long)ref;
8313 		spd.nr_pages++;
8314 		*ppos += PAGE_SIZE;
8315 
8316 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8317 	}
8318 
8319 	trace_access_unlock(iter->cpu_file);
8320 	spd.nr_pages = i;
8321 
8322 	/* did we read anything? */
8323 	if (!spd.nr_pages) {
8324 		long wait_index;
8325 
8326 		if (ret)
8327 			goto out;
8328 
8329 		ret = -EAGAIN;
8330 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8331 			goto out;
8332 
8333 		wait_index = READ_ONCE(iter->wait_index);
8334 
8335 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8336 		if (ret)
8337 			goto out;
8338 
8339 		/* No need to wait after waking up when tracing is off */
8340 		if (!tracer_tracing_is_on(iter->tr))
8341 			goto out;
8342 
8343 		/* Make sure we see the new wait_index */
8344 		smp_rmb();
8345 		if (wait_index != iter->wait_index)
8346 			goto out;
8347 
8348 		goto again;
8349 	}
8350 
8351 	ret = splice_to_pipe(pipe, &spd);
8352 out:
8353 	splice_shrink_spd(&spd);
8354 
8355 	return ret;
8356 }
8357 
8358 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8359 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8360 {
8361 	struct ftrace_buffer_info *info = file->private_data;
8362 	struct trace_iterator *iter = &info->iter;
8363 
8364 	if (cmd)
8365 		return -ENOIOCTLCMD;
8366 
8367 	mutex_lock(&trace_types_lock);
8368 
8369 	iter->wait_index++;
8370 	/* Make sure the waiters see the new wait_index */
8371 	smp_wmb();
8372 
8373 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8374 
8375 	mutex_unlock(&trace_types_lock);
8376 	return 0;
8377 }
8378 
8379 static const struct file_operations tracing_buffers_fops = {
8380 	.open		= tracing_buffers_open,
8381 	.read		= tracing_buffers_read,
8382 	.poll		= tracing_buffers_poll,
8383 	.release	= tracing_buffers_release,
8384 	.splice_read	= tracing_buffers_splice_read,
8385 	.unlocked_ioctl = tracing_buffers_ioctl,
8386 	.llseek		= no_llseek,
8387 };
8388 
8389 static ssize_t
8390 tracing_stats_read(struct file *filp, char __user *ubuf,
8391 		   size_t count, loff_t *ppos)
8392 {
8393 	struct inode *inode = file_inode(filp);
8394 	struct trace_array *tr = inode->i_private;
8395 	struct array_buffer *trace_buf = &tr->array_buffer;
8396 	int cpu = tracing_get_cpu(inode);
8397 	struct trace_seq *s;
8398 	unsigned long cnt;
8399 	unsigned long long t;
8400 	unsigned long usec_rem;
8401 
8402 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8403 	if (!s)
8404 		return -ENOMEM;
8405 
8406 	trace_seq_init(s);
8407 
8408 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8409 	trace_seq_printf(s, "entries: %ld\n", cnt);
8410 
8411 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8412 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8413 
8414 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8415 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8416 
8417 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8418 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8419 
8420 	if (trace_clocks[tr->clock_id].in_ns) {
8421 		/* local or global for trace_clock */
8422 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8423 		usec_rem = do_div(t, USEC_PER_SEC);
8424 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8425 								t, usec_rem);
8426 
8427 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8428 		usec_rem = do_div(t, USEC_PER_SEC);
8429 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8430 	} else {
8431 		/* counter or tsc mode for trace_clock */
8432 		trace_seq_printf(s, "oldest event ts: %llu\n",
8433 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8434 
8435 		trace_seq_printf(s, "now ts: %llu\n",
8436 				ring_buffer_time_stamp(trace_buf->buffer));
8437 	}
8438 
8439 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8440 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8441 
8442 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8443 	trace_seq_printf(s, "read events: %ld\n", cnt);
8444 
8445 	count = simple_read_from_buffer(ubuf, count, ppos,
8446 					s->buffer, trace_seq_used(s));
8447 
8448 	kfree(s);
8449 
8450 	return count;
8451 }
8452 
8453 static const struct file_operations tracing_stats_fops = {
8454 	.open		= tracing_open_generic_tr,
8455 	.read		= tracing_stats_read,
8456 	.llseek		= generic_file_llseek,
8457 	.release	= tracing_release_generic_tr,
8458 };
8459 
8460 #ifdef CONFIG_DYNAMIC_FTRACE
8461 
8462 static ssize_t
8463 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8464 		  size_t cnt, loff_t *ppos)
8465 {
8466 	ssize_t ret;
8467 	char *buf;
8468 	int r;
8469 
8470 	/* 256 should be plenty to hold the amount needed */
8471 	buf = kmalloc(256, GFP_KERNEL);
8472 	if (!buf)
8473 		return -ENOMEM;
8474 
8475 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8476 		      ftrace_update_tot_cnt,
8477 		      ftrace_number_of_pages,
8478 		      ftrace_number_of_groups);
8479 
8480 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8481 	kfree(buf);
8482 	return ret;
8483 }
8484 
8485 static const struct file_operations tracing_dyn_info_fops = {
8486 	.open		= tracing_open_generic,
8487 	.read		= tracing_read_dyn_info,
8488 	.llseek		= generic_file_llseek,
8489 };
8490 #endif /* CONFIG_DYNAMIC_FTRACE */
8491 
8492 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8493 static void
8494 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8495 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8496 		void *data)
8497 {
8498 	tracing_snapshot_instance(tr);
8499 }
8500 
8501 static void
8502 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8503 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8504 		      void *data)
8505 {
8506 	struct ftrace_func_mapper *mapper = data;
8507 	long *count = NULL;
8508 
8509 	if (mapper)
8510 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8511 
8512 	if (count) {
8513 
8514 		if (*count <= 0)
8515 			return;
8516 
8517 		(*count)--;
8518 	}
8519 
8520 	tracing_snapshot_instance(tr);
8521 }
8522 
8523 static int
8524 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8525 		      struct ftrace_probe_ops *ops, void *data)
8526 {
8527 	struct ftrace_func_mapper *mapper = data;
8528 	long *count = NULL;
8529 
8530 	seq_printf(m, "%ps:", (void *)ip);
8531 
8532 	seq_puts(m, "snapshot");
8533 
8534 	if (mapper)
8535 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8536 
8537 	if (count)
8538 		seq_printf(m, ":count=%ld\n", *count);
8539 	else
8540 		seq_puts(m, ":unlimited\n");
8541 
8542 	return 0;
8543 }
8544 
8545 static int
8546 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8547 		     unsigned long ip, void *init_data, void **data)
8548 {
8549 	struct ftrace_func_mapper *mapper = *data;
8550 
8551 	if (!mapper) {
8552 		mapper = allocate_ftrace_func_mapper();
8553 		if (!mapper)
8554 			return -ENOMEM;
8555 		*data = mapper;
8556 	}
8557 
8558 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8559 }
8560 
8561 static void
8562 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8563 		     unsigned long ip, void *data)
8564 {
8565 	struct ftrace_func_mapper *mapper = data;
8566 
8567 	if (!ip) {
8568 		if (!mapper)
8569 			return;
8570 		free_ftrace_func_mapper(mapper, NULL);
8571 		return;
8572 	}
8573 
8574 	ftrace_func_mapper_remove_ip(mapper, ip);
8575 }
8576 
8577 static struct ftrace_probe_ops snapshot_probe_ops = {
8578 	.func			= ftrace_snapshot,
8579 	.print			= ftrace_snapshot_print,
8580 };
8581 
8582 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8583 	.func			= ftrace_count_snapshot,
8584 	.print			= ftrace_snapshot_print,
8585 	.init			= ftrace_snapshot_init,
8586 	.free			= ftrace_snapshot_free,
8587 };
8588 
8589 static int
8590 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8591 			       char *glob, char *cmd, char *param, int enable)
8592 {
8593 	struct ftrace_probe_ops *ops;
8594 	void *count = (void *)-1;
8595 	char *number;
8596 	int ret;
8597 
8598 	if (!tr)
8599 		return -ENODEV;
8600 
8601 	/* hash funcs only work with set_ftrace_filter */
8602 	if (!enable)
8603 		return -EINVAL;
8604 
8605 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8606 
8607 	if (glob[0] == '!')
8608 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8609 
8610 	if (!param)
8611 		goto out_reg;
8612 
8613 	number = strsep(&param, ":");
8614 
8615 	if (!strlen(number))
8616 		goto out_reg;
8617 
8618 	/*
8619 	 * We use the callback data field (which is a pointer)
8620 	 * as our counter.
8621 	 */
8622 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8623 	if (ret)
8624 		return ret;
8625 
8626  out_reg:
8627 	ret = tracing_alloc_snapshot_instance(tr);
8628 	if (ret < 0)
8629 		goto out;
8630 
8631 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8632 
8633  out:
8634 	return ret < 0 ? ret : 0;
8635 }
8636 
8637 static struct ftrace_func_command ftrace_snapshot_cmd = {
8638 	.name			= "snapshot",
8639 	.func			= ftrace_trace_snapshot_callback,
8640 };
8641 
8642 static __init int register_snapshot_cmd(void)
8643 {
8644 	return register_ftrace_command(&ftrace_snapshot_cmd);
8645 }
8646 #else
8647 static inline __init int register_snapshot_cmd(void) { return 0; }
8648 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8649 
8650 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8651 {
8652 	if (WARN_ON(!tr->dir))
8653 		return ERR_PTR(-ENODEV);
8654 
8655 	/* Top directory uses NULL as the parent */
8656 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8657 		return NULL;
8658 
8659 	/* All sub buffers have a descriptor */
8660 	return tr->dir;
8661 }
8662 
8663 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8664 {
8665 	struct dentry *d_tracer;
8666 
8667 	if (tr->percpu_dir)
8668 		return tr->percpu_dir;
8669 
8670 	d_tracer = tracing_get_dentry(tr);
8671 	if (IS_ERR(d_tracer))
8672 		return NULL;
8673 
8674 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8675 
8676 	MEM_FAIL(!tr->percpu_dir,
8677 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8678 
8679 	return tr->percpu_dir;
8680 }
8681 
8682 static struct dentry *
8683 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8684 		      void *data, long cpu, const struct file_operations *fops)
8685 {
8686 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8687 
8688 	if (ret) /* See tracing_get_cpu() */
8689 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8690 	return ret;
8691 }
8692 
8693 static void
8694 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8695 {
8696 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8697 	struct dentry *d_cpu;
8698 	char cpu_dir[30]; /* 30 characters should be more than enough */
8699 
8700 	if (!d_percpu)
8701 		return;
8702 
8703 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8704 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8705 	if (!d_cpu) {
8706 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8707 		return;
8708 	}
8709 
8710 	/* per cpu trace_pipe */
8711 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8712 				tr, cpu, &tracing_pipe_fops);
8713 
8714 	/* per cpu trace */
8715 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8716 				tr, cpu, &tracing_fops);
8717 
8718 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8719 				tr, cpu, &tracing_buffers_fops);
8720 
8721 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8722 				tr, cpu, &tracing_stats_fops);
8723 
8724 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8725 				tr, cpu, &tracing_entries_fops);
8726 
8727 #ifdef CONFIG_TRACER_SNAPSHOT
8728 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8729 				tr, cpu, &snapshot_fops);
8730 
8731 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8732 				tr, cpu, &snapshot_raw_fops);
8733 #endif
8734 }
8735 
8736 #ifdef CONFIG_FTRACE_SELFTEST
8737 /* Let selftest have access to static functions in this file */
8738 #include "trace_selftest.c"
8739 #endif
8740 
8741 static ssize_t
8742 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8743 			loff_t *ppos)
8744 {
8745 	struct trace_option_dentry *topt = filp->private_data;
8746 	char *buf;
8747 
8748 	if (topt->flags->val & topt->opt->bit)
8749 		buf = "1\n";
8750 	else
8751 		buf = "0\n";
8752 
8753 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8754 }
8755 
8756 static ssize_t
8757 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8758 			 loff_t *ppos)
8759 {
8760 	struct trace_option_dentry *topt = filp->private_data;
8761 	unsigned long val;
8762 	int ret;
8763 
8764 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8765 	if (ret)
8766 		return ret;
8767 
8768 	if (val != 0 && val != 1)
8769 		return -EINVAL;
8770 
8771 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8772 		mutex_lock(&trace_types_lock);
8773 		ret = __set_tracer_option(topt->tr, topt->flags,
8774 					  topt->opt, !val);
8775 		mutex_unlock(&trace_types_lock);
8776 		if (ret)
8777 			return ret;
8778 	}
8779 
8780 	*ppos += cnt;
8781 
8782 	return cnt;
8783 }
8784 
8785 
8786 static const struct file_operations trace_options_fops = {
8787 	.open = tracing_open_generic,
8788 	.read = trace_options_read,
8789 	.write = trace_options_write,
8790 	.llseek	= generic_file_llseek,
8791 };
8792 
8793 /*
8794  * In order to pass in both the trace_array descriptor as well as the index
8795  * to the flag that the trace option file represents, the trace_array
8796  * has a character array of trace_flags_index[], which holds the index
8797  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8798  * The address of this character array is passed to the flag option file
8799  * read/write callbacks.
8800  *
8801  * In order to extract both the index and the trace_array descriptor,
8802  * get_tr_index() uses the following algorithm.
8803  *
8804  *   idx = *ptr;
8805  *
8806  * As the pointer itself contains the address of the index (remember
8807  * index[1] == 1).
8808  *
8809  * Then to get the trace_array descriptor, by subtracting that index
8810  * from the ptr, we get to the start of the index itself.
8811  *
8812  *   ptr - idx == &index[0]
8813  *
8814  * Then a simple container_of() from that pointer gets us to the
8815  * trace_array descriptor.
8816  */
8817 static void get_tr_index(void *data, struct trace_array **ptr,
8818 			 unsigned int *pindex)
8819 {
8820 	*pindex = *(unsigned char *)data;
8821 
8822 	*ptr = container_of(data - *pindex, struct trace_array,
8823 			    trace_flags_index);
8824 }
8825 
8826 static ssize_t
8827 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8828 			loff_t *ppos)
8829 {
8830 	void *tr_index = filp->private_data;
8831 	struct trace_array *tr;
8832 	unsigned int index;
8833 	char *buf;
8834 
8835 	get_tr_index(tr_index, &tr, &index);
8836 
8837 	if (tr->trace_flags & (1 << index))
8838 		buf = "1\n";
8839 	else
8840 		buf = "0\n";
8841 
8842 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8843 }
8844 
8845 static ssize_t
8846 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8847 			 loff_t *ppos)
8848 {
8849 	void *tr_index = filp->private_data;
8850 	struct trace_array *tr;
8851 	unsigned int index;
8852 	unsigned long val;
8853 	int ret;
8854 
8855 	get_tr_index(tr_index, &tr, &index);
8856 
8857 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8858 	if (ret)
8859 		return ret;
8860 
8861 	if (val != 0 && val != 1)
8862 		return -EINVAL;
8863 
8864 	mutex_lock(&event_mutex);
8865 	mutex_lock(&trace_types_lock);
8866 	ret = set_tracer_flag(tr, 1 << index, val);
8867 	mutex_unlock(&trace_types_lock);
8868 	mutex_unlock(&event_mutex);
8869 
8870 	if (ret < 0)
8871 		return ret;
8872 
8873 	*ppos += cnt;
8874 
8875 	return cnt;
8876 }
8877 
8878 static const struct file_operations trace_options_core_fops = {
8879 	.open = tracing_open_generic,
8880 	.read = trace_options_core_read,
8881 	.write = trace_options_core_write,
8882 	.llseek = generic_file_llseek,
8883 };
8884 
8885 struct dentry *trace_create_file(const char *name,
8886 				 umode_t mode,
8887 				 struct dentry *parent,
8888 				 void *data,
8889 				 const struct file_operations *fops)
8890 {
8891 	struct dentry *ret;
8892 
8893 	ret = tracefs_create_file(name, mode, parent, data, fops);
8894 	if (!ret)
8895 		pr_warn("Could not create tracefs '%s' entry\n", name);
8896 
8897 	return ret;
8898 }
8899 
8900 
8901 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8902 {
8903 	struct dentry *d_tracer;
8904 
8905 	if (tr->options)
8906 		return tr->options;
8907 
8908 	d_tracer = tracing_get_dentry(tr);
8909 	if (IS_ERR(d_tracer))
8910 		return NULL;
8911 
8912 	tr->options = tracefs_create_dir("options", d_tracer);
8913 	if (!tr->options) {
8914 		pr_warn("Could not create tracefs directory 'options'\n");
8915 		return NULL;
8916 	}
8917 
8918 	return tr->options;
8919 }
8920 
8921 static void
8922 create_trace_option_file(struct trace_array *tr,
8923 			 struct trace_option_dentry *topt,
8924 			 struct tracer_flags *flags,
8925 			 struct tracer_opt *opt)
8926 {
8927 	struct dentry *t_options;
8928 
8929 	t_options = trace_options_init_dentry(tr);
8930 	if (!t_options)
8931 		return;
8932 
8933 	topt->flags = flags;
8934 	topt->opt = opt;
8935 	topt->tr = tr;
8936 
8937 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8938 					t_options, topt, &trace_options_fops);
8939 
8940 }
8941 
8942 static void
8943 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8944 {
8945 	struct trace_option_dentry *topts;
8946 	struct trace_options *tr_topts;
8947 	struct tracer_flags *flags;
8948 	struct tracer_opt *opts;
8949 	int cnt;
8950 	int i;
8951 
8952 	if (!tracer)
8953 		return;
8954 
8955 	flags = tracer->flags;
8956 
8957 	if (!flags || !flags->opts)
8958 		return;
8959 
8960 	/*
8961 	 * If this is an instance, only create flags for tracers
8962 	 * the instance may have.
8963 	 */
8964 	if (!trace_ok_for_array(tracer, tr))
8965 		return;
8966 
8967 	for (i = 0; i < tr->nr_topts; i++) {
8968 		/* Make sure there's no duplicate flags. */
8969 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8970 			return;
8971 	}
8972 
8973 	opts = flags->opts;
8974 
8975 	for (cnt = 0; opts[cnt].name; cnt++)
8976 		;
8977 
8978 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8979 	if (!topts)
8980 		return;
8981 
8982 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8983 			    GFP_KERNEL);
8984 	if (!tr_topts) {
8985 		kfree(topts);
8986 		return;
8987 	}
8988 
8989 	tr->topts = tr_topts;
8990 	tr->topts[tr->nr_topts].tracer = tracer;
8991 	tr->topts[tr->nr_topts].topts = topts;
8992 	tr->nr_topts++;
8993 
8994 	for (cnt = 0; opts[cnt].name; cnt++) {
8995 		create_trace_option_file(tr, &topts[cnt], flags,
8996 					 &opts[cnt]);
8997 		MEM_FAIL(topts[cnt].entry == NULL,
8998 			  "Failed to create trace option: %s",
8999 			  opts[cnt].name);
9000 	}
9001 }
9002 
9003 static struct dentry *
9004 create_trace_option_core_file(struct trace_array *tr,
9005 			      const char *option, long index)
9006 {
9007 	struct dentry *t_options;
9008 
9009 	t_options = trace_options_init_dentry(tr);
9010 	if (!t_options)
9011 		return NULL;
9012 
9013 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9014 				 (void *)&tr->trace_flags_index[index],
9015 				 &trace_options_core_fops);
9016 }
9017 
9018 static void create_trace_options_dir(struct trace_array *tr)
9019 {
9020 	struct dentry *t_options;
9021 	bool top_level = tr == &global_trace;
9022 	int i;
9023 
9024 	t_options = trace_options_init_dentry(tr);
9025 	if (!t_options)
9026 		return;
9027 
9028 	for (i = 0; trace_options[i]; i++) {
9029 		if (top_level ||
9030 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9031 			create_trace_option_core_file(tr, trace_options[i], i);
9032 	}
9033 }
9034 
9035 static ssize_t
9036 rb_simple_read(struct file *filp, char __user *ubuf,
9037 	       size_t cnt, loff_t *ppos)
9038 {
9039 	struct trace_array *tr = filp->private_data;
9040 	char buf[64];
9041 	int r;
9042 
9043 	r = tracer_tracing_is_on(tr);
9044 	r = sprintf(buf, "%d\n", r);
9045 
9046 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9047 }
9048 
9049 static ssize_t
9050 rb_simple_write(struct file *filp, const char __user *ubuf,
9051 		size_t cnt, loff_t *ppos)
9052 {
9053 	struct trace_array *tr = filp->private_data;
9054 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9055 	unsigned long val;
9056 	int ret;
9057 
9058 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9059 	if (ret)
9060 		return ret;
9061 
9062 	if (buffer) {
9063 		mutex_lock(&trace_types_lock);
9064 		if (!!val == tracer_tracing_is_on(tr)) {
9065 			val = 0; /* do nothing */
9066 		} else if (val) {
9067 			tracer_tracing_on(tr);
9068 			if (tr->current_trace->start)
9069 				tr->current_trace->start(tr);
9070 		} else {
9071 			tracer_tracing_off(tr);
9072 			if (tr->current_trace->stop)
9073 				tr->current_trace->stop(tr);
9074 			/* Wake up any waiters */
9075 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9076 		}
9077 		mutex_unlock(&trace_types_lock);
9078 	}
9079 
9080 	(*ppos)++;
9081 
9082 	return cnt;
9083 }
9084 
9085 static const struct file_operations rb_simple_fops = {
9086 	.open		= tracing_open_generic_tr,
9087 	.read		= rb_simple_read,
9088 	.write		= rb_simple_write,
9089 	.release	= tracing_release_generic_tr,
9090 	.llseek		= default_llseek,
9091 };
9092 
9093 static ssize_t
9094 buffer_percent_read(struct file *filp, char __user *ubuf,
9095 		    size_t cnt, loff_t *ppos)
9096 {
9097 	struct trace_array *tr = filp->private_data;
9098 	char buf[64];
9099 	int r;
9100 
9101 	r = tr->buffer_percent;
9102 	r = sprintf(buf, "%d\n", r);
9103 
9104 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9105 }
9106 
9107 static ssize_t
9108 buffer_percent_write(struct file *filp, const char __user *ubuf,
9109 		     size_t cnt, loff_t *ppos)
9110 {
9111 	struct trace_array *tr = filp->private_data;
9112 	unsigned long val;
9113 	int ret;
9114 
9115 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9116 	if (ret)
9117 		return ret;
9118 
9119 	if (val > 100)
9120 		return -EINVAL;
9121 
9122 	if (!val)
9123 		val = 1;
9124 
9125 	tr->buffer_percent = val;
9126 
9127 	(*ppos)++;
9128 
9129 	return cnt;
9130 }
9131 
9132 static const struct file_operations buffer_percent_fops = {
9133 	.open		= tracing_open_generic_tr,
9134 	.read		= buffer_percent_read,
9135 	.write		= buffer_percent_write,
9136 	.release	= tracing_release_generic_tr,
9137 	.llseek		= default_llseek,
9138 };
9139 
9140 static struct dentry *trace_instance_dir;
9141 
9142 static void
9143 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9144 
9145 static int
9146 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9147 {
9148 	enum ring_buffer_flags rb_flags;
9149 
9150 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9151 
9152 	buf->tr = tr;
9153 
9154 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9155 	if (!buf->buffer)
9156 		return -ENOMEM;
9157 
9158 	buf->data = alloc_percpu(struct trace_array_cpu);
9159 	if (!buf->data) {
9160 		ring_buffer_free(buf->buffer);
9161 		buf->buffer = NULL;
9162 		return -ENOMEM;
9163 	}
9164 
9165 	/* Allocate the first page for all buffers */
9166 	set_buffer_entries(&tr->array_buffer,
9167 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9168 
9169 	return 0;
9170 }
9171 
9172 static void free_trace_buffer(struct array_buffer *buf)
9173 {
9174 	if (buf->buffer) {
9175 		ring_buffer_free(buf->buffer);
9176 		buf->buffer = NULL;
9177 		free_percpu(buf->data);
9178 		buf->data = NULL;
9179 	}
9180 }
9181 
9182 static int allocate_trace_buffers(struct trace_array *tr, int size)
9183 {
9184 	int ret;
9185 
9186 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9187 	if (ret)
9188 		return ret;
9189 
9190 #ifdef CONFIG_TRACER_MAX_TRACE
9191 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9192 				    allocate_snapshot ? size : 1);
9193 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9194 		free_trace_buffer(&tr->array_buffer);
9195 		return -ENOMEM;
9196 	}
9197 	tr->allocated_snapshot = allocate_snapshot;
9198 
9199 	/*
9200 	 * Only the top level trace array gets its snapshot allocated
9201 	 * from the kernel command line.
9202 	 */
9203 	allocate_snapshot = false;
9204 #endif
9205 
9206 	return 0;
9207 }
9208 
9209 static void free_trace_buffers(struct trace_array *tr)
9210 {
9211 	if (!tr)
9212 		return;
9213 
9214 	free_trace_buffer(&tr->array_buffer);
9215 
9216 #ifdef CONFIG_TRACER_MAX_TRACE
9217 	free_trace_buffer(&tr->max_buffer);
9218 #endif
9219 }
9220 
9221 static void init_trace_flags_index(struct trace_array *tr)
9222 {
9223 	int i;
9224 
9225 	/* Used by the trace options files */
9226 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9227 		tr->trace_flags_index[i] = i;
9228 }
9229 
9230 static void __update_tracer_options(struct trace_array *tr)
9231 {
9232 	struct tracer *t;
9233 
9234 	for (t = trace_types; t; t = t->next)
9235 		add_tracer_options(tr, t);
9236 }
9237 
9238 static void update_tracer_options(struct trace_array *tr)
9239 {
9240 	mutex_lock(&trace_types_lock);
9241 	tracer_options_updated = true;
9242 	__update_tracer_options(tr);
9243 	mutex_unlock(&trace_types_lock);
9244 }
9245 
9246 /* Must have trace_types_lock held */
9247 struct trace_array *trace_array_find(const char *instance)
9248 {
9249 	struct trace_array *tr, *found = NULL;
9250 
9251 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9252 		if (tr->name && strcmp(tr->name, instance) == 0) {
9253 			found = tr;
9254 			break;
9255 		}
9256 	}
9257 
9258 	return found;
9259 }
9260 
9261 struct trace_array *trace_array_find_get(const char *instance)
9262 {
9263 	struct trace_array *tr;
9264 
9265 	mutex_lock(&trace_types_lock);
9266 	tr = trace_array_find(instance);
9267 	if (tr)
9268 		tr->ref++;
9269 	mutex_unlock(&trace_types_lock);
9270 
9271 	return tr;
9272 }
9273 
9274 static int trace_array_create_dir(struct trace_array *tr)
9275 {
9276 	int ret;
9277 
9278 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9279 	if (!tr->dir)
9280 		return -EINVAL;
9281 
9282 	ret = event_trace_add_tracer(tr->dir, tr);
9283 	if (ret) {
9284 		tracefs_remove(tr->dir);
9285 		return ret;
9286 	}
9287 
9288 	init_tracer_tracefs(tr, tr->dir);
9289 	__update_tracer_options(tr);
9290 
9291 	return ret;
9292 }
9293 
9294 static struct trace_array *trace_array_create(const char *name)
9295 {
9296 	struct trace_array *tr;
9297 	int ret;
9298 
9299 	ret = -ENOMEM;
9300 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9301 	if (!tr)
9302 		return ERR_PTR(ret);
9303 
9304 	tr->name = kstrdup(name, GFP_KERNEL);
9305 	if (!tr->name)
9306 		goto out_free_tr;
9307 
9308 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9309 		goto out_free_tr;
9310 
9311 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9312 
9313 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9314 
9315 	raw_spin_lock_init(&tr->start_lock);
9316 
9317 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9318 
9319 	tr->current_trace = &nop_trace;
9320 
9321 	INIT_LIST_HEAD(&tr->systems);
9322 	INIT_LIST_HEAD(&tr->events);
9323 	INIT_LIST_HEAD(&tr->hist_vars);
9324 	INIT_LIST_HEAD(&tr->err_log);
9325 
9326 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9327 		goto out_free_tr;
9328 
9329 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9330 		goto out_free_tr;
9331 
9332 	ftrace_init_trace_array(tr);
9333 
9334 	init_trace_flags_index(tr);
9335 
9336 	if (trace_instance_dir) {
9337 		ret = trace_array_create_dir(tr);
9338 		if (ret)
9339 			goto out_free_tr;
9340 	} else
9341 		__trace_early_add_events(tr);
9342 
9343 	list_add(&tr->list, &ftrace_trace_arrays);
9344 
9345 	tr->ref++;
9346 
9347 	return tr;
9348 
9349  out_free_tr:
9350 	ftrace_free_ftrace_ops(tr);
9351 	free_trace_buffers(tr);
9352 	free_cpumask_var(tr->tracing_cpumask);
9353 	kfree(tr->name);
9354 	kfree(tr);
9355 
9356 	return ERR_PTR(ret);
9357 }
9358 
9359 static int instance_mkdir(const char *name)
9360 {
9361 	struct trace_array *tr;
9362 	int ret;
9363 
9364 	mutex_lock(&event_mutex);
9365 	mutex_lock(&trace_types_lock);
9366 
9367 	ret = -EEXIST;
9368 	if (trace_array_find(name))
9369 		goto out_unlock;
9370 
9371 	tr = trace_array_create(name);
9372 
9373 	ret = PTR_ERR_OR_ZERO(tr);
9374 
9375 out_unlock:
9376 	mutex_unlock(&trace_types_lock);
9377 	mutex_unlock(&event_mutex);
9378 	return ret;
9379 }
9380 
9381 /**
9382  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9383  * @name: The name of the trace array to be looked up/created.
9384  *
9385  * Returns pointer to trace array with given name.
9386  * NULL, if it cannot be created.
9387  *
9388  * NOTE: This function increments the reference counter associated with the
9389  * trace array returned. This makes sure it cannot be freed while in use.
9390  * Use trace_array_put() once the trace array is no longer needed.
9391  * If the trace_array is to be freed, trace_array_destroy() needs to
9392  * be called after the trace_array_put(), or simply let user space delete
9393  * it from the tracefs instances directory. But until the
9394  * trace_array_put() is called, user space can not delete it.
9395  *
9396  */
9397 struct trace_array *trace_array_get_by_name(const char *name)
9398 {
9399 	struct trace_array *tr;
9400 
9401 	mutex_lock(&event_mutex);
9402 	mutex_lock(&trace_types_lock);
9403 
9404 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9405 		if (tr->name && strcmp(tr->name, name) == 0)
9406 			goto out_unlock;
9407 	}
9408 
9409 	tr = trace_array_create(name);
9410 
9411 	if (IS_ERR(tr))
9412 		tr = NULL;
9413 out_unlock:
9414 	if (tr)
9415 		tr->ref++;
9416 
9417 	mutex_unlock(&trace_types_lock);
9418 	mutex_unlock(&event_mutex);
9419 	return tr;
9420 }
9421 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9422 
9423 static int __remove_instance(struct trace_array *tr)
9424 {
9425 	int i;
9426 
9427 	/* Reference counter for a newly created trace array = 1. */
9428 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9429 		return -EBUSY;
9430 
9431 	list_del(&tr->list);
9432 
9433 	/* Disable all the flags that were enabled coming in */
9434 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9435 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9436 			set_tracer_flag(tr, 1 << i, 0);
9437 	}
9438 
9439 	tracing_set_nop(tr);
9440 	clear_ftrace_function_probes(tr);
9441 	event_trace_del_tracer(tr);
9442 	ftrace_clear_pids(tr);
9443 	ftrace_destroy_function_files(tr);
9444 	tracefs_remove(tr->dir);
9445 	free_percpu(tr->last_func_repeats);
9446 	free_trace_buffers(tr);
9447 
9448 	for (i = 0; i < tr->nr_topts; i++) {
9449 		kfree(tr->topts[i].topts);
9450 	}
9451 	kfree(tr->topts);
9452 
9453 	free_cpumask_var(tr->tracing_cpumask);
9454 	kfree(tr->name);
9455 	kfree(tr);
9456 
9457 	return 0;
9458 }
9459 
9460 int trace_array_destroy(struct trace_array *this_tr)
9461 {
9462 	struct trace_array *tr;
9463 	int ret;
9464 
9465 	if (!this_tr)
9466 		return -EINVAL;
9467 
9468 	mutex_lock(&event_mutex);
9469 	mutex_lock(&trace_types_lock);
9470 
9471 	ret = -ENODEV;
9472 
9473 	/* Making sure trace array exists before destroying it. */
9474 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9475 		if (tr == this_tr) {
9476 			ret = __remove_instance(tr);
9477 			break;
9478 		}
9479 	}
9480 
9481 	mutex_unlock(&trace_types_lock);
9482 	mutex_unlock(&event_mutex);
9483 
9484 	return ret;
9485 }
9486 EXPORT_SYMBOL_GPL(trace_array_destroy);
9487 
9488 static int instance_rmdir(const char *name)
9489 {
9490 	struct trace_array *tr;
9491 	int ret;
9492 
9493 	mutex_lock(&event_mutex);
9494 	mutex_lock(&trace_types_lock);
9495 
9496 	ret = -ENODEV;
9497 	tr = trace_array_find(name);
9498 	if (tr)
9499 		ret = __remove_instance(tr);
9500 
9501 	mutex_unlock(&trace_types_lock);
9502 	mutex_unlock(&event_mutex);
9503 
9504 	return ret;
9505 }
9506 
9507 static __init void create_trace_instances(struct dentry *d_tracer)
9508 {
9509 	struct trace_array *tr;
9510 
9511 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9512 							 instance_mkdir,
9513 							 instance_rmdir);
9514 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9515 		return;
9516 
9517 	mutex_lock(&event_mutex);
9518 	mutex_lock(&trace_types_lock);
9519 
9520 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9521 		if (!tr->name)
9522 			continue;
9523 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9524 			     "Failed to create instance directory\n"))
9525 			break;
9526 	}
9527 
9528 	mutex_unlock(&trace_types_lock);
9529 	mutex_unlock(&event_mutex);
9530 }
9531 
9532 static void
9533 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9534 {
9535 	struct trace_event_file *file;
9536 	int cpu;
9537 
9538 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9539 			tr, &show_traces_fops);
9540 
9541 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9542 			tr, &set_tracer_fops);
9543 
9544 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9545 			  tr, &tracing_cpumask_fops);
9546 
9547 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9548 			  tr, &tracing_iter_fops);
9549 
9550 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9551 			  tr, &tracing_fops);
9552 
9553 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9554 			  tr, &tracing_pipe_fops);
9555 
9556 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9557 			  tr, &tracing_entries_fops);
9558 
9559 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9560 			  tr, &tracing_total_entries_fops);
9561 
9562 	trace_create_file("free_buffer", 0200, d_tracer,
9563 			  tr, &tracing_free_buffer_fops);
9564 
9565 	trace_create_file("trace_marker", 0220, d_tracer,
9566 			  tr, &tracing_mark_fops);
9567 
9568 	file = __find_event_file(tr, "ftrace", "print");
9569 	if (file && file->dir)
9570 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9571 				  file, &event_trigger_fops);
9572 	tr->trace_marker_file = file;
9573 
9574 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9575 			  tr, &tracing_mark_raw_fops);
9576 
9577 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9578 			  &trace_clock_fops);
9579 
9580 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9581 			  tr, &rb_simple_fops);
9582 
9583 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9584 			  &trace_time_stamp_mode_fops);
9585 
9586 	tr->buffer_percent = 50;
9587 
9588 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9589 			tr, &buffer_percent_fops);
9590 
9591 	create_trace_options_dir(tr);
9592 
9593 	trace_create_maxlat_file(tr, d_tracer);
9594 
9595 	if (ftrace_create_function_files(tr, d_tracer))
9596 		MEM_FAIL(1, "Could not allocate function filter files");
9597 
9598 #ifdef CONFIG_TRACER_SNAPSHOT
9599 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9600 			  tr, &snapshot_fops);
9601 #endif
9602 
9603 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9604 			  tr, &tracing_err_log_fops);
9605 
9606 	for_each_tracing_cpu(cpu)
9607 		tracing_init_tracefs_percpu(tr, cpu);
9608 
9609 	ftrace_init_tracefs(tr, d_tracer);
9610 }
9611 
9612 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9613 {
9614 	struct vfsmount *mnt;
9615 	struct file_system_type *type;
9616 
9617 	/*
9618 	 * To maintain backward compatibility for tools that mount
9619 	 * debugfs to get to the tracing facility, tracefs is automatically
9620 	 * mounted to the debugfs/tracing directory.
9621 	 */
9622 	type = get_fs_type("tracefs");
9623 	if (!type)
9624 		return NULL;
9625 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9626 	put_filesystem(type);
9627 	if (IS_ERR(mnt))
9628 		return NULL;
9629 	mntget(mnt);
9630 
9631 	return mnt;
9632 }
9633 
9634 /**
9635  * tracing_init_dentry - initialize top level trace array
9636  *
9637  * This is called when creating files or directories in the tracing
9638  * directory. It is called via fs_initcall() by any of the boot up code
9639  * and expects to return the dentry of the top level tracing directory.
9640  */
9641 int tracing_init_dentry(void)
9642 {
9643 	struct trace_array *tr = &global_trace;
9644 
9645 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9646 		pr_warn("Tracing disabled due to lockdown\n");
9647 		return -EPERM;
9648 	}
9649 
9650 	/* The top level trace array uses  NULL as parent */
9651 	if (tr->dir)
9652 		return 0;
9653 
9654 	if (WARN_ON(!tracefs_initialized()))
9655 		return -ENODEV;
9656 
9657 	/*
9658 	 * As there may still be users that expect the tracing
9659 	 * files to exist in debugfs/tracing, we must automount
9660 	 * the tracefs file system there, so older tools still
9661 	 * work with the newer kernel.
9662 	 */
9663 	tr->dir = debugfs_create_automount("tracing", NULL,
9664 					   trace_automount, NULL);
9665 
9666 	return 0;
9667 }
9668 
9669 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9670 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9671 
9672 static struct workqueue_struct *eval_map_wq __initdata;
9673 static struct work_struct eval_map_work __initdata;
9674 static struct work_struct tracerfs_init_work __initdata;
9675 
9676 static void __init eval_map_work_func(struct work_struct *work)
9677 {
9678 	int len;
9679 
9680 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9681 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9682 }
9683 
9684 static int __init trace_eval_init(void)
9685 {
9686 	INIT_WORK(&eval_map_work, eval_map_work_func);
9687 
9688 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9689 	if (!eval_map_wq) {
9690 		pr_err("Unable to allocate eval_map_wq\n");
9691 		/* Do work here */
9692 		eval_map_work_func(&eval_map_work);
9693 		return -ENOMEM;
9694 	}
9695 
9696 	queue_work(eval_map_wq, &eval_map_work);
9697 	return 0;
9698 }
9699 
9700 subsys_initcall(trace_eval_init);
9701 
9702 static int __init trace_eval_sync(void)
9703 {
9704 	/* Make sure the eval map updates are finished */
9705 	if (eval_map_wq)
9706 		destroy_workqueue(eval_map_wq);
9707 	return 0;
9708 }
9709 
9710 late_initcall_sync(trace_eval_sync);
9711 
9712 
9713 #ifdef CONFIG_MODULES
9714 static void trace_module_add_evals(struct module *mod)
9715 {
9716 	if (!mod->num_trace_evals)
9717 		return;
9718 
9719 	/*
9720 	 * Modules with bad taint do not have events created, do
9721 	 * not bother with enums either.
9722 	 */
9723 	if (trace_module_has_bad_taint(mod))
9724 		return;
9725 
9726 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9727 }
9728 
9729 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9730 static void trace_module_remove_evals(struct module *mod)
9731 {
9732 	union trace_eval_map_item *map;
9733 	union trace_eval_map_item **last = &trace_eval_maps;
9734 
9735 	if (!mod->num_trace_evals)
9736 		return;
9737 
9738 	mutex_lock(&trace_eval_mutex);
9739 
9740 	map = trace_eval_maps;
9741 
9742 	while (map) {
9743 		if (map->head.mod == mod)
9744 			break;
9745 		map = trace_eval_jmp_to_tail(map);
9746 		last = &map->tail.next;
9747 		map = map->tail.next;
9748 	}
9749 	if (!map)
9750 		goto out;
9751 
9752 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9753 	kfree(map);
9754  out:
9755 	mutex_unlock(&trace_eval_mutex);
9756 }
9757 #else
9758 static inline void trace_module_remove_evals(struct module *mod) { }
9759 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9760 
9761 static int trace_module_notify(struct notifier_block *self,
9762 			       unsigned long val, void *data)
9763 {
9764 	struct module *mod = data;
9765 
9766 	switch (val) {
9767 	case MODULE_STATE_COMING:
9768 		trace_module_add_evals(mod);
9769 		break;
9770 	case MODULE_STATE_GOING:
9771 		trace_module_remove_evals(mod);
9772 		break;
9773 	}
9774 
9775 	return NOTIFY_OK;
9776 }
9777 
9778 static struct notifier_block trace_module_nb = {
9779 	.notifier_call = trace_module_notify,
9780 	.priority = 0,
9781 };
9782 #endif /* CONFIG_MODULES */
9783 
9784 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9785 {
9786 
9787 	event_trace_init();
9788 
9789 	init_tracer_tracefs(&global_trace, NULL);
9790 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9791 
9792 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9793 			&global_trace, &tracing_thresh_fops);
9794 
9795 	trace_create_file("README", TRACE_MODE_READ, NULL,
9796 			NULL, &tracing_readme_fops);
9797 
9798 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9799 			NULL, &tracing_saved_cmdlines_fops);
9800 
9801 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9802 			  NULL, &tracing_saved_cmdlines_size_fops);
9803 
9804 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9805 			NULL, &tracing_saved_tgids_fops);
9806 
9807 	trace_create_eval_file(NULL);
9808 
9809 #ifdef CONFIG_MODULES
9810 	register_module_notifier(&trace_module_nb);
9811 #endif
9812 
9813 #ifdef CONFIG_DYNAMIC_FTRACE
9814 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9815 			NULL, &tracing_dyn_info_fops);
9816 #endif
9817 
9818 	create_trace_instances(NULL);
9819 
9820 	update_tracer_options(&global_trace);
9821 }
9822 
9823 static __init int tracer_init_tracefs(void)
9824 {
9825 	int ret;
9826 
9827 	trace_access_lock_init();
9828 
9829 	ret = tracing_init_dentry();
9830 	if (ret)
9831 		return 0;
9832 
9833 	if (eval_map_wq) {
9834 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9835 		queue_work(eval_map_wq, &tracerfs_init_work);
9836 	} else {
9837 		tracer_init_tracefs_work_func(NULL);
9838 	}
9839 
9840 	rv_init_interface();
9841 
9842 	return 0;
9843 }
9844 
9845 fs_initcall(tracer_init_tracefs);
9846 
9847 static int trace_panic_handler(struct notifier_block *this,
9848 			       unsigned long event, void *unused)
9849 {
9850 	if (ftrace_dump_on_oops)
9851 		ftrace_dump(ftrace_dump_on_oops);
9852 	return NOTIFY_OK;
9853 }
9854 
9855 static struct notifier_block trace_panic_notifier = {
9856 	.notifier_call  = trace_panic_handler,
9857 	.next           = NULL,
9858 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9859 };
9860 
9861 static int trace_die_handler(struct notifier_block *self,
9862 			     unsigned long val,
9863 			     void *data)
9864 {
9865 	switch (val) {
9866 	case DIE_OOPS:
9867 		if (ftrace_dump_on_oops)
9868 			ftrace_dump(ftrace_dump_on_oops);
9869 		break;
9870 	default:
9871 		break;
9872 	}
9873 	return NOTIFY_OK;
9874 }
9875 
9876 static struct notifier_block trace_die_notifier = {
9877 	.notifier_call = trace_die_handler,
9878 	.priority = 200
9879 };
9880 
9881 /*
9882  * printk is set to max of 1024, we really don't need it that big.
9883  * Nothing should be printing 1000 characters anyway.
9884  */
9885 #define TRACE_MAX_PRINT		1000
9886 
9887 /*
9888  * Define here KERN_TRACE so that we have one place to modify
9889  * it if we decide to change what log level the ftrace dump
9890  * should be at.
9891  */
9892 #define KERN_TRACE		KERN_EMERG
9893 
9894 void
9895 trace_printk_seq(struct trace_seq *s)
9896 {
9897 	/* Probably should print a warning here. */
9898 	if (s->seq.len >= TRACE_MAX_PRINT)
9899 		s->seq.len = TRACE_MAX_PRINT;
9900 
9901 	/*
9902 	 * More paranoid code. Although the buffer size is set to
9903 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9904 	 * an extra layer of protection.
9905 	 */
9906 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9907 		s->seq.len = s->seq.size - 1;
9908 
9909 	/* should be zero ended, but we are paranoid. */
9910 	s->buffer[s->seq.len] = 0;
9911 
9912 	printk(KERN_TRACE "%s", s->buffer);
9913 
9914 	trace_seq_init(s);
9915 }
9916 
9917 void trace_init_global_iter(struct trace_iterator *iter)
9918 {
9919 	iter->tr = &global_trace;
9920 	iter->trace = iter->tr->current_trace;
9921 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9922 	iter->array_buffer = &global_trace.array_buffer;
9923 
9924 	if (iter->trace && iter->trace->open)
9925 		iter->trace->open(iter);
9926 
9927 	/* Annotate start of buffers if we had overruns */
9928 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9929 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9930 
9931 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9932 	if (trace_clocks[iter->tr->clock_id].in_ns)
9933 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9934 
9935 	/* Can not use kmalloc for iter.temp and iter.fmt */
9936 	iter->temp = static_temp_buf;
9937 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9938 	iter->fmt = static_fmt_buf;
9939 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9940 }
9941 
9942 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9943 {
9944 	/* use static because iter can be a bit big for the stack */
9945 	static struct trace_iterator iter;
9946 	static atomic_t dump_running;
9947 	struct trace_array *tr = &global_trace;
9948 	unsigned int old_userobj;
9949 	unsigned long flags;
9950 	int cnt = 0, cpu;
9951 
9952 	/* Only allow one dump user at a time. */
9953 	if (atomic_inc_return(&dump_running) != 1) {
9954 		atomic_dec(&dump_running);
9955 		return;
9956 	}
9957 
9958 	/*
9959 	 * Always turn off tracing when we dump.
9960 	 * We don't need to show trace output of what happens
9961 	 * between multiple crashes.
9962 	 *
9963 	 * If the user does a sysrq-z, then they can re-enable
9964 	 * tracing with echo 1 > tracing_on.
9965 	 */
9966 	tracing_off();
9967 
9968 	local_irq_save(flags);
9969 
9970 	/* Simulate the iterator */
9971 	trace_init_global_iter(&iter);
9972 
9973 	for_each_tracing_cpu(cpu) {
9974 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9975 	}
9976 
9977 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9978 
9979 	/* don't look at user memory in panic mode */
9980 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9981 
9982 	switch (oops_dump_mode) {
9983 	case DUMP_ALL:
9984 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9985 		break;
9986 	case DUMP_ORIG:
9987 		iter.cpu_file = raw_smp_processor_id();
9988 		break;
9989 	case DUMP_NONE:
9990 		goto out_enable;
9991 	default:
9992 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9993 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9994 	}
9995 
9996 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9997 
9998 	/* Did function tracer already get disabled? */
9999 	if (ftrace_is_dead()) {
10000 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10001 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10002 	}
10003 
10004 	/*
10005 	 * We need to stop all tracing on all CPUS to read
10006 	 * the next buffer. This is a bit expensive, but is
10007 	 * not done often. We fill all what we can read,
10008 	 * and then release the locks again.
10009 	 */
10010 
10011 	while (!trace_empty(&iter)) {
10012 
10013 		if (!cnt)
10014 			printk(KERN_TRACE "---------------------------------\n");
10015 
10016 		cnt++;
10017 
10018 		trace_iterator_reset(&iter);
10019 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10020 
10021 		if (trace_find_next_entry_inc(&iter) != NULL) {
10022 			int ret;
10023 
10024 			ret = print_trace_line(&iter);
10025 			if (ret != TRACE_TYPE_NO_CONSUME)
10026 				trace_consume(&iter);
10027 		}
10028 		touch_nmi_watchdog();
10029 
10030 		trace_printk_seq(&iter.seq);
10031 	}
10032 
10033 	if (!cnt)
10034 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10035 	else
10036 		printk(KERN_TRACE "---------------------------------\n");
10037 
10038  out_enable:
10039 	tr->trace_flags |= old_userobj;
10040 
10041 	for_each_tracing_cpu(cpu) {
10042 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10043 	}
10044 	atomic_dec(&dump_running);
10045 	local_irq_restore(flags);
10046 }
10047 EXPORT_SYMBOL_GPL(ftrace_dump);
10048 
10049 #define WRITE_BUFSIZE  4096
10050 
10051 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10052 				size_t count, loff_t *ppos,
10053 				int (*createfn)(const char *))
10054 {
10055 	char *kbuf, *buf, *tmp;
10056 	int ret = 0;
10057 	size_t done = 0;
10058 	size_t size;
10059 
10060 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10061 	if (!kbuf)
10062 		return -ENOMEM;
10063 
10064 	while (done < count) {
10065 		size = count - done;
10066 
10067 		if (size >= WRITE_BUFSIZE)
10068 			size = WRITE_BUFSIZE - 1;
10069 
10070 		if (copy_from_user(kbuf, buffer + done, size)) {
10071 			ret = -EFAULT;
10072 			goto out;
10073 		}
10074 		kbuf[size] = '\0';
10075 		buf = kbuf;
10076 		do {
10077 			tmp = strchr(buf, '\n');
10078 			if (tmp) {
10079 				*tmp = '\0';
10080 				size = tmp - buf + 1;
10081 			} else {
10082 				size = strlen(buf);
10083 				if (done + size < count) {
10084 					if (buf != kbuf)
10085 						break;
10086 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10087 					pr_warn("Line length is too long: Should be less than %d\n",
10088 						WRITE_BUFSIZE - 2);
10089 					ret = -EINVAL;
10090 					goto out;
10091 				}
10092 			}
10093 			done += size;
10094 
10095 			/* Remove comments */
10096 			tmp = strchr(buf, '#');
10097 
10098 			if (tmp)
10099 				*tmp = '\0';
10100 
10101 			ret = createfn(buf);
10102 			if (ret)
10103 				goto out;
10104 			buf += size;
10105 
10106 		} while (done < count);
10107 	}
10108 	ret = done;
10109 
10110 out:
10111 	kfree(kbuf);
10112 
10113 	return ret;
10114 }
10115 
10116 __init static int tracer_alloc_buffers(void)
10117 {
10118 	int ring_buf_size;
10119 	int ret = -ENOMEM;
10120 
10121 
10122 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10123 		pr_warn("Tracing disabled due to lockdown\n");
10124 		return -EPERM;
10125 	}
10126 
10127 	/*
10128 	 * Make sure we don't accidentally add more trace options
10129 	 * than we have bits for.
10130 	 */
10131 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10132 
10133 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10134 		goto out;
10135 
10136 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10137 		goto out_free_buffer_mask;
10138 
10139 	/* Only allocate trace_printk buffers if a trace_printk exists */
10140 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10141 		/* Must be called before global_trace.buffer is allocated */
10142 		trace_printk_init_buffers();
10143 
10144 	/* To save memory, keep the ring buffer size to its minimum */
10145 	if (ring_buffer_expanded)
10146 		ring_buf_size = trace_buf_size;
10147 	else
10148 		ring_buf_size = 1;
10149 
10150 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10151 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10152 
10153 	raw_spin_lock_init(&global_trace.start_lock);
10154 
10155 	/*
10156 	 * The prepare callbacks allocates some memory for the ring buffer. We
10157 	 * don't free the buffer if the CPU goes down. If we were to free
10158 	 * the buffer, then the user would lose any trace that was in the
10159 	 * buffer. The memory will be removed once the "instance" is removed.
10160 	 */
10161 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10162 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10163 				      NULL);
10164 	if (ret < 0)
10165 		goto out_free_cpumask;
10166 	/* Used for event triggers */
10167 	ret = -ENOMEM;
10168 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10169 	if (!temp_buffer)
10170 		goto out_rm_hp_state;
10171 
10172 	if (trace_create_savedcmd() < 0)
10173 		goto out_free_temp_buffer;
10174 
10175 	/* TODO: make the number of buffers hot pluggable with CPUS */
10176 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10177 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10178 		goto out_free_savedcmd;
10179 	}
10180 
10181 	if (global_trace.buffer_disabled)
10182 		tracing_off();
10183 
10184 	if (trace_boot_clock) {
10185 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10186 		if (ret < 0)
10187 			pr_warn("Trace clock %s not defined, going back to default\n",
10188 				trace_boot_clock);
10189 	}
10190 
10191 	/*
10192 	 * register_tracer() might reference current_trace, so it
10193 	 * needs to be set before we register anything. This is
10194 	 * just a bootstrap of current_trace anyway.
10195 	 */
10196 	global_trace.current_trace = &nop_trace;
10197 
10198 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10199 
10200 	ftrace_init_global_array_ops(&global_trace);
10201 
10202 	init_trace_flags_index(&global_trace);
10203 
10204 	register_tracer(&nop_trace);
10205 
10206 	/* Function tracing may start here (via kernel command line) */
10207 	init_function_trace();
10208 
10209 	/* All seems OK, enable tracing */
10210 	tracing_disabled = 0;
10211 
10212 	atomic_notifier_chain_register(&panic_notifier_list,
10213 				       &trace_panic_notifier);
10214 
10215 	register_die_notifier(&trace_die_notifier);
10216 
10217 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10218 
10219 	INIT_LIST_HEAD(&global_trace.systems);
10220 	INIT_LIST_HEAD(&global_trace.events);
10221 	INIT_LIST_HEAD(&global_trace.hist_vars);
10222 	INIT_LIST_HEAD(&global_trace.err_log);
10223 	list_add(&global_trace.list, &ftrace_trace_arrays);
10224 
10225 	apply_trace_boot_options();
10226 
10227 	register_snapshot_cmd();
10228 
10229 	test_can_verify();
10230 
10231 	return 0;
10232 
10233 out_free_savedcmd:
10234 	free_saved_cmdlines_buffer(savedcmd);
10235 out_free_temp_buffer:
10236 	ring_buffer_free(temp_buffer);
10237 out_rm_hp_state:
10238 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10239 out_free_cpumask:
10240 	free_cpumask_var(global_trace.tracing_cpumask);
10241 out_free_buffer_mask:
10242 	free_cpumask_var(tracing_buffer_mask);
10243 out:
10244 	return ret;
10245 }
10246 
10247 void __init ftrace_boot_snapshot(void)
10248 {
10249 	if (snapshot_at_boot) {
10250 		tracing_snapshot();
10251 		internal_trace_puts("** Boot snapshot taken **\n");
10252 	}
10253 }
10254 
10255 void __init early_trace_init(void)
10256 {
10257 	if (tracepoint_printk) {
10258 		tracepoint_print_iter =
10259 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10260 		if (MEM_FAIL(!tracepoint_print_iter,
10261 			     "Failed to allocate trace iterator\n"))
10262 			tracepoint_printk = 0;
10263 		else
10264 			static_key_enable(&tracepoint_printk_key.key);
10265 	}
10266 	tracer_alloc_buffers();
10267 }
10268 
10269 void __init trace_init(void)
10270 {
10271 	trace_event_init();
10272 }
10273 
10274 __init static void clear_boot_tracer(void)
10275 {
10276 	/*
10277 	 * The default tracer at boot buffer is an init section.
10278 	 * This function is called in lateinit. If we did not
10279 	 * find the boot tracer, then clear it out, to prevent
10280 	 * later registration from accessing the buffer that is
10281 	 * about to be freed.
10282 	 */
10283 	if (!default_bootup_tracer)
10284 		return;
10285 
10286 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10287 	       default_bootup_tracer);
10288 	default_bootup_tracer = NULL;
10289 }
10290 
10291 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10292 __init static void tracing_set_default_clock(void)
10293 {
10294 	/* sched_clock_stable() is determined in late_initcall */
10295 	if (!trace_boot_clock && !sched_clock_stable()) {
10296 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10297 			pr_warn("Can not set tracing clock due to lockdown\n");
10298 			return;
10299 		}
10300 
10301 		printk(KERN_WARNING
10302 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10303 		       "If you want to keep using the local clock, then add:\n"
10304 		       "  \"trace_clock=local\"\n"
10305 		       "on the kernel command line\n");
10306 		tracing_set_clock(&global_trace, "global");
10307 	}
10308 }
10309 #else
10310 static inline void tracing_set_default_clock(void) { }
10311 #endif
10312 
10313 __init static int late_trace_init(void)
10314 {
10315 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10316 		static_key_disable(&tracepoint_printk_key.key);
10317 		tracepoint_printk = 0;
10318 	}
10319 
10320 	tracing_set_default_clock();
10321 	clear_boot_tracer();
10322 	return 0;
10323 }
10324 
10325 late_initcall_sync(late_trace_init);
10326