xref: /openbmc/linux/kernel/trace/trace.c (revision 7fde9d6e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94 	{ }
95 };
96 
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100 	return 0;
101 }
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135 
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137 
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140 
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144 	struct module			*mod;
145 	unsigned long			length;
146 };
147 
148 union trace_eval_map_item;
149 
150 struct trace_eval_map_tail {
151 	/*
152 	 * "end" is first and points to NULL as it must be different
153 	 * than "mod" or "eval_string"
154 	 */
155 	union trace_eval_map_item	*next;
156 	const char			*end;	/* points to NULL */
157 };
158 
159 static DEFINE_MUTEX(trace_eval_mutex);
160 
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169 	struct trace_eval_map		map;
170 	struct trace_eval_map_head	head;
171 	struct trace_eval_map_tail	tail;
172 };
173 
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179 				   struct trace_buffer *buffer,
180 				   unsigned int trace_ctx);
181 
182 #define MAX_TRACER_SIZE		100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185 
186 static bool allocate_snapshot;
187 
188 static int __init set_cmdline_ftrace(char *str)
189 {
190 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191 	default_bootup_tracer = bootup_tracer_buf;
192 	/* We are using ftrace early, expand it */
193 	ring_buffer_expanded = true;
194 	return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197 
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200 	if (*str++ != '=' || !*str) {
201 		ftrace_dump_on_oops = DUMP_ALL;
202 		return 1;
203 	}
204 
205 	if (!strcmp("orig_cpu", str)) {
206 		ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209 
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 
214 static int __init stop_trace_on_warning(char *str)
215 {
216 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217 		__disable_trace_on_warning = 1;
218 	return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221 
222 static int __init boot_alloc_snapshot(char *str)
223 {
224 	allocate_snapshot = true;
225 	/* We also need the main ring buffer expanded */
226 	ring_buffer_expanded = true;
227 	return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230 
231 
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 
234 static int __init set_trace_boot_options(char *str)
235 {
236 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237 	return 0;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240 
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243 
244 static int __init set_trace_boot_clock(char *str)
245 {
246 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247 	trace_boot_clock = trace_boot_clock_buf;
248 	return 0;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251 
252 static int __init set_tracepoint_printk(char *str)
253 {
254 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
255 		tracepoint_printk = 1;
256 	return 1;
257 }
258 __setup("tp_printk", set_tracepoint_printk);
259 
260 unsigned long long ns2usecs(u64 nsec)
261 {
262 	nsec += 500;
263 	do_div(nsec, 1000);
264 	return nsec;
265 }
266 
267 static void
268 trace_process_export(struct trace_export *export,
269 	       struct ring_buffer_event *event, int flag)
270 {
271 	struct trace_entry *entry;
272 	unsigned int size = 0;
273 
274 	if (export->flags & flag) {
275 		entry = ring_buffer_event_data(event);
276 		size = ring_buffer_event_length(event);
277 		export->write(export, entry, size);
278 	}
279 }
280 
281 static DEFINE_MUTEX(ftrace_export_lock);
282 
283 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
284 
285 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
287 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
288 
289 static inline void ftrace_exports_enable(struct trace_export *export)
290 {
291 	if (export->flags & TRACE_EXPORT_FUNCTION)
292 		static_branch_inc(&trace_function_exports_enabled);
293 
294 	if (export->flags & TRACE_EXPORT_EVENT)
295 		static_branch_inc(&trace_event_exports_enabled);
296 
297 	if (export->flags & TRACE_EXPORT_MARKER)
298 		static_branch_inc(&trace_marker_exports_enabled);
299 }
300 
301 static inline void ftrace_exports_disable(struct trace_export *export)
302 {
303 	if (export->flags & TRACE_EXPORT_FUNCTION)
304 		static_branch_dec(&trace_function_exports_enabled);
305 
306 	if (export->flags & TRACE_EXPORT_EVENT)
307 		static_branch_dec(&trace_event_exports_enabled);
308 
309 	if (export->flags & TRACE_EXPORT_MARKER)
310 		static_branch_dec(&trace_marker_exports_enabled);
311 }
312 
313 static void ftrace_exports(struct ring_buffer_event *event, int flag)
314 {
315 	struct trace_export *export;
316 
317 	preempt_disable_notrace();
318 
319 	export = rcu_dereference_raw_check(ftrace_exports_list);
320 	while (export) {
321 		trace_process_export(export, event, flag);
322 		export = rcu_dereference_raw_check(export->next);
323 	}
324 
325 	preempt_enable_notrace();
326 }
327 
328 static inline void
329 add_trace_export(struct trace_export **list, struct trace_export *export)
330 {
331 	rcu_assign_pointer(export->next, *list);
332 	/*
333 	 * We are entering export into the list but another
334 	 * CPU might be walking that list. We need to make sure
335 	 * the export->next pointer is valid before another CPU sees
336 	 * the export pointer included into the list.
337 	 */
338 	rcu_assign_pointer(*list, export);
339 }
340 
341 static inline int
342 rm_trace_export(struct trace_export **list, struct trace_export *export)
343 {
344 	struct trace_export **p;
345 
346 	for (p = list; *p != NULL; p = &(*p)->next)
347 		if (*p == export)
348 			break;
349 
350 	if (*p != export)
351 		return -1;
352 
353 	rcu_assign_pointer(*p, (*p)->next);
354 
355 	return 0;
356 }
357 
358 static inline void
359 add_ftrace_export(struct trace_export **list, struct trace_export *export)
360 {
361 	ftrace_exports_enable(export);
362 
363 	add_trace_export(list, export);
364 }
365 
366 static inline int
367 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	int ret;
370 
371 	ret = rm_trace_export(list, export);
372 	ftrace_exports_disable(export);
373 
374 	return ret;
375 }
376 
377 int register_ftrace_export(struct trace_export *export)
378 {
379 	if (WARN_ON_ONCE(!export->write))
380 		return -1;
381 
382 	mutex_lock(&ftrace_export_lock);
383 
384 	add_ftrace_export(&ftrace_exports_list, export);
385 
386 	mutex_unlock(&ftrace_export_lock);
387 
388 	return 0;
389 }
390 EXPORT_SYMBOL_GPL(register_ftrace_export);
391 
392 int unregister_ftrace_export(struct trace_export *export)
393 {
394 	int ret;
395 
396 	mutex_lock(&ftrace_export_lock);
397 
398 	ret = rm_ftrace_export(&ftrace_exports_list, export);
399 
400 	mutex_unlock(&ftrace_export_lock);
401 
402 	return ret;
403 }
404 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
405 
406 /* trace_flags holds trace_options default values */
407 #define TRACE_DEFAULT_FLAGS						\
408 	(FUNCTION_DEFAULT_FLAGS |					\
409 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
410 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
411 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
412 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
413 	 TRACE_ITER_HASH_PTR)
414 
415 /* trace_options that are only supported by global_trace */
416 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
417 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
418 
419 /* trace_flags that are default zero for instances */
420 #define ZEROED_TRACE_FLAGS \
421 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
422 
423 /*
424  * The global_trace is the descriptor that holds the top-level tracing
425  * buffers for the live tracing.
426  */
427 static struct trace_array global_trace = {
428 	.trace_flags = TRACE_DEFAULT_FLAGS,
429 };
430 
431 LIST_HEAD(ftrace_trace_arrays);
432 
433 int trace_array_get(struct trace_array *this_tr)
434 {
435 	struct trace_array *tr;
436 	int ret = -ENODEV;
437 
438 	mutex_lock(&trace_types_lock);
439 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
440 		if (tr == this_tr) {
441 			tr->ref++;
442 			ret = 0;
443 			break;
444 		}
445 	}
446 	mutex_unlock(&trace_types_lock);
447 
448 	return ret;
449 }
450 
451 static void __trace_array_put(struct trace_array *this_tr)
452 {
453 	WARN_ON(!this_tr->ref);
454 	this_tr->ref--;
455 }
456 
457 /**
458  * trace_array_put - Decrement the reference counter for this trace array.
459  * @this_tr : pointer to the trace array
460  *
461  * NOTE: Use this when we no longer need the trace array returned by
462  * trace_array_get_by_name(). This ensures the trace array can be later
463  * destroyed.
464  *
465  */
466 void trace_array_put(struct trace_array *this_tr)
467 {
468 	if (!this_tr)
469 		return;
470 
471 	mutex_lock(&trace_types_lock);
472 	__trace_array_put(this_tr);
473 	mutex_unlock(&trace_types_lock);
474 }
475 EXPORT_SYMBOL_GPL(trace_array_put);
476 
477 int tracing_check_open_get_tr(struct trace_array *tr)
478 {
479 	int ret;
480 
481 	ret = security_locked_down(LOCKDOWN_TRACEFS);
482 	if (ret)
483 		return ret;
484 
485 	if (tracing_disabled)
486 		return -ENODEV;
487 
488 	if (tr && trace_array_get(tr) < 0)
489 		return -ENODEV;
490 
491 	return 0;
492 }
493 
494 int call_filter_check_discard(struct trace_event_call *call, void *rec,
495 			      struct trace_buffer *buffer,
496 			      struct ring_buffer_event *event)
497 {
498 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
499 	    !filter_match_preds(call->filter, rec)) {
500 		__trace_event_discard_commit(buffer, event);
501 		return 1;
502 	}
503 
504 	return 0;
505 }
506 
507 void trace_free_pid_list(struct trace_pid_list *pid_list)
508 {
509 	vfree(pid_list->pids);
510 	kfree(pid_list);
511 }
512 
513 /**
514  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
515  * @filtered_pids: The list of pids to check
516  * @search_pid: The PID to find in @filtered_pids
517  *
518  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
519  */
520 bool
521 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
522 {
523 	/*
524 	 * If pid_max changed after filtered_pids was created, we
525 	 * by default ignore all pids greater than the previous pid_max.
526 	 */
527 	if (search_pid >= filtered_pids->pid_max)
528 		return false;
529 
530 	return test_bit(search_pid, filtered_pids->pids);
531 }
532 
533 /**
534  * trace_ignore_this_task - should a task be ignored for tracing
535  * @filtered_pids: The list of pids to check
536  * @filtered_no_pids: The list of pids not to be traced
537  * @task: The task that should be ignored if not filtered
538  *
539  * Checks if @task should be traced or not from @filtered_pids.
540  * Returns true if @task should *NOT* be traced.
541  * Returns false if @task should be traced.
542  */
543 bool
544 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
545 		       struct trace_pid_list *filtered_no_pids,
546 		       struct task_struct *task)
547 {
548 	/*
549 	 * If filtered_no_pids is not empty, and the task's pid is listed
550 	 * in filtered_no_pids, then return true.
551 	 * Otherwise, if filtered_pids is empty, that means we can
552 	 * trace all tasks. If it has content, then only trace pids
553 	 * within filtered_pids.
554 	 */
555 
556 	return (filtered_pids &&
557 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 		(filtered_no_pids &&
559 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
560 }
561 
562 /**
563  * trace_filter_add_remove_task - Add or remove a task from a pid_list
564  * @pid_list: The list to modify
565  * @self: The current task for fork or NULL for exit
566  * @task: The task to add or remove
567  *
568  * If adding a task, if @self is defined, the task is only added if @self
569  * is also included in @pid_list. This happens on fork and tasks should
570  * only be added when the parent is listed. If @self is NULL, then the
571  * @task pid will be removed from the list, which would happen on exit
572  * of a task.
573  */
574 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
575 				  struct task_struct *self,
576 				  struct task_struct *task)
577 {
578 	if (!pid_list)
579 		return;
580 
581 	/* For forks, we only add if the forking task is listed */
582 	if (self) {
583 		if (!trace_find_filtered_pid(pid_list, self->pid))
584 			return;
585 	}
586 
587 	/* Sorry, but we don't support pid_max changing after setting */
588 	if (task->pid >= pid_list->pid_max)
589 		return;
590 
591 	/* "self" is set for forks, and NULL for exits */
592 	if (self)
593 		set_bit(task->pid, pid_list->pids);
594 	else
595 		clear_bit(task->pid, pid_list->pids);
596 }
597 
598 /**
599  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
600  * @pid_list: The pid list to show
601  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
602  * @pos: The position of the file
603  *
604  * This is used by the seq_file "next" operation to iterate the pids
605  * listed in a trace_pid_list structure.
606  *
607  * Returns the pid+1 as we want to display pid of zero, but NULL would
608  * stop the iteration.
609  */
610 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 {
612 	unsigned long pid = (unsigned long)v;
613 
614 	(*pos)++;
615 
616 	/* pid already is +1 of the actual previous bit */
617 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618 
619 	/* Return pid + 1 to allow zero to be represented */
620 	if (pid < pid_list->pid_max)
621 		return (void *)(pid + 1);
622 
623 	return NULL;
624 }
625 
626 /**
627  * trace_pid_start - Used for seq_file to start reading pid lists
628  * @pid_list: The pid list to show
629  * @pos: The position of the file
630  *
631  * This is used by seq_file "start" operation to start the iteration
632  * of listing pids.
633  *
634  * Returns the pid+1 as we want to display pid of zero, but NULL would
635  * stop the iteration.
636  */
637 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
638 {
639 	unsigned long pid;
640 	loff_t l = 0;
641 
642 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
643 	if (pid >= pid_list->pid_max)
644 		return NULL;
645 
646 	/* Return pid + 1 so that zero can be the exit value */
647 	for (pid++; pid && l < *pos;
648 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 		;
650 	return (void *)pid;
651 }
652 
653 /**
654  * trace_pid_show - show the current pid in seq_file processing
655  * @m: The seq_file structure to write into
656  * @v: A void pointer of the pid (+1) value to display
657  *
658  * Can be directly used by seq_file operations to display the current
659  * pid value.
660  */
661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 	unsigned long pid = (unsigned long)v - 1;
664 
665 	seq_printf(m, "%lu\n", pid);
666 	return 0;
667 }
668 
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE		127
671 
672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 		    struct trace_pid_list **new_pid_list,
674 		    const char __user *ubuf, size_t cnt)
675 {
676 	struct trace_pid_list *pid_list;
677 	struct trace_parser parser;
678 	unsigned long val;
679 	int nr_pids = 0;
680 	ssize_t read = 0;
681 	ssize_t ret = 0;
682 	loff_t pos;
683 	pid_t pid;
684 
685 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 		return -ENOMEM;
687 
688 	/*
689 	 * Always recreate a new array. The write is an all or nothing
690 	 * operation. Always create a new array when adding new pids by
691 	 * the user. If the operation fails, then the current list is
692 	 * not modified.
693 	 */
694 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 	if (!pid_list) {
696 		trace_parser_put(&parser);
697 		return -ENOMEM;
698 	}
699 
700 	pid_list->pid_max = READ_ONCE(pid_max);
701 
702 	/* Only truncating will shrink pid_max */
703 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
704 		pid_list->pid_max = filtered_pids->pid_max;
705 
706 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
707 	if (!pid_list->pids) {
708 		trace_parser_put(&parser);
709 		kfree(pid_list);
710 		return -ENOMEM;
711 	}
712 
713 	if (filtered_pids) {
714 		/* copy the current bits to the new max */
715 		for_each_set_bit(pid, filtered_pids->pids,
716 				 filtered_pids->pid_max) {
717 			set_bit(pid, pid_list->pids);
718 			nr_pids++;
719 		}
720 	}
721 
722 	while (cnt > 0) {
723 
724 		pos = 0;
725 
726 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
727 		if (ret < 0 || !trace_parser_loaded(&parser))
728 			break;
729 
730 		read += ret;
731 		ubuf += ret;
732 		cnt -= ret;
733 
734 		ret = -EINVAL;
735 		if (kstrtoul(parser.buffer, 0, &val))
736 			break;
737 		if (val >= pid_list->pid_max)
738 			break;
739 
740 		pid = (pid_t)val;
741 
742 		set_bit(pid, pid_list->pids);
743 		nr_pids++;
744 
745 		trace_parser_clear(&parser);
746 		ret = 0;
747 	}
748 	trace_parser_put(&parser);
749 
750 	if (ret < 0) {
751 		trace_free_pid_list(pid_list);
752 		return ret;
753 	}
754 
755 	if (!nr_pids) {
756 		/* Cleared the list of pids */
757 		trace_free_pid_list(pid_list);
758 		read = ret;
759 		pid_list = NULL;
760 	}
761 
762 	*new_pid_list = pid_list;
763 
764 	return read;
765 }
766 
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 	u64 ts;
770 
771 	/* Early boot up does not have a buffer yet */
772 	if (!buf->buffer)
773 		return trace_clock_local();
774 
775 	ts = ring_buffer_time_stamp(buf->buffer);
776 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777 
778 	return ts;
779 }
780 
781 u64 ftrace_now(int cpu)
782 {
783 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785 
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797 	/*
798 	 * For quick access (irqsoff uses this in fast path), just
799 	 * return the mirror variable of the state of the ring buffer.
800 	 * It's a little racy, but we don't really care.
801 	 */
802 	smp_rmb();
803 	return !global_trace.buffer_disabled;
804 }
805 
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
817 
818 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer		*trace_types __read_mostly;
822 
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827 
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849 
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 
854 static inline void trace_access_lock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		/* gain it for accessing the whole ring buffer. */
858 		down_write(&all_cpu_access_lock);
859 	} else {
860 		/* gain it for accessing a cpu ring buffer. */
861 
862 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 		down_read(&all_cpu_access_lock);
864 
865 		/* Secondly block other access to this @cpu ring buffer. */
866 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 	}
868 }
869 
870 static inline void trace_access_unlock(int cpu)
871 {
872 	if (cpu == RING_BUFFER_ALL_CPUS) {
873 		up_write(&all_cpu_access_lock);
874 	} else {
875 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 		up_read(&all_cpu_access_lock);
877 	}
878 }
879 
880 static inline void trace_access_lock_init(void)
881 {
882 	int cpu;
883 
884 	for_each_possible_cpu(cpu)
885 		mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887 
888 #else
889 
890 static DEFINE_MUTEX(access_lock);
891 
892 static inline void trace_access_lock(int cpu)
893 {
894 	(void)cpu;
895 	mutex_lock(&access_lock);
896 }
897 
898 static inline void trace_access_unlock(int cpu)
899 {
900 	(void)cpu;
901 	mutex_unlock(&access_lock);
902 }
903 
904 static inline void trace_access_lock_init(void)
905 {
906 }
907 
908 #endif
909 
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 				 unsigned int trace_ctx,
913 				 int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 				      struct trace_buffer *buffer,
916 				      unsigned int trace_ctx,
917 				      int skip, struct pt_regs *regs);
918 
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 					unsigned int trace_ctx,
922 					int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 				      struct trace_buffer *buffer,
927 				      unsigned long trace_ctx,
928 				      int skip, struct pt_regs *regs)
929 {
930 }
931 
932 #endif
933 
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936 		  int type, unsigned int trace_ctx)
937 {
938 	struct trace_entry *ent = ring_buffer_event_data(event);
939 
940 	tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942 
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 			  int type,
946 			  unsigned long len,
947 			  unsigned int trace_ctx)
948 {
949 	struct ring_buffer_event *event;
950 
951 	event = ring_buffer_lock_reserve(buffer, len);
952 	if (event != NULL)
953 		trace_event_setup(event, type, trace_ctx);
954 
955 	return event;
956 }
957 
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 	if (tr->array_buffer.buffer)
961 		ring_buffer_record_on(tr->array_buffer.buffer);
962 	/*
963 	 * This flag is looked at when buffers haven't been allocated
964 	 * yet, or by some tracers (like irqsoff), that just want to
965 	 * know if the ring buffer has been disabled, but it can handle
966 	 * races of where it gets disabled but we still do a record.
967 	 * As the check is in the fast path of the tracers, it is more
968 	 * important to be fast than accurate.
969 	 */
970 	tr->buffer_disabled = 0;
971 	/* Make the flag seen by readers */
972 	smp_wmb();
973 }
974 
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983 	tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986 
987 
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 	__this_cpu_write(trace_taskinfo_save, true);
992 
993 	/* If this is the temp buffer, we need to commit fully */
994 	if (this_cpu_read(trace_buffered_event) == event) {
995 		/* Length is in event->array[0] */
996 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 		/* Release the temp buffer */
998 		this_cpu_dec(trace_buffered_event_cnt);
999 	} else
1000 		ring_buffer_unlock_commit(buffer, event);
1001 }
1002 
1003 /**
1004  * __trace_puts - write a constant string into the trace buffer.
1005  * @ip:	   The address of the caller
1006  * @str:   The constant string to write
1007  * @size:  The size of the string.
1008  */
1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011 	struct ring_buffer_event *event;
1012 	struct trace_buffer *buffer;
1013 	struct print_entry *entry;
1014 	unsigned int trace_ctx;
1015 	int alloc;
1016 
1017 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1018 		return 0;
1019 
1020 	if (unlikely(tracing_selftest_running || tracing_disabled))
1021 		return 0;
1022 
1023 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024 
1025 	trace_ctx = tracing_gen_ctx();
1026 	buffer = global_trace.array_buffer.buffer;
1027 	ring_buffer_nest_start(buffer);
1028 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1029 					    trace_ctx);
1030 	if (!event) {
1031 		size = 0;
1032 		goto out;
1033 	}
1034 
1035 	entry = ring_buffer_event_data(event);
1036 	entry->ip = ip;
1037 
1038 	memcpy(&entry->buf, str, size);
1039 
1040 	/* Add a newline if necessary */
1041 	if (entry->buf[size - 1] != '\n') {
1042 		entry->buf[size] = '\n';
1043 		entry->buf[size + 1] = '\0';
1044 	} else
1045 		entry->buf[size] = '\0';
1046 
1047 	__buffer_unlock_commit(buffer, event);
1048 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049  out:
1050 	ring_buffer_nest_end(buffer);
1051 	return size;
1052 }
1053 EXPORT_SYMBOL_GPL(__trace_puts);
1054 
1055 /**
1056  * __trace_bputs - write the pointer to a constant string into trace buffer
1057  * @ip:	   The address of the caller
1058  * @str:   The constant string to write to the buffer to
1059  */
1060 int __trace_bputs(unsigned long ip, const char *str)
1061 {
1062 	struct ring_buffer_event *event;
1063 	struct trace_buffer *buffer;
1064 	struct bputs_entry *entry;
1065 	unsigned int trace_ctx;
1066 	int size = sizeof(struct bputs_entry);
1067 	int ret = 0;
1068 
1069 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070 		return 0;
1071 
1072 	if (unlikely(tracing_selftest_running || tracing_disabled))
1073 		return 0;
1074 
1075 	trace_ctx = tracing_gen_ctx();
1076 	buffer = global_trace.array_buffer.buffer;
1077 
1078 	ring_buffer_nest_start(buffer);
1079 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1080 					    trace_ctx);
1081 	if (!event)
1082 		goto out;
1083 
1084 	entry = ring_buffer_event_data(event);
1085 	entry->ip			= ip;
1086 	entry->str			= str;
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1090 
1091 	ret = 1;
1092  out:
1093 	ring_buffer_nest_end(buffer);
1094 	return ret;
1095 }
1096 EXPORT_SYMBOL_GPL(__trace_bputs);
1097 
1098 #ifdef CONFIG_TRACER_SNAPSHOT
1099 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1100 					   void *cond_data)
1101 {
1102 	struct tracer *tracer = tr->current_trace;
1103 	unsigned long flags;
1104 
1105 	if (in_nmi()) {
1106 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1107 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1108 		return;
1109 	}
1110 
1111 	if (!tr->allocated_snapshot) {
1112 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1113 		internal_trace_puts("*** stopping trace here!   ***\n");
1114 		tracing_off();
1115 		return;
1116 	}
1117 
1118 	/* Note, snapshot can not be used when the tracer uses it */
1119 	if (tracer->use_max_tr) {
1120 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1121 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1122 		return;
1123 	}
1124 
1125 	local_irq_save(flags);
1126 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1127 	local_irq_restore(flags);
1128 }
1129 
1130 void tracing_snapshot_instance(struct trace_array *tr)
1131 {
1132 	tracing_snapshot_instance_cond(tr, NULL);
1133 }
1134 
1135 /**
1136  * tracing_snapshot - take a snapshot of the current buffer.
1137  *
1138  * This causes a swap between the snapshot buffer and the current live
1139  * tracing buffer. You can use this to take snapshots of the live
1140  * trace when some condition is triggered, but continue to trace.
1141  *
1142  * Note, make sure to allocate the snapshot with either
1143  * a tracing_snapshot_alloc(), or by doing it manually
1144  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145  *
1146  * If the snapshot buffer is not allocated, it will stop tracing.
1147  * Basically making a permanent snapshot.
1148  */
1149 void tracing_snapshot(void)
1150 {
1151 	struct trace_array *tr = &global_trace;
1152 
1153 	tracing_snapshot_instance(tr);
1154 }
1155 EXPORT_SYMBOL_GPL(tracing_snapshot);
1156 
1157 /**
1158  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1159  * @tr:		The tracing instance to snapshot
1160  * @cond_data:	The data to be tested conditionally, and possibly saved
1161  *
1162  * This is the same as tracing_snapshot() except that the snapshot is
1163  * conditional - the snapshot will only happen if the
1164  * cond_snapshot.update() implementation receiving the cond_data
1165  * returns true, which means that the trace array's cond_snapshot
1166  * update() operation used the cond_data to determine whether the
1167  * snapshot should be taken, and if it was, presumably saved it along
1168  * with the snapshot.
1169  */
1170 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 {
1172 	tracing_snapshot_instance_cond(tr, cond_data);
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1175 
1176 /**
1177  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1178  * @tr:		The tracing instance
1179  *
1180  * When the user enables a conditional snapshot using
1181  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1182  * with the snapshot.  This accessor is used to retrieve it.
1183  *
1184  * Should not be called from cond_snapshot.update(), since it takes
1185  * the tr->max_lock lock, which the code calling
1186  * cond_snapshot.update() has already done.
1187  *
1188  * Returns the cond_data associated with the trace array's snapshot.
1189  */
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192 	void *cond_data = NULL;
1193 
1194 	arch_spin_lock(&tr->max_lock);
1195 
1196 	if (tr->cond_snapshot)
1197 		cond_data = tr->cond_snapshot->cond_data;
1198 
1199 	arch_spin_unlock(&tr->max_lock);
1200 
1201 	return cond_data;
1202 }
1203 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204 
1205 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1206 					struct array_buffer *size_buf, int cpu_id);
1207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208 
1209 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1210 {
1211 	int ret;
1212 
1213 	if (!tr->allocated_snapshot) {
1214 
1215 		/* allocate spare buffer */
1216 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1217 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1218 		if (ret < 0)
1219 			return ret;
1220 
1221 		tr->allocated_snapshot = true;
1222 	}
1223 
1224 	return 0;
1225 }
1226 
1227 static void free_snapshot(struct trace_array *tr)
1228 {
1229 	/*
1230 	 * We don't free the ring buffer. instead, resize it because
1231 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1232 	 * we want preserve it.
1233 	 */
1234 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1235 	set_buffer_entries(&tr->max_buffer, 1);
1236 	tracing_reset_online_cpus(&tr->max_buffer);
1237 	tr->allocated_snapshot = false;
1238 }
1239 
1240 /**
1241  * tracing_alloc_snapshot - allocate snapshot buffer.
1242  *
1243  * This only allocates the snapshot buffer if it isn't already
1244  * allocated - it doesn't also take a snapshot.
1245  *
1246  * This is meant to be used in cases where the snapshot buffer needs
1247  * to be set up for events that can't sleep but need to be able to
1248  * trigger a snapshot.
1249  */
1250 int tracing_alloc_snapshot(void)
1251 {
1252 	struct trace_array *tr = &global_trace;
1253 	int ret;
1254 
1255 	ret = tracing_alloc_snapshot_instance(tr);
1256 	WARN_ON(ret < 0);
1257 
1258 	return ret;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1261 
1262 /**
1263  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264  *
1265  * This is similar to tracing_snapshot(), but it will allocate the
1266  * snapshot buffer if it isn't already allocated. Use this only
1267  * where it is safe to sleep, as the allocation may sleep.
1268  *
1269  * This causes a swap between the snapshot buffer and the current live
1270  * tracing buffer. You can use this to take snapshots of the live
1271  * trace when some condition is triggered, but continue to trace.
1272  */
1273 void tracing_snapshot_alloc(void)
1274 {
1275 	int ret;
1276 
1277 	ret = tracing_alloc_snapshot();
1278 	if (ret < 0)
1279 		return;
1280 
1281 	tracing_snapshot();
1282 }
1283 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1284 
1285 /**
1286  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1287  * @tr:		The tracing instance
1288  * @cond_data:	User data to associate with the snapshot
1289  * @update:	Implementation of the cond_snapshot update function
1290  *
1291  * Check whether the conditional snapshot for the given instance has
1292  * already been enabled, or if the current tracer is already using a
1293  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1294  * save the cond_data and update function inside.
1295  *
1296  * Returns 0 if successful, error otherwise.
1297  */
1298 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1299 				 cond_update_fn_t update)
1300 {
1301 	struct cond_snapshot *cond_snapshot;
1302 	int ret = 0;
1303 
1304 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1305 	if (!cond_snapshot)
1306 		return -ENOMEM;
1307 
1308 	cond_snapshot->cond_data = cond_data;
1309 	cond_snapshot->update = update;
1310 
1311 	mutex_lock(&trace_types_lock);
1312 
1313 	ret = tracing_alloc_snapshot_instance(tr);
1314 	if (ret)
1315 		goto fail_unlock;
1316 
1317 	if (tr->current_trace->use_max_tr) {
1318 		ret = -EBUSY;
1319 		goto fail_unlock;
1320 	}
1321 
1322 	/*
1323 	 * The cond_snapshot can only change to NULL without the
1324 	 * trace_types_lock. We don't care if we race with it going
1325 	 * to NULL, but we want to make sure that it's not set to
1326 	 * something other than NULL when we get here, which we can
1327 	 * do safely with only holding the trace_types_lock and not
1328 	 * having to take the max_lock.
1329 	 */
1330 	if (tr->cond_snapshot) {
1331 		ret = -EBUSY;
1332 		goto fail_unlock;
1333 	}
1334 
1335 	arch_spin_lock(&tr->max_lock);
1336 	tr->cond_snapshot = cond_snapshot;
1337 	arch_spin_unlock(&tr->max_lock);
1338 
1339 	mutex_unlock(&trace_types_lock);
1340 
1341 	return ret;
1342 
1343  fail_unlock:
1344 	mutex_unlock(&trace_types_lock);
1345 	kfree(cond_snapshot);
1346 	return ret;
1347 }
1348 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1349 
1350 /**
1351  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1352  * @tr:		The tracing instance
1353  *
1354  * Check whether the conditional snapshot for the given instance is
1355  * enabled; if so, free the cond_snapshot associated with it,
1356  * otherwise return -EINVAL.
1357  *
1358  * Returns 0 if successful, error otherwise.
1359  */
1360 int tracing_snapshot_cond_disable(struct trace_array *tr)
1361 {
1362 	int ret = 0;
1363 
1364 	arch_spin_lock(&tr->max_lock);
1365 
1366 	if (!tr->cond_snapshot)
1367 		ret = -EINVAL;
1368 	else {
1369 		kfree(tr->cond_snapshot);
1370 		tr->cond_snapshot = NULL;
1371 	}
1372 
1373 	arch_spin_unlock(&tr->max_lock);
1374 
1375 	return ret;
1376 }
1377 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 #else
1379 void tracing_snapshot(void)
1380 {
1381 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 }
1383 EXPORT_SYMBOL_GPL(tracing_snapshot);
1384 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 {
1386 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1389 int tracing_alloc_snapshot(void)
1390 {
1391 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1392 	return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1395 void tracing_snapshot_alloc(void)
1396 {
1397 	/* Give warning */
1398 	tracing_snapshot();
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1401 void *tracing_cond_snapshot_data(struct trace_array *tr)
1402 {
1403 	return NULL;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1406 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1407 {
1408 	return -ENODEV;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1411 int tracing_snapshot_cond_disable(struct trace_array *tr)
1412 {
1413 	return false;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1416 #endif /* CONFIG_TRACER_SNAPSHOT */
1417 
1418 void tracer_tracing_off(struct trace_array *tr)
1419 {
1420 	if (tr->array_buffer.buffer)
1421 		ring_buffer_record_off(tr->array_buffer.buffer);
1422 	/*
1423 	 * This flag is looked at when buffers haven't been allocated
1424 	 * yet, or by some tracers (like irqsoff), that just want to
1425 	 * know if the ring buffer has been disabled, but it can handle
1426 	 * races of where it gets disabled but we still do a record.
1427 	 * As the check is in the fast path of the tracers, it is more
1428 	 * important to be fast than accurate.
1429 	 */
1430 	tr->buffer_disabled = 1;
1431 	/* Make the flag seen by readers */
1432 	smp_wmb();
1433 }
1434 
1435 /**
1436  * tracing_off - turn off tracing buffers
1437  *
1438  * This function stops the tracing buffers from recording data.
1439  * It does not disable any overhead the tracers themselves may
1440  * be causing. This function simply causes all recording to
1441  * the ring buffers to fail.
1442  */
1443 void tracing_off(void)
1444 {
1445 	tracer_tracing_off(&global_trace);
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_off);
1448 
1449 void disable_trace_on_warning(void)
1450 {
1451 	if (__disable_trace_on_warning) {
1452 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1453 			"Disabling tracing due to warning\n");
1454 		tracing_off();
1455 	}
1456 }
1457 
1458 /**
1459  * tracer_tracing_is_on - show real state of ring buffer enabled
1460  * @tr : the trace array to know if ring buffer is enabled
1461  *
1462  * Shows real state of the ring buffer if it is enabled or not.
1463  */
1464 bool tracer_tracing_is_on(struct trace_array *tr)
1465 {
1466 	if (tr->array_buffer.buffer)
1467 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1468 	return !tr->buffer_disabled;
1469 }
1470 
1471 /**
1472  * tracing_is_on - show state of ring buffers enabled
1473  */
1474 int tracing_is_on(void)
1475 {
1476 	return tracer_tracing_is_on(&global_trace);
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_is_on);
1479 
1480 static int __init set_buf_size(char *str)
1481 {
1482 	unsigned long buf_size;
1483 
1484 	if (!str)
1485 		return 0;
1486 	buf_size = memparse(str, &str);
1487 	/* nr_entries can not be zero */
1488 	if (buf_size == 0)
1489 		return 0;
1490 	trace_buf_size = buf_size;
1491 	return 1;
1492 }
1493 __setup("trace_buf_size=", set_buf_size);
1494 
1495 static int __init set_tracing_thresh(char *str)
1496 {
1497 	unsigned long threshold;
1498 	int ret;
1499 
1500 	if (!str)
1501 		return 0;
1502 	ret = kstrtoul(str, 0, &threshold);
1503 	if (ret < 0)
1504 		return 0;
1505 	tracing_thresh = threshold * 1000;
1506 	return 1;
1507 }
1508 __setup("tracing_thresh=", set_tracing_thresh);
1509 
1510 unsigned long nsecs_to_usecs(unsigned long nsecs)
1511 {
1512 	return nsecs / 1000;
1513 }
1514 
1515 /*
1516  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1517  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1518  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1519  * of strings in the order that the evals (enum) were defined.
1520  */
1521 #undef C
1522 #define C(a, b) b
1523 
1524 /* These must match the bit positions in trace_iterator_flags */
1525 static const char *trace_options[] = {
1526 	TRACE_FLAGS
1527 	NULL
1528 };
1529 
1530 static struct {
1531 	u64 (*func)(void);
1532 	const char *name;
1533 	int in_ns;		/* is this clock in nanoseconds? */
1534 } trace_clocks[] = {
1535 	{ trace_clock_local,		"local",	1 },
1536 	{ trace_clock_global,		"global",	1 },
1537 	{ trace_clock_counter,		"counter",	0 },
1538 	{ trace_clock_jiffies,		"uptime",	0 },
1539 	{ trace_clock,			"perf",		1 },
1540 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1541 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1542 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1543 	ARCH_TRACE_CLOCKS
1544 };
1545 
1546 bool trace_clock_in_ns(struct trace_array *tr)
1547 {
1548 	if (trace_clocks[tr->clock_id].in_ns)
1549 		return true;
1550 
1551 	return false;
1552 }
1553 
1554 /*
1555  * trace_parser_get_init - gets the buffer for trace parser
1556  */
1557 int trace_parser_get_init(struct trace_parser *parser, int size)
1558 {
1559 	memset(parser, 0, sizeof(*parser));
1560 
1561 	parser->buffer = kmalloc(size, GFP_KERNEL);
1562 	if (!parser->buffer)
1563 		return 1;
1564 
1565 	parser->size = size;
1566 	return 0;
1567 }
1568 
1569 /*
1570  * trace_parser_put - frees the buffer for trace parser
1571  */
1572 void trace_parser_put(struct trace_parser *parser)
1573 {
1574 	kfree(parser->buffer);
1575 	parser->buffer = NULL;
1576 }
1577 
1578 /*
1579  * trace_get_user - reads the user input string separated by  space
1580  * (matched by isspace(ch))
1581  *
1582  * For each string found the 'struct trace_parser' is updated,
1583  * and the function returns.
1584  *
1585  * Returns number of bytes read.
1586  *
1587  * See kernel/trace/trace.h for 'struct trace_parser' details.
1588  */
1589 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1590 	size_t cnt, loff_t *ppos)
1591 {
1592 	char ch;
1593 	size_t read = 0;
1594 	ssize_t ret;
1595 
1596 	if (!*ppos)
1597 		trace_parser_clear(parser);
1598 
1599 	ret = get_user(ch, ubuf++);
1600 	if (ret)
1601 		goto out;
1602 
1603 	read++;
1604 	cnt--;
1605 
1606 	/*
1607 	 * The parser is not finished with the last write,
1608 	 * continue reading the user input without skipping spaces.
1609 	 */
1610 	if (!parser->cont) {
1611 		/* skip white space */
1612 		while (cnt && isspace(ch)) {
1613 			ret = get_user(ch, ubuf++);
1614 			if (ret)
1615 				goto out;
1616 			read++;
1617 			cnt--;
1618 		}
1619 
1620 		parser->idx = 0;
1621 
1622 		/* only spaces were written */
1623 		if (isspace(ch) || !ch) {
1624 			*ppos += read;
1625 			ret = read;
1626 			goto out;
1627 		}
1628 	}
1629 
1630 	/* read the non-space input */
1631 	while (cnt && !isspace(ch) && ch) {
1632 		if (parser->idx < parser->size - 1)
1633 			parser->buffer[parser->idx++] = ch;
1634 		else {
1635 			ret = -EINVAL;
1636 			goto out;
1637 		}
1638 		ret = get_user(ch, ubuf++);
1639 		if (ret)
1640 			goto out;
1641 		read++;
1642 		cnt--;
1643 	}
1644 
1645 	/* We either got finished input or we have to wait for another call. */
1646 	if (isspace(ch) || !ch) {
1647 		parser->buffer[parser->idx] = 0;
1648 		parser->cont = false;
1649 	} else if (parser->idx < parser->size - 1) {
1650 		parser->cont = true;
1651 		parser->buffer[parser->idx++] = ch;
1652 		/* Make sure the parsed string always terminates with '\0'. */
1653 		parser->buffer[parser->idx] = 0;
1654 	} else {
1655 		ret = -EINVAL;
1656 		goto out;
1657 	}
1658 
1659 	*ppos += read;
1660 	ret = read;
1661 
1662 out:
1663 	return ret;
1664 }
1665 
1666 /* TODO add a seq_buf_to_buffer() */
1667 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1668 {
1669 	int len;
1670 
1671 	if (trace_seq_used(s) <= s->seq.readpos)
1672 		return -EBUSY;
1673 
1674 	len = trace_seq_used(s) - s->seq.readpos;
1675 	if (cnt > len)
1676 		cnt = len;
1677 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1678 
1679 	s->seq.readpos += cnt;
1680 	return cnt;
1681 }
1682 
1683 unsigned long __read_mostly	tracing_thresh;
1684 static const struct file_operations tracing_max_lat_fops;
1685 
1686 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1687 	defined(CONFIG_FSNOTIFY)
1688 
1689 static struct workqueue_struct *fsnotify_wq;
1690 
1691 static void latency_fsnotify_workfn(struct work_struct *work)
1692 {
1693 	struct trace_array *tr = container_of(work, struct trace_array,
1694 					      fsnotify_work);
1695 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1696 }
1697 
1698 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1699 {
1700 	struct trace_array *tr = container_of(iwork, struct trace_array,
1701 					      fsnotify_irqwork);
1702 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1703 }
1704 
1705 static void trace_create_maxlat_file(struct trace_array *tr,
1706 				     struct dentry *d_tracer)
1707 {
1708 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1709 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1710 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1711 					      d_tracer, &tr->max_latency,
1712 					      &tracing_max_lat_fops);
1713 }
1714 
1715 __init static int latency_fsnotify_init(void)
1716 {
1717 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1718 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1719 	if (!fsnotify_wq) {
1720 		pr_err("Unable to allocate tr_max_lat_wq\n");
1721 		return -ENOMEM;
1722 	}
1723 	return 0;
1724 }
1725 
1726 late_initcall_sync(latency_fsnotify_init);
1727 
1728 void latency_fsnotify(struct trace_array *tr)
1729 {
1730 	if (!fsnotify_wq)
1731 		return;
1732 	/*
1733 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1734 	 * possible that we are called from __schedule() or do_idle(), which
1735 	 * could cause a deadlock.
1736 	 */
1737 	irq_work_queue(&tr->fsnotify_irqwork);
1738 }
1739 
1740 /*
1741  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1742  *  defined(CONFIG_FSNOTIFY)
1743  */
1744 #else
1745 
1746 #define trace_create_maxlat_file(tr, d_tracer)				\
1747 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1748 			  &tr->max_latency, &tracing_max_lat_fops)
1749 
1750 #endif
1751 
1752 #ifdef CONFIG_TRACER_MAX_TRACE
1753 /*
1754  * Copy the new maximum trace into the separate maximum-trace
1755  * structure. (this way the maximum trace is permanently saved,
1756  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1757  */
1758 static void
1759 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1760 {
1761 	struct array_buffer *trace_buf = &tr->array_buffer;
1762 	struct array_buffer *max_buf = &tr->max_buffer;
1763 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1764 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1765 
1766 	max_buf->cpu = cpu;
1767 	max_buf->time_start = data->preempt_timestamp;
1768 
1769 	max_data->saved_latency = tr->max_latency;
1770 	max_data->critical_start = data->critical_start;
1771 	max_data->critical_end = data->critical_end;
1772 
1773 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1774 	max_data->pid = tsk->pid;
1775 	/*
1776 	 * If tsk == current, then use current_uid(), as that does not use
1777 	 * RCU. The irq tracer can be called out of RCU scope.
1778 	 */
1779 	if (tsk == current)
1780 		max_data->uid = current_uid();
1781 	else
1782 		max_data->uid = task_uid(tsk);
1783 
1784 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1785 	max_data->policy = tsk->policy;
1786 	max_data->rt_priority = tsk->rt_priority;
1787 
1788 	/* record this tasks comm */
1789 	tracing_record_cmdline(tsk);
1790 	latency_fsnotify(tr);
1791 }
1792 
1793 /**
1794  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1795  * @tr: tracer
1796  * @tsk: the task with the latency
1797  * @cpu: The cpu that initiated the trace.
1798  * @cond_data: User data associated with a conditional snapshot
1799  *
1800  * Flip the buffers between the @tr and the max_tr and record information
1801  * about which task was the cause of this latency.
1802  */
1803 void
1804 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1805 	      void *cond_data)
1806 {
1807 	if (tr->stop_count)
1808 		return;
1809 
1810 	WARN_ON_ONCE(!irqs_disabled());
1811 
1812 	if (!tr->allocated_snapshot) {
1813 		/* Only the nop tracer should hit this when disabling */
1814 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1815 		return;
1816 	}
1817 
1818 	arch_spin_lock(&tr->max_lock);
1819 
1820 	/* Inherit the recordable setting from array_buffer */
1821 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1822 		ring_buffer_record_on(tr->max_buffer.buffer);
1823 	else
1824 		ring_buffer_record_off(tr->max_buffer.buffer);
1825 
1826 #ifdef CONFIG_TRACER_SNAPSHOT
1827 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1828 		goto out_unlock;
1829 #endif
1830 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1831 
1832 	__update_max_tr(tr, tsk, cpu);
1833 
1834  out_unlock:
1835 	arch_spin_unlock(&tr->max_lock);
1836 }
1837 
1838 /**
1839  * update_max_tr_single - only copy one trace over, and reset the rest
1840  * @tr: tracer
1841  * @tsk: task with the latency
1842  * @cpu: the cpu of the buffer to copy.
1843  *
1844  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1845  */
1846 void
1847 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1848 {
1849 	int ret;
1850 
1851 	if (tr->stop_count)
1852 		return;
1853 
1854 	WARN_ON_ONCE(!irqs_disabled());
1855 	if (!tr->allocated_snapshot) {
1856 		/* Only the nop tracer should hit this when disabling */
1857 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1858 		return;
1859 	}
1860 
1861 	arch_spin_lock(&tr->max_lock);
1862 
1863 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1864 
1865 	if (ret == -EBUSY) {
1866 		/*
1867 		 * We failed to swap the buffer due to a commit taking
1868 		 * place on this CPU. We fail to record, but we reset
1869 		 * the max trace buffer (no one writes directly to it)
1870 		 * and flag that it failed.
1871 		 */
1872 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1873 			"Failed to swap buffers due to commit in progress\n");
1874 	}
1875 
1876 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1877 
1878 	__update_max_tr(tr, tsk, cpu);
1879 	arch_spin_unlock(&tr->max_lock);
1880 }
1881 #endif /* CONFIG_TRACER_MAX_TRACE */
1882 
1883 static int wait_on_pipe(struct trace_iterator *iter, int full)
1884 {
1885 	/* Iterators are static, they should be filled or empty */
1886 	if (trace_buffer_iter(iter, iter->cpu_file))
1887 		return 0;
1888 
1889 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1890 				full);
1891 }
1892 
1893 #ifdef CONFIG_FTRACE_STARTUP_TEST
1894 static bool selftests_can_run;
1895 
1896 struct trace_selftests {
1897 	struct list_head		list;
1898 	struct tracer			*type;
1899 };
1900 
1901 static LIST_HEAD(postponed_selftests);
1902 
1903 static int save_selftest(struct tracer *type)
1904 {
1905 	struct trace_selftests *selftest;
1906 
1907 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1908 	if (!selftest)
1909 		return -ENOMEM;
1910 
1911 	selftest->type = type;
1912 	list_add(&selftest->list, &postponed_selftests);
1913 	return 0;
1914 }
1915 
1916 static int run_tracer_selftest(struct tracer *type)
1917 {
1918 	struct trace_array *tr = &global_trace;
1919 	struct tracer *saved_tracer = tr->current_trace;
1920 	int ret;
1921 
1922 	if (!type->selftest || tracing_selftest_disabled)
1923 		return 0;
1924 
1925 	/*
1926 	 * If a tracer registers early in boot up (before scheduling is
1927 	 * initialized and such), then do not run its selftests yet.
1928 	 * Instead, run it a little later in the boot process.
1929 	 */
1930 	if (!selftests_can_run)
1931 		return save_selftest(type);
1932 
1933 	if (!tracing_is_on()) {
1934 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1935 			type->name);
1936 		return 0;
1937 	}
1938 
1939 	/*
1940 	 * Run a selftest on this tracer.
1941 	 * Here we reset the trace buffer, and set the current
1942 	 * tracer to be this tracer. The tracer can then run some
1943 	 * internal tracing to verify that everything is in order.
1944 	 * If we fail, we do not register this tracer.
1945 	 */
1946 	tracing_reset_online_cpus(&tr->array_buffer);
1947 
1948 	tr->current_trace = type;
1949 
1950 #ifdef CONFIG_TRACER_MAX_TRACE
1951 	if (type->use_max_tr) {
1952 		/* If we expanded the buffers, make sure the max is expanded too */
1953 		if (ring_buffer_expanded)
1954 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1955 					   RING_BUFFER_ALL_CPUS);
1956 		tr->allocated_snapshot = true;
1957 	}
1958 #endif
1959 
1960 	/* the test is responsible for initializing and enabling */
1961 	pr_info("Testing tracer %s: ", type->name);
1962 	ret = type->selftest(type, tr);
1963 	/* the test is responsible for resetting too */
1964 	tr->current_trace = saved_tracer;
1965 	if (ret) {
1966 		printk(KERN_CONT "FAILED!\n");
1967 		/* Add the warning after printing 'FAILED' */
1968 		WARN_ON(1);
1969 		return -1;
1970 	}
1971 	/* Only reset on passing, to avoid touching corrupted buffers */
1972 	tracing_reset_online_cpus(&tr->array_buffer);
1973 
1974 #ifdef CONFIG_TRACER_MAX_TRACE
1975 	if (type->use_max_tr) {
1976 		tr->allocated_snapshot = false;
1977 
1978 		/* Shrink the max buffer again */
1979 		if (ring_buffer_expanded)
1980 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1981 					   RING_BUFFER_ALL_CPUS);
1982 	}
1983 #endif
1984 
1985 	printk(KERN_CONT "PASSED\n");
1986 	return 0;
1987 }
1988 
1989 static __init int init_trace_selftests(void)
1990 {
1991 	struct trace_selftests *p, *n;
1992 	struct tracer *t, **last;
1993 	int ret;
1994 
1995 	selftests_can_run = true;
1996 
1997 	mutex_lock(&trace_types_lock);
1998 
1999 	if (list_empty(&postponed_selftests))
2000 		goto out;
2001 
2002 	pr_info("Running postponed tracer tests:\n");
2003 
2004 	tracing_selftest_running = true;
2005 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2006 		/* This loop can take minutes when sanitizers are enabled, so
2007 		 * lets make sure we allow RCU processing.
2008 		 */
2009 		cond_resched();
2010 		ret = run_tracer_selftest(p->type);
2011 		/* If the test fails, then warn and remove from available_tracers */
2012 		if (ret < 0) {
2013 			WARN(1, "tracer: %s failed selftest, disabling\n",
2014 			     p->type->name);
2015 			last = &trace_types;
2016 			for (t = trace_types; t; t = t->next) {
2017 				if (t == p->type) {
2018 					*last = t->next;
2019 					break;
2020 				}
2021 				last = &t->next;
2022 			}
2023 		}
2024 		list_del(&p->list);
2025 		kfree(p);
2026 	}
2027 	tracing_selftest_running = false;
2028 
2029  out:
2030 	mutex_unlock(&trace_types_lock);
2031 
2032 	return 0;
2033 }
2034 core_initcall(init_trace_selftests);
2035 #else
2036 static inline int run_tracer_selftest(struct tracer *type)
2037 {
2038 	return 0;
2039 }
2040 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2041 
2042 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2043 
2044 static void __init apply_trace_boot_options(void);
2045 
2046 /**
2047  * register_tracer - register a tracer with the ftrace system.
2048  * @type: the plugin for the tracer
2049  *
2050  * Register a new plugin tracer.
2051  */
2052 int __init register_tracer(struct tracer *type)
2053 {
2054 	struct tracer *t;
2055 	int ret = 0;
2056 
2057 	if (!type->name) {
2058 		pr_info("Tracer must have a name\n");
2059 		return -1;
2060 	}
2061 
2062 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2063 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2064 		return -1;
2065 	}
2066 
2067 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2068 		pr_warn("Can not register tracer %s due to lockdown\n",
2069 			   type->name);
2070 		return -EPERM;
2071 	}
2072 
2073 	mutex_lock(&trace_types_lock);
2074 
2075 	tracing_selftest_running = true;
2076 
2077 	for (t = trace_types; t; t = t->next) {
2078 		if (strcmp(type->name, t->name) == 0) {
2079 			/* already found */
2080 			pr_info("Tracer %s already registered\n",
2081 				type->name);
2082 			ret = -1;
2083 			goto out;
2084 		}
2085 	}
2086 
2087 	if (!type->set_flag)
2088 		type->set_flag = &dummy_set_flag;
2089 	if (!type->flags) {
2090 		/*allocate a dummy tracer_flags*/
2091 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2092 		if (!type->flags) {
2093 			ret = -ENOMEM;
2094 			goto out;
2095 		}
2096 		type->flags->val = 0;
2097 		type->flags->opts = dummy_tracer_opt;
2098 	} else
2099 		if (!type->flags->opts)
2100 			type->flags->opts = dummy_tracer_opt;
2101 
2102 	/* store the tracer for __set_tracer_option */
2103 	type->flags->trace = type;
2104 
2105 	ret = run_tracer_selftest(type);
2106 	if (ret < 0)
2107 		goto out;
2108 
2109 	type->next = trace_types;
2110 	trace_types = type;
2111 	add_tracer_options(&global_trace, type);
2112 
2113  out:
2114 	tracing_selftest_running = false;
2115 	mutex_unlock(&trace_types_lock);
2116 
2117 	if (ret || !default_bootup_tracer)
2118 		goto out_unlock;
2119 
2120 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2121 		goto out_unlock;
2122 
2123 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2124 	/* Do we want this tracer to start on bootup? */
2125 	tracing_set_tracer(&global_trace, type->name);
2126 	default_bootup_tracer = NULL;
2127 
2128 	apply_trace_boot_options();
2129 
2130 	/* disable other selftests, since this will break it. */
2131 	disable_tracing_selftest("running a tracer");
2132 
2133  out_unlock:
2134 	return ret;
2135 }
2136 
2137 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2138 {
2139 	struct trace_buffer *buffer = buf->buffer;
2140 
2141 	if (!buffer)
2142 		return;
2143 
2144 	ring_buffer_record_disable(buffer);
2145 
2146 	/* Make sure all commits have finished */
2147 	synchronize_rcu();
2148 	ring_buffer_reset_cpu(buffer, cpu);
2149 
2150 	ring_buffer_record_enable(buffer);
2151 }
2152 
2153 void tracing_reset_online_cpus(struct array_buffer *buf)
2154 {
2155 	struct trace_buffer *buffer = buf->buffer;
2156 
2157 	if (!buffer)
2158 		return;
2159 
2160 	ring_buffer_record_disable(buffer);
2161 
2162 	/* Make sure all commits have finished */
2163 	synchronize_rcu();
2164 
2165 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2166 
2167 	ring_buffer_reset_online_cpus(buffer);
2168 
2169 	ring_buffer_record_enable(buffer);
2170 }
2171 
2172 /* Must have trace_types_lock held */
2173 void tracing_reset_all_online_cpus(void)
2174 {
2175 	struct trace_array *tr;
2176 
2177 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2178 		if (!tr->clear_trace)
2179 			continue;
2180 		tr->clear_trace = false;
2181 		tracing_reset_online_cpus(&tr->array_buffer);
2182 #ifdef CONFIG_TRACER_MAX_TRACE
2183 		tracing_reset_online_cpus(&tr->max_buffer);
2184 #endif
2185 	}
2186 }
2187 
2188 static int *tgid_map;
2189 
2190 #define SAVED_CMDLINES_DEFAULT 128
2191 #define NO_CMDLINE_MAP UINT_MAX
2192 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2193 struct saved_cmdlines_buffer {
2194 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2195 	unsigned *map_cmdline_to_pid;
2196 	unsigned cmdline_num;
2197 	int cmdline_idx;
2198 	char *saved_cmdlines;
2199 };
2200 static struct saved_cmdlines_buffer *savedcmd;
2201 
2202 static inline char *get_saved_cmdlines(int idx)
2203 {
2204 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2205 }
2206 
2207 static inline void set_cmdline(int idx, const char *cmdline)
2208 {
2209 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2210 }
2211 
2212 static int allocate_cmdlines_buffer(unsigned int val,
2213 				    struct saved_cmdlines_buffer *s)
2214 {
2215 	s->map_cmdline_to_pid = kmalloc_array(val,
2216 					      sizeof(*s->map_cmdline_to_pid),
2217 					      GFP_KERNEL);
2218 	if (!s->map_cmdline_to_pid)
2219 		return -ENOMEM;
2220 
2221 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2222 	if (!s->saved_cmdlines) {
2223 		kfree(s->map_cmdline_to_pid);
2224 		return -ENOMEM;
2225 	}
2226 
2227 	s->cmdline_idx = 0;
2228 	s->cmdline_num = val;
2229 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2230 	       sizeof(s->map_pid_to_cmdline));
2231 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2232 	       val * sizeof(*s->map_cmdline_to_pid));
2233 
2234 	return 0;
2235 }
2236 
2237 static int trace_create_savedcmd(void)
2238 {
2239 	int ret;
2240 
2241 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2242 	if (!savedcmd)
2243 		return -ENOMEM;
2244 
2245 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2246 	if (ret < 0) {
2247 		kfree(savedcmd);
2248 		savedcmd = NULL;
2249 		return -ENOMEM;
2250 	}
2251 
2252 	return 0;
2253 }
2254 
2255 int is_tracing_stopped(void)
2256 {
2257 	return global_trace.stop_count;
2258 }
2259 
2260 /**
2261  * tracing_start - quick start of the tracer
2262  *
2263  * If tracing is enabled but was stopped by tracing_stop,
2264  * this will start the tracer back up.
2265  */
2266 void tracing_start(void)
2267 {
2268 	struct trace_buffer *buffer;
2269 	unsigned long flags;
2270 
2271 	if (tracing_disabled)
2272 		return;
2273 
2274 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2275 	if (--global_trace.stop_count) {
2276 		if (global_trace.stop_count < 0) {
2277 			/* Someone screwed up their debugging */
2278 			WARN_ON_ONCE(1);
2279 			global_trace.stop_count = 0;
2280 		}
2281 		goto out;
2282 	}
2283 
2284 	/* Prevent the buffers from switching */
2285 	arch_spin_lock(&global_trace.max_lock);
2286 
2287 	buffer = global_trace.array_buffer.buffer;
2288 	if (buffer)
2289 		ring_buffer_record_enable(buffer);
2290 
2291 #ifdef CONFIG_TRACER_MAX_TRACE
2292 	buffer = global_trace.max_buffer.buffer;
2293 	if (buffer)
2294 		ring_buffer_record_enable(buffer);
2295 #endif
2296 
2297 	arch_spin_unlock(&global_trace.max_lock);
2298 
2299  out:
2300 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2301 }
2302 
2303 static void tracing_start_tr(struct trace_array *tr)
2304 {
2305 	struct trace_buffer *buffer;
2306 	unsigned long flags;
2307 
2308 	if (tracing_disabled)
2309 		return;
2310 
2311 	/* If global, we need to also start the max tracer */
2312 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2313 		return tracing_start();
2314 
2315 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2316 
2317 	if (--tr->stop_count) {
2318 		if (tr->stop_count < 0) {
2319 			/* Someone screwed up their debugging */
2320 			WARN_ON_ONCE(1);
2321 			tr->stop_count = 0;
2322 		}
2323 		goto out;
2324 	}
2325 
2326 	buffer = tr->array_buffer.buffer;
2327 	if (buffer)
2328 		ring_buffer_record_enable(buffer);
2329 
2330  out:
2331 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2332 }
2333 
2334 /**
2335  * tracing_stop - quick stop of the tracer
2336  *
2337  * Light weight way to stop tracing. Use in conjunction with
2338  * tracing_start.
2339  */
2340 void tracing_stop(void)
2341 {
2342 	struct trace_buffer *buffer;
2343 	unsigned long flags;
2344 
2345 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2346 	if (global_trace.stop_count++)
2347 		goto out;
2348 
2349 	/* Prevent the buffers from switching */
2350 	arch_spin_lock(&global_trace.max_lock);
2351 
2352 	buffer = global_trace.array_buffer.buffer;
2353 	if (buffer)
2354 		ring_buffer_record_disable(buffer);
2355 
2356 #ifdef CONFIG_TRACER_MAX_TRACE
2357 	buffer = global_trace.max_buffer.buffer;
2358 	if (buffer)
2359 		ring_buffer_record_disable(buffer);
2360 #endif
2361 
2362 	arch_spin_unlock(&global_trace.max_lock);
2363 
2364  out:
2365 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2366 }
2367 
2368 static void tracing_stop_tr(struct trace_array *tr)
2369 {
2370 	struct trace_buffer *buffer;
2371 	unsigned long flags;
2372 
2373 	/* If global, we need to also stop the max tracer */
2374 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2375 		return tracing_stop();
2376 
2377 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2378 	if (tr->stop_count++)
2379 		goto out;
2380 
2381 	buffer = tr->array_buffer.buffer;
2382 	if (buffer)
2383 		ring_buffer_record_disable(buffer);
2384 
2385  out:
2386 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2387 }
2388 
2389 static int trace_save_cmdline(struct task_struct *tsk)
2390 {
2391 	unsigned tpid, idx;
2392 
2393 	/* treat recording of idle task as a success */
2394 	if (!tsk->pid)
2395 		return 1;
2396 
2397 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2398 
2399 	/*
2400 	 * It's not the end of the world if we don't get
2401 	 * the lock, but we also don't want to spin
2402 	 * nor do we want to disable interrupts,
2403 	 * so if we miss here, then better luck next time.
2404 	 */
2405 	if (!arch_spin_trylock(&trace_cmdline_lock))
2406 		return 0;
2407 
2408 	idx = savedcmd->map_pid_to_cmdline[tpid];
2409 	if (idx == NO_CMDLINE_MAP) {
2410 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2411 
2412 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2413 		savedcmd->cmdline_idx = idx;
2414 	}
2415 
2416 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2417 	set_cmdline(idx, tsk->comm);
2418 
2419 	arch_spin_unlock(&trace_cmdline_lock);
2420 
2421 	return 1;
2422 }
2423 
2424 static void __trace_find_cmdline(int pid, char comm[])
2425 {
2426 	unsigned map;
2427 	int tpid;
2428 
2429 	if (!pid) {
2430 		strcpy(comm, "<idle>");
2431 		return;
2432 	}
2433 
2434 	if (WARN_ON_ONCE(pid < 0)) {
2435 		strcpy(comm, "<XXX>");
2436 		return;
2437 	}
2438 
2439 	tpid = pid & (PID_MAX_DEFAULT - 1);
2440 	map = savedcmd->map_pid_to_cmdline[tpid];
2441 	if (map != NO_CMDLINE_MAP) {
2442 		tpid = savedcmd->map_cmdline_to_pid[map];
2443 		if (tpid == pid) {
2444 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2445 			return;
2446 		}
2447 	}
2448 	strcpy(comm, "<...>");
2449 }
2450 
2451 void trace_find_cmdline(int pid, char comm[])
2452 {
2453 	preempt_disable();
2454 	arch_spin_lock(&trace_cmdline_lock);
2455 
2456 	__trace_find_cmdline(pid, comm);
2457 
2458 	arch_spin_unlock(&trace_cmdline_lock);
2459 	preempt_enable();
2460 }
2461 
2462 int trace_find_tgid(int pid)
2463 {
2464 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2465 		return 0;
2466 
2467 	return tgid_map[pid];
2468 }
2469 
2470 static int trace_save_tgid(struct task_struct *tsk)
2471 {
2472 	/* treat recording of idle task as a success */
2473 	if (!tsk->pid)
2474 		return 1;
2475 
2476 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2477 		return 0;
2478 
2479 	tgid_map[tsk->pid] = tsk->tgid;
2480 	return 1;
2481 }
2482 
2483 static bool tracing_record_taskinfo_skip(int flags)
2484 {
2485 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2486 		return true;
2487 	if (!__this_cpu_read(trace_taskinfo_save))
2488 		return true;
2489 	return false;
2490 }
2491 
2492 /**
2493  * tracing_record_taskinfo - record the task info of a task
2494  *
2495  * @task:  task to record
2496  * @flags: TRACE_RECORD_CMDLINE for recording comm
2497  *         TRACE_RECORD_TGID for recording tgid
2498  */
2499 void tracing_record_taskinfo(struct task_struct *task, int flags)
2500 {
2501 	bool done;
2502 
2503 	if (tracing_record_taskinfo_skip(flags))
2504 		return;
2505 
2506 	/*
2507 	 * Record as much task information as possible. If some fail, continue
2508 	 * to try to record the others.
2509 	 */
2510 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2511 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2512 
2513 	/* If recording any information failed, retry again soon. */
2514 	if (!done)
2515 		return;
2516 
2517 	__this_cpu_write(trace_taskinfo_save, false);
2518 }
2519 
2520 /**
2521  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2522  *
2523  * @prev: previous task during sched_switch
2524  * @next: next task during sched_switch
2525  * @flags: TRACE_RECORD_CMDLINE for recording comm
2526  *         TRACE_RECORD_TGID for recording tgid
2527  */
2528 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2529 					  struct task_struct *next, int flags)
2530 {
2531 	bool done;
2532 
2533 	if (tracing_record_taskinfo_skip(flags))
2534 		return;
2535 
2536 	/*
2537 	 * Record as much task information as possible. If some fail, continue
2538 	 * to try to record the others.
2539 	 */
2540 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2541 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2543 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2544 
2545 	/* If recording any information failed, retry again soon. */
2546 	if (!done)
2547 		return;
2548 
2549 	__this_cpu_write(trace_taskinfo_save, false);
2550 }
2551 
2552 /* Helpers to record a specific task information */
2553 void tracing_record_cmdline(struct task_struct *task)
2554 {
2555 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2556 }
2557 
2558 void tracing_record_tgid(struct task_struct *task)
2559 {
2560 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2561 }
2562 
2563 /*
2564  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2565  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2566  * simplifies those functions and keeps them in sync.
2567  */
2568 enum print_line_t trace_handle_return(struct trace_seq *s)
2569 {
2570 	return trace_seq_has_overflowed(s) ?
2571 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2572 }
2573 EXPORT_SYMBOL_GPL(trace_handle_return);
2574 
2575 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2576 {
2577 	unsigned int trace_flags = irqs_status;
2578 	unsigned int pc;
2579 
2580 	pc = preempt_count();
2581 
2582 	if (pc & NMI_MASK)
2583 		trace_flags |= TRACE_FLAG_NMI;
2584 	if (pc & HARDIRQ_MASK)
2585 		trace_flags |= TRACE_FLAG_HARDIRQ;
2586 	if (in_serving_softirq())
2587 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2588 
2589 	if (tif_need_resched())
2590 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2591 	if (test_preempt_need_resched())
2592 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2593 	return (trace_flags << 16) | (pc & 0xff);
2594 }
2595 
2596 struct ring_buffer_event *
2597 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2598 			  int type,
2599 			  unsigned long len,
2600 			  unsigned int trace_ctx)
2601 {
2602 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2603 }
2604 
2605 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2606 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2607 static int trace_buffered_event_ref;
2608 
2609 /**
2610  * trace_buffered_event_enable - enable buffering events
2611  *
2612  * When events are being filtered, it is quicker to use a temporary
2613  * buffer to write the event data into if there's a likely chance
2614  * that it will not be committed. The discard of the ring buffer
2615  * is not as fast as committing, and is much slower than copying
2616  * a commit.
2617  *
2618  * When an event is to be filtered, allocate per cpu buffers to
2619  * write the event data into, and if the event is filtered and discarded
2620  * it is simply dropped, otherwise, the entire data is to be committed
2621  * in one shot.
2622  */
2623 void trace_buffered_event_enable(void)
2624 {
2625 	struct ring_buffer_event *event;
2626 	struct page *page;
2627 	int cpu;
2628 
2629 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2630 
2631 	if (trace_buffered_event_ref++)
2632 		return;
2633 
2634 	for_each_tracing_cpu(cpu) {
2635 		page = alloc_pages_node(cpu_to_node(cpu),
2636 					GFP_KERNEL | __GFP_NORETRY, 0);
2637 		if (!page)
2638 			goto failed;
2639 
2640 		event = page_address(page);
2641 		memset(event, 0, sizeof(*event));
2642 
2643 		per_cpu(trace_buffered_event, cpu) = event;
2644 
2645 		preempt_disable();
2646 		if (cpu == smp_processor_id() &&
2647 		    __this_cpu_read(trace_buffered_event) !=
2648 		    per_cpu(trace_buffered_event, cpu))
2649 			WARN_ON_ONCE(1);
2650 		preempt_enable();
2651 	}
2652 
2653 	return;
2654  failed:
2655 	trace_buffered_event_disable();
2656 }
2657 
2658 static void enable_trace_buffered_event(void *data)
2659 {
2660 	/* Probably not needed, but do it anyway */
2661 	smp_rmb();
2662 	this_cpu_dec(trace_buffered_event_cnt);
2663 }
2664 
2665 static void disable_trace_buffered_event(void *data)
2666 {
2667 	this_cpu_inc(trace_buffered_event_cnt);
2668 }
2669 
2670 /**
2671  * trace_buffered_event_disable - disable buffering events
2672  *
2673  * When a filter is removed, it is faster to not use the buffered
2674  * events, and to commit directly into the ring buffer. Free up
2675  * the temp buffers when there are no more users. This requires
2676  * special synchronization with current events.
2677  */
2678 void trace_buffered_event_disable(void)
2679 {
2680 	int cpu;
2681 
2682 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2683 
2684 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2685 		return;
2686 
2687 	if (--trace_buffered_event_ref)
2688 		return;
2689 
2690 	preempt_disable();
2691 	/* For each CPU, set the buffer as used. */
2692 	smp_call_function_many(tracing_buffer_mask,
2693 			       disable_trace_buffered_event, NULL, 1);
2694 	preempt_enable();
2695 
2696 	/* Wait for all current users to finish */
2697 	synchronize_rcu();
2698 
2699 	for_each_tracing_cpu(cpu) {
2700 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2701 		per_cpu(trace_buffered_event, cpu) = NULL;
2702 	}
2703 	/*
2704 	 * Make sure trace_buffered_event is NULL before clearing
2705 	 * trace_buffered_event_cnt.
2706 	 */
2707 	smp_wmb();
2708 
2709 	preempt_disable();
2710 	/* Do the work on each cpu */
2711 	smp_call_function_many(tracing_buffer_mask,
2712 			       enable_trace_buffered_event, NULL, 1);
2713 	preempt_enable();
2714 }
2715 
2716 static struct trace_buffer *temp_buffer;
2717 
2718 struct ring_buffer_event *
2719 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2720 			  struct trace_event_file *trace_file,
2721 			  int type, unsigned long len,
2722 			  unsigned int trace_ctx)
2723 {
2724 	struct ring_buffer_event *entry;
2725 	struct trace_array *tr = trace_file->tr;
2726 	int val;
2727 
2728 	*current_rb = tr->array_buffer.buffer;
2729 
2730 	if (!tr->no_filter_buffering_ref &&
2731 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2732 	    (entry = this_cpu_read(trace_buffered_event))) {
2733 		/* Try to use the per cpu buffer first */
2734 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2735 		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2736 			trace_event_setup(entry, type, trace_ctx);
2737 			entry->array[0] = len;
2738 			return entry;
2739 		}
2740 		this_cpu_dec(trace_buffered_event_cnt);
2741 	}
2742 
2743 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2744 					    trace_ctx);
2745 	/*
2746 	 * If tracing is off, but we have triggers enabled
2747 	 * we still need to look at the event data. Use the temp_buffer
2748 	 * to store the trace event for the trigger to use. It's recursive
2749 	 * safe and will not be recorded anywhere.
2750 	 */
2751 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2752 		*current_rb = temp_buffer;
2753 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2754 						    trace_ctx);
2755 	}
2756 	return entry;
2757 }
2758 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2759 
2760 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2761 static DEFINE_MUTEX(tracepoint_printk_mutex);
2762 
2763 static void output_printk(struct trace_event_buffer *fbuffer)
2764 {
2765 	struct trace_event_call *event_call;
2766 	struct trace_event_file *file;
2767 	struct trace_event *event;
2768 	unsigned long flags;
2769 	struct trace_iterator *iter = tracepoint_print_iter;
2770 
2771 	/* We should never get here if iter is NULL */
2772 	if (WARN_ON_ONCE(!iter))
2773 		return;
2774 
2775 	event_call = fbuffer->trace_file->event_call;
2776 	if (!event_call || !event_call->event.funcs ||
2777 	    !event_call->event.funcs->trace)
2778 		return;
2779 
2780 	file = fbuffer->trace_file;
2781 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2782 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2783 	     !filter_match_preds(file->filter, fbuffer->entry)))
2784 		return;
2785 
2786 	event = &fbuffer->trace_file->event_call->event;
2787 
2788 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2789 	trace_seq_init(&iter->seq);
2790 	iter->ent = fbuffer->entry;
2791 	event_call->event.funcs->trace(iter, 0, event);
2792 	trace_seq_putc(&iter->seq, 0);
2793 	printk("%s", iter->seq.buffer);
2794 
2795 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2796 }
2797 
2798 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2799 			     void *buffer, size_t *lenp,
2800 			     loff_t *ppos)
2801 {
2802 	int save_tracepoint_printk;
2803 	int ret;
2804 
2805 	mutex_lock(&tracepoint_printk_mutex);
2806 	save_tracepoint_printk = tracepoint_printk;
2807 
2808 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2809 
2810 	/*
2811 	 * This will force exiting early, as tracepoint_printk
2812 	 * is always zero when tracepoint_printk_iter is not allocated
2813 	 */
2814 	if (!tracepoint_print_iter)
2815 		tracepoint_printk = 0;
2816 
2817 	if (save_tracepoint_printk == tracepoint_printk)
2818 		goto out;
2819 
2820 	if (tracepoint_printk)
2821 		static_key_enable(&tracepoint_printk_key.key);
2822 	else
2823 		static_key_disable(&tracepoint_printk_key.key);
2824 
2825  out:
2826 	mutex_unlock(&tracepoint_printk_mutex);
2827 
2828 	return ret;
2829 }
2830 
2831 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2832 {
2833 	if (static_key_false(&tracepoint_printk_key.key))
2834 		output_printk(fbuffer);
2835 
2836 	if (static_branch_unlikely(&trace_event_exports_enabled))
2837 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2838 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2839 				    fbuffer->event, fbuffer->entry,
2840 				    fbuffer->trace_ctx, fbuffer->regs);
2841 }
2842 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2843 
2844 /*
2845  * Skip 3:
2846  *
2847  *   trace_buffer_unlock_commit_regs()
2848  *   trace_event_buffer_commit()
2849  *   trace_event_raw_event_xxx()
2850  */
2851 # define STACK_SKIP 3
2852 
2853 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2854 				     struct trace_buffer *buffer,
2855 				     struct ring_buffer_event *event,
2856 				     unsigned int trace_ctx,
2857 				     struct pt_regs *regs)
2858 {
2859 	__buffer_unlock_commit(buffer, event);
2860 
2861 	/*
2862 	 * If regs is not set, then skip the necessary functions.
2863 	 * Note, we can still get here via blktrace, wakeup tracer
2864 	 * and mmiotrace, but that's ok if they lose a function or
2865 	 * two. They are not that meaningful.
2866 	 */
2867 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2868 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2869 }
2870 
2871 /*
2872  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2873  */
2874 void
2875 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2876 				   struct ring_buffer_event *event)
2877 {
2878 	__buffer_unlock_commit(buffer, event);
2879 }
2880 
2881 void
2882 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2883 	       parent_ip, unsigned int trace_ctx)
2884 {
2885 	struct trace_event_call *call = &event_function;
2886 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2887 	struct ring_buffer_event *event;
2888 	struct ftrace_entry *entry;
2889 
2890 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2891 					    trace_ctx);
2892 	if (!event)
2893 		return;
2894 	entry	= ring_buffer_event_data(event);
2895 	entry->ip			= ip;
2896 	entry->parent_ip		= parent_ip;
2897 
2898 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2899 		if (static_branch_unlikely(&trace_function_exports_enabled))
2900 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2901 		__buffer_unlock_commit(buffer, event);
2902 	}
2903 }
2904 
2905 #ifdef CONFIG_STACKTRACE
2906 
2907 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2908 #define FTRACE_KSTACK_NESTING	4
2909 
2910 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2911 
2912 struct ftrace_stack {
2913 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2914 };
2915 
2916 
2917 struct ftrace_stacks {
2918 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2919 };
2920 
2921 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2922 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2923 
2924 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2925 				 unsigned int trace_ctx,
2926 				 int skip, struct pt_regs *regs)
2927 {
2928 	struct trace_event_call *call = &event_kernel_stack;
2929 	struct ring_buffer_event *event;
2930 	unsigned int size, nr_entries;
2931 	struct ftrace_stack *fstack;
2932 	struct stack_entry *entry;
2933 	int stackidx;
2934 
2935 	/*
2936 	 * Add one, for this function and the call to save_stack_trace()
2937 	 * If regs is set, then these functions will not be in the way.
2938 	 */
2939 #ifndef CONFIG_UNWINDER_ORC
2940 	if (!regs)
2941 		skip++;
2942 #endif
2943 
2944 	preempt_disable_notrace();
2945 
2946 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2947 
2948 	/* This should never happen. If it does, yell once and skip */
2949 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2950 		goto out;
2951 
2952 	/*
2953 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2954 	 * interrupt will either see the value pre increment or post
2955 	 * increment. If the interrupt happens pre increment it will have
2956 	 * restored the counter when it returns.  We just need a barrier to
2957 	 * keep gcc from moving things around.
2958 	 */
2959 	barrier();
2960 
2961 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2962 	size = ARRAY_SIZE(fstack->calls);
2963 
2964 	if (regs) {
2965 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2966 						   size, skip);
2967 	} else {
2968 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2969 	}
2970 
2971 	size = nr_entries * sizeof(unsigned long);
2972 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2973 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2974 				    trace_ctx);
2975 	if (!event)
2976 		goto out;
2977 	entry = ring_buffer_event_data(event);
2978 
2979 	memcpy(&entry->caller, fstack->calls, size);
2980 	entry->size = nr_entries;
2981 
2982 	if (!call_filter_check_discard(call, entry, buffer, event))
2983 		__buffer_unlock_commit(buffer, event);
2984 
2985  out:
2986 	/* Again, don't let gcc optimize things here */
2987 	barrier();
2988 	__this_cpu_dec(ftrace_stack_reserve);
2989 	preempt_enable_notrace();
2990 
2991 }
2992 
2993 static inline void ftrace_trace_stack(struct trace_array *tr,
2994 				      struct trace_buffer *buffer,
2995 				      unsigned int trace_ctx,
2996 				      int skip, struct pt_regs *regs)
2997 {
2998 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2999 		return;
3000 
3001 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3002 }
3003 
3004 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3005 		   int skip)
3006 {
3007 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3008 
3009 	if (rcu_is_watching()) {
3010 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3011 		return;
3012 	}
3013 
3014 	/*
3015 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3016 	 * but if the above rcu_is_watching() failed, then the NMI
3017 	 * triggered someplace critical, and rcu_irq_enter() should
3018 	 * not be called from NMI.
3019 	 */
3020 	if (unlikely(in_nmi()))
3021 		return;
3022 
3023 	rcu_irq_enter_irqson();
3024 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3025 	rcu_irq_exit_irqson();
3026 }
3027 
3028 /**
3029  * trace_dump_stack - record a stack back trace in the trace buffer
3030  * @skip: Number of functions to skip (helper handlers)
3031  */
3032 void trace_dump_stack(int skip)
3033 {
3034 	if (tracing_disabled || tracing_selftest_running)
3035 		return;
3036 
3037 #ifndef CONFIG_UNWINDER_ORC
3038 	/* Skip 1 to skip this function. */
3039 	skip++;
3040 #endif
3041 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3042 			     tracing_gen_ctx(), skip, NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045 
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048 
3049 static void
3050 ftrace_trace_userstack(struct trace_array *tr,
3051 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3052 {
3053 	struct trace_event_call *call = &event_user_stack;
3054 	struct ring_buffer_event *event;
3055 	struct userstack_entry *entry;
3056 
3057 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3058 		return;
3059 
3060 	/*
3061 	 * NMIs can not handle page faults, even with fix ups.
3062 	 * The save user stack can (and often does) fault.
3063 	 */
3064 	if (unlikely(in_nmi()))
3065 		return;
3066 
3067 	/*
3068 	 * prevent recursion, since the user stack tracing may
3069 	 * trigger other kernel events.
3070 	 */
3071 	preempt_disable();
3072 	if (__this_cpu_read(user_stack_count))
3073 		goto out;
3074 
3075 	__this_cpu_inc(user_stack_count);
3076 
3077 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3078 					    sizeof(*entry), trace_ctx);
3079 	if (!event)
3080 		goto out_drop_count;
3081 	entry	= ring_buffer_event_data(event);
3082 
3083 	entry->tgid		= current->tgid;
3084 	memset(&entry->caller, 0, sizeof(entry->caller));
3085 
3086 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3087 	if (!call_filter_check_discard(call, entry, buffer, event))
3088 		__buffer_unlock_commit(buffer, event);
3089 
3090  out_drop_count:
3091 	__this_cpu_dec(user_stack_count);
3092  out:
3093 	preempt_enable();
3094 }
3095 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3096 static void ftrace_trace_userstack(struct trace_array *tr,
3097 				   struct trace_buffer *buffer,
3098 				   unsigned int trace_ctx)
3099 {
3100 }
3101 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3102 
3103 #endif /* CONFIG_STACKTRACE */
3104 
3105 static inline void
3106 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3107 			  unsigned long long delta)
3108 {
3109 	entry->bottom_delta_ts = delta & U32_MAX;
3110 	entry->top_delta_ts = (delta >> 32);
3111 }
3112 
3113 void trace_last_func_repeats(struct trace_array *tr,
3114 			     struct trace_func_repeats *last_info,
3115 			     unsigned int trace_ctx)
3116 {
3117 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3118 	struct func_repeats_entry *entry;
3119 	struct ring_buffer_event *event;
3120 	u64 delta;
3121 
3122 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3123 					    sizeof(*entry), trace_ctx);
3124 	if (!event)
3125 		return;
3126 
3127 	delta = ring_buffer_event_time_stamp(buffer, event) -
3128 		last_info->ts_last_call;
3129 
3130 	entry = ring_buffer_event_data(event);
3131 	entry->ip = last_info->ip;
3132 	entry->parent_ip = last_info->parent_ip;
3133 	entry->count = last_info->count;
3134 	func_repeats_set_delta_ts(entry, delta);
3135 
3136 	__buffer_unlock_commit(buffer, event);
3137 }
3138 
3139 /* created for use with alloc_percpu */
3140 struct trace_buffer_struct {
3141 	int nesting;
3142 	char buffer[4][TRACE_BUF_SIZE];
3143 };
3144 
3145 static struct trace_buffer_struct *trace_percpu_buffer;
3146 
3147 /*
3148  * This allows for lockless recording.  If we're nested too deeply, then
3149  * this returns NULL.
3150  */
3151 static char *get_trace_buf(void)
3152 {
3153 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3154 
3155 	if (!buffer || buffer->nesting >= 4)
3156 		return NULL;
3157 
3158 	buffer->nesting++;
3159 
3160 	/* Interrupts must see nesting incremented before we use the buffer */
3161 	barrier();
3162 	return &buffer->buffer[buffer->nesting - 1][0];
3163 }
3164 
3165 static void put_trace_buf(void)
3166 {
3167 	/* Don't let the decrement of nesting leak before this */
3168 	barrier();
3169 	this_cpu_dec(trace_percpu_buffer->nesting);
3170 }
3171 
3172 static int alloc_percpu_trace_buffer(void)
3173 {
3174 	struct trace_buffer_struct *buffers;
3175 
3176 	if (trace_percpu_buffer)
3177 		return 0;
3178 
3179 	buffers = alloc_percpu(struct trace_buffer_struct);
3180 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3181 		return -ENOMEM;
3182 
3183 	trace_percpu_buffer = buffers;
3184 	return 0;
3185 }
3186 
3187 static int buffers_allocated;
3188 
3189 void trace_printk_init_buffers(void)
3190 {
3191 	if (buffers_allocated)
3192 		return;
3193 
3194 	if (alloc_percpu_trace_buffer())
3195 		return;
3196 
3197 	/* trace_printk() is for debug use only. Don't use it in production. */
3198 
3199 	pr_warn("\n");
3200 	pr_warn("**********************************************************\n");
3201 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3202 	pr_warn("**                                                      **\n");
3203 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3204 	pr_warn("**                                                      **\n");
3205 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3206 	pr_warn("** unsafe for production use.                           **\n");
3207 	pr_warn("**                                                      **\n");
3208 	pr_warn("** If you see this message and you are not debugging    **\n");
3209 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3210 	pr_warn("**                                                      **\n");
3211 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3212 	pr_warn("**********************************************************\n");
3213 
3214 	/* Expand the buffers to set size */
3215 	tracing_update_buffers();
3216 
3217 	buffers_allocated = 1;
3218 
3219 	/*
3220 	 * trace_printk_init_buffers() can be called by modules.
3221 	 * If that happens, then we need to start cmdline recording
3222 	 * directly here. If the global_trace.buffer is already
3223 	 * allocated here, then this was called by module code.
3224 	 */
3225 	if (global_trace.array_buffer.buffer)
3226 		tracing_start_cmdline_record();
3227 }
3228 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3229 
3230 void trace_printk_start_comm(void)
3231 {
3232 	/* Start tracing comms if trace printk is set */
3233 	if (!buffers_allocated)
3234 		return;
3235 	tracing_start_cmdline_record();
3236 }
3237 
3238 static void trace_printk_start_stop_comm(int enabled)
3239 {
3240 	if (!buffers_allocated)
3241 		return;
3242 
3243 	if (enabled)
3244 		tracing_start_cmdline_record();
3245 	else
3246 		tracing_stop_cmdline_record();
3247 }
3248 
3249 /**
3250  * trace_vbprintk - write binary msg to tracing buffer
3251  * @ip:    The address of the caller
3252  * @fmt:   The string format to write to the buffer
3253  * @args:  Arguments for @fmt
3254  */
3255 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3256 {
3257 	struct trace_event_call *call = &event_bprint;
3258 	struct ring_buffer_event *event;
3259 	struct trace_buffer *buffer;
3260 	struct trace_array *tr = &global_trace;
3261 	struct bprint_entry *entry;
3262 	unsigned int trace_ctx;
3263 	char *tbuffer;
3264 	int len = 0, size;
3265 
3266 	if (unlikely(tracing_selftest_running || tracing_disabled))
3267 		return 0;
3268 
3269 	/* Don't pollute graph traces with trace_vprintk internals */
3270 	pause_graph_tracing();
3271 
3272 	trace_ctx = tracing_gen_ctx();
3273 	preempt_disable_notrace();
3274 
3275 	tbuffer = get_trace_buf();
3276 	if (!tbuffer) {
3277 		len = 0;
3278 		goto out_nobuffer;
3279 	}
3280 
3281 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3282 
3283 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3284 		goto out_put;
3285 
3286 	size = sizeof(*entry) + sizeof(u32) * len;
3287 	buffer = tr->array_buffer.buffer;
3288 	ring_buffer_nest_start(buffer);
3289 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3290 					    trace_ctx);
3291 	if (!event)
3292 		goto out;
3293 	entry = ring_buffer_event_data(event);
3294 	entry->ip			= ip;
3295 	entry->fmt			= fmt;
3296 
3297 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3298 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3299 		__buffer_unlock_commit(buffer, event);
3300 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3301 	}
3302 
3303 out:
3304 	ring_buffer_nest_end(buffer);
3305 out_put:
3306 	put_trace_buf();
3307 
3308 out_nobuffer:
3309 	preempt_enable_notrace();
3310 	unpause_graph_tracing();
3311 
3312 	return len;
3313 }
3314 EXPORT_SYMBOL_GPL(trace_vbprintk);
3315 
3316 __printf(3, 0)
3317 static int
3318 __trace_array_vprintk(struct trace_buffer *buffer,
3319 		      unsigned long ip, const char *fmt, va_list args)
3320 {
3321 	struct trace_event_call *call = &event_print;
3322 	struct ring_buffer_event *event;
3323 	int len = 0, size;
3324 	struct print_entry *entry;
3325 	unsigned int trace_ctx;
3326 	char *tbuffer;
3327 
3328 	if (tracing_disabled || tracing_selftest_running)
3329 		return 0;
3330 
3331 	/* Don't pollute graph traces with trace_vprintk internals */
3332 	pause_graph_tracing();
3333 
3334 	trace_ctx = tracing_gen_ctx();
3335 	preempt_disable_notrace();
3336 
3337 
3338 	tbuffer = get_trace_buf();
3339 	if (!tbuffer) {
3340 		len = 0;
3341 		goto out_nobuffer;
3342 	}
3343 
3344 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3345 
3346 	size = sizeof(*entry) + len + 1;
3347 	ring_buffer_nest_start(buffer);
3348 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3349 					    trace_ctx);
3350 	if (!event)
3351 		goto out;
3352 	entry = ring_buffer_event_data(event);
3353 	entry->ip = ip;
3354 
3355 	memcpy(&entry->buf, tbuffer, len + 1);
3356 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3357 		__buffer_unlock_commit(buffer, event);
3358 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3359 	}
3360 
3361 out:
3362 	ring_buffer_nest_end(buffer);
3363 	put_trace_buf();
3364 
3365 out_nobuffer:
3366 	preempt_enable_notrace();
3367 	unpause_graph_tracing();
3368 
3369 	return len;
3370 }
3371 
3372 __printf(3, 0)
3373 int trace_array_vprintk(struct trace_array *tr,
3374 			unsigned long ip, const char *fmt, va_list args)
3375 {
3376 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3377 }
3378 
3379 /**
3380  * trace_array_printk - Print a message to a specific instance
3381  * @tr: The instance trace_array descriptor
3382  * @ip: The instruction pointer that this is called from.
3383  * @fmt: The format to print (printf format)
3384  *
3385  * If a subsystem sets up its own instance, they have the right to
3386  * printk strings into their tracing instance buffer using this
3387  * function. Note, this function will not write into the top level
3388  * buffer (use trace_printk() for that), as writing into the top level
3389  * buffer should only have events that can be individually disabled.
3390  * trace_printk() is only used for debugging a kernel, and should not
3391  * be ever incorporated in normal use.
3392  *
3393  * trace_array_printk() can be used, as it will not add noise to the
3394  * top level tracing buffer.
3395  *
3396  * Note, trace_array_init_printk() must be called on @tr before this
3397  * can be used.
3398  */
3399 __printf(3, 0)
3400 int trace_array_printk(struct trace_array *tr,
3401 		       unsigned long ip, const char *fmt, ...)
3402 {
3403 	int ret;
3404 	va_list ap;
3405 
3406 	if (!tr)
3407 		return -ENOENT;
3408 
3409 	/* This is only allowed for created instances */
3410 	if (tr == &global_trace)
3411 		return 0;
3412 
3413 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3414 		return 0;
3415 
3416 	va_start(ap, fmt);
3417 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3418 	va_end(ap);
3419 	return ret;
3420 }
3421 EXPORT_SYMBOL_GPL(trace_array_printk);
3422 
3423 /**
3424  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3425  * @tr: The trace array to initialize the buffers for
3426  *
3427  * As trace_array_printk() only writes into instances, they are OK to
3428  * have in the kernel (unlike trace_printk()). This needs to be called
3429  * before trace_array_printk() can be used on a trace_array.
3430  */
3431 int trace_array_init_printk(struct trace_array *tr)
3432 {
3433 	if (!tr)
3434 		return -ENOENT;
3435 
3436 	/* This is only allowed for created instances */
3437 	if (tr == &global_trace)
3438 		return -EINVAL;
3439 
3440 	return alloc_percpu_trace_buffer();
3441 }
3442 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3443 
3444 __printf(3, 4)
3445 int trace_array_printk_buf(struct trace_buffer *buffer,
3446 			   unsigned long ip, const char *fmt, ...)
3447 {
3448 	int ret;
3449 	va_list ap;
3450 
3451 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3452 		return 0;
3453 
3454 	va_start(ap, fmt);
3455 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3456 	va_end(ap);
3457 	return ret;
3458 }
3459 
3460 __printf(2, 0)
3461 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3462 {
3463 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3464 }
3465 EXPORT_SYMBOL_GPL(trace_vprintk);
3466 
3467 static void trace_iterator_increment(struct trace_iterator *iter)
3468 {
3469 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3470 
3471 	iter->idx++;
3472 	if (buf_iter)
3473 		ring_buffer_iter_advance(buf_iter);
3474 }
3475 
3476 static struct trace_entry *
3477 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3478 		unsigned long *lost_events)
3479 {
3480 	struct ring_buffer_event *event;
3481 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3482 
3483 	if (buf_iter) {
3484 		event = ring_buffer_iter_peek(buf_iter, ts);
3485 		if (lost_events)
3486 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3487 				(unsigned long)-1 : 0;
3488 	} else {
3489 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3490 					 lost_events);
3491 	}
3492 
3493 	if (event) {
3494 		iter->ent_size = ring_buffer_event_length(event);
3495 		return ring_buffer_event_data(event);
3496 	}
3497 	iter->ent_size = 0;
3498 	return NULL;
3499 }
3500 
3501 static struct trace_entry *
3502 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3503 		  unsigned long *missing_events, u64 *ent_ts)
3504 {
3505 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3506 	struct trace_entry *ent, *next = NULL;
3507 	unsigned long lost_events = 0, next_lost = 0;
3508 	int cpu_file = iter->cpu_file;
3509 	u64 next_ts = 0, ts;
3510 	int next_cpu = -1;
3511 	int next_size = 0;
3512 	int cpu;
3513 
3514 	/*
3515 	 * If we are in a per_cpu trace file, don't bother by iterating over
3516 	 * all cpu and peek directly.
3517 	 */
3518 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3519 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3520 			return NULL;
3521 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3522 		if (ent_cpu)
3523 			*ent_cpu = cpu_file;
3524 
3525 		return ent;
3526 	}
3527 
3528 	for_each_tracing_cpu(cpu) {
3529 
3530 		if (ring_buffer_empty_cpu(buffer, cpu))
3531 			continue;
3532 
3533 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3534 
3535 		/*
3536 		 * Pick the entry with the smallest timestamp:
3537 		 */
3538 		if (ent && (!next || ts < next_ts)) {
3539 			next = ent;
3540 			next_cpu = cpu;
3541 			next_ts = ts;
3542 			next_lost = lost_events;
3543 			next_size = iter->ent_size;
3544 		}
3545 	}
3546 
3547 	iter->ent_size = next_size;
3548 
3549 	if (ent_cpu)
3550 		*ent_cpu = next_cpu;
3551 
3552 	if (ent_ts)
3553 		*ent_ts = next_ts;
3554 
3555 	if (missing_events)
3556 		*missing_events = next_lost;
3557 
3558 	return next;
3559 }
3560 
3561 #define STATIC_FMT_BUF_SIZE	128
3562 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3563 
3564 static char *trace_iter_expand_format(struct trace_iterator *iter)
3565 {
3566 	char *tmp;
3567 
3568 	/*
3569 	 * iter->tr is NULL when used with tp_printk, which makes
3570 	 * this get called where it is not safe to call krealloc().
3571 	 */
3572 	if (!iter->tr || iter->fmt == static_fmt_buf)
3573 		return NULL;
3574 
3575 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3576 		       GFP_KERNEL);
3577 	if (tmp) {
3578 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3579 		iter->fmt = tmp;
3580 	}
3581 
3582 	return tmp;
3583 }
3584 
3585 /* Returns true if the string is safe to dereference from an event */
3586 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3587 {
3588 	unsigned long addr = (unsigned long)str;
3589 	struct trace_event *trace_event;
3590 	struct trace_event_call *event;
3591 
3592 	/* OK if part of the event data */
3593 	if ((addr >= (unsigned long)iter->ent) &&
3594 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3595 		return true;
3596 
3597 	/* OK if part of the temp seq buffer */
3598 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3599 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3600 		return true;
3601 
3602 	/* Core rodata can not be freed */
3603 	if (is_kernel_rodata(addr))
3604 		return true;
3605 
3606 	if (trace_is_tracepoint_string(str))
3607 		return true;
3608 
3609 	/*
3610 	 * Now this could be a module event, referencing core module
3611 	 * data, which is OK.
3612 	 */
3613 	if (!iter->ent)
3614 		return false;
3615 
3616 	trace_event = ftrace_find_event(iter->ent->type);
3617 	if (!trace_event)
3618 		return false;
3619 
3620 	event = container_of(trace_event, struct trace_event_call, event);
3621 	if (!event->mod)
3622 		return false;
3623 
3624 	/* Would rather have rodata, but this will suffice */
3625 	if (within_module_core(addr, event->mod))
3626 		return true;
3627 
3628 	return false;
3629 }
3630 
3631 static const char *show_buffer(struct trace_seq *s)
3632 {
3633 	struct seq_buf *seq = &s->seq;
3634 
3635 	seq_buf_terminate(seq);
3636 
3637 	return seq->buffer;
3638 }
3639 
3640 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3641 
3642 static int test_can_verify_check(const char *fmt, ...)
3643 {
3644 	char buf[16];
3645 	va_list ap;
3646 	int ret;
3647 
3648 	/*
3649 	 * The verifier is dependent on vsnprintf() modifies the va_list
3650 	 * passed to it, where it is sent as a reference. Some architectures
3651 	 * (like x86_32) passes it by value, which means that vsnprintf()
3652 	 * does not modify the va_list passed to it, and the verifier
3653 	 * would then need to be able to understand all the values that
3654 	 * vsnprintf can use. If it is passed by value, then the verifier
3655 	 * is disabled.
3656 	 */
3657 	va_start(ap, fmt);
3658 	vsnprintf(buf, 16, "%d", ap);
3659 	ret = va_arg(ap, int);
3660 	va_end(ap);
3661 
3662 	return ret;
3663 }
3664 
3665 static void test_can_verify(void)
3666 {
3667 	if (!test_can_verify_check("%d %d", 0, 1)) {
3668 		pr_info("trace event string verifier disabled\n");
3669 		static_branch_inc(&trace_no_verify);
3670 	}
3671 }
3672 
3673 /**
3674  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3675  * @iter: The iterator that holds the seq buffer and the event being printed
3676  * @fmt: The format used to print the event
3677  * @ap: The va_list holding the data to print from @fmt.
3678  *
3679  * This writes the data into the @iter->seq buffer using the data from
3680  * @fmt and @ap. If the format has a %s, then the source of the string
3681  * is examined to make sure it is safe to print, otherwise it will
3682  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3683  * pointer.
3684  */
3685 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3686 			 va_list ap)
3687 {
3688 	const char *p = fmt;
3689 	const char *str;
3690 	int i, j;
3691 
3692 	if (WARN_ON_ONCE(!fmt))
3693 		return;
3694 
3695 	if (static_branch_unlikely(&trace_no_verify))
3696 		goto print;
3697 
3698 	/* Don't bother checking when doing a ftrace_dump() */
3699 	if (iter->fmt == static_fmt_buf)
3700 		goto print;
3701 
3702 	while (*p) {
3703 		bool star = false;
3704 		int len = 0;
3705 
3706 		j = 0;
3707 
3708 		/* We only care about %s and variants */
3709 		for (i = 0; p[i]; i++) {
3710 			if (i + 1 >= iter->fmt_size) {
3711 				/*
3712 				 * If we can't expand the copy buffer,
3713 				 * just print it.
3714 				 */
3715 				if (!trace_iter_expand_format(iter))
3716 					goto print;
3717 			}
3718 
3719 			if (p[i] == '\\' && p[i+1]) {
3720 				i++;
3721 				continue;
3722 			}
3723 			if (p[i] == '%') {
3724 				/* Need to test cases like %08.*s */
3725 				for (j = 1; p[i+j]; j++) {
3726 					if (isdigit(p[i+j]) ||
3727 					    p[i+j] == '.')
3728 						continue;
3729 					if (p[i+j] == '*') {
3730 						star = true;
3731 						continue;
3732 					}
3733 					break;
3734 				}
3735 				if (p[i+j] == 's')
3736 					break;
3737 				star = false;
3738 			}
3739 			j = 0;
3740 		}
3741 		/* If no %s found then just print normally */
3742 		if (!p[i])
3743 			break;
3744 
3745 		/* Copy up to the %s, and print that */
3746 		strncpy(iter->fmt, p, i);
3747 		iter->fmt[i] = '\0';
3748 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3749 
3750 		if (star)
3751 			len = va_arg(ap, int);
3752 
3753 		/* The ap now points to the string data of the %s */
3754 		str = va_arg(ap, const char *);
3755 
3756 		/*
3757 		 * If you hit this warning, it is likely that the
3758 		 * trace event in question used %s on a string that
3759 		 * was saved at the time of the event, but may not be
3760 		 * around when the trace is read. Use __string(),
3761 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3762 		 * instead. See samples/trace_events/trace-events-sample.h
3763 		 * for reference.
3764 		 */
3765 		if (WARN_ONCE(!trace_safe_str(iter, str),
3766 			      "fmt: '%s' current_buffer: '%s'",
3767 			      fmt, show_buffer(&iter->seq))) {
3768 			int ret;
3769 
3770 			/* Try to safely read the string */
3771 			if (star) {
3772 				if (len + 1 > iter->fmt_size)
3773 					len = iter->fmt_size - 1;
3774 				if (len < 0)
3775 					len = 0;
3776 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3777 				iter->fmt[len] = 0;
3778 				star = false;
3779 			} else {
3780 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3781 								  iter->fmt_size);
3782 			}
3783 			if (ret < 0)
3784 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3785 			else
3786 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3787 						 str, iter->fmt);
3788 			str = "[UNSAFE-MEMORY]";
3789 			strcpy(iter->fmt, "%s");
3790 		} else {
3791 			strncpy(iter->fmt, p + i, j + 1);
3792 			iter->fmt[j+1] = '\0';
3793 		}
3794 		if (star)
3795 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3796 		else
3797 			trace_seq_printf(&iter->seq, iter->fmt, str);
3798 
3799 		p += i + j + 1;
3800 	}
3801  print:
3802 	if (*p)
3803 		trace_seq_vprintf(&iter->seq, p, ap);
3804 }
3805 
3806 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3807 {
3808 	const char *p, *new_fmt;
3809 	char *q;
3810 
3811 	if (WARN_ON_ONCE(!fmt))
3812 		return fmt;
3813 
3814 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3815 		return fmt;
3816 
3817 	p = fmt;
3818 	new_fmt = q = iter->fmt;
3819 	while (*p) {
3820 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3821 			if (!trace_iter_expand_format(iter))
3822 				return fmt;
3823 
3824 			q += iter->fmt - new_fmt;
3825 			new_fmt = iter->fmt;
3826 		}
3827 
3828 		*q++ = *p++;
3829 
3830 		/* Replace %p with %px */
3831 		if (p[-1] == '%') {
3832 			if (p[0] == '%') {
3833 				*q++ = *p++;
3834 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3835 				*q++ = *p++;
3836 				*q++ = 'x';
3837 			}
3838 		}
3839 	}
3840 	*q = '\0';
3841 
3842 	return new_fmt;
3843 }
3844 
3845 #define STATIC_TEMP_BUF_SIZE	128
3846 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3847 
3848 /* Find the next real entry, without updating the iterator itself */
3849 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3850 					  int *ent_cpu, u64 *ent_ts)
3851 {
3852 	/* __find_next_entry will reset ent_size */
3853 	int ent_size = iter->ent_size;
3854 	struct trace_entry *entry;
3855 
3856 	/*
3857 	 * If called from ftrace_dump(), then the iter->temp buffer
3858 	 * will be the static_temp_buf and not created from kmalloc.
3859 	 * If the entry size is greater than the buffer, we can
3860 	 * not save it. Just return NULL in that case. This is only
3861 	 * used to add markers when two consecutive events' time
3862 	 * stamps have a large delta. See trace_print_lat_context()
3863 	 */
3864 	if (iter->temp == static_temp_buf &&
3865 	    STATIC_TEMP_BUF_SIZE < ent_size)
3866 		return NULL;
3867 
3868 	/*
3869 	 * The __find_next_entry() may call peek_next_entry(), which may
3870 	 * call ring_buffer_peek() that may make the contents of iter->ent
3871 	 * undefined. Need to copy iter->ent now.
3872 	 */
3873 	if (iter->ent && iter->ent != iter->temp) {
3874 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3875 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3876 			void *temp;
3877 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3878 			if (!temp)
3879 				return NULL;
3880 			kfree(iter->temp);
3881 			iter->temp = temp;
3882 			iter->temp_size = iter->ent_size;
3883 		}
3884 		memcpy(iter->temp, iter->ent, iter->ent_size);
3885 		iter->ent = iter->temp;
3886 	}
3887 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3888 	/* Put back the original ent_size */
3889 	iter->ent_size = ent_size;
3890 
3891 	return entry;
3892 }
3893 
3894 /* Find the next real entry, and increment the iterator to the next entry */
3895 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3896 {
3897 	iter->ent = __find_next_entry(iter, &iter->cpu,
3898 				      &iter->lost_events, &iter->ts);
3899 
3900 	if (iter->ent)
3901 		trace_iterator_increment(iter);
3902 
3903 	return iter->ent ? iter : NULL;
3904 }
3905 
3906 static void trace_consume(struct trace_iterator *iter)
3907 {
3908 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3909 			    &iter->lost_events);
3910 }
3911 
3912 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3913 {
3914 	struct trace_iterator *iter = m->private;
3915 	int i = (int)*pos;
3916 	void *ent;
3917 
3918 	WARN_ON_ONCE(iter->leftover);
3919 
3920 	(*pos)++;
3921 
3922 	/* can't go backwards */
3923 	if (iter->idx > i)
3924 		return NULL;
3925 
3926 	if (iter->idx < 0)
3927 		ent = trace_find_next_entry_inc(iter);
3928 	else
3929 		ent = iter;
3930 
3931 	while (ent && iter->idx < i)
3932 		ent = trace_find_next_entry_inc(iter);
3933 
3934 	iter->pos = *pos;
3935 
3936 	return ent;
3937 }
3938 
3939 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3940 {
3941 	struct ring_buffer_iter *buf_iter;
3942 	unsigned long entries = 0;
3943 	u64 ts;
3944 
3945 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3946 
3947 	buf_iter = trace_buffer_iter(iter, cpu);
3948 	if (!buf_iter)
3949 		return;
3950 
3951 	ring_buffer_iter_reset(buf_iter);
3952 
3953 	/*
3954 	 * We could have the case with the max latency tracers
3955 	 * that a reset never took place on a cpu. This is evident
3956 	 * by the timestamp being before the start of the buffer.
3957 	 */
3958 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3959 		if (ts >= iter->array_buffer->time_start)
3960 			break;
3961 		entries++;
3962 		ring_buffer_iter_advance(buf_iter);
3963 	}
3964 
3965 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3966 }
3967 
3968 /*
3969  * The current tracer is copied to avoid a global locking
3970  * all around.
3971  */
3972 static void *s_start(struct seq_file *m, loff_t *pos)
3973 {
3974 	struct trace_iterator *iter = m->private;
3975 	struct trace_array *tr = iter->tr;
3976 	int cpu_file = iter->cpu_file;
3977 	void *p = NULL;
3978 	loff_t l = 0;
3979 	int cpu;
3980 
3981 	/*
3982 	 * copy the tracer to avoid using a global lock all around.
3983 	 * iter->trace is a copy of current_trace, the pointer to the
3984 	 * name may be used instead of a strcmp(), as iter->trace->name
3985 	 * will point to the same string as current_trace->name.
3986 	 */
3987 	mutex_lock(&trace_types_lock);
3988 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3989 		*iter->trace = *tr->current_trace;
3990 	mutex_unlock(&trace_types_lock);
3991 
3992 #ifdef CONFIG_TRACER_MAX_TRACE
3993 	if (iter->snapshot && iter->trace->use_max_tr)
3994 		return ERR_PTR(-EBUSY);
3995 #endif
3996 
3997 	if (*pos != iter->pos) {
3998 		iter->ent = NULL;
3999 		iter->cpu = 0;
4000 		iter->idx = -1;
4001 
4002 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4003 			for_each_tracing_cpu(cpu)
4004 				tracing_iter_reset(iter, cpu);
4005 		} else
4006 			tracing_iter_reset(iter, cpu_file);
4007 
4008 		iter->leftover = 0;
4009 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4010 			;
4011 
4012 	} else {
4013 		/*
4014 		 * If we overflowed the seq_file before, then we want
4015 		 * to just reuse the trace_seq buffer again.
4016 		 */
4017 		if (iter->leftover)
4018 			p = iter;
4019 		else {
4020 			l = *pos - 1;
4021 			p = s_next(m, p, &l);
4022 		}
4023 	}
4024 
4025 	trace_event_read_lock();
4026 	trace_access_lock(cpu_file);
4027 	return p;
4028 }
4029 
4030 static void s_stop(struct seq_file *m, void *p)
4031 {
4032 	struct trace_iterator *iter = m->private;
4033 
4034 #ifdef CONFIG_TRACER_MAX_TRACE
4035 	if (iter->snapshot && iter->trace->use_max_tr)
4036 		return;
4037 #endif
4038 
4039 	trace_access_unlock(iter->cpu_file);
4040 	trace_event_read_unlock();
4041 }
4042 
4043 static void
4044 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4045 		      unsigned long *entries, int cpu)
4046 {
4047 	unsigned long count;
4048 
4049 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4050 	/*
4051 	 * If this buffer has skipped entries, then we hold all
4052 	 * entries for the trace and we need to ignore the
4053 	 * ones before the time stamp.
4054 	 */
4055 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4056 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4057 		/* total is the same as the entries */
4058 		*total = count;
4059 	} else
4060 		*total = count +
4061 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4062 	*entries = count;
4063 }
4064 
4065 static void
4066 get_total_entries(struct array_buffer *buf,
4067 		  unsigned long *total, unsigned long *entries)
4068 {
4069 	unsigned long t, e;
4070 	int cpu;
4071 
4072 	*total = 0;
4073 	*entries = 0;
4074 
4075 	for_each_tracing_cpu(cpu) {
4076 		get_total_entries_cpu(buf, &t, &e, cpu);
4077 		*total += t;
4078 		*entries += e;
4079 	}
4080 }
4081 
4082 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4083 {
4084 	unsigned long total, entries;
4085 
4086 	if (!tr)
4087 		tr = &global_trace;
4088 
4089 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4090 
4091 	return entries;
4092 }
4093 
4094 unsigned long trace_total_entries(struct trace_array *tr)
4095 {
4096 	unsigned long total, entries;
4097 
4098 	if (!tr)
4099 		tr = &global_trace;
4100 
4101 	get_total_entries(&tr->array_buffer, &total, &entries);
4102 
4103 	return entries;
4104 }
4105 
4106 static void print_lat_help_header(struct seq_file *m)
4107 {
4108 	seq_puts(m, "#                    _------=> CPU#            \n"
4109 		    "#                   / _-----=> irqs-off        \n"
4110 		    "#                  | / _----=> need-resched    \n"
4111 		    "#                  || / _---=> hardirq/softirq \n"
4112 		    "#                  ||| / _--=> preempt-depth   \n"
4113 		    "#                  |||| /     delay            \n"
4114 		    "#  cmd     pid     ||||| time  |   caller      \n"
4115 		    "#     \\   /        |||||  \\    |   /         \n");
4116 }
4117 
4118 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4119 {
4120 	unsigned long total;
4121 	unsigned long entries;
4122 
4123 	get_total_entries(buf, &total, &entries);
4124 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4125 		   entries, total, num_online_cpus());
4126 	seq_puts(m, "#\n");
4127 }
4128 
4129 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4130 				   unsigned int flags)
4131 {
4132 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4133 
4134 	print_event_info(buf, m);
4135 
4136 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4137 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4138 }
4139 
4140 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4141 				       unsigned int flags)
4142 {
4143 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4144 	const char *space = "            ";
4145 	int prec = tgid ? 12 : 2;
4146 
4147 	print_event_info(buf, m);
4148 
4149 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4150 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4151 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4152 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4153 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4154 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4155 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4156 }
4157 
4158 void
4159 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4160 {
4161 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4162 	struct array_buffer *buf = iter->array_buffer;
4163 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4164 	struct tracer *type = iter->trace;
4165 	unsigned long entries;
4166 	unsigned long total;
4167 	const char *name = "preemption";
4168 
4169 	name = type->name;
4170 
4171 	get_total_entries(buf, &total, &entries);
4172 
4173 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4174 		   name, UTS_RELEASE);
4175 	seq_puts(m, "# -----------------------------------"
4176 		 "---------------------------------\n");
4177 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4178 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4179 		   nsecs_to_usecs(data->saved_latency),
4180 		   entries,
4181 		   total,
4182 		   buf->cpu,
4183 #if defined(CONFIG_PREEMPT_NONE)
4184 		   "server",
4185 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4186 		   "desktop",
4187 #elif defined(CONFIG_PREEMPT)
4188 		   "preempt",
4189 #elif defined(CONFIG_PREEMPT_RT)
4190 		   "preempt_rt",
4191 #else
4192 		   "unknown",
4193 #endif
4194 		   /* These are reserved for later use */
4195 		   0, 0, 0, 0);
4196 #ifdef CONFIG_SMP
4197 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4198 #else
4199 	seq_puts(m, ")\n");
4200 #endif
4201 	seq_puts(m, "#    -----------------\n");
4202 	seq_printf(m, "#    | task: %.16s-%d "
4203 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4204 		   data->comm, data->pid,
4205 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4206 		   data->policy, data->rt_priority);
4207 	seq_puts(m, "#    -----------------\n");
4208 
4209 	if (data->critical_start) {
4210 		seq_puts(m, "#  => started at: ");
4211 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4212 		trace_print_seq(m, &iter->seq);
4213 		seq_puts(m, "\n#  => ended at:   ");
4214 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4215 		trace_print_seq(m, &iter->seq);
4216 		seq_puts(m, "\n#\n");
4217 	}
4218 
4219 	seq_puts(m, "#\n");
4220 }
4221 
4222 static void test_cpu_buff_start(struct trace_iterator *iter)
4223 {
4224 	struct trace_seq *s = &iter->seq;
4225 	struct trace_array *tr = iter->tr;
4226 
4227 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4228 		return;
4229 
4230 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4231 		return;
4232 
4233 	if (cpumask_available(iter->started) &&
4234 	    cpumask_test_cpu(iter->cpu, iter->started))
4235 		return;
4236 
4237 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4238 		return;
4239 
4240 	if (cpumask_available(iter->started))
4241 		cpumask_set_cpu(iter->cpu, iter->started);
4242 
4243 	/* Don't print started cpu buffer for the first entry of the trace */
4244 	if (iter->idx > 1)
4245 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4246 				iter->cpu);
4247 }
4248 
4249 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4250 {
4251 	struct trace_array *tr = iter->tr;
4252 	struct trace_seq *s = &iter->seq;
4253 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4254 	struct trace_entry *entry;
4255 	struct trace_event *event;
4256 
4257 	entry = iter->ent;
4258 
4259 	test_cpu_buff_start(iter);
4260 
4261 	event = ftrace_find_event(entry->type);
4262 
4263 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4264 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4265 			trace_print_lat_context(iter);
4266 		else
4267 			trace_print_context(iter);
4268 	}
4269 
4270 	if (trace_seq_has_overflowed(s))
4271 		return TRACE_TYPE_PARTIAL_LINE;
4272 
4273 	if (event)
4274 		return event->funcs->trace(iter, sym_flags, event);
4275 
4276 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4277 
4278 	return trace_handle_return(s);
4279 }
4280 
4281 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4282 {
4283 	struct trace_array *tr = iter->tr;
4284 	struct trace_seq *s = &iter->seq;
4285 	struct trace_entry *entry;
4286 	struct trace_event *event;
4287 
4288 	entry = iter->ent;
4289 
4290 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4291 		trace_seq_printf(s, "%d %d %llu ",
4292 				 entry->pid, iter->cpu, iter->ts);
4293 
4294 	if (trace_seq_has_overflowed(s))
4295 		return TRACE_TYPE_PARTIAL_LINE;
4296 
4297 	event = ftrace_find_event(entry->type);
4298 	if (event)
4299 		return event->funcs->raw(iter, 0, event);
4300 
4301 	trace_seq_printf(s, "%d ?\n", entry->type);
4302 
4303 	return trace_handle_return(s);
4304 }
4305 
4306 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4307 {
4308 	struct trace_array *tr = iter->tr;
4309 	struct trace_seq *s = &iter->seq;
4310 	unsigned char newline = '\n';
4311 	struct trace_entry *entry;
4312 	struct trace_event *event;
4313 
4314 	entry = iter->ent;
4315 
4316 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4317 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4318 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4319 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4320 		if (trace_seq_has_overflowed(s))
4321 			return TRACE_TYPE_PARTIAL_LINE;
4322 	}
4323 
4324 	event = ftrace_find_event(entry->type);
4325 	if (event) {
4326 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4327 		if (ret != TRACE_TYPE_HANDLED)
4328 			return ret;
4329 	}
4330 
4331 	SEQ_PUT_FIELD(s, newline);
4332 
4333 	return trace_handle_return(s);
4334 }
4335 
4336 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4337 {
4338 	struct trace_array *tr = iter->tr;
4339 	struct trace_seq *s = &iter->seq;
4340 	struct trace_entry *entry;
4341 	struct trace_event *event;
4342 
4343 	entry = iter->ent;
4344 
4345 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4346 		SEQ_PUT_FIELD(s, entry->pid);
4347 		SEQ_PUT_FIELD(s, iter->cpu);
4348 		SEQ_PUT_FIELD(s, iter->ts);
4349 		if (trace_seq_has_overflowed(s))
4350 			return TRACE_TYPE_PARTIAL_LINE;
4351 	}
4352 
4353 	event = ftrace_find_event(entry->type);
4354 	return event ? event->funcs->binary(iter, 0, event) :
4355 		TRACE_TYPE_HANDLED;
4356 }
4357 
4358 int trace_empty(struct trace_iterator *iter)
4359 {
4360 	struct ring_buffer_iter *buf_iter;
4361 	int cpu;
4362 
4363 	/* If we are looking at one CPU buffer, only check that one */
4364 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4365 		cpu = iter->cpu_file;
4366 		buf_iter = trace_buffer_iter(iter, cpu);
4367 		if (buf_iter) {
4368 			if (!ring_buffer_iter_empty(buf_iter))
4369 				return 0;
4370 		} else {
4371 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4372 				return 0;
4373 		}
4374 		return 1;
4375 	}
4376 
4377 	for_each_tracing_cpu(cpu) {
4378 		buf_iter = trace_buffer_iter(iter, cpu);
4379 		if (buf_iter) {
4380 			if (!ring_buffer_iter_empty(buf_iter))
4381 				return 0;
4382 		} else {
4383 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4384 				return 0;
4385 		}
4386 	}
4387 
4388 	return 1;
4389 }
4390 
4391 /*  Called with trace_event_read_lock() held. */
4392 enum print_line_t print_trace_line(struct trace_iterator *iter)
4393 {
4394 	struct trace_array *tr = iter->tr;
4395 	unsigned long trace_flags = tr->trace_flags;
4396 	enum print_line_t ret;
4397 
4398 	if (iter->lost_events) {
4399 		if (iter->lost_events == (unsigned long)-1)
4400 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4401 					 iter->cpu);
4402 		else
4403 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4404 					 iter->cpu, iter->lost_events);
4405 		if (trace_seq_has_overflowed(&iter->seq))
4406 			return TRACE_TYPE_PARTIAL_LINE;
4407 	}
4408 
4409 	if (iter->trace && iter->trace->print_line) {
4410 		ret = iter->trace->print_line(iter);
4411 		if (ret != TRACE_TYPE_UNHANDLED)
4412 			return ret;
4413 	}
4414 
4415 	if (iter->ent->type == TRACE_BPUTS &&
4416 			trace_flags & TRACE_ITER_PRINTK &&
4417 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4418 		return trace_print_bputs_msg_only(iter);
4419 
4420 	if (iter->ent->type == TRACE_BPRINT &&
4421 			trace_flags & TRACE_ITER_PRINTK &&
4422 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4423 		return trace_print_bprintk_msg_only(iter);
4424 
4425 	if (iter->ent->type == TRACE_PRINT &&
4426 			trace_flags & TRACE_ITER_PRINTK &&
4427 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4428 		return trace_print_printk_msg_only(iter);
4429 
4430 	if (trace_flags & TRACE_ITER_BIN)
4431 		return print_bin_fmt(iter);
4432 
4433 	if (trace_flags & TRACE_ITER_HEX)
4434 		return print_hex_fmt(iter);
4435 
4436 	if (trace_flags & TRACE_ITER_RAW)
4437 		return print_raw_fmt(iter);
4438 
4439 	return print_trace_fmt(iter);
4440 }
4441 
4442 void trace_latency_header(struct seq_file *m)
4443 {
4444 	struct trace_iterator *iter = m->private;
4445 	struct trace_array *tr = iter->tr;
4446 
4447 	/* print nothing if the buffers are empty */
4448 	if (trace_empty(iter))
4449 		return;
4450 
4451 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4452 		print_trace_header(m, iter);
4453 
4454 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4455 		print_lat_help_header(m);
4456 }
4457 
4458 void trace_default_header(struct seq_file *m)
4459 {
4460 	struct trace_iterator *iter = m->private;
4461 	struct trace_array *tr = iter->tr;
4462 	unsigned long trace_flags = tr->trace_flags;
4463 
4464 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4465 		return;
4466 
4467 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4468 		/* print nothing if the buffers are empty */
4469 		if (trace_empty(iter))
4470 			return;
4471 		print_trace_header(m, iter);
4472 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4473 			print_lat_help_header(m);
4474 	} else {
4475 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4476 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4477 				print_func_help_header_irq(iter->array_buffer,
4478 							   m, trace_flags);
4479 			else
4480 				print_func_help_header(iter->array_buffer, m,
4481 						       trace_flags);
4482 		}
4483 	}
4484 }
4485 
4486 static void test_ftrace_alive(struct seq_file *m)
4487 {
4488 	if (!ftrace_is_dead())
4489 		return;
4490 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4491 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4492 }
4493 
4494 #ifdef CONFIG_TRACER_MAX_TRACE
4495 static void show_snapshot_main_help(struct seq_file *m)
4496 {
4497 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4498 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4499 		    "#                      Takes a snapshot of the main buffer.\n"
4500 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4501 		    "#                      (Doesn't have to be '2' works with any number that\n"
4502 		    "#                       is not a '0' or '1')\n");
4503 }
4504 
4505 static void show_snapshot_percpu_help(struct seq_file *m)
4506 {
4507 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4508 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4509 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4510 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4511 #else
4512 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4513 		    "#                     Must use main snapshot file to allocate.\n");
4514 #endif
4515 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4516 		    "#                      (Doesn't have to be '2' works with any number that\n"
4517 		    "#                       is not a '0' or '1')\n");
4518 }
4519 
4520 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4521 {
4522 	if (iter->tr->allocated_snapshot)
4523 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4524 	else
4525 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4526 
4527 	seq_puts(m, "# Snapshot commands:\n");
4528 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4529 		show_snapshot_main_help(m);
4530 	else
4531 		show_snapshot_percpu_help(m);
4532 }
4533 #else
4534 /* Should never be called */
4535 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4536 #endif
4537 
4538 static int s_show(struct seq_file *m, void *v)
4539 {
4540 	struct trace_iterator *iter = v;
4541 	int ret;
4542 
4543 	if (iter->ent == NULL) {
4544 		if (iter->tr) {
4545 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4546 			seq_puts(m, "#\n");
4547 			test_ftrace_alive(m);
4548 		}
4549 		if (iter->snapshot && trace_empty(iter))
4550 			print_snapshot_help(m, iter);
4551 		else if (iter->trace && iter->trace->print_header)
4552 			iter->trace->print_header(m);
4553 		else
4554 			trace_default_header(m);
4555 
4556 	} else if (iter->leftover) {
4557 		/*
4558 		 * If we filled the seq_file buffer earlier, we
4559 		 * want to just show it now.
4560 		 */
4561 		ret = trace_print_seq(m, &iter->seq);
4562 
4563 		/* ret should this time be zero, but you never know */
4564 		iter->leftover = ret;
4565 
4566 	} else {
4567 		print_trace_line(iter);
4568 		ret = trace_print_seq(m, &iter->seq);
4569 		/*
4570 		 * If we overflow the seq_file buffer, then it will
4571 		 * ask us for this data again at start up.
4572 		 * Use that instead.
4573 		 *  ret is 0 if seq_file write succeeded.
4574 		 *        -1 otherwise.
4575 		 */
4576 		iter->leftover = ret;
4577 	}
4578 
4579 	return 0;
4580 }
4581 
4582 /*
4583  * Should be used after trace_array_get(), trace_types_lock
4584  * ensures that i_cdev was already initialized.
4585  */
4586 static inline int tracing_get_cpu(struct inode *inode)
4587 {
4588 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4589 		return (long)inode->i_cdev - 1;
4590 	return RING_BUFFER_ALL_CPUS;
4591 }
4592 
4593 static const struct seq_operations tracer_seq_ops = {
4594 	.start		= s_start,
4595 	.next		= s_next,
4596 	.stop		= s_stop,
4597 	.show		= s_show,
4598 };
4599 
4600 static struct trace_iterator *
4601 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4602 {
4603 	struct trace_array *tr = inode->i_private;
4604 	struct trace_iterator *iter;
4605 	int cpu;
4606 
4607 	if (tracing_disabled)
4608 		return ERR_PTR(-ENODEV);
4609 
4610 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4611 	if (!iter)
4612 		return ERR_PTR(-ENOMEM);
4613 
4614 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4615 				    GFP_KERNEL);
4616 	if (!iter->buffer_iter)
4617 		goto release;
4618 
4619 	/*
4620 	 * trace_find_next_entry() may need to save off iter->ent.
4621 	 * It will place it into the iter->temp buffer. As most
4622 	 * events are less than 128, allocate a buffer of that size.
4623 	 * If one is greater, then trace_find_next_entry() will
4624 	 * allocate a new buffer to adjust for the bigger iter->ent.
4625 	 * It's not critical if it fails to get allocated here.
4626 	 */
4627 	iter->temp = kmalloc(128, GFP_KERNEL);
4628 	if (iter->temp)
4629 		iter->temp_size = 128;
4630 
4631 	/*
4632 	 * trace_event_printf() may need to modify given format
4633 	 * string to replace %p with %px so that it shows real address
4634 	 * instead of hash value. However, that is only for the event
4635 	 * tracing, other tracer may not need. Defer the allocation
4636 	 * until it is needed.
4637 	 */
4638 	iter->fmt = NULL;
4639 	iter->fmt_size = 0;
4640 
4641 	/*
4642 	 * We make a copy of the current tracer to avoid concurrent
4643 	 * changes on it while we are reading.
4644 	 */
4645 	mutex_lock(&trace_types_lock);
4646 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4647 	if (!iter->trace)
4648 		goto fail;
4649 
4650 	*iter->trace = *tr->current_trace;
4651 
4652 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4653 		goto fail;
4654 
4655 	iter->tr = tr;
4656 
4657 #ifdef CONFIG_TRACER_MAX_TRACE
4658 	/* Currently only the top directory has a snapshot */
4659 	if (tr->current_trace->print_max || snapshot)
4660 		iter->array_buffer = &tr->max_buffer;
4661 	else
4662 #endif
4663 		iter->array_buffer = &tr->array_buffer;
4664 	iter->snapshot = snapshot;
4665 	iter->pos = -1;
4666 	iter->cpu_file = tracing_get_cpu(inode);
4667 	mutex_init(&iter->mutex);
4668 
4669 	/* Notify the tracer early; before we stop tracing. */
4670 	if (iter->trace->open)
4671 		iter->trace->open(iter);
4672 
4673 	/* Annotate start of buffers if we had overruns */
4674 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4675 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676 
4677 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4678 	if (trace_clocks[tr->clock_id].in_ns)
4679 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4680 
4681 	/*
4682 	 * If pause-on-trace is enabled, then stop the trace while
4683 	 * dumping, unless this is the "snapshot" file
4684 	 */
4685 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4686 		tracing_stop_tr(tr);
4687 
4688 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4689 		for_each_tracing_cpu(cpu) {
4690 			iter->buffer_iter[cpu] =
4691 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4692 							 cpu, GFP_KERNEL);
4693 		}
4694 		ring_buffer_read_prepare_sync();
4695 		for_each_tracing_cpu(cpu) {
4696 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4697 			tracing_iter_reset(iter, cpu);
4698 		}
4699 	} else {
4700 		cpu = iter->cpu_file;
4701 		iter->buffer_iter[cpu] =
4702 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4703 						 cpu, GFP_KERNEL);
4704 		ring_buffer_read_prepare_sync();
4705 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4706 		tracing_iter_reset(iter, cpu);
4707 	}
4708 
4709 	mutex_unlock(&trace_types_lock);
4710 
4711 	return iter;
4712 
4713  fail:
4714 	mutex_unlock(&trace_types_lock);
4715 	kfree(iter->trace);
4716 	kfree(iter->temp);
4717 	kfree(iter->buffer_iter);
4718 release:
4719 	seq_release_private(inode, file);
4720 	return ERR_PTR(-ENOMEM);
4721 }
4722 
4723 int tracing_open_generic(struct inode *inode, struct file *filp)
4724 {
4725 	int ret;
4726 
4727 	ret = tracing_check_open_get_tr(NULL);
4728 	if (ret)
4729 		return ret;
4730 
4731 	filp->private_data = inode->i_private;
4732 	return 0;
4733 }
4734 
4735 bool tracing_is_disabled(void)
4736 {
4737 	return (tracing_disabled) ? true: false;
4738 }
4739 
4740 /*
4741  * Open and update trace_array ref count.
4742  * Must have the current trace_array passed to it.
4743  */
4744 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4745 {
4746 	struct trace_array *tr = inode->i_private;
4747 	int ret;
4748 
4749 	ret = tracing_check_open_get_tr(tr);
4750 	if (ret)
4751 		return ret;
4752 
4753 	filp->private_data = inode->i_private;
4754 
4755 	return 0;
4756 }
4757 
4758 static int tracing_release(struct inode *inode, struct file *file)
4759 {
4760 	struct trace_array *tr = inode->i_private;
4761 	struct seq_file *m = file->private_data;
4762 	struct trace_iterator *iter;
4763 	int cpu;
4764 
4765 	if (!(file->f_mode & FMODE_READ)) {
4766 		trace_array_put(tr);
4767 		return 0;
4768 	}
4769 
4770 	/* Writes do not use seq_file */
4771 	iter = m->private;
4772 	mutex_lock(&trace_types_lock);
4773 
4774 	for_each_tracing_cpu(cpu) {
4775 		if (iter->buffer_iter[cpu])
4776 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4777 	}
4778 
4779 	if (iter->trace && iter->trace->close)
4780 		iter->trace->close(iter);
4781 
4782 	if (!iter->snapshot && tr->stop_count)
4783 		/* reenable tracing if it was previously enabled */
4784 		tracing_start_tr(tr);
4785 
4786 	__trace_array_put(tr);
4787 
4788 	mutex_unlock(&trace_types_lock);
4789 
4790 	mutex_destroy(&iter->mutex);
4791 	free_cpumask_var(iter->started);
4792 	kfree(iter->fmt);
4793 	kfree(iter->temp);
4794 	kfree(iter->trace);
4795 	kfree(iter->buffer_iter);
4796 	seq_release_private(inode, file);
4797 
4798 	return 0;
4799 }
4800 
4801 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4802 {
4803 	struct trace_array *tr = inode->i_private;
4804 
4805 	trace_array_put(tr);
4806 	return 0;
4807 }
4808 
4809 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4810 {
4811 	struct trace_array *tr = inode->i_private;
4812 
4813 	trace_array_put(tr);
4814 
4815 	return single_release(inode, file);
4816 }
4817 
4818 static int tracing_open(struct inode *inode, struct file *file)
4819 {
4820 	struct trace_array *tr = inode->i_private;
4821 	struct trace_iterator *iter;
4822 	int ret;
4823 
4824 	ret = tracing_check_open_get_tr(tr);
4825 	if (ret)
4826 		return ret;
4827 
4828 	/* If this file was open for write, then erase contents */
4829 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4830 		int cpu = tracing_get_cpu(inode);
4831 		struct array_buffer *trace_buf = &tr->array_buffer;
4832 
4833 #ifdef CONFIG_TRACER_MAX_TRACE
4834 		if (tr->current_trace->print_max)
4835 			trace_buf = &tr->max_buffer;
4836 #endif
4837 
4838 		if (cpu == RING_BUFFER_ALL_CPUS)
4839 			tracing_reset_online_cpus(trace_buf);
4840 		else
4841 			tracing_reset_cpu(trace_buf, cpu);
4842 	}
4843 
4844 	if (file->f_mode & FMODE_READ) {
4845 		iter = __tracing_open(inode, file, false);
4846 		if (IS_ERR(iter))
4847 			ret = PTR_ERR(iter);
4848 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4849 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4850 	}
4851 
4852 	if (ret < 0)
4853 		trace_array_put(tr);
4854 
4855 	return ret;
4856 }
4857 
4858 /*
4859  * Some tracers are not suitable for instance buffers.
4860  * A tracer is always available for the global array (toplevel)
4861  * or if it explicitly states that it is.
4862  */
4863 static bool
4864 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4865 {
4866 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4867 }
4868 
4869 /* Find the next tracer that this trace array may use */
4870 static struct tracer *
4871 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4872 {
4873 	while (t && !trace_ok_for_array(t, tr))
4874 		t = t->next;
4875 
4876 	return t;
4877 }
4878 
4879 static void *
4880 t_next(struct seq_file *m, void *v, loff_t *pos)
4881 {
4882 	struct trace_array *tr = m->private;
4883 	struct tracer *t = v;
4884 
4885 	(*pos)++;
4886 
4887 	if (t)
4888 		t = get_tracer_for_array(tr, t->next);
4889 
4890 	return t;
4891 }
4892 
4893 static void *t_start(struct seq_file *m, loff_t *pos)
4894 {
4895 	struct trace_array *tr = m->private;
4896 	struct tracer *t;
4897 	loff_t l = 0;
4898 
4899 	mutex_lock(&trace_types_lock);
4900 
4901 	t = get_tracer_for_array(tr, trace_types);
4902 	for (; t && l < *pos; t = t_next(m, t, &l))
4903 			;
4904 
4905 	return t;
4906 }
4907 
4908 static void t_stop(struct seq_file *m, void *p)
4909 {
4910 	mutex_unlock(&trace_types_lock);
4911 }
4912 
4913 static int t_show(struct seq_file *m, void *v)
4914 {
4915 	struct tracer *t = v;
4916 
4917 	if (!t)
4918 		return 0;
4919 
4920 	seq_puts(m, t->name);
4921 	if (t->next)
4922 		seq_putc(m, ' ');
4923 	else
4924 		seq_putc(m, '\n');
4925 
4926 	return 0;
4927 }
4928 
4929 static const struct seq_operations show_traces_seq_ops = {
4930 	.start		= t_start,
4931 	.next		= t_next,
4932 	.stop		= t_stop,
4933 	.show		= t_show,
4934 };
4935 
4936 static int show_traces_open(struct inode *inode, struct file *file)
4937 {
4938 	struct trace_array *tr = inode->i_private;
4939 	struct seq_file *m;
4940 	int ret;
4941 
4942 	ret = tracing_check_open_get_tr(tr);
4943 	if (ret)
4944 		return ret;
4945 
4946 	ret = seq_open(file, &show_traces_seq_ops);
4947 	if (ret) {
4948 		trace_array_put(tr);
4949 		return ret;
4950 	}
4951 
4952 	m = file->private_data;
4953 	m->private = tr;
4954 
4955 	return 0;
4956 }
4957 
4958 static int show_traces_release(struct inode *inode, struct file *file)
4959 {
4960 	struct trace_array *tr = inode->i_private;
4961 
4962 	trace_array_put(tr);
4963 	return seq_release(inode, file);
4964 }
4965 
4966 static ssize_t
4967 tracing_write_stub(struct file *filp, const char __user *ubuf,
4968 		   size_t count, loff_t *ppos)
4969 {
4970 	return count;
4971 }
4972 
4973 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4974 {
4975 	int ret;
4976 
4977 	if (file->f_mode & FMODE_READ)
4978 		ret = seq_lseek(file, offset, whence);
4979 	else
4980 		file->f_pos = ret = 0;
4981 
4982 	return ret;
4983 }
4984 
4985 static const struct file_operations tracing_fops = {
4986 	.open		= tracing_open,
4987 	.read		= seq_read,
4988 	.write		= tracing_write_stub,
4989 	.llseek		= tracing_lseek,
4990 	.release	= tracing_release,
4991 };
4992 
4993 static const struct file_operations show_traces_fops = {
4994 	.open		= show_traces_open,
4995 	.read		= seq_read,
4996 	.llseek		= seq_lseek,
4997 	.release	= show_traces_release,
4998 };
4999 
5000 static ssize_t
5001 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5002 		     size_t count, loff_t *ppos)
5003 {
5004 	struct trace_array *tr = file_inode(filp)->i_private;
5005 	char *mask_str;
5006 	int len;
5007 
5008 	len = snprintf(NULL, 0, "%*pb\n",
5009 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5010 	mask_str = kmalloc(len, GFP_KERNEL);
5011 	if (!mask_str)
5012 		return -ENOMEM;
5013 
5014 	len = snprintf(mask_str, len, "%*pb\n",
5015 		       cpumask_pr_args(tr->tracing_cpumask));
5016 	if (len >= count) {
5017 		count = -EINVAL;
5018 		goto out_err;
5019 	}
5020 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5021 
5022 out_err:
5023 	kfree(mask_str);
5024 
5025 	return count;
5026 }
5027 
5028 int tracing_set_cpumask(struct trace_array *tr,
5029 			cpumask_var_t tracing_cpumask_new)
5030 {
5031 	int cpu;
5032 
5033 	if (!tr)
5034 		return -EINVAL;
5035 
5036 	local_irq_disable();
5037 	arch_spin_lock(&tr->max_lock);
5038 	for_each_tracing_cpu(cpu) {
5039 		/*
5040 		 * Increase/decrease the disabled counter if we are
5041 		 * about to flip a bit in the cpumask:
5042 		 */
5043 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5044 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5045 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5046 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5047 		}
5048 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5049 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5050 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5051 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5052 		}
5053 	}
5054 	arch_spin_unlock(&tr->max_lock);
5055 	local_irq_enable();
5056 
5057 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5058 
5059 	return 0;
5060 }
5061 
5062 static ssize_t
5063 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5064 		      size_t count, loff_t *ppos)
5065 {
5066 	struct trace_array *tr = file_inode(filp)->i_private;
5067 	cpumask_var_t tracing_cpumask_new;
5068 	int err;
5069 
5070 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5071 		return -ENOMEM;
5072 
5073 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5074 	if (err)
5075 		goto err_free;
5076 
5077 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5078 	if (err)
5079 		goto err_free;
5080 
5081 	free_cpumask_var(tracing_cpumask_new);
5082 
5083 	return count;
5084 
5085 err_free:
5086 	free_cpumask_var(tracing_cpumask_new);
5087 
5088 	return err;
5089 }
5090 
5091 static const struct file_operations tracing_cpumask_fops = {
5092 	.open		= tracing_open_generic_tr,
5093 	.read		= tracing_cpumask_read,
5094 	.write		= tracing_cpumask_write,
5095 	.release	= tracing_release_generic_tr,
5096 	.llseek		= generic_file_llseek,
5097 };
5098 
5099 static int tracing_trace_options_show(struct seq_file *m, void *v)
5100 {
5101 	struct tracer_opt *trace_opts;
5102 	struct trace_array *tr = m->private;
5103 	u32 tracer_flags;
5104 	int i;
5105 
5106 	mutex_lock(&trace_types_lock);
5107 	tracer_flags = tr->current_trace->flags->val;
5108 	trace_opts = tr->current_trace->flags->opts;
5109 
5110 	for (i = 0; trace_options[i]; i++) {
5111 		if (tr->trace_flags & (1 << i))
5112 			seq_printf(m, "%s\n", trace_options[i]);
5113 		else
5114 			seq_printf(m, "no%s\n", trace_options[i]);
5115 	}
5116 
5117 	for (i = 0; trace_opts[i].name; i++) {
5118 		if (tracer_flags & trace_opts[i].bit)
5119 			seq_printf(m, "%s\n", trace_opts[i].name);
5120 		else
5121 			seq_printf(m, "no%s\n", trace_opts[i].name);
5122 	}
5123 	mutex_unlock(&trace_types_lock);
5124 
5125 	return 0;
5126 }
5127 
5128 static int __set_tracer_option(struct trace_array *tr,
5129 			       struct tracer_flags *tracer_flags,
5130 			       struct tracer_opt *opts, int neg)
5131 {
5132 	struct tracer *trace = tracer_flags->trace;
5133 	int ret;
5134 
5135 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5136 	if (ret)
5137 		return ret;
5138 
5139 	if (neg)
5140 		tracer_flags->val &= ~opts->bit;
5141 	else
5142 		tracer_flags->val |= opts->bit;
5143 	return 0;
5144 }
5145 
5146 /* Try to assign a tracer specific option */
5147 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5148 {
5149 	struct tracer *trace = tr->current_trace;
5150 	struct tracer_flags *tracer_flags = trace->flags;
5151 	struct tracer_opt *opts = NULL;
5152 	int i;
5153 
5154 	for (i = 0; tracer_flags->opts[i].name; i++) {
5155 		opts = &tracer_flags->opts[i];
5156 
5157 		if (strcmp(cmp, opts->name) == 0)
5158 			return __set_tracer_option(tr, trace->flags, opts, neg);
5159 	}
5160 
5161 	return -EINVAL;
5162 }
5163 
5164 /* Some tracers require overwrite to stay enabled */
5165 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5166 {
5167 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5168 		return -1;
5169 
5170 	return 0;
5171 }
5172 
5173 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5174 {
5175 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5176 	    (mask == TRACE_ITER_RECORD_CMD))
5177 		lockdep_assert_held(&event_mutex);
5178 
5179 	/* do nothing if flag is already set */
5180 	if (!!(tr->trace_flags & mask) == !!enabled)
5181 		return 0;
5182 
5183 	/* Give the tracer a chance to approve the change */
5184 	if (tr->current_trace->flag_changed)
5185 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5186 			return -EINVAL;
5187 
5188 	if (enabled)
5189 		tr->trace_flags |= mask;
5190 	else
5191 		tr->trace_flags &= ~mask;
5192 
5193 	if (mask == TRACE_ITER_RECORD_CMD)
5194 		trace_event_enable_cmd_record(enabled);
5195 
5196 	if (mask == TRACE_ITER_RECORD_TGID) {
5197 		if (!tgid_map)
5198 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5199 					   sizeof(*tgid_map),
5200 					   GFP_KERNEL);
5201 		if (!tgid_map) {
5202 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5203 			return -ENOMEM;
5204 		}
5205 
5206 		trace_event_enable_tgid_record(enabled);
5207 	}
5208 
5209 	if (mask == TRACE_ITER_EVENT_FORK)
5210 		trace_event_follow_fork(tr, enabled);
5211 
5212 	if (mask == TRACE_ITER_FUNC_FORK)
5213 		ftrace_pid_follow_fork(tr, enabled);
5214 
5215 	if (mask == TRACE_ITER_OVERWRITE) {
5216 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5217 #ifdef CONFIG_TRACER_MAX_TRACE
5218 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5219 #endif
5220 	}
5221 
5222 	if (mask == TRACE_ITER_PRINTK) {
5223 		trace_printk_start_stop_comm(enabled);
5224 		trace_printk_control(enabled);
5225 	}
5226 
5227 	return 0;
5228 }
5229 
5230 int trace_set_options(struct trace_array *tr, char *option)
5231 {
5232 	char *cmp;
5233 	int neg = 0;
5234 	int ret;
5235 	size_t orig_len = strlen(option);
5236 	int len;
5237 
5238 	cmp = strstrip(option);
5239 
5240 	len = str_has_prefix(cmp, "no");
5241 	if (len)
5242 		neg = 1;
5243 
5244 	cmp += len;
5245 
5246 	mutex_lock(&event_mutex);
5247 	mutex_lock(&trace_types_lock);
5248 
5249 	ret = match_string(trace_options, -1, cmp);
5250 	/* If no option could be set, test the specific tracer options */
5251 	if (ret < 0)
5252 		ret = set_tracer_option(tr, cmp, neg);
5253 	else
5254 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5255 
5256 	mutex_unlock(&trace_types_lock);
5257 	mutex_unlock(&event_mutex);
5258 
5259 	/*
5260 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5261 	 * turn it back into a space.
5262 	 */
5263 	if (orig_len > strlen(option))
5264 		option[strlen(option)] = ' ';
5265 
5266 	return ret;
5267 }
5268 
5269 static void __init apply_trace_boot_options(void)
5270 {
5271 	char *buf = trace_boot_options_buf;
5272 	char *option;
5273 
5274 	while (true) {
5275 		option = strsep(&buf, ",");
5276 
5277 		if (!option)
5278 			break;
5279 
5280 		if (*option)
5281 			trace_set_options(&global_trace, option);
5282 
5283 		/* Put back the comma to allow this to be called again */
5284 		if (buf)
5285 			*(buf - 1) = ',';
5286 	}
5287 }
5288 
5289 static ssize_t
5290 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5291 			size_t cnt, loff_t *ppos)
5292 {
5293 	struct seq_file *m = filp->private_data;
5294 	struct trace_array *tr = m->private;
5295 	char buf[64];
5296 	int ret;
5297 
5298 	if (cnt >= sizeof(buf))
5299 		return -EINVAL;
5300 
5301 	if (copy_from_user(buf, ubuf, cnt))
5302 		return -EFAULT;
5303 
5304 	buf[cnt] = 0;
5305 
5306 	ret = trace_set_options(tr, buf);
5307 	if (ret < 0)
5308 		return ret;
5309 
5310 	*ppos += cnt;
5311 
5312 	return cnt;
5313 }
5314 
5315 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5316 {
5317 	struct trace_array *tr = inode->i_private;
5318 	int ret;
5319 
5320 	ret = tracing_check_open_get_tr(tr);
5321 	if (ret)
5322 		return ret;
5323 
5324 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5325 	if (ret < 0)
5326 		trace_array_put(tr);
5327 
5328 	return ret;
5329 }
5330 
5331 static const struct file_operations tracing_iter_fops = {
5332 	.open		= tracing_trace_options_open,
5333 	.read		= seq_read,
5334 	.llseek		= seq_lseek,
5335 	.release	= tracing_single_release_tr,
5336 	.write		= tracing_trace_options_write,
5337 };
5338 
5339 static const char readme_msg[] =
5340 	"tracing mini-HOWTO:\n\n"
5341 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5342 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5343 	" Important files:\n"
5344 	"  trace\t\t\t- The static contents of the buffer\n"
5345 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5346 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5347 	"  current_tracer\t- function and latency tracers\n"
5348 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5349 	"  error_log\t- error log for failed commands (that support it)\n"
5350 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5351 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5352 	"  trace_clock\t\t-change the clock used to order events\n"
5353 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5354 	"      global:   Synced across CPUs but slows tracing down.\n"
5355 	"     counter:   Not a clock, but just an increment\n"
5356 	"      uptime:   Jiffy counter from time of boot\n"
5357 	"        perf:   Same clock that perf events use\n"
5358 #ifdef CONFIG_X86_64
5359 	"     x86-tsc:   TSC cycle counter\n"
5360 #endif
5361 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5362 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5363 	"    absolute:   Absolute (standalone) timestamp\n"
5364 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5365 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5366 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5367 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5368 	"\t\t\t  Remove sub-buffer with rmdir\n"
5369 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5370 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5371 	"\t\t\t  option name\n"
5372 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5373 #ifdef CONFIG_DYNAMIC_FTRACE
5374 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5375 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5376 	"\t\t\t  functions\n"
5377 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5378 	"\t     modules: Can select a group via module\n"
5379 	"\t      Format: :mod:<module-name>\n"
5380 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5381 	"\t    triggers: a command to perform when function is hit\n"
5382 	"\t      Format: <function>:<trigger>[:count]\n"
5383 	"\t     trigger: traceon, traceoff\n"
5384 	"\t\t      enable_event:<system>:<event>\n"
5385 	"\t\t      disable_event:<system>:<event>\n"
5386 #ifdef CONFIG_STACKTRACE
5387 	"\t\t      stacktrace\n"
5388 #endif
5389 #ifdef CONFIG_TRACER_SNAPSHOT
5390 	"\t\t      snapshot\n"
5391 #endif
5392 	"\t\t      dump\n"
5393 	"\t\t      cpudump\n"
5394 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5395 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5396 	"\t     The first one will disable tracing every time do_fault is hit\n"
5397 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5398 	"\t       The first time do trap is hit and it disables tracing, the\n"
5399 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5400 	"\t       the counter will not decrement. It only decrements when the\n"
5401 	"\t       trigger did work\n"
5402 	"\t     To remove trigger without count:\n"
5403 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5404 	"\t     To remove trigger with a count:\n"
5405 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5406 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5407 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5408 	"\t    modules: Can select a group via module command :mod:\n"
5409 	"\t    Does not accept triggers\n"
5410 #endif /* CONFIG_DYNAMIC_FTRACE */
5411 #ifdef CONFIG_FUNCTION_TRACER
5412 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5413 	"\t\t    (function)\n"
5414 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5415 	"\t\t    (function)\n"
5416 #endif
5417 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5418 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5419 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5420 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5421 #endif
5422 #ifdef CONFIG_TRACER_SNAPSHOT
5423 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5424 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5425 	"\t\t\t  information\n"
5426 #endif
5427 #ifdef CONFIG_STACK_TRACER
5428 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5429 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5430 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5431 	"\t\t\t  new trace)\n"
5432 #ifdef CONFIG_DYNAMIC_FTRACE
5433 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5434 	"\t\t\t  traces\n"
5435 #endif
5436 #endif /* CONFIG_STACK_TRACER */
5437 #ifdef CONFIG_DYNAMIC_EVENTS
5438 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5439 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5440 #endif
5441 #ifdef CONFIG_KPROBE_EVENTS
5442 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5443 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5444 #endif
5445 #ifdef CONFIG_UPROBE_EVENTS
5446 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5447 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5448 #endif
5449 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5450 	"\t  accepts: event-definitions (one definition per line)\n"
5451 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5452 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5453 #ifdef CONFIG_HIST_TRIGGERS
5454 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5455 #endif
5456 	"\t           -:[<group>/]<event>\n"
5457 #ifdef CONFIG_KPROBE_EVENTS
5458 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5459   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5460 #endif
5461 #ifdef CONFIG_UPROBE_EVENTS
5462   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5463 #endif
5464 	"\t     args: <name>=fetcharg[:type]\n"
5465 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5466 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5467 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5468 #else
5469 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5470 #endif
5471 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5472 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5473 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5474 	"\t           <type>\\[<array-size>\\]\n"
5475 #ifdef CONFIG_HIST_TRIGGERS
5476 	"\t    field: <stype> <name>;\n"
5477 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5478 	"\t           [unsigned] char/int/long\n"
5479 #endif
5480 #endif
5481 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5482 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5483 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5484 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5485 	"\t\t\t  events\n"
5486 	"      filter\t\t- If set, only events passing filter are traced\n"
5487 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5488 	"\t\t\t  <event>:\n"
5489 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5490 	"      filter\t\t- If set, only events passing filter are traced\n"
5491 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5492 	"\t    Format: <trigger>[:count][if <filter>]\n"
5493 	"\t   trigger: traceon, traceoff\n"
5494 	"\t            enable_event:<system>:<event>\n"
5495 	"\t            disable_event:<system>:<event>\n"
5496 #ifdef CONFIG_HIST_TRIGGERS
5497 	"\t            enable_hist:<system>:<event>\n"
5498 	"\t            disable_hist:<system>:<event>\n"
5499 #endif
5500 #ifdef CONFIG_STACKTRACE
5501 	"\t\t    stacktrace\n"
5502 #endif
5503 #ifdef CONFIG_TRACER_SNAPSHOT
5504 	"\t\t    snapshot\n"
5505 #endif
5506 #ifdef CONFIG_HIST_TRIGGERS
5507 	"\t\t    hist (see below)\n"
5508 #endif
5509 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5510 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5511 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5512 	"\t                  events/block/block_unplug/trigger\n"
5513 	"\t   The first disables tracing every time block_unplug is hit.\n"
5514 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5515 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5516 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5517 	"\t   Like function triggers, the counter is only decremented if it\n"
5518 	"\t    enabled or disabled tracing.\n"
5519 	"\t   To remove a trigger without a count:\n"
5520 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5521 	"\t   To remove a trigger with a count:\n"
5522 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5523 	"\t   Filters can be ignored when removing a trigger.\n"
5524 #ifdef CONFIG_HIST_TRIGGERS
5525 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5526 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5527 	"\t            [:values=<field1[,field2,...]>]\n"
5528 	"\t            [:sort=<field1[,field2,...]>]\n"
5529 	"\t            [:size=#entries]\n"
5530 	"\t            [:pause][:continue][:clear]\n"
5531 	"\t            [:name=histname1]\n"
5532 	"\t            [:<handler>.<action>]\n"
5533 	"\t            [if <filter>]\n\n"
5534 	"\t    When a matching event is hit, an entry is added to a hash\n"
5535 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5536 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5537 	"\t    correspond to fields in the event's format description.  Keys\n"
5538 	"\t    can be any field, or the special string 'stacktrace'.\n"
5539 	"\t    Compound keys consisting of up to two fields can be specified\n"
5540 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5541 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5542 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5543 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5544 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5545 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5546 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5547 	"\t    its histogram data will be shared with other triggers of the\n"
5548 	"\t    same name, and trigger hits will update this common data.\n\n"
5549 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5550 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5551 	"\t    triggers attached to an event, there will be a table for each\n"
5552 	"\t    trigger in the output.  The table displayed for a named\n"
5553 	"\t    trigger will be the same as any other instance having the\n"
5554 	"\t    same name.  The default format used to display a given field\n"
5555 	"\t    can be modified by appending any of the following modifiers\n"
5556 	"\t    to the field name, as applicable:\n\n"
5557 	"\t            .hex        display a number as a hex value\n"
5558 	"\t            .sym        display an address as a symbol\n"
5559 	"\t            .sym-offset display an address as a symbol and offset\n"
5560 	"\t            .execname   display a common_pid as a program name\n"
5561 	"\t            .syscall    display a syscall id as a syscall name\n"
5562 	"\t            .log2       display log2 value rather than raw number\n"
5563 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5564 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5565 	"\t    trigger or to start a hist trigger but not log any events\n"
5566 	"\t    until told to do so.  'continue' can be used to start or\n"
5567 	"\t    restart a paused hist trigger.\n\n"
5568 	"\t    The 'clear' parameter will clear the contents of a running\n"
5569 	"\t    hist trigger and leave its current paused/active state\n"
5570 	"\t    unchanged.\n\n"
5571 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5572 	"\t    have one event conditionally start and stop another event's\n"
5573 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5574 	"\t    the enable_event and disable_event triggers.\n\n"
5575 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5576 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5577 	"\t        <handler>.<action>\n\n"
5578 	"\t    The available handlers are:\n\n"
5579 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5580 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5581 	"\t        onchange(var)            - invoke action if var changes\n\n"
5582 	"\t    The available actions are:\n\n"
5583 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5584 	"\t        save(field,...)                      - save current event fields\n"
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5587 #endif
5588 #ifdef CONFIG_SYNTH_EVENTS
5589 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5590 	"\t  Write into this file to define/undefine new synthetic events.\n"
5591 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5592 #endif
5593 #endif
5594 ;
5595 
5596 static ssize_t
5597 tracing_readme_read(struct file *filp, char __user *ubuf,
5598 		       size_t cnt, loff_t *ppos)
5599 {
5600 	return simple_read_from_buffer(ubuf, cnt, ppos,
5601 					readme_msg, strlen(readme_msg));
5602 }
5603 
5604 static const struct file_operations tracing_readme_fops = {
5605 	.open		= tracing_open_generic,
5606 	.read		= tracing_readme_read,
5607 	.llseek		= generic_file_llseek,
5608 };
5609 
5610 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5611 {
5612 	int *ptr = v;
5613 
5614 	if (*pos || m->count)
5615 		ptr++;
5616 
5617 	(*pos)++;
5618 
5619 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5620 		if (trace_find_tgid(*ptr))
5621 			return ptr;
5622 	}
5623 
5624 	return NULL;
5625 }
5626 
5627 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5628 {
5629 	void *v;
5630 	loff_t l = 0;
5631 
5632 	if (!tgid_map)
5633 		return NULL;
5634 
5635 	v = &tgid_map[0];
5636 	while (l <= *pos) {
5637 		v = saved_tgids_next(m, v, &l);
5638 		if (!v)
5639 			return NULL;
5640 	}
5641 
5642 	return v;
5643 }
5644 
5645 static void saved_tgids_stop(struct seq_file *m, void *v)
5646 {
5647 }
5648 
5649 static int saved_tgids_show(struct seq_file *m, void *v)
5650 {
5651 	int pid = (int *)v - tgid_map;
5652 
5653 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5654 	return 0;
5655 }
5656 
5657 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5658 	.start		= saved_tgids_start,
5659 	.stop		= saved_tgids_stop,
5660 	.next		= saved_tgids_next,
5661 	.show		= saved_tgids_show,
5662 };
5663 
5664 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5665 {
5666 	int ret;
5667 
5668 	ret = tracing_check_open_get_tr(NULL);
5669 	if (ret)
5670 		return ret;
5671 
5672 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5673 }
5674 
5675 
5676 static const struct file_operations tracing_saved_tgids_fops = {
5677 	.open		= tracing_saved_tgids_open,
5678 	.read		= seq_read,
5679 	.llseek		= seq_lseek,
5680 	.release	= seq_release,
5681 };
5682 
5683 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5684 {
5685 	unsigned int *ptr = v;
5686 
5687 	if (*pos || m->count)
5688 		ptr++;
5689 
5690 	(*pos)++;
5691 
5692 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5693 	     ptr++) {
5694 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5695 			continue;
5696 
5697 		return ptr;
5698 	}
5699 
5700 	return NULL;
5701 }
5702 
5703 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5704 {
5705 	void *v;
5706 	loff_t l = 0;
5707 
5708 	preempt_disable();
5709 	arch_spin_lock(&trace_cmdline_lock);
5710 
5711 	v = &savedcmd->map_cmdline_to_pid[0];
5712 	while (l <= *pos) {
5713 		v = saved_cmdlines_next(m, v, &l);
5714 		if (!v)
5715 			return NULL;
5716 	}
5717 
5718 	return v;
5719 }
5720 
5721 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5722 {
5723 	arch_spin_unlock(&trace_cmdline_lock);
5724 	preempt_enable();
5725 }
5726 
5727 static int saved_cmdlines_show(struct seq_file *m, void *v)
5728 {
5729 	char buf[TASK_COMM_LEN];
5730 	unsigned int *pid = v;
5731 
5732 	__trace_find_cmdline(*pid, buf);
5733 	seq_printf(m, "%d %s\n", *pid, buf);
5734 	return 0;
5735 }
5736 
5737 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5738 	.start		= saved_cmdlines_start,
5739 	.next		= saved_cmdlines_next,
5740 	.stop		= saved_cmdlines_stop,
5741 	.show		= saved_cmdlines_show,
5742 };
5743 
5744 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5745 {
5746 	int ret;
5747 
5748 	ret = tracing_check_open_get_tr(NULL);
5749 	if (ret)
5750 		return ret;
5751 
5752 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5753 }
5754 
5755 static const struct file_operations tracing_saved_cmdlines_fops = {
5756 	.open		= tracing_saved_cmdlines_open,
5757 	.read		= seq_read,
5758 	.llseek		= seq_lseek,
5759 	.release	= seq_release,
5760 };
5761 
5762 static ssize_t
5763 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5764 				 size_t cnt, loff_t *ppos)
5765 {
5766 	char buf[64];
5767 	int r;
5768 
5769 	arch_spin_lock(&trace_cmdline_lock);
5770 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5771 	arch_spin_unlock(&trace_cmdline_lock);
5772 
5773 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 }
5775 
5776 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5777 {
5778 	kfree(s->saved_cmdlines);
5779 	kfree(s->map_cmdline_to_pid);
5780 	kfree(s);
5781 }
5782 
5783 static int tracing_resize_saved_cmdlines(unsigned int val)
5784 {
5785 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5786 
5787 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5788 	if (!s)
5789 		return -ENOMEM;
5790 
5791 	if (allocate_cmdlines_buffer(val, s) < 0) {
5792 		kfree(s);
5793 		return -ENOMEM;
5794 	}
5795 
5796 	arch_spin_lock(&trace_cmdline_lock);
5797 	savedcmd_temp = savedcmd;
5798 	savedcmd = s;
5799 	arch_spin_unlock(&trace_cmdline_lock);
5800 	free_saved_cmdlines_buffer(savedcmd_temp);
5801 
5802 	return 0;
5803 }
5804 
5805 static ssize_t
5806 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5807 				  size_t cnt, loff_t *ppos)
5808 {
5809 	unsigned long val;
5810 	int ret;
5811 
5812 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5813 	if (ret)
5814 		return ret;
5815 
5816 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5817 	if (!val || val > PID_MAX_DEFAULT)
5818 		return -EINVAL;
5819 
5820 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5821 	if (ret < 0)
5822 		return ret;
5823 
5824 	*ppos += cnt;
5825 
5826 	return cnt;
5827 }
5828 
5829 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5830 	.open		= tracing_open_generic,
5831 	.read		= tracing_saved_cmdlines_size_read,
5832 	.write		= tracing_saved_cmdlines_size_write,
5833 };
5834 
5835 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5836 static union trace_eval_map_item *
5837 update_eval_map(union trace_eval_map_item *ptr)
5838 {
5839 	if (!ptr->map.eval_string) {
5840 		if (ptr->tail.next) {
5841 			ptr = ptr->tail.next;
5842 			/* Set ptr to the next real item (skip head) */
5843 			ptr++;
5844 		} else
5845 			return NULL;
5846 	}
5847 	return ptr;
5848 }
5849 
5850 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5851 {
5852 	union trace_eval_map_item *ptr = v;
5853 
5854 	/*
5855 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5856 	 * This really should never happen.
5857 	 */
5858 	(*pos)++;
5859 	ptr = update_eval_map(ptr);
5860 	if (WARN_ON_ONCE(!ptr))
5861 		return NULL;
5862 
5863 	ptr++;
5864 	ptr = update_eval_map(ptr);
5865 
5866 	return ptr;
5867 }
5868 
5869 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5870 {
5871 	union trace_eval_map_item *v;
5872 	loff_t l = 0;
5873 
5874 	mutex_lock(&trace_eval_mutex);
5875 
5876 	v = trace_eval_maps;
5877 	if (v)
5878 		v++;
5879 
5880 	while (v && l < *pos) {
5881 		v = eval_map_next(m, v, &l);
5882 	}
5883 
5884 	return v;
5885 }
5886 
5887 static void eval_map_stop(struct seq_file *m, void *v)
5888 {
5889 	mutex_unlock(&trace_eval_mutex);
5890 }
5891 
5892 static int eval_map_show(struct seq_file *m, void *v)
5893 {
5894 	union trace_eval_map_item *ptr = v;
5895 
5896 	seq_printf(m, "%s %ld (%s)\n",
5897 		   ptr->map.eval_string, ptr->map.eval_value,
5898 		   ptr->map.system);
5899 
5900 	return 0;
5901 }
5902 
5903 static const struct seq_operations tracing_eval_map_seq_ops = {
5904 	.start		= eval_map_start,
5905 	.next		= eval_map_next,
5906 	.stop		= eval_map_stop,
5907 	.show		= eval_map_show,
5908 };
5909 
5910 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5911 {
5912 	int ret;
5913 
5914 	ret = tracing_check_open_get_tr(NULL);
5915 	if (ret)
5916 		return ret;
5917 
5918 	return seq_open(filp, &tracing_eval_map_seq_ops);
5919 }
5920 
5921 static const struct file_operations tracing_eval_map_fops = {
5922 	.open		= tracing_eval_map_open,
5923 	.read		= seq_read,
5924 	.llseek		= seq_lseek,
5925 	.release	= seq_release,
5926 };
5927 
5928 static inline union trace_eval_map_item *
5929 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5930 {
5931 	/* Return tail of array given the head */
5932 	return ptr + ptr->head.length + 1;
5933 }
5934 
5935 static void
5936 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5937 			   int len)
5938 {
5939 	struct trace_eval_map **stop;
5940 	struct trace_eval_map **map;
5941 	union trace_eval_map_item *map_array;
5942 	union trace_eval_map_item *ptr;
5943 
5944 	stop = start + len;
5945 
5946 	/*
5947 	 * The trace_eval_maps contains the map plus a head and tail item,
5948 	 * where the head holds the module and length of array, and the
5949 	 * tail holds a pointer to the next list.
5950 	 */
5951 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5952 	if (!map_array) {
5953 		pr_warn("Unable to allocate trace eval mapping\n");
5954 		return;
5955 	}
5956 
5957 	mutex_lock(&trace_eval_mutex);
5958 
5959 	if (!trace_eval_maps)
5960 		trace_eval_maps = map_array;
5961 	else {
5962 		ptr = trace_eval_maps;
5963 		for (;;) {
5964 			ptr = trace_eval_jmp_to_tail(ptr);
5965 			if (!ptr->tail.next)
5966 				break;
5967 			ptr = ptr->tail.next;
5968 
5969 		}
5970 		ptr->tail.next = map_array;
5971 	}
5972 	map_array->head.mod = mod;
5973 	map_array->head.length = len;
5974 	map_array++;
5975 
5976 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5977 		map_array->map = **map;
5978 		map_array++;
5979 	}
5980 	memset(map_array, 0, sizeof(*map_array));
5981 
5982 	mutex_unlock(&trace_eval_mutex);
5983 }
5984 
5985 static void trace_create_eval_file(struct dentry *d_tracer)
5986 {
5987 	trace_create_file("eval_map", 0444, d_tracer,
5988 			  NULL, &tracing_eval_map_fops);
5989 }
5990 
5991 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5992 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5993 static inline void trace_insert_eval_map_file(struct module *mod,
5994 			      struct trace_eval_map **start, int len) { }
5995 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5996 
5997 static void trace_insert_eval_map(struct module *mod,
5998 				  struct trace_eval_map **start, int len)
5999 {
6000 	struct trace_eval_map **map;
6001 
6002 	if (len <= 0)
6003 		return;
6004 
6005 	map = start;
6006 
6007 	trace_event_eval_update(map, len);
6008 
6009 	trace_insert_eval_map_file(mod, start, len);
6010 }
6011 
6012 static ssize_t
6013 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6014 		       size_t cnt, loff_t *ppos)
6015 {
6016 	struct trace_array *tr = filp->private_data;
6017 	char buf[MAX_TRACER_SIZE+2];
6018 	int r;
6019 
6020 	mutex_lock(&trace_types_lock);
6021 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6022 	mutex_unlock(&trace_types_lock);
6023 
6024 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6025 }
6026 
6027 int tracer_init(struct tracer *t, struct trace_array *tr)
6028 {
6029 	tracing_reset_online_cpus(&tr->array_buffer);
6030 	return t->init(tr);
6031 }
6032 
6033 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6034 {
6035 	int cpu;
6036 
6037 	for_each_tracing_cpu(cpu)
6038 		per_cpu_ptr(buf->data, cpu)->entries = val;
6039 }
6040 
6041 #ifdef CONFIG_TRACER_MAX_TRACE
6042 /* resize @tr's buffer to the size of @size_tr's entries */
6043 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6044 					struct array_buffer *size_buf, int cpu_id)
6045 {
6046 	int cpu, ret = 0;
6047 
6048 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6049 		for_each_tracing_cpu(cpu) {
6050 			ret = ring_buffer_resize(trace_buf->buffer,
6051 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6052 			if (ret < 0)
6053 				break;
6054 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6055 				per_cpu_ptr(size_buf->data, cpu)->entries;
6056 		}
6057 	} else {
6058 		ret = ring_buffer_resize(trace_buf->buffer,
6059 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6060 		if (ret == 0)
6061 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6062 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6063 	}
6064 
6065 	return ret;
6066 }
6067 #endif /* CONFIG_TRACER_MAX_TRACE */
6068 
6069 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6070 					unsigned long size, int cpu)
6071 {
6072 	int ret;
6073 
6074 	/*
6075 	 * If kernel or user changes the size of the ring buffer
6076 	 * we use the size that was given, and we can forget about
6077 	 * expanding it later.
6078 	 */
6079 	ring_buffer_expanded = true;
6080 
6081 	/* May be called before buffers are initialized */
6082 	if (!tr->array_buffer.buffer)
6083 		return 0;
6084 
6085 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6086 	if (ret < 0)
6087 		return ret;
6088 
6089 #ifdef CONFIG_TRACER_MAX_TRACE
6090 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6091 	    !tr->current_trace->use_max_tr)
6092 		goto out;
6093 
6094 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6095 	if (ret < 0) {
6096 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6097 						     &tr->array_buffer, cpu);
6098 		if (r < 0) {
6099 			/*
6100 			 * AARGH! We are left with different
6101 			 * size max buffer!!!!
6102 			 * The max buffer is our "snapshot" buffer.
6103 			 * When a tracer needs a snapshot (one of the
6104 			 * latency tracers), it swaps the max buffer
6105 			 * with the saved snap shot. We succeeded to
6106 			 * update the size of the main buffer, but failed to
6107 			 * update the size of the max buffer. But when we tried
6108 			 * to reset the main buffer to the original size, we
6109 			 * failed there too. This is very unlikely to
6110 			 * happen, but if it does, warn and kill all
6111 			 * tracing.
6112 			 */
6113 			WARN_ON(1);
6114 			tracing_disabled = 1;
6115 		}
6116 		return ret;
6117 	}
6118 
6119 	if (cpu == RING_BUFFER_ALL_CPUS)
6120 		set_buffer_entries(&tr->max_buffer, size);
6121 	else
6122 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6123 
6124  out:
6125 #endif /* CONFIG_TRACER_MAX_TRACE */
6126 
6127 	if (cpu == RING_BUFFER_ALL_CPUS)
6128 		set_buffer_entries(&tr->array_buffer, size);
6129 	else
6130 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6131 
6132 	return ret;
6133 }
6134 
6135 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6136 				  unsigned long size, int cpu_id)
6137 {
6138 	int ret = size;
6139 
6140 	mutex_lock(&trace_types_lock);
6141 
6142 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6143 		/* make sure, this cpu is enabled in the mask */
6144 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6145 			ret = -EINVAL;
6146 			goto out;
6147 		}
6148 	}
6149 
6150 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6151 	if (ret < 0)
6152 		ret = -ENOMEM;
6153 
6154 out:
6155 	mutex_unlock(&trace_types_lock);
6156 
6157 	return ret;
6158 }
6159 
6160 
6161 /**
6162  * tracing_update_buffers - used by tracing facility to expand ring buffers
6163  *
6164  * To save on memory when the tracing is never used on a system with it
6165  * configured in. The ring buffers are set to a minimum size. But once
6166  * a user starts to use the tracing facility, then they need to grow
6167  * to their default size.
6168  *
6169  * This function is to be called when a tracer is about to be used.
6170  */
6171 int tracing_update_buffers(void)
6172 {
6173 	int ret = 0;
6174 
6175 	mutex_lock(&trace_types_lock);
6176 	if (!ring_buffer_expanded)
6177 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6178 						RING_BUFFER_ALL_CPUS);
6179 	mutex_unlock(&trace_types_lock);
6180 
6181 	return ret;
6182 }
6183 
6184 struct trace_option_dentry;
6185 
6186 static void
6187 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6188 
6189 /*
6190  * Used to clear out the tracer before deletion of an instance.
6191  * Must have trace_types_lock held.
6192  */
6193 static void tracing_set_nop(struct trace_array *tr)
6194 {
6195 	if (tr->current_trace == &nop_trace)
6196 		return;
6197 
6198 	tr->current_trace->enabled--;
6199 
6200 	if (tr->current_trace->reset)
6201 		tr->current_trace->reset(tr);
6202 
6203 	tr->current_trace = &nop_trace;
6204 }
6205 
6206 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6207 {
6208 	/* Only enable if the directory has been created already. */
6209 	if (!tr->dir)
6210 		return;
6211 
6212 	create_trace_option_files(tr, t);
6213 }
6214 
6215 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6216 {
6217 	struct tracer *t;
6218 #ifdef CONFIG_TRACER_MAX_TRACE
6219 	bool had_max_tr;
6220 #endif
6221 	int ret = 0;
6222 
6223 	mutex_lock(&trace_types_lock);
6224 
6225 	if (!ring_buffer_expanded) {
6226 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6227 						RING_BUFFER_ALL_CPUS);
6228 		if (ret < 0)
6229 			goto out;
6230 		ret = 0;
6231 	}
6232 
6233 	for (t = trace_types; t; t = t->next) {
6234 		if (strcmp(t->name, buf) == 0)
6235 			break;
6236 	}
6237 	if (!t) {
6238 		ret = -EINVAL;
6239 		goto out;
6240 	}
6241 	if (t == tr->current_trace)
6242 		goto out;
6243 
6244 #ifdef CONFIG_TRACER_SNAPSHOT
6245 	if (t->use_max_tr) {
6246 		arch_spin_lock(&tr->max_lock);
6247 		if (tr->cond_snapshot)
6248 			ret = -EBUSY;
6249 		arch_spin_unlock(&tr->max_lock);
6250 		if (ret)
6251 			goto out;
6252 	}
6253 #endif
6254 	/* Some tracers won't work on kernel command line */
6255 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6256 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6257 			t->name);
6258 		goto out;
6259 	}
6260 
6261 	/* Some tracers are only allowed for the top level buffer */
6262 	if (!trace_ok_for_array(t, tr)) {
6263 		ret = -EINVAL;
6264 		goto out;
6265 	}
6266 
6267 	/* If trace pipe files are being read, we can't change the tracer */
6268 	if (tr->trace_ref) {
6269 		ret = -EBUSY;
6270 		goto out;
6271 	}
6272 
6273 	trace_branch_disable();
6274 
6275 	tr->current_trace->enabled--;
6276 
6277 	if (tr->current_trace->reset)
6278 		tr->current_trace->reset(tr);
6279 
6280 	/* Current trace needs to be nop_trace before synchronize_rcu */
6281 	tr->current_trace = &nop_trace;
6282 
6283 #ifdef CONFIG_TRACER_MAX_TRACE
6284 	had_max_tr = tr->allocated_snapshot;
6285 
6286 	if (had_max_tr && !t->use_max_tr) {
6287 		/*
6288 		 * We need to make sure that the update_max_tr sees that
6289 		 * current_trace changed to nop_trace to keep it from
6290 		 * swapping the buffers after we resize it.
6291 		 * The update_max_tr is called from interrupts disabled
6292 		 * so a synchronized_sched() is sufficient.
6293 		 */
6294 		synchronize_rcu();
6295 		free_snapshot(tr);
6296 	}
6297 #endif
6298 
6299 #ifdef CONFIG_TRACER_MAX_TRACE
6300 	if (t->use_max_tr && !had_max_tr) {
6301 		ret = tracing_alloc_snapshot_instance(tr);
6302 		if (ret < 0)
6303 			goto out;
6304 	}
6305 #endif
6306 
6307 	if (t->init) {
6308 		ret = tracer_init(t, tr);
6309 		if (ret)
6310 			goto out;
6311 	}
6312 
6313 	tr->current_trace = t;
6314 	tr->current_trace->enabled++;
6315 	trace_branch_enable(tr);
6316  out:
6317 	mutex_unlock(&trace_types_lock);
6318 
6319 	return ret;
6320 }
6321 
6322 static ssize_t
6323 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6324 			size_t cnt, loff_t *ppos)
6325 {
6326 	struct trace_array *tr = filp->private_data;
6327 	char buf[MAX_TRACER_SIZE+1];
6328 	int i;
6329 	size_t ret;
6330 	int err;
6331 
6332 	ret = cnt;
6333 
6334 	if (cnt > MAX_TRACER_SIZE)
6335 		cnt = MAX_TRACER_SIZE;
6336 
6337 	if (copy_from_user(buf, ubuf, cnt))
6338 		return -EFAULT;
6339 
6340 	buf[cnt] = 0;
6341 
6342 	/* strip ending whitespace. */
6343 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6344 		buf[i] = 0;
6345 
6346 	err = tracing_set_tracer(tr, buf);
6347 	if (err)
6348 		return err;
6349 
6350 	*ppos += ret;
6351 
6352 	return ret;
6353 }
6354 
6355 static ssize_t
6356 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6357 		   size_t cnt, loff_t *ppos)
6358 {
6359 	char buf[64];
6360 	int r;
6361 
6362 	r = snprintf(buf, sizeof(buf), "%ld\n",
6363 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6364 	if (r > sizeof(buf))
6365 		r = sizeof(buf);
6366 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6367 }
6368 
6369 static ssize_t
6370 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6371 		    size_t cnt, loff_t *ppos)
6372 {
6373 	unsigned long val;
6374 	int ret;
6375 
6376 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6377 	if (ret)
6378 		return ret;
6379 
6380 	*ptr = val * 1000;
6381 
6382 	return cnt;
6383 }
6384 
6385 static ssize_t
6386 tracing_thresh_read(struct file *filp, char __user *ubuf,
6387 		    size_t cnt, loff_t *ppos)
6388 {
6389 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6390 }
6391 
6392 static ssize_t
6393 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6394 		     size_t cnt, loff_t *ppos)
6395 {
6396 	struct trace_array *tr = filp->private_data;
6397 	int ret;
6398 
6399 	mutex_lock(&trace_types_lock);
6400 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6401 	if (ret < 0)
6402 		goto out;
6403 
6404 	if (tr->current_trace->update_thresh) {
6405 		ret = tr->current_trace->update_thresh(tr);
6406 		if (ret < 0)
6407 			goto out;
6408 	}
6409 
6410 	ret = cnt;
6411 out:
6412 	mutex_unlock(&trace_types_lock);
6413 
6414 	return ret;
6415 }
6416 
6417 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6418 
6419 static ssize_t
6420 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6421 		     size_t cnt, loff_t *ppos)
6422 {
6423 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6424 }
6425 
6426 static ssize_t
6427 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6428 		      size_t cnt, loff_t *ppos)
6429 {
6430 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6431 }
6432 
6433 #endif
6434 
6435 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6436 {
6437 	struct trace_array *tr = inode->i_private;
6438 	struct trace_iterator *iter;
6439 	int ret;
6440 
6441 	ret = tracing_check_open_get_tr(tr);
6442 	if (ret)
6443 		return ret;
6444 
6445 	mutex_lock(&trace_types_lock);
6446 
6447 	/* create a buffer to store the information to pass to userspace */
6448 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6449 	if (!iter) {
6450 		ret = -ENOMEM;
6451 		__trace_array_put(tr);
6452 		goto out;
6453 	}
6454 
6455 	trace_seq_init(&iter->seq);
6456 	iter->trace = tr->current_trace;
6457 
6458 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6459 		ret = -ENOMEM;
6460 		goto fail;
6461 	}
6462 
6463 	/* trace pipe does not show start of buffer */
6464 	cpumask_setall(iter->started);
6465 
6466 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6467 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6468 
6469 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6470 	if (trace_clocks[tr->clock_id].in_ns)
6471 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6472 
6473 	iter->tr = tr;
6474 	iter->array_buffer = &tr->array_buffer;
6475 	iter->cpu_file = tracing_get_cpu(inode);
6476 	mutex_init(&iter->mutex);
6477 	filp->private_data = iter;
6478 
6479 	if (iter->trace->pipe_open)
6480 		iter->trace->pipe_open(iter);
6481 
6482 	nonseekable_open(inode, filp);
6483 
6484 	tr->trace_ref++;
6485 out:
6486 	mutex_unlock(&trace_types_lock);
6487 	return ret;
6488 
6489 fail:
6490 	kfree(iter);
6491 	__trace_array_put(tr);
6492 	mutex_unlock(&trace_types_lock);
6493 	return ret;
6494 }
6495 
6496 static int tracing_release_pipe(struct inode *inode, struct file *file)
6497 {
6498 	struct trace_iterator *iter = file->private_data;
6499 	struct trace_array *tr = inode->i_private;
6500 
6501 	mutex_lock(&trace_types_lock);
6502 
6503 	tr->trace_ref--;
6504 
6505 	if (iter->trace->pipe_close)
6506 		iter->trace->pipe_close(iter);
6507 
6508 	mutex_unlock(&trace_types_lock);
6509 
6510 	free_cpumask_var(iter->started);
6511 	mutex_destroy(&iter->mutex);
6512 	kfree(iter);
6513 
6514 	trace_array_put(tr);
6515 
6516 	return 0;
6517 }
6518 
6519 static __poll_t
6520 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6521 {
6522 	struct trace_array *tr = iter->tr;
6523 
6524 	/* Iterators are static, they should be filled or empty */
6525 	if (trace_buffer_iter(iter, iter->cpu_file))
6526 		return EPOLLIN | EPOLLRDNORM;
6527 
6528 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6529 		/*
6530 		 * Always select as readable when in blocking mode
6531 		 */
6532 		return EPOLLIN | EPOLLRDNORM;
6533 	else
6534 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6535 					     filp, poll_table);
6536 }
6537 
6538 static __poll_t
6539 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6540 {
6541 	struct trace_iterator *iter = filp->private_data;
6542 
6543 	return trace_poll(iter, filp, poll_table);
6544 }
6545 
6546 /* Must be called with iter->mutex held. */
6547 static int tracing_wait_pipe(struct file *filp)
6548 {
6549 	struct trace_iterator *iter = filp->private_data;
6550 	int ret;
6551 
6552 	while (trace_empty(iter)) {
6553 
6554 		if ((filp->f_flags & O_NONBLOCK)) {
6555 			return -EAGAIN;
6556 		}
6557 
6558 		/*
6559 		 * We block until we read something and tracing is disabled.
6560 		 * We still block if tracing is disabled, but we have never
6561 		 * read anything. This allows a user to cat this file, and
6562 		 * then enable tracing. But after we have read something,
6563 		 * we give an EOF when tracing is again disabled.
6564 		 *
6565 		 * iter->pos will be 0 if we haven't read anything.
6566 		 */
6567 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6568 			break;
6569 
6570 		mutex_unlock(&iter->mutex);
6571 
6572 		ret = wait_on_pipe(iter, 0);
6573 
6574 		mutex_lock(&iter->mutex);
6575 
6576 		if (ret)
6577 			return ret;
6578 	}
6579 
6580 	return 1;
6581 }
6582 
6583 /*
6584  * Consumer reader.
6585  */
6586 static ssize_t
6587 tracing_read_pipe(struct file *filp, char __user *ubuf,
6588 		  size_t cnt, loff_t *ppos)
6589 {
6590 	struct trace_iterator *iter = filp->private_data;
6591 	ssize_t sret;
6592 
6593 	/*
6594 	 * Avoid more than one consumer on a single file descriptor
6595 	 * This is just a matter of traces coherency, the ring buffer itself
6596 	 * is protected.
6597 	 */
6598 	mutex_lock(&iter->mutex);
6599 
6600 	/* return any leftover data */
6601 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6602 	if (sret != -EBUSY)
6603 		goto out;
6604 
6605 	trace_seq_init(&iter->seq);
6606 
6607 	if (iter->trace->read) {
6608 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6609 		if (sret)
6610 			goto out;
6611 	}
6612 
6613 waitagain:
6614 	sret = tracing_wait_pipe(filp);
6615 	if (sret <= 0)
6616 		goto out;
6617 
6618 	/* stop when tracing is finished */
6619 	if (trace_empty(iter)) {
6620 		sret = 0;
6621 		goto out;
6622 	}
6623 
6624 	if (cnt >= PAGE_SIZE)
6625 		cnt = PAGE_SIZE - 1;
6626 
6627 	/* reset all but tr, trace, and overruns */
6628 	memset(&iter->seq, 0,
6629 	       sizeof(struct trace_iterator) -
6630 	       offsetof(struct trace_iterator, seq));
6631 	cpumask_clear(iter->started);
6632 	trace_seq_init(&iter->seq);
6633 	iter->pos = -1;
6634 
6635 	trace_event_read_lock();
6636 	trace_access_lock(iter->cpu_file);
6637 	while (trace_find_next_entry_inc(iter) != NULL) {
6638 		enum print_line_t ret;
6639 		int save_len = iter->seq.seq.len;
6640 
6641 		ret = print_trace_line(iter);
6642 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6643 			/* don't print partial lines */
6644 			iter->seq.seq.len = save_len;
6645 			break;
6646 		}
6647 		if (ret != TRACE_TYPE_NO_CONSUME)
6648 			trace_consume(iter);
6649 
6650 		if (trace_seq_used(&iter->seq) >= cnt)
6651 			break;
6652 
6653 		/*
6654 		 * Setting the full flag means we reached the trace_seq buffer
6655 		 * size and we should leave by partial output condition above.
6656 		 * One of the trace_seq_* functions is not used properly.
6657 		 */
6658 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6659 			  iter->ent->type);
6660 	}
6661 	trace_access_unlock(iter->cpu_file);
6662 	trace_event_read_unlock();
6663 
6664 	/* Now copy what we have to the user */
6665 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6666 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6667 		trace_seq_init(&iter->seq);
6668 
6669 	/*
6670 	 * If there was nothing to send to user, in spite of consuming trace
6671 	 * entries, go back to wait for more entries.
6672 	 */
6673 	if (sret == -EBUSY)
6674 		goto waitagain;
6675 
6676 out:
6677 	mutex_unlock(&iter->mutex);
6678 
6679 	return sret;
6680 }
6681 
6682 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6683 				     unsigned int idx)
6684 {
6685 	__free_page(spd->pages[idx]);
6686 }
6687 
6688 static size_t
6689 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6690 {
6691 	size_t count;
6692 	int save_len;
6693 	int ret;
6694 
6695 	/* Seq buffer is page-sized, exactly what we need. */
6696 	for (;;) {
6697 		save_len = iter->seq.seq.len;
6698 		ret = print_trace_line(iter);
6699 
6700 		if (trace_seq_has_overflowed(&iter->seq)) {
6701 			iter->seq.seq.len = save_len;
6702 			break;
6703 		}
6704 
6705 		/*
6706 		 * This should not be hit, because it should only
6707 		 * be set if the iter->seq overflowed. But check it
6708 		 * anyway to be safe.
6709 		 */
6710 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6711 			iter->seq.seq.len = save_len;
6712 			break;
6713 		}
6714 
6715 		count = trace_seq_used(&iter->seq) - save_len;
6716 		if (rem < count) {
6717 			rem = 0;
6718 			iter->seq.seq.len = save_len;
6719 			break;
6720 		}
6721 
6722 		if (ret != TRACE_TYPE_NO_CONSUME)
6723 			trace_consume(iter);
6724 		rem -= count;
6725 		if (!trace_find_next_entry_inc(iter))	{
6726 			rem = 0;
6727 			iter->ent = NULL;
6728 			break;
6729 		}
6730 	}
6731 
6732 	return rem;
6733 }
6734 
6735 static ssize_t tracing_splice_read_pipe(struct file *filp,
6736 					loff_t *ppos,
6737 					struct pipe_inode_info *pipe,
6738 					size_t len,
6739 					unsigned int flags)
6740 {
6741 	struct page *pages_def[PIPE_DEF_BUFFERS];
6742 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6743 	struct trace_iterator *iter = filp->private_data;
6744 	struct splice_pipe_desc spd = {
6745 		.pages		= pages_def,
6746 		.partial	= partial_def,
6747 		.nr_pages	= 0, /* This gets updated below. */
6748 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6749 		.ops		= &default_pipe_buf_ops,
6750 		.spd_release	= tracing_spd_release_pipe,
6751 	};
6752 	ssize_t ret;
6753 	size_t rem;
6754 	unsigned int i;
6755 
6756 	if (splice_grow_spd(pipe, &spd))
6757 		return -ENOMEM;
6758 
6759 	mutex_lock(&iter->mutex);
6760 
6761 	if (iter->trace->splice_read) {
6762 		ret = iter->trace->splice_read(iter, filp,
6763 					       ppos, pipe, len, flags);
6764 		if (ret)
6765 			goto out_err;
6766 	}
6767 
6768 	ret = tracing_wait_pipe(filp);
6769 	if (ret <= 0)
6770 		goto out_err;
6771 
6772 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6773 		ret = -EFAULT;
6774 		goto out_err;
6775 	}
6776 
6777 	trace_event_read_lock();
6778 	trace_access_lock(iter->cpu_file);
6779 
6780 	/* Fill as many pages as possible. */
6781 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6782 		spd.pages[i] = alloc_page(GFP_KERNEL);
6783 		if (!spd.pages[i])
6784 			break;
6785 
6786 		rem = tracing_fill_pipe_page(rem, iter);
6787 
6788 		/* Copy the data into the page, so we can start over. */
6789 		ret = trace_seq_to_buffer(&iter->seq,
6790 					  page_address(spd.pages[i]),
6791 					  trace_seq_used(&iter->seq));
6792 		if (ret < 0) {
6793 			__free_page(spd.pages[i]);
6794 			break;
6795 		}
6796 		spd.partial[i].offset = 0;
6797 		spd.partial[i].len = trace_seq_used(&iter->seq);
6798 
6799 		trace_seq_init(&iter->seq);
6800 	}
6801 
6802 	trace_access_unlock(iter->cpu_file);
6803 	trace_event_read_unlock();
6804 	mutex_unlock(&iter->mutex);
6805 
6806 	spd.nr_pages = i;
6807 
6808 	if (i)
6809 		ret = splice_to_pipe(pipe, &spd);
6810 	else
6811 		ret = 0;
6812 out:
6813 	splice_shrink_spd(&spd);
6814 	return ret;
6815 
6816 out_err:
6817 	mutex_unlock(&iter->mutex);
6818 	goto out;
6819 }
6820 
6821 static ssize_t
6822 tracing_entries_read(struct file *filp, char __user *ubuf,
6823 		     size_t cnt, loff_t *ppos)
6824 {
6825 	struct inode *inode = file_inode(filp);
6826 	struct trace_array *tr = inode->i_private;
6827 	int cpu = tracing_get_cpu(inode);
6828 	char buf[64];
6829 	int r = 0;
6830 	ssize_t ret;
6831 
6832 	mutex_lock(&trace_types_lock);
6833 
6834 	if (cpu == RING_BUFFER_ALL_CPUS) {
6835 		int cpu, buf_size_same;
6836 		unsigned long size;
6837 
6838 		size = 0;
6839 		buf_size_same = 1;
6840 		/* check if all cpu sizes are same */
6841 		for_each_tracing_cpu(cpu) {
6842 			/* fill in the size from first enabled cpu */
6843 			if (size == 0)
6844 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6845 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6846 				buf_size_same = 0;
6847 				break;
6848 			}
6849 		}
6850 
6851 		if (buf_size_same) {
6852 			if (!ring_buffer_expanded)
6853 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6854 					    size >> 10,
6855 					    trace_buf_size >> 10);
6856 			else
6857 				r = sprintf(buf, "%lu\n", size >> 10);
6858 		} else
6859 			r = sprintf(buf, "X\n");
6860 	} else
6861 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6862 
6863 	mutex_unlock(&trace_types_lock);
6864 
6865 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6866 	return ret;
6867 }
6868 
6869 static ssize_t
6870 tracing_entries_write(struct file *filp, const char __user *ubuf,
6871 		      size_t cnt, loff_t *ppos)
6872 {
6873 	struct inode *inode = file_inode(filp);
6874 	struct trace_array *tr = inode->i_private;
6875 	unsigned long val;
6876 	int ret;
6877 
6878 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6879 	if (ret)
6880 		return ret;
6881 
6882 	/* must have at least 1 entry */
6883 	if (!val)
6884 		return -EINVAL;
6885 
6886 	/* value is in KB */
6887 	val <<= 10;
6888 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6889 	if (ret < 0)
6890 		return ret;
6891 
6892 	*ppos += cnt;
6893 
6894 	return cnt;
6895 }
6896 
6897 static ssize_t
6898 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6899 				size_t cnt, loff_t *ppos)
6900 {
6901 	struct trace_array *tr = filp->private_data;
6902 	char buf[64];
6903 	int r, cpu;
6904 	unsigned long size = 0, expanded_size = 0;
6905 
6906 	mutex_lock(&trace_types_lock);
6907 	for_each_tracing_cpu(cpu) {
6908 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6909 		if (!ring_buffer_expanded)
6910 			expanded_size += trace_buf_size >> 10;
6911 	}
6912 	if (ring_buffer_expanded)
6913 		r = sprintf(buf, "%lu\n", size);
6914 	else
6915 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6916 	mutex_unlock(&trace_types_lock);
6917 
6918 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6919 }
6920 
6921 static ssize_t
6922 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6923 			  size_t cnt, loff_t *ppos)
6924 {
6925 	/*
6926 	 * There is no need to read what the user has written, this function
6927 	 * is just to make sure that there is no error when "echo" is used
6928 	 */
6929 
6930 	*ppos += cnt;
6931 
6932 	return cnt;
6933 }
6934 
6935 static int
6936 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6937 {
6938 	struct trace_array *tr = inode->i_private;
6939 
6940 	/* disable tracing ? */
6941 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6942 		tracer_tracing_off(tr);
6943 	/* resize the ring buffer to 0 */
6944 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6945 
6946 	trace_array_put(tr);
6947 
6948 	return 0;
6949 }
6950 
6951 static ssize_t
6952 tracing_mark_write(struct file *filp, const char __user *ubuf,
6953 					size_t cnt, loff_t *fpos)
6954 {
6955 	struct trace_array *tr = filp->private_data;
6956 	struct ring_buffer_event *event;
6957 	enum event_trigger_type tt = ETT_NONE;
6958 	struct trace_buffer *buffer;
6959 	struct print_entry *entry;
6960 	ssize_t written;
6961 	int size;
6962 	int len;
6963 
6964 /* Used in tracing_mark_raw_write() as well */
6965 #define FAULTED_STR "<faulted>"
6966 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6967 
6968 	if (tracing_disabled)
6969 		return -EINVAL;
6970 
6971 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6972 		return -EINVAL;
6973 
6974 	if (cnt > TRACE_BUF_SIZE)
6975 		cnt = TRACE_BUF_SIZE;
6976 
6977 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6978 
6979 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6980 
6981 	/* If less than "<faulted>", then make sure we can still add that */
6982 	if (cnt < FAULTED_SIZE)
6983 		size += FAULTED_SIZE - cnt;
6984 
6985 	buffer = tr->array_buffer.buffer;
6986 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6987 					    tracing_gen_ctx());
6988 	if (unlikely(!event))
6989 		/* Ring buffer disabled, return as if not open for write */
6990 		return -EBADF;
6991 
6992 	entry = ring_buffer_event_data(event);
6993 	entry->ip = _THIS_IP_;
6994 
6995 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6996 	if (len) {
6997 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6998 		cnt = FAULTED_SIZE;
6999 		written = -EFAULT;
7000 	} else
7001 		written = cnt;
7002 
7003 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7004 		/* do not add \n before testing triggers, but add \0 */
7005 		entry->buf[cnt] = '\0';
7006 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7007 	}
7008 
7009 	if (entry->buf[cnt - 1] != '\n') {
7010 		entry->buf[cnt] = '\n';
7011 		entry->buf[cnt + 1] = '\0';
7012 	} else
7013 		entry->buf[cnt] = '\0';
7014 
7015 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7016 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7017 	__buffer_unlock_commit(buffer, event);
7018 
7019 	if (tt)
7020 		event_triggers_post_call(tr->trace_marker_file, tt);
7021 
7022 	if (written > 0)
7023 		*fpos += written;
7024 
7025 	return written;
7026 }
7027 
7028 /* Limit it for now to 3K (including tag) */
7029 #define RAW_DATA_MAX_SIZE (1024*3)
7030 
7031 static ssize_t
7032 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7033 					size_t cnt, loff_t *fpos)
7034 {
7035 	struct trace_array *tr = filp->private_data;
7036 	struct ring_buffer_event *event;
7037 	struct trace_buffer *buffer;
7038 	struct raw_data_entry *entry;
7039 	ssize_t written;
7040 	int size;
7041 	int len;
7042 
7043 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7044 
7045 	if (tracing_disabled)
7046 		return -EINVAL;
7047 
7048 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7049 		return -EINVAL;
7050 
7051 	/* The marker must at least have a tag id */
7052 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7053 		return -EINVAL;
7054 
7055 	if (cnt > TRACE_BUF_SIZE)
7056 		cnt = TRACE_BUF_SIZE;
7057 
7058 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7059 
7060 	size = sizeof(*entry) + cnt;
7061 	if (cnt < FAULT_SIZE_ID)
7062 		size += FAULT_SIZE_ID - cnt;
7063 
7064 	buffer = tr->array_buffer.buffer;
7065 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7066 					    tracing_gen_ctx());
7067 	if (!event)
7068 		/* Ring buffer disabled, return as if not open for write */
7069 		return -EBADF;
7070 
7071 	entry = ring_buffer_event_data(event);
7072 
7073 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7074 	if (len) {
7075 		entry->id = -1;
7076 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7077 		written = -EFAULT;
7078 	} else
7079 		written = cnt;
7080 
7081 	__buffer_unlock_commit(buffer, event);
7082 
7083 	if (written > 0)
7084 		*fpos += written;
7085 
7086 	return written;
7087 }
7088 
7089 static int tracing_clock_show(struct seq_file *m, void *v)
7090 {
7091 	struct trace_array *tr = m->private;
7092 	int i;
7093 
7094 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7095 		seq_printf(m,
7096 			"%s%s%s%s", i ? " " : "",
7097 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7098 			i == tr->clock_id ? "]" : "");
7099 	seq_putc(m, '\n');
7100 
7101 	return 0;
7102 }
7103 
7104 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7105 {
7106 	int i;
7107 
7108 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7109 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7110 			break;
7111 	}
7112 	if (i == ARRAY_SIZE(trace_clocks))
7113 		return -EINVAL;
7114 
7115 	mutex_lock(&trace_types_lock);
7116 
7117 	tr->clock_id = i;
7118 
7119 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7120 
7121 	/*
7122 	 * New clock may not be consistent with the previous clock.
7123 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7124 	 */
7125 	tracing_reset_online_cpus(&tr->array_buffer);
7126 
7127 #ifdef CONFIG_TRACER_MAX_TRACE
7128 	if (tr->max_buffer.buffer)
7129 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7130 	tracing_reset_online_cpus(&tr->max_buffer);
7131 #endif
7132 
7133 	mutex_unlock(&trace_types_lock);
7134 
7135 	return 0;
7136 }
7137 
7138 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7139 				   size_t cnt, loff_t *fpos)
7140 {
7141 	struct seq_file *m = filp->private_data;
7142 	struct trace_array *tr = m->private;
7143 	char buf[64];
7144 	const char *clockstr;
7145 	int ret;
7146 
7147 	if (cnt >= sizeof(buf))
7148 		return -EINVAL;
7149 
7150 	if (copy_from_user(buf, ubuf, cnt))
7151 		return -EFAULT;
7152 
7153 	buf[cnt] = 0;
7154 
7155 	clockstr = strstrip(buf);
7156 
7157 	ret = tracing_set_clock(tr, clockstr);
7158 	if (ret)
7159 		return ret;
7160 
7161 	*fpos += cnt;
7162 
7163 	return cnt;
7164 }
7165 
7166 static int tracing_clock_open(struct inode *inode, struct file *file)
7167 {
7168 	struct trace_array *tr = inode->i_private;
7169 	int ret;
7170 
7171 	ret = tracing_check_open_get_tr(tr);
7172 	if (ret)
7173 		return ret;
7174 
7175 	ret = single_open(file, tracing_clock_show, inode->i_private);
7176 	if (ret < 0)
7177 		trace_array_put(tr);
7178 
7179 	return ret;
7180 }
7181 
7182 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7183 {
7184 	struct trace_array *tr = m->private;
7185 
7186 	mutex_lock(&trace_types_lock);
7187 
7188 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7189 		seq_puts(m, "delta [absolute]\n");
7190 	else
7191 		seq_puts(m, "[delta] absolute\n");
7192 
7193 	mutex_unlock(&trace_types_lock);
7194 
7195 	return 0;
7196 }
7197 
7198 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7199 {
7200 	struct trace_array *tr = inode->i_private;
7201 	int ret;
7202 
7203 	ret = tracing_check_open_get_tr(tr);
7204 	if (ret)
7205 		return ret;
7206 
7207 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7208 	if (ret < 0)
7209 		trace_array_put(tr);
7210 
7211 	return ret;
7212 }
7213 
7214 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7215 {
7216 	if (rbe == this_cpu_read(trace_buffered_event))
7217 		return ring_buffer_time_stamp(buffer);
7218 
7219 	return ring_buffer_event_time_stamp(buffer, rbe);
7220 }
7221 
7222 /*
7223  * Set or disable using the per CPU trace_buffer_event when possible.
7224  */
7225 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7226 {
7227 	int ret = 0;
7228 
7229 	mutex_lock(&trace_types_lock);
7230 
7231 	if (set && tr->no_filter_buffering_ref++)
7232 		goto out;
7233 
7234 	if (!set) {
7235 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7236 			ret = -EINVAL;
7237 			goto out;
7238 		}
7239 
7240 		--tr->no_filter_buffering_ref;
7241 	}
7242  out:
7243 	mutex_unlock(&trace_types_lock);
7244 
7245 	return ret;
7246 }
7247 
7248 struct ftrace_buffer_info {
7249 	struct trace_iterator	iter;
7250 	void			*spare;
7251 	unsigned int		spare_cpu;
7252 	unsigned int		read;
7253 };
7254 
7255 #ifdef CONFIG_TRACER_SNAPSHOT
7256 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7257 {
7258 	struct trace_array *tr = inode->i_private;
7259 	struct trace_iterator *iter;
7260 	struct seq_file *m;
7261 	int ret;
7262 
7263 	ret = tracing_check_open_get_tr(tr);
7264 	if (ret)
7265 		return ret;
7266 
7267 	if (file->f_mode & FMODE_READ) {
7268 		iter = __tracing_open(inode, file, true);
7269 		if (IS_ERR(iter))
7270 			ret = PTR_ERR(iter);
7271 	} else {
7272 		/* Writes still need the seq_file to hold the private data */
7273 		ret = -ENOMEM;
7274 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7275 		if (!m)
7276 			goto out;
7277 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7278 		if (!iter) {
7279 			kfree(m);
7280 			goto out;
7281 		}
7282 		ret = 0;
7283 
7284 		iter->tr = tr;
7285 		iter->array_buffer = &tr->max_buffer;
7286 		iter->cpu_file = tracing_get_cpu(inode);
7287 		m->private = iter;
7288 		file->private_data = m;
7289 	}
7290 out:
7291 	if (ret < 0)
7292 		trace_array_put(tr);
7293 
7294 	return ret;
7295 }
7296 
7297 static ssize_t
7298 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7299 		       loff_t *ppos)
7300 {
7301 	struct seq_file *m = filp->private_data;
7302 	struct trace_iterator *iter = m->private;
7303 	struct trace_array *tr = iter->tr;
7304 	unsigned long val;
7305 	int ret;
7306 
7307 	ret = tracing_update_buffers();
7308 	if (ret < 0)
7309 		return ret;
7310 
7311 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7312 	if (ret)
7313 		return ret;
7314 
7315 	mutex_lock(&trace_types_lock);
7316 
7317 	if (tr->current_trace->use_max_tr) {
7318 		ret = -EBUSY;
7319 		goto out;
7320 	}
7321 
7322 	arch_spin_lock(&tr->max_lock);
7323 	if (tr->cond_snapshot)
7324 		ret = -EBUSY;
7325 	arch_spin_unlock(&tr->max_lock);
7326 	if (ret)
7327 		goto out;
7328 
7329 	switch (val) {
7330 	case 0:
7331 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7332 			ret = -EINVAL;
7333 			break;
7334 		}
7335 		if (tr->allocated_snapshot)
7336 			free_snapshot(tr);
7337 		break;
7338 	case 1:
7339 /* Only allow per-cpu swap if the ring buffer supports it */
7340 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7341 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7342 			ret = -EINVAL;
7343 			break;
7344 		}
7345 #endif
7346 		if (tr->allocated_snapshot)
7347 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7348 					&tr->array_buffer, iter->cpu_file);
7349 		else
7350 			ret = tracing_alloc_snapshot_instance(tr);
7351 		if (ret < 0)
7352 			break;
7353 		local_irq_disable();
7354 		/* Now, we're going to swap */
7355 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7356 			update_max_tr(tr, current, smp_processor_id(), NULL);
7357 		else
7358 			update_max_tr_single(tr, current, iter->cpu_file);
7359 		local_irq_enable();
7360 		break;
7361 	default:
7362 		if (tr->allocated_snapshot) {
7363 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7364 				tracing_reset_online_cpus(&tr->max_buffer);
7365 			else
7366 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7367 		}
7368 		break;
7369 	}
7370 
7371 	if (ret >= 0) {
7372 		*ppos += cnt;
7373 		ret = cnt;
7374 	}
7375 out:
7376 	mutex_unlock(&trace_types_lock);
7377 	return ret;
7378 }
7379 
7380 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7381 {
7382 	struct seq_file *m = file->private_data;
7383 	int ret;
7384 
7385 	ret = tracing_release(inode, file);
7386 
7387 	if (file->f_mode & FMODE_READ)
7388 		return ret;
7389 
7390 	/* If write only, the seq_file is just a stub */
7391 	if (m)
7392 		kfree(m->private);
7393 	kfree(m);
7394 
7395 	return 0;
7396 }
7397 
7398 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7399 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7400 				    size_t count, loff_t *ppos);
7401 static int tracing_buffers_release(struct inode *inode, struct file *file);
7402 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7403 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7404 
7405 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7406 {
7407 	struct ftrace_buffer_info *info;
7408 	int ret;
7409 
7410 	/* The following checks for tracefs lockdown */
7411 	ret = tracing_buffers_open(inode, filp);
7412 	if (ret < 0)
7413 		return ret;
7414 
7415 	info = filp->private_data;
7416 
7417 	if (info->iter.trace->use_max_tr) {
7418 		tracing_buffers_release(inode, filp);
7419 		return -EBUSY;
7420 	}
7421 
7422 	info->iter.snapshot = true;
7423 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7424 
7425 	return ret;
7426 }
7427 
7428 #endif /* CONFIG_TRACER_SNAPSHOT */
7429 
7430 
7431 static const struct file_operations tracing_thresh_fops = {
7432 	.open		= tracing_open_generic,
7433 	.read		= tracing_thresh_read,
7434 	.write		= tracing_thresh_write,
7435 	.llseek		= generic_file_llseek,
7436 };
7437 
7438 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7439 static const struct file_operations tracing_max_lat_fops = {
7440 	.open		= tracing_open_generic,
7441 	.read		= tracing_max_lat_read,
7442 	.write		= tracing_max_lat_write,
7443 	.llseek		= generic_file_llseek,
7444 };
7445 #endif
7446 
7447 static const struct file_operations set_tracer_fops = {
7448 	.open		= tracing_open_generic,
7449 	.read		= tracing_set_trace_read,
7450 	.write		= tracing_set_trace_write,
7451 	.llseek		= generic_file_llseek,
7452 };
7453 
7454 static const struct file_operations tracing_pipe_fops = {
7455 	.open		= tracing_open_pipe,
7456 	.poll		= tracing_poll_pipe,
7457 	.read		= tracing_read_pipe,
7458 	.splice_read	= tracing_splice_read_pipe,
7459 	.release	= tracing_release_pipe,
7460 	.llseek		= no_llseek,
7461 };
7462 
7463 static const struct file_operations tracing_entries_fops = {
7464 	.open		= tracing_open_generic_tr,
7465 	.read		= tracing_entries_read,
7466 	.write		= tracing_entries_write,
7467 	.llseek		= generic_file_llseek,
7468 	.release	= tracing_release_generic_tr,
7469 };
7470 
7471 static const struct file_operations tracing_total_entries_fops = {
7472 	.open		= tracing_open_generic_tr,
7473 	.read		= tracing_total_entries_read,
7474 	.llseek		= generic_file_llseek,
7475 	.release	= tracing_release_generic_tr,
7476 };
7477 
7478 static const struct file_operations tracing_free_buffer_fops = {
7479 	.open		= tracing_open_generic_tr,
7480 	.write		= tracing_free_buffer_write,
7481 	.release	= tracing_free_buffer_release,
7482 };
7483 
7484 static const struct file_operations tracing_mark_fops = {
7485 	.open		= tracing_open_generic_tr,
7486 	.write		= tracing_mark_write,
7487 	.llseek		= generic_file_llseek,
7488 	.release	= tracing_release_generic_tr,
7489 };
7490 
7491 static const struct file_operations tracing_mark_raw_fops = {
7492 	.open		= tracing_open_generic_tr,
7493 	.write		= tracing_mark_raw_write,
7494 	.llseek		= generic_file_llseek,
7495 	.release	= tracing_release_generic_tr,
7496 };
7497 
7498 static const struct file_operations trace_clock_fops = {
7499 	.open		= tracing_clock_open,
7500 	.read		= seq_read,
7501 	.llseek		= seq_lseek,
7502 	.release	= tracing_single_release_tr,
7503 	.write		= tracing_clock_write,
7504 };
7505 
7506 static const struct file_operations trace_time_stamp_mode_fops = {
7507 	.open		= tracing_time_stamp_mode_open,
7508 	.read		= seq_read,
7509 	.llseek		= seq_lseek,
7510 	.release	= tracing_single_release_tr,
7511 };
7512 
7513 #ifdef CONFIG_TRACER_SNAPSHOT
7514 static const struct file_operations snapshot_fops = {
7515 	.open		= tracing_snapshot_open,
7516 	.read		= seq_read,
7517 	.write		= tracing_snapshot_write,
7518 	.llseek		= tracing_lseek,
7519 	.release	= tracing_snapshot_release,
7520 };
7521 
7522 static const struct file_operations snapshot_raw_fops = {
7523 	.open		= snapshot_raw_open,
7524 	.read		= tracing_buffers_read,
7525 	.release	= tracing_buffers_release,
7526 	.splice_read	= tracing_buffers_splice_read,
7527 	.llseek		= no_llseek,
7528 };
7529 
7530 #endif /* CONFIG_TRACER_SNAPSHOT */
7531 
7532 #define TRACING_LOG_ERRS_MAX	8
7533 #define TRACING_LOG_LOC_MAX	128
7534 
7535 #define CMD_PREFIX "  Command: "
7536 
7537 struct err_info {
7538 	const char	**errs;	/* ptr to loc-specific array of err strings */
7539 	u8		type;	/* index into errs -> specific err string */
7540 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7541 	u64		ts;
7542 };
7543 
7544 struct tracing_log_err {
7545 	struct list_head	list;
7546 	struct err_info		info;
7547 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7548 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7549 };
7550 
7551 static DEFINE_MUTEX(tracing_err_log_lock);
7552 
7553 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7554 {
7555 	struct tracing_log_err *err;
7556 
7557 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7558 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7559 		if (!err)
7560 			err = ERR_PTR(-ENOMEM);
7561 		tr->n_err_log_entries++;
7562 
7563 		return err;
7564 	}
7565 
7566 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7567 	list_del(&err->list);
7568 
7569 	return err;
7570 }
7571 
7572 /**
7573  * err_pos - find the position of a string within a command for error careting
7574  * @cmd: The tracing command that caused the error
7575  * @str: The string to position the caret at within @cmd
7576  *
7577  * Finds the position of the first occurrence of @str within @cmd.  The
7578  * return value can be passed to tracing_log_err() for caret placement
7579  * within @cmd.
7580  *
7581  * Returns the index within @cmd of the first occurrence of @str or 0
7582  * if @str was not found.
7583  */
7584 unsigned int err_pos(char *cmd, const char *str)
7585 {
7586 	char *found;
7587 
7588 	if (WARN_ON(!strlen(cmd)))
7589 		return 0;
7590 
7591 	found = strstr(cmd, str);
7592 	if (found)
7593 		return found - cmd;
7594 
7595 	return 0;
7596 }
7597 
7598 /**
7599  * tracing_log_err - write an error to the tracing error log
7600  * @tr: The associated trace array for the error (NULL for top level array)
7601  * @loc: A string describing where the error occurred
7602  * @cmd: The tracing command that caused the error
7603  * @errs: The array of loc-specific static error strings
7604  * @type: The index into errs[], which produces the specific static err string
7605  * @pos: The position the caret should be placed in the cmd
7606  *
7607  * Writes an error into tracing/error_log of the form:
7608  *
7609  * <loc>: error: <text>
7610  *   Command: <cmd>
7611  *              ^
7612  *
7613  * tracing/error_log is a small log file containing the last
7614  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7615  * unless there has been a tracing error, and the error log can be
7616  * cleared and have its memory freed by writing the empty string in
7617  * truncation mode to it i.e. echo > tracing/error_log.
7618  *
7619  * NOTE: the @errs array along with the @type param are used to
7620  * produce a static error string - this string is not copied and saved
7621  * when the error is logged - only a pointer to it is saved.  See
7622  * existing callers for examples of how static strings are typically
7623  * defined for use with tracing_log_err().
7624  */
7625 void tracing_log_err(struct trace_array *tr,
7626 		     const char *loc, const char *cmd,
7627 		     const char **errs, u8 type, u8 pos)
7628 {
7629 	struct tracing_log_err *err;
7630 
7631 	if (!tr)
7632 		tr = &global_trace;
7633 
7634 	mutex_lock(&tracing_err_log_lock);
7635 	err = get_tracing_log_err(tr);
7636 	if (PTR_ERR(err) == -ENOMEM) {
7637 		mutex_unlock(&tracing_err_log_lock);
7638 		return;
7639 	}
7640 
7641 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7642 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7643 
7644 	err->info.errs = errs;
7645 	err->info.type = type;
7646 	err->info.pos = pos;
7647 	err->info.ts = local_clock();
7648 
7649 	list_add_tail(&err->list, &tr->err_log);
7650 	mutex_unlock(&tracing_err_log_lock);
7651 }
7652 
7653 static void clear_tracing_err_log(struct trace_array *tr)
7654 {
7655 	struct tracing_log_err *err, *next;
7656 
7657 	mutex_lock(&tracing_err_log_lock);
7658 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7659 		list_del(&err->list);
7660 		kfree(err);
7661 	}
7662 
7663 	tr->n_err_log_entries = 0;
7664 	mutex_unlock(&tracing_err_log_lock);
7665 }
7666 
7667 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7668 {
7669 	struct trace_array *tr = m->private;
7670 
7671 	mutex_lock(&tracing_err_log_lock);
7672 
7673 	return seq_list_start(&tr->err_log, *pos);
7674 }
7675 
7676 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7677 {
7678 	struct trace_array *tr = m->private;
7679 
7680 	return seq_list_next(v, &tr->err_log, pos);
7681 }
7682 
7683 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7684 {
7685 	mutex_unlock(&tracing_err_log_lock);
7686 }
7687 
7688 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7689 {
7690 	u8 i;
7691 
7692 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7693 		seq_putc(m, ' ');
7694 	for (i = 0; i < pos; i++)
7695 		seq_putc(m, ' ');
7696 	seq_puts(m, "^\n");
7697 }
7698 
7699 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7700 {
7701 	struct tracing_log_err *err = v;
7702 
7703 	if (err) {
7704 		const char *err_text = err->info.errs[err->info.type];
7705 		u64 sec = err->info.ts;
7706 		u32 nsec;
7707 
7708 		nsec = do_div(sec, NSEC_PER_SEC);
7709 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7710 			   err->loc, err_text);
7711 		seq_printf(m, "%s", err->cmd);
7712 		tracing_err_log_show_pos(m, err->info.pos);
7713 	}
7714 
7715 	return 0;
7716 }
7717 
7718 static const struct seq_operations tracing_err_log_seq_ops = {
7719 	.start  = tracing_err_log_seq_start,
7720 	.next   = tracing_err_log_seq_next,
7721 	.stop   = tracing_err_log_seq_stop,
7722 	.show   = tracing_err_log_seq_show
7723 };
7724 
7725 static int tracing_err_log_open(struct inode *inode, struct file *file)
7726 {
7727 	struct trace_array *tr = inode->i_private;
7728 	int ret = 0;
7729 
7730 	ret = tracing_check_open_get_tr(tr);
7731 	if (ret)
7732 		return ret;
7733 
7734 	/* If this file was opened for write, then erase contents */
7735 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7736 		clear_tracing_err_log(tr);
7737 
7738 	if (file->f_mode & FMODE_READ) {
7739 		ret = seq_open(file, &tracing_err_log_seq_ops);
7740 		if (!ret) {
7741 			struct seq_file *m = file->private_data;
7742 			m->private = tr;
7743 		} else {
7744 			trace_array_put(tr);
7745 		}
7746 	}
7747 	return ret;
7748 }
7749 
7750 static ssize_t tracing_err_log_write(struct file *file,
7751 				     const char __user *buffer,
7752 				     size_t count, loff_t *ppos)
7753 {
7754 	return count;
7755 }
7756 
7757 static int tracing_err_log_release(struct inode *inode, struct file *file)
7758 {
7759 	struct trace_array *tr = inode->i_private;
7760 
7761 	trace_array_put(tr);
7762 
7763 	if (file->f_mode & FMODE_READ)
7764 		seq_release(inode, file);
7765 
7766 	return 0;
7767 }
7768 
7769 static const struct file_operations tracing_err_log_fops = {
7770 	.open           = tracing_err_log_open,
7771 	.write		= tracing_err_log_write,
7772 	.read           = seq_read,
7773 	.llseek         = seq_lseek,
7774 	.release        = tracing_err_log_release,
7775 };
7776 
7777 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7778 {
7779 	struct trace_array *tr = inode->i_private;
7780 	struct ftrace_buffer_info *info;
7781 	int ret;
7782 
7783 	ret = tracing_check_open_get_tr(tr);
7784 	if (ret)
7785 		return ret;
7786 
7787 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7788 	if (!info) {
7789 		trace_array_put(tr);
7790 		return -ENOMEM;
7791 	}
7792 
7793 	mutex_lock(&trace_types_lock);
7794 
7795 	info->iter.tr		= tr;
7796 	info->iter.cpu_file	= tracing_get_cpu(inode);
7797 	info->iter.trace	= tr->current_trace;
7798 	info->iter.array_buffer = &tr->array_buffer;
7799 	info->spare		= NULL;
7800 	/* Force reading ring buffer for first read */
7801 	info->read		= (unsigned int)-1;
7802 
7803 	filp->private_data = info;
7804 
7805 	tr->trace_ref++;
7806 
7807 	mutex_unlock(&trace_types_lock);
7808 
7809 	ret = nonseekable_open(inode, filp);
7810 	if (ret < 0)
7811 		trace_array_put(tr);
7812 
7813 	return ret;
7814 }
7815 
7816 static __poll_t
7817 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7818 {
7819 	struct ftrace_buffer_info *info = filp->private_data;
7820 	struct trace_iterator *iter = &info->iter;
7821 
7822 	return trace_poll(iter, filp, poll_table);
7823 }
7824 
7825 static ssize_t
7826 tracing_buffers_read(struct file *filp, char __user *ubuf,
7827 		     size_t count, loff_t *ppos)
7828 {
7829 	struct ftrace_buffer_info *info = filp->private_data;
7830 	struct trace_iterator *iter = &info->iter;
7831 	ssize_t ret = 0;
7832 	ssize_t size;
7833 
7834 	if (!count)
7835 		return 0;
7836 
7837 #ifdef CONFIG_TRACER_MAX_TRACE
7838 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7839 		return -EBUSY;
7840 #endif
7841 
7842 	if (!info->spare) {
7843 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7844 							  iter->cpu_file);
7845 		if (IS_ERR(info->spare)) {
7846 			ret = PTR_ERR(info->spare);
7847 			info->spare = NULL;
7848 		} else {
7849 			info->spare_cpu = iter->cpu_file;
7850 		}
7851 	}
7852 	if (!info->spare)
7853 		return ret;
7854 
7855 	/* Do we have previous read data to read? */
7856 	if (info->read < PAGE_SIZE)
7857 		goto read;
7858 
7859  again:
7860 	trace_access_lock(iter->cpu_file);
7861 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7862 				    &info->spare,
7863 				    count,
7864 				    iter->cpu_file, 0);
7865 	trace_access_unlock(iter->cpu_file);
7866 
7867 	if (ret < 0) {
7868 		if (trace_empty(iter)) {
7869 			if ((filp->f_flags & O_NONBLOCK))
7870 				return -EAGAIN;
7871 
7872 			ret = wait_on_pipe(iter, 0);
7873 			if (ret)
7874 				return ret;
7875 
7876 			goto again;
7877 		}
7878 		return 0;
7879 	}
7880 
7881 	info->read = 0;
7882  read:
7883 	size = PAGE_SIZE - info->read;
7884 	if (size > count)
7885 		size = count;
7886 
7887 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7888 	if (ret == size)
7889 		return -EFAULT;
7890 
7891 	size -= ret;
7892 
7893 	*ppos += size;
7894 	info->read += size;
7895 
7896 	return size;
7897 }
7898 
7899 static int tracing_buffers_release(struct inode *inode, struct file *file)
7900 {
7901 	struct ftrace_buffer_info *info = file->private_data;
7902 	struct trace_iterator *iter = &info->iter;
7903 
7904 	mutex_lock(&trace_types_lock);
7905 
7906 	iter->tr->trace_ref--;
7907 
7908 	__trace_array_put(iter->tr);
7909 
7910 	if (info->spare)
7911 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7912 					   info->spare_cpu, info->spare);
7913 	kvfree(info);
7914 
7915 	mutex_unlock(&trace_types_lock);
7916 
7917 	return 0;
7918 }
7919 
7920 struct buffer_ref {
7921 	struct trace_buffer	*buffer;
7922 	void			*page;
7923 	int			cpu;
7924 	refcount_t		refcount;
7925 };
7926 
7927 static void buffer_ref_release(struct buffer_ref *ref)
7928 {
7929 	if (!refcount_dec_and_test(&ref->refcount))
7930 		return;
7931 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7932 	kfree(ref);
7933 }
7934 
7935 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7936 				    struct pipe_buffer *buf)
7937 {
7938 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7939 
7940 	buffer_ref_release(ref);
7941 	buf->private = 0;
7942 }
7943 
7944 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7945 				struct pipe_buffer *buf)
7946 {
7947 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7948 
7949 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7950 		return false;
7951 
7952 	refcount_inc(&ref->refcount);
7953 	return true;
7954 }
7955 
7956 /* Pipe buffer operations for a buffer. */
7957 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7958 	.release		= buffer_pipe_buf_release,
7959 	.get			= buffer_pipe_buf_get,
7960 };
7961 
7962 /*
7963  * Callback from splice_to_pipe(), if we need to release some pages
7964  * at the end of the spd in case we error'ed out in filling the pipe.
7965  */
7966 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7967 {
7968 	struct buffer_ref *ref =
7969 		(struct buffer_ref *)spd->partial[i].private;
7970 
7971 	buffer_ref_release(ref);
7972 	spd->partial[i].private = 0;
7973 }
7974 
7975 static ssize_t
7976 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7977 			    struct pipe_inode_info *pipe, size_t len,
7978 			    unsigned int flags)
7979 {
7980 	struct ftrace_buffer_info *info = file->private_data;
7981 	struct trace_iterator *iter = &info->iter;
7982 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7983 	struct page *pages_def[PIPE_DEF_BUFFERS];
7984 	struct splice_pipe_desc spd = {
7985 		.pages		= pages_def,
7986 		.partial	= partial_def,
7987 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7988 		.ops		= &buffer_pipe_buf_ops,
7989 		.spd_release	= buffer_spd_release,
7990 	};
7991 	struct buffer_ref *ref;
7992 	int entries, i;
7993 	ssize_t ret = 0;
7994 
7995 #ifdef CONFIG_TRACER_MAX_TRACE
7996 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7997 		return -EBUSY;
7998 #endif
7999 
8000 	if (*ppos & (PAGE_SIZE - 1))
8001 		return -EINVAL;
8002 
8003 	if (len & (PAGE_SIZE - 1)) {
8004 		if (len < PAGE_SIZE)
8005 			return -EINVAL;
8006 		len &= PAGE_MASK;
8007 	}
8008 
8009 	if (splice_grow_spd(pipe, &spd))
8010 		return -ENOMEM;
8011 
8012  again:
8013 	trace_access_lock(iter->cpu_file);
8014 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8015 
8016 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8017 		struct page *page;
8018 		int r;
8019 
8020 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8021 		if (!ref) {
8022 			ret = -ENOMEM;
8023 			break;
8024 		}
8025 
8026 		refcount_set(&ref->refcount, 1);
8027 		ref->buffer = iter->array_buffer->buffer;
8028 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8029 		if (IS_ERR(ref->page)) {
8030 			ret = PTR_ERR(ref->page);
8031 			ref->page = NULL;
8032 			kfree(ref);
8033 			break;
8034 		}
8035 		ref->cpu = iter->cpu_file;
8036 
8037 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8038 					  len, iter->cpu_file, 1);
8039 		if (r < 0) {
8040 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8041 						   ref->page);
8042 			kfree(ref);
8043 			break;
8044 		}
8045 
8046 		page = virt_to_page(ref->page);
8047 
8048 		spd.pages[i] = page;
8049 		spd.partial[i].len = PAGE_SIZE;
8050 		spd.partial[i].offset = 0;
8051 		spd.partial[i].private = (unsigned long)ref;
8052 		spd.nr_pages++;
8053 		*ppos += PAGE_SIZE;
8054 
8055 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8056 	}
8057 
8058 	trace_access_unlock(iter->cpu_file);
8059 	spd.nr_pages = i;
8060 
8061 	/* did we read anything? */
8062 	if (!spd.nr_pages) {
8063 		if (ret)
8064 			goto out;
8065 
8066 		ret = -EAGAIN;
8067 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8068 			goto out;
8069 
8070 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8071 		if (ret)
8072 			goto out;
8073 
8074 		goto again;
8075 	}
8076 
8077 	ret = splice_to_pipe(pipe, &spd);
8078 out:
8079 	splice_shrink_spd(&spd);
8080 
8081 	return ret;
8082 }
8083 
8084 static const struct file_operations tracing_buffers_fops = {
8085 	.open		= tracing_buffers_open,
8086 	.read		= tracing_buffers_read,
8087 	.poll		= tracing_buffers_poll,
8088 	.release	= tracing_buffers_release,
8089 	.splice_read	= tracing_buffers_splice_read,
8090 	.llseek		= no_llseek,
8091 };
8092 
8093 static ssize_t
8094 tracing_stats_read(struct file *filp, char __user *ubuf,
8095 		   size_t count, loff_t *ppos)
8096 {
8097 	struct inode *inode = file_inode(filp);
8098 	struct trace_array *tr = inode->i_private;
8099 	struct array_buffer *trace_buf = &tr->array_buffer;
8100 	int cpu = tracing_get_cpu(inode);
8101 	struct trace_seq *s;
8102 	unsigned long cnt;
8103 	unsigned long long t;
8104 	unsigned long usec_rem;
8105 
8106 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8107 	if (!s)
8108 		return -ENOMEM;
8109 
8110 	trace_seq_init(s);
8111 
8112 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8113 	trace_seq_printf(s, "entries: %ld\n", cnt);
8114 
8115 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8116 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8117 
8118 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8119 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8120 
8121 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8122 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8123 
8124 	if (trace_clocks[tr->clock_id].in_ns) {
8125 		/* local or global for trace_clock */
8126 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8127 		usec_rem = do_div(t, USEC_PER_SEC);
8128 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8129 								t, usec_rem);
8130 
8131 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8132 		usec_rem = do_div(t, USEC_PER_SEC);
8133 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8134 	} else {
8135 		/* counter or tsc mode for trace_clock */
8136 		trace_seq_printf(s, "oldest event ts: %llu\n",
8137 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8138 
8139 		trace_seq_printf(s, "now ts: %llu\n",
8140 				ring_buffer_time_stamp(trace_buf->buffer));
8141 	}
8142 
8143 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8144 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8145 
8146 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8147 	trace_seq_printf(s, "read events: %ld\n", cnt);
8148 
8149 	count = simple_read_from_buffer(ubuf, count, ppos,
8150 					s->buffer, trace_seq_used(s));
8151 
8152 	kfree(s);
8153 
8154 	return count;
8155 }
8156 
8157 static const struct file_operations tracing_stats_fops = {
8158 	.open		= tracing_open_generic_tr,
8159 	.read		= tracing_stats_read,
8160 	.llseek		= generic_file_llseek,
8161 	.release	= tracing_release_generic_tr,
8162 };
8163 
8164 #ifdef CONFIG_DYNAMIC_FTRACE
8165 
8166 static ssize_t
8167 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8168 		  size_t cnt, loff_t *ppos)
8169 {
8170 	ssize_t ret;
8171 	char *buf;
8172 	int r;
8173 
8174 	/* 256 should be plenty to hold the amount needed */
8175 	buf = kmalloc(256, GFP_KERNEL);
8176 	if (!buf)
8177 		return -ENOMEM;
8178 
8179 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8180 		      ftrace_update_tot_cnt,
8181 		      ftrace_number_of_pages,
8182 		      ftrace_number_of_groups);
8183 
8184 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8185 	kfree(buf);
8186 	return ret;
8187 }
8188 
8189 static const struct file_operations tracing_dyn_info_fops = {
8190 	.open		= tracing_open_generic,
8191 	.read		= tracing_read_dyn_info,
8192 	.llseek		= generic_file_llseek,
8193 };
8194 #endif /* CONFIG_DYNAMIC_FTRACE */
8195 
8196 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8197 static void
8198 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8199 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8200 		void *data)
8201 {
8202 	tracing_snapshot_instance(tr);
8203 }
8204 
8205 static void
8206 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8207 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8208 		      void *data)
8209 {
8210 	struct ftrace_func_mapper *mapper = data;
8211 	long *count = NULL;
8212 
8213 	if (mapper)
8214 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8215 
8216 	if (count) {
8217 
8218 		if (*count <= 0)
8219 			return;
8220 
8221 		(*count)--;
8222 	}
8223 
8224 	tracing_snapshot_instance(tr);
8225 }
8226 
8227 static int
8228 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8229 		      struct ftrace_probe_ops *ops, void *data)
8230 {
8231 	struct ftrace_func_mapper *mapper = data;
8232 	long *count = NULL;
8233 
8234 	seq_printf(m, "%ps:", (void *)ip);
8235 
8236 	seq_puts(m, "snapshot");
8237 
8238 	if (mapper)
8239 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8240 
8241 	if (count)
8242 		seq_printf(m, ":count=%ld\n", *count);
8243 	else
8244 		seq_puts(m, ":unlimited\n");
8245 
8246 	return 0;
8247 }
8248 
8249 static int
8250 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8251 		     unsigned long ip, void *init_data, void **data)
8252 {
8253 	struct ftrace_func_mapper *mapper = *data;
8254 
8255 	if (!mapper) {
8256 		mapper = allocate_ftrace_func_mapper();
8257 		if (!mapper)
8258 			return -ENOMEM;
8259 		*data = mapper;
8260 	}
8261 
8262 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8263 }
8264 
8265 static void
8266 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8267 		     unsigned long ip, void *data)
8268 {
8269 	struct ftrace_func_mapper *mapper = data;
8270 
8271 	if (!ip) {
8272 		if (!mapper)
8273 			return;
8274 		free_ftrace_func_mapper(mapper, NULL);
8275 		return;
8276 	}
8277 
8278 	ftrace_func_mapper_remove_ip(mapper, ip);
8279 }
8280 
8281 static struct ftrace_probe_ops snapshot_probe_ops = {
8282 	.func			= ftrace_snapshot,
8283 	.print			= ftrace_snapshot_print,
8284 };
8285 
8286 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8287 	.func			= ftrace_count_snapshot,
8288 	.print			= ftrace_snapshot_print,
8289 	.init			= ftrace_snapshot_init,
8290 	.free			= ftrace_snapshot_free,
8291 };
8292 
8293 static int
8294 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8295 			       char *glob, char *cmd, char *param, int enable)
8296 {
8297 	struct ftrace_probe_ops *ops;
8298 	void *count = (void *)-1;
8299 	char *number;
8300 	int ret;
8301 
8302 	if (!tr)
8303 		return -ENODEV;
8304 
8305 	/* hash funcs only work with set_ftrace_filter */
8306 	if (!enable)
8307 		return -EINVAL;
8308 
8309 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8310 
8311 	if (glob[0] == '!')
8312 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8313 
8314 	if (!param)
8315 		goto out_reg;
8316 
8317 	number = strsep(&param, ":");
8318 
8319 	if (!strlen(number))
8320 		goto out_reg;
8321 
8322 	/*
8323 	 * We use the callback data field (which is a pointer)
8324 	 * as our counter.
8325 	 */
8326 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8327 	if (ret)
8328 		return ret;
8329 
8330  out_reg:
8331 	ret = tracing_alloc_snapshot_instance(tr);
8332 	if (ret < 0)
8333 		goto out;
8334 
8335 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8336 
8337  out:
8338 	return ret < 0 ? ret : 0;
8339 }
8340 
8341 static struct ftrace_func_command ftrace_snapshot_cmd = {
8342 	.name			= "snapshot",
8343 	.func			= ftrace_trace_snapshot_callback,
8344 };
8345 
8346 static __init int register_snapshot_cmd(void)
8347 {
8348 	return register_ftrace_command(&ftrace_snapshot_cmd);
8349 }
8350 #else
8351 static inline __init int register_snapshot_cmd(void) { return 0; }
8352 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8353 
8354 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8355 {
8356 	if (WARN_ON(!tr->dir))
8357 		return ERR_PTR(-ENODEV);
8358 
8359 	/* Top directory uses NULL as the parent */
8360 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8361 		return NULL;
8362 
8363 	/* All sub buffers have a descriptor */
8364 	return tr->dir;
8365 }
8366 
8367 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8368 {
8369 	struct dentry *d_tracer;
8370 
8371 	if (tr->percpu_dir)
8372 		return tr->percpu_dir;
8373 
8374 	d_tracer = tracing_get_dentry(tr);
8375 	if (IS_ERR(d_tracer))
8376 		return NULL;
8377 
8378 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8379 
8380 	MEM_FAIL(!tr->percpu_dir,
8381 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8382 
8383 	return tr->percpu_dir;
8384 }
8385 
8386 static struct dentry *
8387 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8388 		      void *data, long cpu, const struct file_operations *fops)
8389 {
8390 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8391 
8392 	if (ret) /* See tracing_get_cpu() */
8393 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8394 	return ret;
8395 }
8396 
8397 static void
8398 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8399 {
8400 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8401 	struct dentry *d_cpu;
8402 	char cpu_dir[30]; /* 30 characters should be more than enough */
8403 
8404 	if (!d_percpu)
8405 		return;
8406 
8407 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8408 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8409 	if (!d_cpu) {
8410 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8411 		return;
8412 	}
8413 
8414 	/* per cpu trace_pipe */
8415 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8416 				tr, cpu, &tracing_pipe_fops);
8417 
8418 	/* per cpu trace */
8419 	trace_create_cpu_file("trace", 0644, d_cpu,
8420 				tr, cpu, &tracing_fops);
8421 
8422 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8423 				tr, cpu, &tracing_buffers_fops);
8424 
8425 	trace_create_cpu_file("stats", 0444, d_cpu,
8426 				tr, cpu, &tracing_stats_fops);
8427 
8428 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8429 				tr, cpu, &tracing_entries_fops);
8430 
8431 #ifdef CONFIG_TRACER_SNAPSHOT
8432 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8433 				tr, cpu, &snapshot_fops);
8434 
8435 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8436 				tr, cpu, &snapshot_raw_fops);
8437 #endif
8438 }
8439 
8440 #ifdef CONFIG_FTRACE_SELFTEST
8441 /* Let selftest have access to static functions in this file */
8442 #include "trace_selftest.c"
8443 #endif
8444 
8445 static ssize_t
8446 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8447 			loff_t *ppos)
8448 {
8449 	struct trace_option_dentry *topt = filp->private_data;
8450 	char *buf;
8451 
8452 	if (topt->flags->val & topt->opt->bit)
8453 		buf = "1\n";
8454 	else
8455 		buf = "0\n";
8456 
8457 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8458 }
8459 
8460 static ssize_t
8461 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8462 			 loff_t *ppos)
8463 {
8464 	struct trace_option_dentry *topt = filp->private_data;
8465 	unsigned long val;
8466 	int ret;
8467 
8468 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8469 	if (ret)
8470 		return ret;
8471 
8472 	if (val != 0 && val != 1)
8473 		return -EINVAL;
8474 
8475 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8476 		mutex_lock(&trace_types_lock);
8477 		ret = __set_tracer_option(topt->tr, topt->flags,
8478 					  topt->opt, !val);
8479 		mutex_unlock(&trace_types_lock);
8480 		if (ret)
8481 			return ret;
8482 	}
8483 
8484 	*ppos += cnt;
8485 
8486 	return cnt;
8487 }
8488 
8489 
8490 static const struct file_operations trace_options_fops = {
8491 	.open = tracing_open_generic,
8492 	.read = trace_options_read,
8493 	.write = trace_options_write,
8494 	.llseek	= generic_file_llseek,
8495 };
8496 
8497 /*
8498  * In order to pass in both the trace_array descriptor as well as the index
8499  * to the flag that the trace option file represents, the trace_array
8500  * has a character array of trace_flags_index[], which holds the index
8501  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8502  * The address of this character array is passed to the flag option file
8503  * read/write callbacks.
8504  *
8505  * In order to extract both the index and the trace_array descriptor,
8506  * get_tr_index() uses the following algorithm.
8507  *
8508  *   idx = *ptr;
8509  *
8510  * As the pointer itself contains the address of the index (remember
8511  * index[1] == 1).
8512  *
8513  * Then to get the trace_array descriptor, by subtracting that index
8514  * from the ptr, we get to the start of the index itself.
8515  *
8516  *   ptr - idx == &index[0]
8517  *
8518  * Then a simple container_of() from that pointer gets us to the
8519  * trace_array descriptor.
8520  */
8521 static void get_tr_index(void *data, struct trace_array **ptr,
8522 			 unsigned int *pindex)
8523 {
8524 	*pindex = *(unsigned char *)data;
8525 
8526 	*ptr = container_of(data - *pindex, struct trace_array,
8527 			    trace_flags_index);
8528 }
8529 
8530 static ssize_t
8531 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8532 			loff_t *ppos)
8533 {
8534 	void *tr_index = filp->private_data;
8535 	struct trace_array *tr;
8536 	unsigned int index;
8537 	char *buf;
8538 
8539 	get_tr_index(tr_index, &tr, &index);
8540 
8541 	if (tr->trace_flags & (1 << index))
8542 		buf = "1\n";
8543 	else
8544 		buf = "0\n";
8545 
8546 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8547 }
8548 
8549 static ssize_t
8550 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8551 			 loff_t *ppos)
8552 {
8553 	void *tr_index = filp->private_data;
8554 	struct trace_array *tr;
8555 	unsigned int index;
8556 	unsigned long val;
8557 	int ret;
8558 
8559 	get_tr_index(tr_index, &tr, &index);
8560 
8561 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8562 	if (ret)
8563 		return ret;
8564 
8565 	if (val != 0 && val != 1)
8566 		return -EINVAL;
8567 
8568 	mutex_lock(&event_mutex);
8569 	mutex_lock(&trace_types_lock);
8570 	ret = set_tracer_flag(tr, 1 << index, val);
8571 	mutex_unlock(&trace_types_lock);
8572 	mutex_unlock(&event_mutex);
8573 
8574 	if (ret < 0)
8575 		return ret;
8576 
8577 	*ppos += cnt;
8578 
8579 	return cnt;
8580 }
8581 
8582 static const struct file_operations trace_options_core_fops = {
8583 	.open = tracing_open_generic,
8584 	.read = trace_options_core_read,
8585 	.write = trace_options_core_write,
8586 	.llseek = generic_file_llseek,
8587 };
8588 
8589 struct dentry *trace_create_file(const char *name,
8590 				 umode_t mode,
8591 				 struct dentry *parent,
8592 				 void *data,
8593 				 const struct file_operations *fops)
8594 {
8595 	struct dentry *ret;
8596 
8597 	ret = tracefs_create_file(name, mode, parent, data, fops);
8598 	if (!ret)
8599 		pr_warn("Could not create tracefs '%s' entry\n", name);
8600 
8601 	return ret;
8602 }
8603 
8604 
8605 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8606 {
8607 	struct dentry *d_tracer;
8608 
8609 	if (tr->options)
8610 		return tr->options;
8611 
8612 	d_tracer = tracing_get_dentry(tr);
8613 	if (IS_ERR(d_tracer))
8614 		return NULL;
8615 
8616 	tr->options = tracefs_create_dir("options", d_tracer);
8617 	if (!tr->options) {
8618 		pr_warn("Could not create tracefs directory 'options'\n");
8619 		return NULL;
8620 	}
8621 
8622 	return tr->options;
8623 }
8624 
8625 static void
8626 create_trace_option_file(struct trace_array *tr,
8627 			 struct trace_option_dentry *topt,
8628 			 struct tracer_flags *flags,
8629 			 struct tracer_opt *opt)
8630 {
8631 	struct dentry *t_options;
8632 
8633 	t_options = trace_options_init_dentry(tr);
8634 	if (!t_options)
8635 		return;
8636 
8637 	topt->flags = flags;
8638 	topt->opt = opt;
8639 	topt->tr = tr;
8640 
8641 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8642 				    &trace_options_fops);
8643 
8644 }
8645 
8646 static void
8647 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8648 {
8649 	struct trace_option_dentry *topts;
8650 	struct trace_options *tr_topts;
8651 	struct tracer_flags *flags;
8652 	struct tracer_opt *opts;
8653 	int cnt;
8654 	int i;
8655 
8656 	if (!tracer)
8657 		return;
8658 
8659 	flags = tracer->flags;
8660 
8661 	if (!flags || !flags->opts)
8662 		return;
8663 
8664 	/*
8665 	 * If this is an instance, only create flags for tracers
8666 	 * the instance may have.
8667 	 */
8668 	if (!trace_ok_for_array(tracer, tr))
8669 		return;
8670 
8671 	for (i = 0; i < tr->nr_topts; i++) {
8672 		/* Make sure there's no duplicate flags. */
8673 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8674 			return;
8675 	}
8676 
8677 	opts = flags->opts;
8678 
8679 	for (cnt = 0; opts[cnt].name; cnt++)
8680 		;
8681 
8682 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8683 	if (!topts)
8684 		return;
8685 
8686 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8687 			    GFP_KERNEL);
8688 	if (!tr_topts) {
8689 		kfree(topts);
8690 		return;
8691 	}
8692 
8693 	tr->topts = tr_topts;
8694 	tr->topts[tr->nr_topts].tracer = tracer;
8695 	tr->topts[tr->nr_topts].topts = topts;
8696 	tr->nr_topts++;
8697 
8698 	for (cnt = 0; opts[cnt].name; cnt++) {
8699 		create_trace_option_file(tr, &topts[cnt], flags,
8700 					 &opts[cnt]);
8701 		MEM_FAIL(topts[cnt].entry == NULL,
8702 			  "Failed to create trace option: %s",
8703 			  opts[cnt].name);
8704 	}
8705 }
8706 
8707 static struct dentry *
8708 create_trace_option_core_file(struct trace_array *tr,
8709 			      const char *option, long index)
8710 {
8711 	struct dentry *t_options;
8712 
8713 	t_options = trace_options_init_dentry(tr);
8714 	if (!t_options)
8715 		return NULL;
8716 
8717 	return trace_create_file(option, 0644, t_options,
8718 				 (void *)&tr->trace_flags_index[index],
8719 				 &trace_options_core_fops);
8720 }
8721 
8722 static void create_trace_options_dir(struct trace_array *tr)
8723 {
8724 	struct dentry *t_options;
8725 	bool top_level = tr == &global_trace;
8726 	int i;
8727 
8728 	t_options = trace_options_init_dentry(tr);
8729 	if (!t_options)
8730 		return;
8731 
8732 	for (i = 0; trace_options[i]; i++) {
8733 		if (top_level ||
8734 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8735 			create_trace_option_core_file(tr, trace_options[i], i);
8736 	}
8737 }
8738 
8739 static ssize_t
8740 rb_simple_read(struct file *filp, char __user *ubuf,
8741 	       size_t cnt, loff_t *ppos)
8742 {
8743 	struct trace_array *tr = filp->private_data;
8744 	char buf[64];
8745 	int r;
8746 
8747 	r = tracer_tracing_is_on(tr);
8748 	r = sprintf(buf, "%d\n", r);
8749 
8750 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8751 }
8752 
8753 static ssize_t
8754 rb_simple_write(struct file *filp, const char __user *ubuf,
8755 		size_t cnt, loff_t *ppos)
8756 {
8757 	struct trace_array *tr = filp->private_data;
8758 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8759 	unsigned long val;
8760 	int ret;
8761 
8762 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8763 	if (ret)
8764 		return ret;
8765 
8766 	if (buffer) {
8767 		mutex_lock(&trace_types_lock);
8768 		if (!!val == tracer_tracing_is_on(tr)) {
8769 			val = 0; /* do nothing */
8770 		} else if (val) {
8771 			tracer_tracing_on(tr);
8772 			if (tr->current_trace->start)
8773 				tr->current_trace->start(tr);
8774 		} else {
8775 			tracer_tracing_off(tr);
8776 			if (tr->current_trace->stop)
8777 				tr->current_trace->stop(tr);
8778 		}
8779 		mutex_unlock(&trace_types_lock);
8780 	}
8781 
8782 	(*ppos)++;
8783 
8784 	return cnt;
8785 }
8786 
8787 static const struct file_operations rb_simple_fops = {
8788 	.open		= tracing_open_generic_tr,
8789 	.read		= rb_simple_read,
8790 	.write		= rb_simple_write,
8791 	.release	= tracing_release_generic_tr,
8792 	.llseek		= default_llseek,
8793 };
8794 
8795 static ssize_t
8796 buffer_percent_read(struct file *filp, char __user *ubuf,
8797 		    size_t cnt, loff_t *ppos)
8798 {
8799 	struct trace_array *tr = filp->private_data;
8800 	char buf[64];
8801 	int r;
8802 
8803 	r = tr->buffer_percent;
8804 	r = sprintf(buf, "%d\n", r);
8805 
8806 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8807 }
8808 
8809 static ssize_t
8810 buffer_percent_write(struct file *filp, const char __user *ubuf,
8811 		     size_t cnt, loff_t *ppos)
8812 {
8813 	struct trace_array *tr = filp->private_data;
8814 	unsigned long val;
8815 	int ret;
8816 
8817 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8818 	if (ret)
8819 		return ret;
8820 
8821 	if (val > 100)
8822 		return -EINVAL;
8823 
8824 	if (!val)
8825 		val = 1;
8826 
8827 	tr->buffer_percent = val;
8828 
8829 	(*ppos)++;
8830 
8831 	return cnt;
8832 }
8833 
8834 static const struct file_operations buffer_percent_fops = {
8835 	.open		= tracing_open_generic_tr,
8836 	.read		= buffer_percent_read,
8837 	.write		= buffer_percent_write,
8838 	.release	= tracing_release_generic_tr,
8839 	.llseek		= default_llseek,
8840 };
8841 
8842 static struct dentry *trace_instance_dir;
8843 
8844 static void
8845 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8846 
8847 static int
8848 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8849 {
8850 	enum ring_buffer_flags rb_flags;
8851 
8852 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8853 
8854 	buf->tr = tr;
8855 
8856 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8857 	if (!buf->buffer)
8858 		return -ENOMEM;
8859 
8860 	buf->data = alloc_percpu(struct trace_array_cpu);
8861 	if (!buf->data) {
8862 		ring_buffer_free(buf->buffer);
8863 		buf->buffer = NULL;
8864 		return -ENOMEM;
8865 	}
8866 
8867 	/* Allocate the first page for all buffers */
8868 	set_buffer_entries(&tr->array_buffer,
8869 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8870 
8871 	return 0;
8872 }
8873 
8874 static int allocate_trace_buffers(struct trace_array *tr, int size)
8875 {
8876 	int ret;
8877 
8878 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8879 	if (ret)
8880 		return ret;
8881 
8882 #ifdef CONFIG_TRACER_MAX_TRACE
8883 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8884 				    allocate_snapshot ? size : 1);
8885 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8886 		ring_buffer_free(tr->array_buffer.buffer);
8887 		tr->array_buffer.buffer = NULL;
8888 		free_percpu(tr->array_buffer.data);
8889 		tr->array_buffer.data = NULL;
8890 		return -ENOMEM;
8891 	}
8892 	tr->allocated_snapshot = allocate_snapshot;
8893 
8894 	/*
8895 	 * Only the top level trace array gets its snapshot allocated
8896 	 * from the kernel command line.
8897 	 */
8898 	allocate_snapshot = false;
8899 #endif
8900 
8901 	return 0;
8902 }
8903 
8904 static void free_trace_buffer(struct array_buffer *buf)
8905 {
8906 	if (buf->buffer) {
8907 		ring_buffer_free(buf->buffer);
8908 		buf->buffer = NULL;
8909 		free_percpu(buf->data);
8910 		buf->data = NULL;
8911 	}
8912 }
8913 
8914 static void free_trace_buffers(struct trace_array *tr)
8915 {
8916 	if (!tr)
8917 		return;
8918 
8919 	free_trace_buffer(&tr->array_buffer);
8920 
8921 #ifdef CONFIG_TRACER_MAX_TRACE
8922 	free_trace_buffer(&tr->max_buffer);
8923 #endif
8924 }
8925 
8926 static void init_trace_flags_index(struct trace_array *tr)
8927 {
8928 	int i;
8929 
8930 	/* Used by the trace options files */
8931 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8932 		tr->trace_flags_index[i] = i;
8933 }
8934 
8935 static void __update_tracer_options(struct trace_array *tr)
8936 {
8937 	struct tracer *t;
8938 
8939 	for (t = trace_types; t; t = t->next)
8940 		add_tracer_options(tr, t);
8941 }
8942 
8943 static void update_tracer_options(struct trace_array *tr)
8944 {
8945 	mutex_lock(&trace_types_lock);
8946 	__update_tracer_options(tr);
8947 	mutex_unlock(&trace_types_lock);
8948 }
8949 
8950 /* Must have trace_types_lock held */
8951 struct trace_array *trace_array_find(const char *instance)
8952 {
8953 	struct trace_array *tr, *found = NULL;
8954 
8955 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8956 		if (tr->name && strcmp(tr->name, instance) == 0) {
8957 			found = tr;
8958 			break;
8959 		}
8960 	}
8961 
8962 	return found;
8963 }
8964 
8965 struct trace_array *trace_array_find_get(const char *instance)
8966 {
8967 	struct trace_array *tr;
8968 
8969 	mutex_lock(&trace_types_lock);
8970 	tr = trace_array_find(instance);
8971 	if (tr)
8972 		tr->ref++;
8973 	mutex_unlock(&trace_types_lock);
8974 
8975 	return tr;
8976 }
8977 
8978 static int trace_array_create_dir(struct trace_array *tr)
8979 {
8980 	int ret;
8981 
8982 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8983 	if (!tr->dir)
8984 		return -EINVAL;
8985 
8986 	ret = event_trace_add_tracer(tr->dir, tr);
8987 	if (ret)
8988 		tracefs_remove(tr->dir);
8989 
8990 	init_tracer_tracefs(tr, tr->dir);
8991 	__update_tracer_options(tr);
8992 
8993 	return ret;
8994 }
8995 
8996 static struct trace_array *trace_array_create(const char *name)
8997 {
8998 	struct trace_array *tr;
8999 	int ret;
9000 
9001 	ret = -ENOMEM;
9002 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9003 	if (!tr)
9004 		return ERR_PTR(ret);
9005 
9006 	tr->name = kstrdup(name, GFP_KERNEL);
9007 	if (!tr->name)
9008 		goto out_free_tr;
9009 
9010 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9011 		goto out_free_tr;
9012 
9013 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9014 
9015 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9016 
9017 	raw_spin_lock_init(&tr->start_lock);
9018 
9019 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9020 
9021 	tr->current_trace = &nop_trace;
9022 
9023 	INIT_LIST_HEAD(&tr->systems);
9024 	INIT_LIST_HEAD(&tr->events);
9025 	INIT_LIST_HEAD(&tr->hist_vars);
9026 	INIT_LIST_HEAD(&tr->err_log);
9027 
9028 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9029 		goto out_free_tr;
9030 
9031 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9032 		goto out_free_tr;
9033 
9034 	ftrace_init_trace_array(tr);
9035 
9036 	init_trace_flags_index(tr);
9037 
9038 	if (trace_instance_dir) {
9039 		ret = trace_array_create_dir(tr);
9040 		if (ret)
9041 			goto out_free_tr;
9042 	} else
9043 		__trace_early_add_events(tr);
9044 
9045 	list_add(&tr->list, &ftrace_trace_arrays);
9046 
9047 	tr->ref++;
9048 
9049 	return tr;
9050 
9051  out_free_tr:
9052 	ftrace_free_ftrace_ops(tr);
9053 	free_trace_buffers(tr);
9054 	free_cpumask_var(tr->tracing_cpumask);
9055 	kfree(tr->name);
9056 	kfree(tr);
9057 
9058 	return ERR_PTR(ret);
9059 }
9060 
9061 static int instance_mkdir(const char *name)
9062 {
9063 	struct trace_array *tr;
9064 	int ret;
9065 
9066 	mutex_lock(&event_mutex);
9067 	mutex_lock(&trace_types_lock);
9068 
9069 	ret = -EEXIST;
9070 	if (trace_array_find(name))
9071 		goto out_unlock;
9072 
9073 	tr = trace_array_create(name);
9074 
9075 	ret = PTR_ERR_OR_ZERO(tr);
9076 
9077 out_unlock:
9078 	mutex_unlock(&trace_types_lock);
9079 	mutex_unlock(&event_mutex);
9080 	return ret;
9081 }
9082 
9083 /**
9084  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9085  * @name: The name of the trace array to be looked up/created.
9086  *
9087  * Returns pointer to trace array with given name.
9088  * NULL, if it cannot be created.
9089  *
9090  * NOTE: This function increments the reference counter associated with the
9091  * trace array returned. This makes sure it cannot be freed while in use.
9092  * Use trace_array_put() once the trace array is no longer needed.
9093  * If the trace_array is to be freed, trace_array_destroy() needs to
9094  * be called after the trace_array_put(), or simply let user space delete
9095  * it from the tracefs instances directory. But until the
9096  * trace_array_put() is called, user space can not delete it.
9097  *
9098  */
9099 struct trace_array *trace_array_get_by_name(const char *name)
9100 {
9101 	struct trace_array *tr;
9102 
9103 	mutex_lock(&event_mutex);
9104 	mutex_lock(&trace_types_lock);
9105 
9106 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9107 		if (tr->name && strcmp(tr->name, name) == 0)
9108 			goto out_unlock;
9109 	}
9110 
9111 	tr = trace_array_create(name);
9112 
9113 	if (IS_ERR(tr))
9114 		tr = NULL;
9115 out_unlock:
9116 	if (tr)
9117 		tr->ref++;
9118 
9119 	mutex_unlock(&trace_types_lock);
9120 	mutex_unlock(&event_mutex);
9121 	return tr;
9122 }
9123 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9124 
9125 static int __remove_instance(struct trace_array *tr)
9126 {
9127 	int i;
9128 
9129 	/* Reference counter for a newly created trace array = 1. */
9130 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9131 		return -EBUSY;
9132 
9133 	list_del(&tr->list);
9134 
9135 	/* Disable all the flags that were enabled coming in */
9136 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9137 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9138 			set_tracer_flag(tr, 1 << i, 0);
9139 	}
9140 
9141 	tracing_set_nop(tr);
9142 	clear_ftrace_function_probes(tr);
9143 	event_trace_del_tracer(tr);
9144 	ftrace_clear_pids(tr);
9145 	ftrace_destroy_function_files(tr);
9146 	tracefs_remove(tr->dir);
9147 	free_percpu(tr->last_func_repeats);
9148 	free_trace_buffers(tr);
9149 
9150 	for (i = 0; i < tr->nr_topts; i++) {
9151 		kfree(tr->topts[i].topts);
9152 	}
9153 	kfree(tr->topts);
9154 
9155 	free_cpumask_var(tr->tracing_cpumask);
9156 	kfree(tr->name);
9157 	kfree(tr);
9158 
9159 	return 0;
9160 }
9161 
9162 int trace_array_destroy(struct trace_array *this_tr)
9163 {
9164 	struct trace_array *tr;
9165 	int ret;
9166 
9167 	if (!this_tr)
9168 		return -EINVAL;
9169 
9170 	mutex_lock(&event_mutex);
9171 	mutex_lock(&trace_types_lock);
9172 
9173 	ret = -ENODEV;
9174 
9175 	/* Making sure trace array exists before destroying it. */
9176 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9177 		if (tr == this_tr) {
9178 			ret = __remove_instance(tr);
9179 			break;
9180 		}
9181 	}
9182 
9183 	mutex_unlock(&trace_types_lock);
9184 	mutex_unlock(&event_mutex);
9185 
9186 	return ret;
9187 }
9188 EXPORT_SYMBOL_GPL(trace_array_destroy);
9189 
9190 static int instance_rmdir(const char *name)
9191 {
9192 	struct trace_array *tr;
9193 	int ret;
9194 
9195 	mutex_lock(&event_mutex);
9196 	mutex_lock(&trace_types_lock);
9197 
9198 	ret = -ENODEV;
9199 	tr = trace_array_find(name);
9200 	if (tr)
9201 		ret = __remove_instance(tr);
9202 
9203 	mutex_unlock(&trace_types_lock);
9204 	mutex_unlock(&event_mutex);
9205 
9206 	return ret;
9207 }
9208 
9209 static __init void create_trace_instances(struct dentry *d_tracer)
9210 {
9211 	struct trace_array *tr;
9212 
9213 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9214 							 instance_mkdir,
9215 							 instance_rmdir);
9216 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9217 		return;
9218 
9219 	mutex_lock(&event_mutex);
9220 	mutex_lock(&trace_types_lock);
9221 
9222 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9223 		if (!tr->name)
9224 			continue;
9225 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9226 			     "Failed to create instance directory\n"))
9227 			break;
9228 	}
9229 
9230 	mutex_unlock(&trace_types_lock);
9231 	mutex_unlock(&event_mutex);
9232 }
9233 
9234 static void
9235 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9236 {
9237 	struct trace_event_file *file;
9238 	int cpu;
9239 
9240 	trace_create_file("available_tracers", 0444, d_tracer,
9241 			tr, &show_traces_fops);
9242 
9243 	trace_create_file("current_tracer", 0644, d_tracer,
9244 			tr, &set_tracer_fops);
9245 
9246 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9247 			  tr, &tracing_cpumask_fops);
9248 
9249 	trace_create_file("trace_options", 0644, d_tracer,
9250 			  tr, &tracing_iter_fops);
9251 
9252 	trace_create_file("trace", 0644, d_tracer,
9253 			  tr, &tracing_fops);
9254 
9255 	trace_create_file("trace_pipe", 0444, d_tracer,
9256 			  tr, &tracing_pipe_fops);
9257 
9258 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9259 			  tr, &tracing_entries_fops);
9260 
9261 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9262 			  tr, &tracing_total_entries_fops);
9263 
9264 	trace_create_file("free_buffer", 0200, d_tracer,
9265 			  tr, &tracing_free_buffer_fops);
9266 
9267 	trace_create_file("trace_marker", 0220, d_tracer,
9268 			  tr, &tracing_mark_fops);
9269 
9270 	file = __find_event_file(tr, "ftrace", "print");
9271 	if (file && file->dir)
9272 		trace_create_file("trigger", 0644, file->dir, file,
9273 				  &event_trigger_fops);
9274 	tr->trace_marker_file = file;
9275 
9276 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9277 			  tr, &tracing_mark_raw_fops);
9278 
9279 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9280 			  &trace_clock_fops);
9281 
9282 	trace_create_file("tracing_on", 0644, d_tracer,
9283 			  tr, &rb_simple_fops);
9284 
9285 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9286 			  &trace_time_stamp_mode_fops);
9287 
9288 	tr->buffer_percent = 50;
9289 
9290 	trace_create_file("buffer_percent", 0444, d_tracer,
9291 			tr, &buffer_percent_fops);
9292 
9293 	create_trace_options_dir(tr);
9294 
9295 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9296 	trace_create_maxlat_file(tr, d_tracer);
9297 #endif
9298 
9299 	if (ftrace_create_function_files(tr, d_tracer))
9300 		MEM_FAIL(1, "Could not allocate function filter files");
9301 
9302 #ifdef CONFIG_TRACER_SNAPSHOT
9303 	trace_create_file("snapshot", 0644, d_tracer,
9304 			  tr, &snapshot_fops);
9305 #endif
9306 
9307 	trace_create_file("error_log", 0644, d_tracer,
9308 			  tr, &tracing_err_log_fops);
9309 
9310 	for_each_tracing_cpu(cpu)
9311 		tracing_init_tracefs_percpu(tr, cpu);
9312 
9313 	ftrace_init_tracefs(tr, d_tracer);
9314 }
9315 
9316 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9317 {
9318 	struct vfsmount *mnt;
9319 	struct file_system_type *type;
9320 
9321 	/*
9322 	 * To maintain backward compatibility for tools that mount
9323 	 * debugfs to get to the tracing facility, tracefs is automatically
9324 	 * mounted to the debugfs/tracing directory.
9325 	 */
9326 	type = get_fs_type("tracefs");
9327 	if (!type)
9328 		return NULL;
9329 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9330 	put_filesystem(type);
9331 	if (IS_ERR(mnt))
9332 		return NULL;
9333 	mntget(mnt);
9334 
9335 	return mnt;
9336 }
9337 
9338 /**
9339  * tracing_init_dentry - initialize top level trace array
9340  *
9341  * This is called when creating files or directories in the tracing
9342  * directory. It is called via fs_initcall() by any of the boot up code
9343  * and expects to return the dentry of the top level tracing directory.
9344  */
9345 int tracing_init_dentry(void)
9346 {
9347 	struct trace_array *tr = &global_trace;
9348 
9349 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9350 		pr_warn("Tracing disabled due to lockdown\n");
9351 		return -EPERM;
9352 	}
9353 
9354 	/* The top level trace array uses  NULL as parent */
9355 	if (tr->dir)
9356 		return 0;
9357 
9358 	if (WARN_ON(!tracefs_initialized()))
9359 		return -ENODEV;
9360 
9361 	/*
9362 	 * As there may still be users that expect the tracing
9363 	 * files to exist in debugfs/tracing, we must automount
9364 	 * the tracefs file system there, so older tools still
9365 	 * work with the newer kernel.
9366 	 */
9367 	tr->dir = debugfs_create_automount("tracing", NULL,
9368 					   trace_automount, NULL);
9369 
9370 	return 0;
9371 }
9372 
9373 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9374 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9375 
9376 static struct workqueue_struct *eval_map_wq __initdata;
9377 static struct work_struct eval_map_work __initdata;
9378 
9379 static void __init eval_map_work_func(struct work_struct *work)
9380 {
9381 	int len;
9382 
9383 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9384 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9385 }
9386 
9387 static int __init trace_eval_init(void)
9388 {
9389 	INIT_WORK(&eval_map_work, eval_map_work_func);
9390 
9391 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9392 	if (!eval_map_wq) {
9393 		pr_err("Unable to allocate eval_map_wq\n");
9394 		/* Do work here */
9395 		eval_map_work_func(&eval_map_work);
9396 		return -ENOMEM;
9397 	}
9398 
9399 	queue_work(eval_map_wq, &eval_map_work);
9400 	return 0;
9401 }
9402 
9403 static int __init trace_eval_sync(void)
9404 {
9405 	/* Make sure the eval map updates are finished */
9406 	if (eval_map_wq)
9407 		destroy_workqueue(eval_map_wq);
9408 	return 0;
9409 }
9410 
9411 late_initcall_sync(trace_eval_sync);
9412 
9413 
9414 #ifdef CONFIG_MODULES
9415 static void trace_module_add_evals(struct module *mod)
9416 {
9417 	if (!mod->num_trace_evals)
9418 		return;
9419 
9420 	/*
9421 	 * Modules with bad taint do not have events created, do
9422 	 * not bother with enums either.
9423 	 */
9424 	if (trace_module_has_bad_taint(mod))
9425 		return;
9426 
9427 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9428 }
9429 
9430 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9431 static void trace_module_remove_evals(struct module *mod)
9432 {
9433 	union trace_eval_map_item *map;
9434 	union trace_eval_map_item **last = &trace_eval_maps;
9435 
9436 	if (!mod->num_trace_evals)
9437 		return;
9438 
9439 	mutex_lock(&trace_eval_mutex);
9440 
9441 	map = trace_eval_maps;
9442 
9443 	while (map) {
9444 		if (map->head.mod == mod)
9445 			break;
9446 		map = trace_eval_jmp_to_tail(map);
9447 		last = &map->tail.next;
9448 		map = map->tail.next;
9449 	}
9450 	if (!map)
9451 		goto out;
9452 
9453 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9454 	kfree(map);
9455  out:
9456 	mutex_unlock(&trace_eval_mutex);
9457 }
9458 #else
9459 static inline void trace_module_remove_evals(struct module *mod) { }
9460 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9461 
9462 static int trace_module_notify(struct notifier_block *self,
9463 			       unsigned long val, void *data)
9464 {
9465 	struct module *mod = data;
9466 
9467 	switch (val) {
9468 	case MODULE_STATE_COMING:
9469 		trace_module_add_evals(mod);
9470 		break;
9471 	case MODULE_STATE_GOING:
9472 		trace_module_remove_evals(mod);
9473 		break;
9474 	}
9475 
9476 	return NOTIFY_OK;
9477 }
9478 
9479 static struct notifier_block trace_module_nb = {
9480 	.notifier_call = trace_module_notify,
9481 	.priority = 0,
9482 };
9483 #endif /* CONFIG_MODULES */
9484 
9485 static __init int tracer_init_tracefs(void)
9486 {
9487 	int ret;
9488 
9489 	trace_access_lock_init();
9490 
9491 	ret = tracing_init_dentry();
9492 	if (ret)
9493 		return 0;
9494 
9495 	event_trace_init();
9496 
9497 	init_tracer_tracefs(&global_trace, NULL);
9498 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9499 
9500 	trace_create_file("tracing_thresh", 0644, NULL,
9501 			&global_trace, &tracing_thresh_fops);
9502 
9503 	trace_create_file("README", 0444, NULL,
9504 			NULL, &tracing_readme_fops);
9505 
9506 	trace_create_file("saved_cmdlines", 0444, NULL,
9507 			NULL, &tracing_saved_cmdlines_fops);
9508 
9509 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9510 			  NULL, &tracing_saved_cmdlines_size_fops);
9511 
9512 	trace_create_file("saved_tgids", 0444, NULL,
9513 			NULL, &tracing_saved_tgids_fops);
9514 
9515 	trace_eval_init();
9516 
9517 	trace_create_eval_file(NULL);
9518 
9519 #ifdef CONFIG_MODULES
9520 	register_module_notifier(&trace_module_nb);
9521 #endif
9522 
9523 #ifdef CONFIG_DYNAMIC_FTRACE
9524 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9525 			NULL, &tracing_dyn_info_fops);
9526 #endif
9527 
9528 	create_trace_instances(NULL);
9529 
9530 	update_tracer_options(&global_trace);
9531 
9532 	return 0;
9533 }
9534 
9535 static int trace_panic_handler(struct notifier_block *this,
9536 			       unsigned long event, void *unused)
9537 {
9538 	if (ftrace_dump_on_oops)
9539 		ftrace_dump(ftrace_dump_on_oops);
9540 	return NOTIFY_OK;
9541 }
9542 
9543 static struct notifier_block trace_panic_notifier = {
9544 	.notifier_call  = trace_panic_handler,
9545 	.next           = NULL,
9546 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9547 };
9548 
9549 static int trace_die_handler(struct notifier_block *self,
9550 			     unsigned long val,
9551 			     void *data)
9552 {
9553 	switch (val) {
9554 	case DIE_OOPS:
9555 		if (ftrace_dump_on_oops)
9556 			ftrace_dump(ftrace_dump_on_oops);
9557 		break;
9558 	default:
9559 		break;
9560 	}
9561 	return NOTIFY_OK;
9562 }
9563 
9564 static struct notifier_block trace_die_notifier = {
9565 	.notifier_call = trace_die_handler,
9566 	.priority = 200
9567 };
9568 
9569 /*
9570  * printk is set to max of 1024, we really don't need it that big.
9571  * Nothing should be printing 1000 characters anyway.
9572  */
9573 #define TRACE_MAX_PRINT		1000
9574 
9575 /*
9576  * Define here KERN_TRACE so that we have one place to modify
9577  * it if we decide to change what log level the ftrace dump
9578  * should be at.
9579  */
9580 #define KERN_TRACE		KERN_EMERG
9581 
9582 void
9583 trace_printk_seq(struct trace_seq *s)
9584 {
9585 	/* Probably should print a warning here. */
9586 	if (s->seq.len >= TRACE_MAX_PRINT)
9587 		s->seq.len = TRACE_MAX_PRINT;
9588 
9589 	/*
9590 	 * More paranoid code. Although the buffer size is set to
9591 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9592 	 * an extra layer of protection.
9593 	 */
9594 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9595 		s->seq.len = s->seq.size - 1;
9596 
9597 	/* should be zero ended, but we are paranoid. */
9598 	s->buffer[s->seq.len] = 0;
9599 
9600 	printk(KERN_TRACE "%s", s->buffer);
9601 
9602 	trace_seq_init(s);
9603 }
9604 
9605 void trace_init_global_iter(struct trace_iterator *iter)
9606 {
9607 	iter->tr = &global_trace;
9608 	iter->trace = iter->tr->current_trace;
9609 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9610 	iter->array_buffer = &global_trace.array_buffer;
9611 
9612 	if (iter->trace && iter->trace->open)
9613 		iter->trace->open(iter);
9614 
9615 	/* Annotate start of buffers if we had overruns */
9616 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9617 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9618 
9619 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9620 	if (trace_clocks[iter->tr->clock_id].in_ns)
9621 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9622 }
9623 
9624 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9625 {
9626 	/* use static because iter can be a bit big for the stack */
9627 	static struct trace_iterator iter;
9628 	static atomic_t dump_running;
9629 	struct trace_array *tr = &global_trace;
9630 	unsigned int old_userobj;
9631 	unsigned long flags;
9632 	int cnt = 0, cpu;
9633 
9634 	/* Only allow one dump user at a time. */
9635 	if (atomic_inc_return(&dump_running) != 1) {
9636 		atomic_dec(&dump_running);
9637 		return;
9638 	}
9639 
9640 	/*
9641 	 * Always turn off tracing when we dump.
9642 	 * We don't need to show trace output of what happens
9643 	 * between multiple crashes.
9644 	 *
9645 	 * If the user does a sysrq-z, then they can re-enable
9646 	 * tracing with echo 1 > tracing_on.
9647 	 */
9648 	tracing_off();
9649 
9650 	local_irq_save(flags);
9651 	printk_nmi_direct_enter();
9652 
9653 	/* Simulate the iterator */
9654 	trace_init_global_iter(&iter);
9655 	/* Can not use kmalloc for iter.temp and iter.fmt */
9656 	iter.temp = static_temp_buf;
9657 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9658 	iter.fmt = static_fmt_buf;
9659 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9660 
9661 	for_each_tracing_cpu(cpu) {
9662 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9663 	}
9664 
9665 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9666 
9667 	/* don't look at user memory in panic mode */
9668 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9669 
9670 	switch (oops_dump_mode) {
9671 	case DUMP_ALL:
9672 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9673 		break;
9674 	case DUMP_ORIG:
9675 		iter.cpu_file = raw_smp_processor_id();
9676 		break;
9677 	case DUMP_NONE:
9678 		goto out_enable;
9679 	default:
9680 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9681 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9682 	}
9683 
9684 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9685 
9686 	/* Did function tracer already get disabled? */
9687 	if (ftrace_is_dead()) {
9688 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9689 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9690 	}
9691 
9692 	/*
9693 	 * We need to stop all tracing on all CPUS to read
9694 	 * the next buffer. This is a bit expensive, but is
9695 	 * not done often. We fill all what we can read,
9696 	 * and then release the locks again.
9697 	 */
9698 
9699 	while (!trace_empty(&iter)) {
9700 
9701 		if (!cnt)
9702 			printk(KERN_TRACE "---------------------------------\n");
9703 
9704 		cnt++;
9705 
9706 		trace_iterator_reset(&iter);
9707 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9708 
9709 		if (trace_find_next_entry_inc(&iter) != NULL) {
9710 			int ret;
9711 
9712 			ret = print_trace_line(&iter);
9713 			if (ret != TRACE_TYPE_NO_CONSUME)
9714 				trace_consume(&iter);
9715 		}
9716 		touch_nmi_watchdog();
9717 
9718 		trace_printk_seq(&iter.seq);
9719 	}
9720 
9721 	if (!cnt)
9722 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9723 	else
9724 		printk(KERN_TRACE "---------------------------------\n");
9725 
9726  out_enable:
9727 	tr->trace_flags |= old_userobj;
9728 
9729 	for_each_tracing_cpu(cpu) {
9730 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9731 	}
9732 	atomic_dec(&dump_running);
9733 	printk_nmi_direct_exit();
9734 	local_irq_restore(flags);
9735 }
9736 EXPORT_SYMBOL_GPL(ftrace_dump);
9737 
9738 #define WRITE_BUFSIZE  4096
9739 
9740 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9741 				size_t count, loff_t *ppos,
9742 				int (*createfn)(const char *))
9743 {
9744 	char *kbuf, *buf, *tmp;
9745 	int ret = 0;
9746 	size_t done = 0;
9747 	size_t size;
9748 
9749 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9750 	if (!kbuf)
9751 		return -ENOMEM;
9752 
9753 	while (done < count) {
9754 		size = count - done;
9755 
9756 		if (size >= WRITE_BUFSIZE)
9757 			size = WRITE_BUFSIZE - 1;
9758 
9759 		if (copy_from_user(kbuf, buffer + done, size)) {
9760 			ret = -EFAULT;
9761 			goto out;
9762 		}
9763 		kbuf[size] = '\0';
9764 		buf = kbuf;
9765 		do {
9766 			tmp = strchr(buf, '\n');
9767 			if (tmp) {
9768 				*tmp = '\0';
9769 				size = tmp - buf + 1;
9770 			} else {
9771 				size = strlen(buf);
9772 				if (done + size < count) {
9773 					if (buf != kbuf)
9774 						break;
9775 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9776 					pr_warn("Line length is too long: Should be less than %d\n",
9777 						WRITE_BUFSIZE - 2);
9778 					ret = -EINVAL;
9779 					goto out;
9780 				}
9781 			}
9782 			done += size;
9783 
9784 			/* Remove comments */
9785 			tmp = strchr(buf, '#');
9786 
9787 			if (tmp)
9788 				*tmp = '\0';
9789 
9790 			ret = createfn(buf);
9791 			if (ret)
9792 				goto out;
9793 			buf += size;
9794 
9795 		} while (done < count);
9796 	}
9797 	ret = done;
9798 
9799 out:
9800 	kfree(kbuf);
9801 
9802 	return ret;
9803 }
9804 
9805 __init static int tracer_alloc_buffers(void)
9806 {
9807 	int ring_buf_size;
9808 	int ret = -ENOMEM;
9809 
9810 
9811 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9812 		pr_warn("Tracing disabled due to lockdown\n");
9813 		return -EPERM;
9814 	}
9815 
9816 	/*
9817 	 * Make sure we don't accidentally add more trace options
9818 	 * than we have bits for.
9819 	 */
9820 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9821 
9822 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9823 		goto out;
9824 
9825 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9826 		goto out_free_buffer_mask;
9827 
9828 	/* Only allocate trace_printk buffers if a trace_printk exists */
9829 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9830 		/* Must be called before global_trace.buffer is allocated */
9831 		trace_printk_init_buffers();
9832 
9833 	/* To save memory, keep the ring buffer size to its minimum */
9834 	if (ring_buffer_expanded)
9835 		ring_buf_size = trace_buf_size;
9836 	else
9837 		ring_buf_size = 1;
9838 
9839 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9840 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9841 
9842 	raw_spin_lock_init(&global_trace.start_lock);
9843 
9844 	/*
9845 	 * The prepare callbacks allocates some memory for the ring buffer. We
9846 	 * don't free the buffer if the CPU goes down. If we were to free
9847 	 * the buffer, then the user would lose any trace that was in the
9848 	 * buffer. The memory will be removed once the "instance" is removed.
9849 	 */
9850 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9851 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9852 				      NULL);
9853 	if (ret < 0)
9854 		goto out_free_cpumask;
9855 	/* Used for event triggers */
9856 	ret = -ENOMEM;
9857 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9858 	if (!temp_buffer)
9859 		goto out_rm_hp_state;
9860 
9861 	if (trace_create_savedcmd() < 0)
9862 		goto out_free_temp_buffer;
9863 
9864 	/* TODO: make the number of buffers hot pluggable with CPUS */
9865 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9866 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9867 		goto out_free_savedcmd;
9868 	}
9869 
9870 	if (global_trace.buffer_disabled)
9871 		tracing_off();
9872 
9873 	if (trace_boot_clock) {
9874 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9875 		if (ret < 0)
9876 			pr_warn("Trace clock %s not defined, going back to default\n",
9877 				trace_boot_clock);
9878 	}
9879 
9880 	/*
9881 	 * register_tracer() might reference current_trace, so it
9882 	 * needs to be set before we register anything. This is
9883 	 * just a bootstrap of current_trace anyway.
9884 	 */
9885 	global_trace.current_trace = &nop_trace;
9886 
9887 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9888 
9889 	ftrace_init_global_array_ops(&global_trace);
9890 
9891 	init_trace_flags_index(&global_trace);
9892 
9893 	register_tracer(&nop_trace);
9894 
9895 	/* Function tracing may start here (via kernel command line) */
9896 	init_function_trace();
9897 
9898 	/* All seems OK, enable tracing */
9899 	tracing_disabled = 0;
9900 
9901 	atomic_notifier_chain_register(&panic_notifier_list,
9902 				       &trace_panic_notifier);
9903 
9904 	register_die_notifier(&trace_die_notifier);
9905 
9906 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9907 
9908 	INIT_LIST_HEAD(&global_trace.systems);
9909 	INIT_LIST_HEAD(&global_trace.events);
9910 	INIT_LIST_HEAD(&global_trace.hist_vars);
9911 	INIT_LIST_HEAD(&global_trace.err_log);
9912 	list_add(&global_trace.list, &ftrace_trace_arrays);
9913 
9914 	apply_trace_boot_options();
9915 
9916 	register_snapshot_cmd();
9917 
9918 	test_can_verify();
9919 
9920 	return 0;
9921 
9922 out_free_savedcmd:
9923 	free_saved_cmdlines_buffer(savedcmd);
9924 out_free_temp_buffer:
9925 	ring_buffer_free(temp_buffer);
9926 out_rm_hp_state:
9927 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9928 out_free_cpumask:
9929 	free_cpumask_var(global_trace.tracing_cpumask);
9930 out_free_buffer_mask:
9931 	free_cpumask_var(tracing_buffer_mask);
9932 out:
9933 	return ret;
9934 }
9935 
9936 void __init early_trace_init(void)
9937 {
9938 	if (tracepoint_printk) {
9939 		tracepoint_print_iter =
9940 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9941 		if (MEM_FAIL(!tracepoint_print_iter,
9942 			     "Failed to allocate trace iterator\n"))
9943 			tracepoint_printk = 0;
9944 		else
9945 			static_key_enable(&tracepoint_printk_key.key);
9946 	}
9947 	tracer_alloc_buffers();
9948 }
9949 
9950 void __init trace_init(void)
9951 {
9952 	trace_event_init();
9953 }
9954 
9955 __init static int clear_boot_tracer(void)
9956 {
9957 	/*
9958 	 * The default tracer at boot buffer is an init section.
9959 	 * This function is called in lateinit. If we did not
9960 	 * find the boot tracer, then clear it out, to prevent
9961 	 * later registration from accessing the buffer that is
9962 	 * about to be freed.
9963 	 */
9964 	if (!default_bootup_tracer)
9965 		return 0;
9966 
9967 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9968 	       default_bootup_tracer);
9969 	default_bootup_tracer = NULL;
9970 
9971 	return 0;
9972 }
9973 
9974 fs_initcall(tracer_init_tracefs);
9975 late_initcall_sync(clear_boot_tracer);
9976 
9977 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9978 __init static int tracing_set_default_clock(void)
9979 {
9980 	/* sched_clock_stable() is determined in late_initcall */
9981 	if (!trace_boot_clock && !sched_clock_stable()) {
9982 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9983 			pr_warn("Can not set tracing clock due to lockdown\n");
9984 			return -EPERM;
9985 		}
9986 
9987 		printk(KERN_WARNING
9988 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9989 		       "If you want to keep using the local clock, then add:\n"
9990 		       "  \"trace_clock=local\"\n"
9991 		       "on the kernel command line\n");
9992 		tracing_set_clock(&global_trace, "global");
9993 	}
9994 
9995 	return 0;
9996 }
9997 late_initcall_sync(tracing_set_default_clock);
9998 #endif
9999