xref: /openbmc/linux/kernel/trace/trace.c (revision 35267cea)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752 
1753 #define trace_create_maxlat_file(tr, d_tracer)				\
1754 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1755 			  &tr->max_latency, &tracing_max_lat_fops)
1756 
1757 #endif
1758 
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768 	struct array_buffer *trace_buf = &tr->array_buffer;
1769 	struct array_buffer *max_buf = &tr->max_buffer;
1770 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772 
1773 	max_buf->cpu = cpu;
1774 	max_buf->time_start = data->preempt_timestamp;
1775 
1776 	max_data->saved_latency = tr->max_latency;
1777 	max_data->critical_start = data->critical_start;
1778 	max_data->critical_end = data->critical_end;
1779 
1780 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781 	max_data->pid = tsk->pid;
1782 	/*
1783 	 * If tsk == current, then use current_uid(), as that does not use
1784 	 * RCU. The irq tracer can be called out of RCU scope.
1785 	 */
1786 	if (tsk == current)
1787 		max_data->uid = current_uid();
1788 	else
1789 		max_data->uid = task_uid(tsk);
1790 
1791 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792 	max_data->policy = tsk->policy;
1793 	max_data->rt_priority = tsk->rt_priority;
1794 
1795 	/* record this tasks comm */
1796 	tracing_record_cmdline(tsk);
1797 	latency_fsnotify(tr);
1798 }
1799 
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812 	      void *cond_data)
1813 {
1814 	if (tr->stop_count)
1815 		return;
1816 
1817 	WARN_ON_ONCE(!irqs_disabled());
1818 
1819 	if (!tr->allocated_snapshot) {
1820 		/* Only the nop tracer should hit this when disabling */
1821 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822 		return;
1823 	}
1824 
1825 	arch_spin_lock(&tr->max_lock);
1826 
1827 	/* Inherit the recordable setting from array_buffer */
1828 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829 		ring_buffer_record_on(tr->max_buffer.buffer);
1830 	else
1831 		ring_buffer_record_off(tr->max_buffer.buffer);
1832 
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835 		goto out_unlock;
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841  out_unlock:
1842 	arch_spin_unlock(&tr->max_lock);
1843 }
1844 
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856 	int ret;
1857 
1858 	if (tr->stop_count)
1859 		return;
1860 
1861 	WARN_ON_ONCE(!irqs_disabled());
1862 	if (!tr->allocated_snapshot) {
1863 		/* Only the nop tracer should hit this when disabling */
1864 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865 		return;
1866 	}
1867 
1868 	arch_spin_lock(&tr->max_lock);
1869 
1870 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871 
1872 	if (ret == -EBUSY) {
1873 		/*
1874 		 * We failed to swap the buffer due to a commit taking
1875 		 * place on this CPU. We fail to record, but we reset
1876 		 * the max trace buffer (no one writes directly to it)
1877 		 * and flag that it failed.
1878 		 */
1879 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880 			"Failed to swap buffers due to commit in progress\n");
1881 	}
1882 
1883 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884 
1885 	__update_max_tr(tr, tsk, cpu);
1886 	arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889 
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892 	/* Iterators are static, they should be filled or empty */
1893 	if (trace_buffer_iter(iter, iter->cpu_file))
1894 		return 0;
1895 
1896 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897 				full);
1898 }
1899 
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902 
1903 struct trace_selftests {
1904 	struct list_head		list;
1905 	struct tracer			*type;
1906 };
1907 
1908 static LIST_HEAD(postponed_selftests);
1909 
1910 static int save_selftest(struct tracer *type)
1911 {
1912 	struct trace_selftests *selftest;
1913 
1914 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915 	if (!selftest)
1916 		return -ENOMEM;
1917 
1918 	selftest->type = type;
1919 	list_add(&selftest->list, &postponed_selftests);
1920 	return 0;
1921 }
1922 
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925 	struct trace_array *tr = &global_trace;
1926 	struct tracer *saved_tracer = tr->current_trace;
1927 	int ret;
1928 
1929 	if (!type->selftest || tracing_selftest_disabled)
1930 		return 0;
1931 
1932 	/*
1933 	 * If a tracer registers early in boot up (before scheduling is
1934 	 * initialized and such), then do not run its selftests yet.
1935 	 * Instead, run it a little later in the boot process.
1936 	 */
1937 	if (!selftests_can_run)
1938 		return save_selftest(type);
1939 
1940 	if (!tracing_is_on()) {
1941 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942 			type->name);
1943 		return 0;
1944 	}
1945 
1946 	/*
1947 	 * Run a selftest on this tracer.
1948 	 * Here we reset the trace buffer, and set the current
1949 	 * tracer to be this tracer. The tracer can then run some
1950 	 * internal tracing to verify that everything is in order.
1951 	 * If we fail, we do not register this tracer.
1952 	 */
1953 	tracing_reset_online_cpus(&tr->array_buffer);
1954 
1955 	tr->current_trace = type;
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		/* If we expanded the buffers, make sure the max is expanded too */
1960 		if (ring_buffer_expanded)
1961 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962 					   RING_BUFFER_ALL_CPUS);
1963 		tr->allocated_snapshot = true;
1964 	}
1965 #endif
1966 
1967 	/* the test is responsible for initializing and enabling */
1968 	pr_info("Testing tracer %s: ", type->name);
1969 	ret = type->selftest(type, tr);
1970 	/* the test is responsible for resetting too */
1971 	tr->current_trace = saved_tracer;
1972 	if (ret) {
1973 		printk(KERN_CONT "FAILED!\n");
1974 		/* Add the warning after printing 'FAILED' */
1975 		WARN_ON(1);
1976 		return -1;
1977 	}
1978 	/* Only reset on passing, to avoid touching corrupted buffers */
1979 	tracing_reset_online_cpus(&tr->array_buffer);
1980 
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982 	if (type->use_max_tr) {
1983 		tr->allocated_snapshot = false;
1984 
1985 		/* Shrink the max buffer again */
1986 		if (ring_buffer_expanded)
1987 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1988 					   RING_BUFFER_ALL_CPUS);
1989 	}
1990 #endif
1991 
1992 	printk(KERN_CONT "PASSED\n");
1993 	return 0;
1994 }
1995 
1996 static __init int init_trace_selftests(void)
1997 {
1998 	struct trace_selftests *p, *n;
1999 	struct tracer *t, **last;
2000 	int ret;
2001 
2002 	selftests_can_run = true;
2003 
2004 	mutex_lock(&trace_types_lock);
2005 
2006 	if (list_empty(&postponed_selftests))
2007 		goto out;
2008 
2009 	pr_info("Running postponed tracer tests:\n");
2010 
2011 	tracing_selftest_running = true;
2012 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013 		/* This loop can take minutes when sanitizers are enabled, so
2014 		 * lets make sure we allow RCU processing.
2015 		 */
2016 		cond_resched();
2017 		ret = run_tracer_selftest(p->type);
2018 		/* If the test fails, then warn and remove from available_tracers */
2019 		if (ret < 0) {
2020 			WARN(1, "tracer: %s failed selftest, disabling\n",
2021 			     p->type->name);
2022 			last = &trace_types;
2023 			for (t = trace_types; t; t = t->next) {
2024 				if (t == p->type) {
2025 					*last = t->next;
2026 					break;
2027 				}
2028 				last = &t->next;
2029 			}
2030 		}
2031 		list_del(&p->list);
2032 		kfree(p);
2033 	}
2034 	tracing_selftest_running = false;
2035 
2036  out:
2037 	mutex_unlock(&trace_types_lock);
2038 
2039 	return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045 	return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048 
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050 
2051 static void __init apply_trace_boot_options(void);
2052 
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061 	struct tracer *t;
2062 	int ret = 0;
2063 
2064 	if (!type->name) {
2065 		pr_info("Tracer must have a name\n");
2066 		return -1;
2067 	}
2068 
2069 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071 		return -1;
2072 	}
2073 
2074 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075 		pr_warn("Can not register tracer %s due to lockdown\n",
2076 			   type->name);
2077 		return -EPERM;
2078 	}
2079 
2080 	mutex_lock(&trace_types_lock);
2081 
2082 	tracing_selftest_running = true;
2083 
2084 	for (t = trace_types; t; t = t->next) {
2085 		if (strcmp(type->name, t->name) == 0) {
2086 			/* already found */
2087 			pr_info("Tracer %s already registered\n",
2088 				type->name);
2089 			ret = -1;
2090 			goto out;
2091 		}
2092 	}
2093 
2094 	if (!type->set_flag)
2095 		type->set_flag = &dummy_set_flag;
2096 	if (!type->flags) {
2097 		/*allocate a dummy tracer_flags*/
2098 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099 		if (!type->flags) {
2100 			ret = -ENOMEM;
2101 			goto out;
2102 		}
2103 		type->flags->val = 0;
2104 		type->flags->opts = dummy_tracer_opt;
2105 	} else
2106 		if (!type->flags->opts)
2107 			type->flags->opts = dummy_tracer_opt;
2108 
2109 	/* store the tracer for __set_tracer_option */
2110 	type->flags->trace = type;
2111 
2112 	ret = run_tracer_selftest(type);
2113 	if (ret < 0)
2114 		goto out;
2115 
2116 	type->next = trace_types;
2117 	trace_types = type;
2118 	add_tracer_options(&global_trace, type);
2119 
2120  out:
2121 	tracing_selftest_running = false;
2122 	mutex_unlock(&trace_types_lock);
2123 
2124 	if (ret || !default_bootup_tracer)
2125 		goto out_unlock;
2126 
2127 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128 		goto out_unlock;
2129 
2130 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131 	/* Do we want this tracer to start on bootup? */
2132 	tracing_set_tracer(&global_trace, type->name);
2133 	default_bootup_tracer = NULL;
2134 
2135 	apply_trace_boot_options();
2136 
2137 	/* disable other selftests, since this will break it. */
2138 	disable_tracing_selftest("running a tracer");
2139 
2140  out_unlock:
2141 	return ret;
2142 }
2143 
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146 	struct trace_buffer *buffer = buf->buffer;
2147 
2148 	if (!buffer)
2149 		return;
2150 
2151 	ring_buffer_record_disable(buffer);
2152 
2153 	/* Make sure all commits have finished */
2154 	synchronize_rcu();
2155 	ring_buffer_reset_cpu(buffer, cpu);
2156 
2157 	ring_buffer_record_enable(buffer);
2158 }
2159 
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 
2172 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173 
2174 	ring_buffer_reset_online_cpus(buffer);
2175 
2176 	ring_buffer_record_enable(buffer);
2177 }
2178 
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182 	struct trace_array *tr;
2183 
2184 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185 		if (!tr->clear_trace)
2186 			continue;
2187 		tr->clear_trace = false;
2188 		tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190 		tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192 	}
2193 }
2194 
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200 
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203 
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209 	unsigned *map_cmdline_to_pid;
2210 	unsigned cmdline_num;
2211 	int cmdline_idx;
2212 	char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215 
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220 
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225 
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227 				    struct saved_cmdlines_buffer *s)
2228 {
2229 	s->map_cmdline_to_pid = kmalloc_array(val,
2230 					      sizeof(*s->map_cmdline_to_pid),
2231 					      GFP_KERNEL);
2232 	if (!s->map_cmdline_to_pid)
2233 		return -ENOMEM;
2234 
2235 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236 	if (!s->saved_cmdlines) {
2237 		kfree(s->map_cmdline_to_pid);
2238 		return -ENOMEM;
2239 	}
2240 
2241 	s->cmdline_idx = 0;
2242 	s->cmdline_num = val;
2243 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244 	       sizeof(s->map_pid_to_cmdline));
2245 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246 	       val * sizeof(*s->map_cmdline_to_pid));
2247 
2248 	return 0;
2249 }
2250 
2251 static int trace_create_savedcmd(void)
2252 {
2253 	int ret;
2254 
2255 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256 	if (!savedcmd)
2257 		return -ENOMEM;
2258 
2259 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260 	if (ret < 0) {
2261 		kfree(savedcmd);
2262 		savedcmd = NULL;
2263 		return -ENOMEM;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 int is_tracing_stopped(void)
2270 {
2271 	return global_trace.stop_count;
2272 }
2273 
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282 	struct trace_buffer *buffer;
2283 	unsigned long flags;
2284 
2285 	if (tracing_disabled)
2286 		return;
2287 
2288 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289 	if (--global_trace.stop_count) {
2290 		if (global_trace.stop_count < 0) {
2291 			/* Someone screwed up their debugging */
2292 			WARN_ON_ONCE(1);
2293 			global_trace.stop_count = 0;
2294 		}
2295 		goto out;
2296 	}
2297 
2298 	/* Prevent the buffers from switching */
2299 	arch_spin_lock(&global_trace.max_lock);
2300 
2301 	buffer = global_trace.array_buffer.buffer;
2302 	if (buffer)
2303 		ring_buffer_record_enable(buffer);
2304 
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306 	buffer = global_trace.max_buffer.buffer;
2307 	if (buffer)
2308 		ring_buffer_record_enable(buffer);
2309 #endif
2310 
2311 	arch_spin_unlock(&global_trace.max_lock);
2312 
2313  out:
2314 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316 
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319 	struct trace_buffer *buffer;
2320 	unsigned long flags;
2321 
2322 	if (tracing_disabled)
2323 		return;
2324 
2325 	/* If global, we need to also start the max tracer */
2326 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327 		return tracing_start();
2328 
2329 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2330 
2331 	if (--tr->stop_count) {
2332 		if (tr->stop_count < 0) {
2333 			/* Someone screwed up their debugging */
2334 			WARN_ON_ONCE(1);
2335 			tr->stop_count = 0;
2336 		}
2337 		goto out;
2338 	}
2339 
2340 	buffer = tr->array_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 
2344  out:
2345 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347 
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356 	struct trace_buffer *buffer;
2357 	unsigned long flags;
2358 
2359 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360 	if (global_trace.stop_count++)
2361 		goto out;
2362 
2363 	/* Prevent the buffers from switching */
2364 	arch_spin_lock(&global_trace.max_lock);
2365 
2366 	buffer = global_trace.array_buffer.buffer;
2367 	if (buffer)
2368 		ring_buffer_record_disable(buffer);
2369 
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371 	buffer = global_trace.max_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 #endif
2375 
2376 	arch_spin_unlock(&global_trace.max_lock);
2377 
2378  out:
2379 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381 
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384 	struct trace_buffer *buffer;
2385 	unsigned long flags;
2386 
2387 	/* If global, we need to also stop the max tracer */
2388 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389 		return tracing_stop();
2390 
2391 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2392 	if (tr->stop_count++)
2393 		goto out;
2394 
2395 	buffer = tr->array_buffer.buffer;
2396 	if (buffer)
2397 		ring_buffer_record_disable(buffer);
2398 
2399  out:
2400 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402 
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 */
2419 	if (!arch_spin_trylock(&trace_cmdline_lock))
2420 		return 0;
2421 
2422 	idx = savedcmd->map_pid_to_cmdline[tpid];
2423 	if (idx == NO_CMDLINE_MAP) {
2424 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425 
2426 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2427 		savedcmd->cmdline_idx = idx;
2428 	}
2429 
2430 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431 	set_cmdline(idx, tsk->comm);
2432 
2433 	arch_spin_unlock(&trace_cmdline_lock);
2434 
2435 	return 1;
2436 }
2437 
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440 	unsigned map;
2441 	int tpid;
2442 
2443 	if (!pid) {
2444 		strcpy(comm, "<idle>");
2445 		return;
2446 	}
2447 
2448 	if (WARN_ON_ONCE(pid < 0)) {
2449 		strcpy(comm, "<XXX>");
2450 		return;
2451 	}
2452 
2453 	tpid = pid & (PID_MAX_DEFAULT - 1);
2454 	map = savedcmd->map_pid_to_cmdline[tpid];
2455 	if (map != NO_CMDLINE_MAP) {
2456 		tpid = savedcmd->map_cmdline_to_pid[map];
2457 		if (tpid == pid) {
2458 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459 			return;
2460 		}
2461 	}
2462 	strcpy(comm, "<...>");
2463 }
2464 
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467 	preempt_disable();
2468 	arch_spin_lock(&trace_cmdline_lock);
2469 
2470 	__trace_find_cmdline(pid, comm);
2471 
2472 	arch_spin_unlock(&trace_cmdline_lock);
2473 	preempt_enable();
2474 }
2475 
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478 	/*
2479 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480 	 * if we observe a non-NULL tgid_map then we also observe the correct
2481 	 * tgid_map_max.
2482 	 */
2483 	int *map = smp_load_acquire(&tgid_map);
2484 
2485 	if (unlikely(!map || pid > tgid_map_max))
2486 		return NULL;
2487 
2488 	return &map[pid];
2489 }
2490 
2491 int trace_find_tgid(int pid)
2492 {
2493 	int *ptr = trace_find_tgid_ptr(pid);
2494 
2495 	return ptr ? *ptr : 0;
2496 }
2497 
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500 	int *ptr;
2501 
2502 	/* treat recording of idle task as a success */
2503 	if (!tsk->pid)
2504 		return 1;
2505 
2506 	ptr = trace_find_tgid_ptr(tsk->pid);
2507 	if (!ptr)
2508 		return 0;
2509 
2510 	*ptr = tsk->tgid;
2511 	return 1;
2512 }
2513 
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517 		return true;
2518 	if (!__this_cpu_read(trace_taskinfo_save))
2519 		return true;
2520 	return false;
2521 }
2522 
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532 	bool done;
2533 
2534 	if (tracing_record_taskinfo_skip(flags))
2535 		return;
2536 
2537 	/*
2538 	 * Record as much task information as possible. If some fail, continue
2539 	 * to try to record the others.
2540 	 */
2541 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560 					  struct task_struct *next, int flags)
2561 {
2562 	bool done;
2563 
2564 	if (tracing_record_taskinfo_skip(flags))
2565 		return;
2566 
2567 	/*
2568 	 * Record as much task information as possible. If some fail, continue
2569 	 * to try to record the others.
2570 	 */
2571 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575 
2576 	/* If recording any information failed, retry again soon. */
2577 	if (!done)
2578 		return;
2579 
2580 	__this_cpu_write(trace_taskinfo_save, false);
2581 }
2582 
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588 
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593 
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601 	return trace_seq_has_overflowed(s) ?
2602 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605 
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608 	unsigned int trace_flags = irqs_status;
2609 	unsigned int pc;
2610 
2611 	pc = preempt_count();
2612 
2613 	if (pc & NMI_MASK)
2614 		trace_flags |= TRACE_FLAG_NMI;
2615 	if (pc & HARDIRQ_MASK)
2616 		trace_flags |= TRACE_FLAG_HARDIRQ;
2617 	if (in_serving_softirq())
2618 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2619 
2620 	if (tif_need_resched())
2621 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622 	if (test_preempt_need_resched())
2623 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624 	return (trace_flags << 16) | (pc & 0xff);
2625 }
2626 
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629 			  int type,
2630 			  unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635 
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639 
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656 	struct ring_buffer_event *event;
2657 	struct page *page;
2658 	int cpu;
2659 
2660 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661 
2662 	if (trace_buffered_event_ref++)
2663 		return;
2664 
2665 	for_each_tracing_cpu(cpu) {
2666 		page = alloc_pages_node(cpu_to_node(cpu),
2667 					GFP_KERNEL | __GFP_NORETRY, 0);
2668 		if (!page)
2669 			goto failed;
2670 
2671 		event = page_address(page);
2672 		memset(event, 0, sizeof(*event));
2673 
2674 		per_cpu(trace_buffered_event, cpu) = event;
2675 
2676 		preempt_disable();
2677 		if (cpu == smp_processor_id() &&
2678 		    __this_cpu_read(trace_buffered_event) !=
2679 		    per_cpu(trace_buffered_event, cpu))
2680 			WARN_ON_ONCE(1);
2681 		preempt_enable();
2682 	}
2683 
2684 	return;
2685  failed:
2686 	trace_buffered_event_disable();
2687 }
2688 
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691 	/* Probably not needed, but do it anyway */
2692 	smp_rmb();
2693 	this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695 
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698 	this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700 
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711 	int cpu;
2712 
2713 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714 
2715 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716 		return;
2717 
2718 	if (--trace_buffered_event_ref)
2719 		return;
2720 
2721 	preempt_disable();
2722 	/* For each CPU, set the buffer as used. */
2723 	smp_call_function_many(tracing_buffer_mask,
2724 			       disable_trace_buffered_event, NULL, 1);
2725 	preempt_enable();
2726 
2727 	/* Wait for all current users to finish */
2728 	synchronize_rcu();
2729 
2730 	for_each_tracing_cpu(cpu) {
2731 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732 		per_cpu(trace_buffered_event, cpu) = NULL;
2733 	}
2734 	/*
2735 	 * Make sure trace_buffered_event is NULL before clearing
2736 	 * trace_buffered_event_cnt.
2737 	 */
2738 	smp_wmb();
2739 
2740 	preempt_disable();
2741 	/* Do the work on each cpu */
2742 	smp_call_function_many(tracing_buffer_mask,
2743 			       enable_trace_buffered_event, NULL, 1);
2744 	preempt_enable();
2745 }
2746 
2747 static struct trace_buffer *temp_buffer;
2748 
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751 			  struct trace_event_file *trace_file,
2752 			  int type, unsigned long len,
2753 			  unsigned int trace_ctx)
2754 {
2755 	struct ring_buffer_event *entry;
2756 	struct trace_array *tr = trace_file->tr;
2757 	int val;
2758 
2759 	*current_rb = tr->array_buffer.buffer;
2760 
2761 	if (!tr->no_filter_buffering_ref &&
2762 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763 	    (entry = this_cpu_read(trace_buffered_event))) {
2764 		/*
2765 		 * Filtering is on, so try to use the per cpu buffer first.
2766 		 * This buffer will simulate a ring_buffer_event,
2767 		 * where the type_len is zero and the array[0] will
2768 		 * hold the full length.
2769 		 * (see include/linux/ring-buffer.h for details on
2770 		 *  how the ring_buffer_event is structured).
2771 		 *
2772 		 * Using a temp buffer during filtering and copying it
2773 		 * on a matched filter is quicker than writing directly
2774 		 * into the ring buffer and then discarding it when
2775 		 * it doesn't match. That is because the discard
2776 		 * requires several atomic operations to get right.
2777 		 * Copying on match and doing nothing on a failed match
2778 		 * is still quicker than no copy on match, but having
2779 		 * to discard out of the ring buffer on a failed match.
2780 		 */
2781 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782 
2783 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2784 
2785 		/*
2786 		 * Preemption is disabled, but interrupts and NMIs
2787 		 * can still come in now. If that happens after
2788 		 * the above increment, then it will have to go
2789 		 * back to the old method of allocating the event
2790 		 * on the ring buffer, and if the filter fails, it
2791 		 * will have to call ring_buffer_discard_commit()
2792 		 * to remove it.
2793 		 *
2794 		 * Need to also check the unlikely case that the
2795 		 * length is bigger than the temp buffer size.
2796 		 * If that happens, then the reserve is pretty much
2797 		 * guaranteed to fail, as the ring buffer currently
2798 		 * only allows events less than a page. But that may
2799 		 * change in the future, so let the ring buffer reserve
2800 		 * handle the failure in that case.
2801 		 */
2802 		if (val == 1 && likely(len <= max_len)) {
2803 			trace_event_setup(entry, type, trace_ctx);
2804 			entry->array[0] = len;
2805 			return entry;
2806 		}
2807 		this_cpu_dec(trace_buffered_event_cnt);
2808 	}
2809 
2810 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811 					    trace_ctx);
2812 	/*
2813 	 * If tracing is off, but we have triggers enabled
2814 	 * we still need to look at the event data. Use the temp_buffer
2815 	 * to store the trace event for the trigger to use. It's recursive
2816 	 * safe and will not be recorded anywhere.
2817 	 */
2818 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819 		*current_rb = temp_buffer;
2820 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821 						    trace_ctx);
2822 	}
2823 	return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826 
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829 
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832 	struct trace_event_call *event_call;
2833 	struct trace_event_file *file;
2834 	struct trace_event *event;
2835 	unsigned long flags;
2836 	struct trace_iterator *iter = tracepoint_print_iter;
2837 
2838 	/* We should never get here if iter is NULL */
2839 	if (WARN_ON_ONCE(!iter))
2840 		return;
2841 
2842 	event_call = fbuffer->trace_file->event_call;
2843 	if (!event_call || !event_call->event.funcs ||
2844 	    !event_call->event.funcs->trace)
2845 		return;
2846 
2847 	file = fbuffer->trace_file;
2848 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850 	     !filter_match_preds(file->filter, fbuffer->entry)))
2851 		return;
2852 
2853 	event = &fbuffer->trace_file->event_call->event;
2854 
2855 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856 	trace_seq_init(&iter->seq);
2857 	iter->ent = fbuffer->entry;
2858 	event_call->event.funcs->trace(iter, 0, event);
2859 	trace_seq_putc(&iter->seq, 0);
2860 	printk("%s", iter->seq.buffer);
2861 
2862 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864 
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866 			     void *buffer, size_t *lenp,
2867 			     loff_t *ppos)
2868 {
2869 	int save_tracepoint_printk;
2870 	int ret;
2871 
2872 	mutex_lock(&tracepoint_printk_mutex);
2873 	save_tracepoint_printk = tracepoint_printk;
2874 
2875 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876 
2877 	/*
2878 	 * This will force exiting early, as tracepoint_printk
2879 	 * is always zero when tracepoint_printk_iter is not allocated
2880 	 */
2881 	if (!tracepoint_print_iter)
2882 		tracepoint_printk = 0;
2883 
2884 	if (save_tracepoint_printk == tracepoint_printk)
2885 		goto out;
2886 
2887 	if (tracepoint_printk)
2888 		static_key_enable(&tracepoint_printk_key.key);
2889 	else
2890 		static_key_disable(&tracepoint_printk_key.key);
2891 
2892  out:
2893 	mutex_unlock(&tracepoint_printk_mutex);
2894 
2895 	return ret;
2896 }
2897 
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900 	if (static_key_false(&tracepoint_printk_key.key))
2901 		output_printk(fbuffer);
2902 
2903 	if (static_branch_unlikely(&trace_event_exports_enabled))
2904 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2905 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2906 				    fbuffer->event, fbuffer->entry,
2907 				    fbuffer->trace_ctx, fbuffer->regs);
2908 }
2909 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2910 
2911 /*
2912  * Skip 3:
2913  *
2914  *   trace_buffer_unlock_commit_regs()
2915  *   trace_event_buffer_commit()
2916  *   trace_event_raw_event_xxx()
2917  */
2918 # define STACK_SKIP 3
2919 
2920 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2921 				     struct trace_buffer *buffer,
2922 				     struct ring_buffer_event *event,
2923 				     unsigned int trace_ctx,
2924 				     struct pt_regs *regs)
2925 {
2926 	__buffer_unlock_commit(buffer, event);
2927 
2928 	/*
2929 	 * If regs is not set, then skip the necessary functions.
2930 	 * Note, we can still get here via blktrace, wakeup tracer
2931 	 * and mmiotrace, but that's ok if they lose a function or
2932 	 * two. They are not that meaningful.
2933 	 */
2934 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2935 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2936 }
2937 
2938 /*
2939  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2940  */
2941 void
2942 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2943 				   struct ring_buffer_event *event)
2944 {
2945 	__buffer_unlock_commit(buffer, event);
2946 }
2947 
2948 void
2949 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2950 	       parent_ip, unsigned int trace_ctx)
2951 {
2952 	struct trace_event_call *call = &event_function;
2953 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2954 	struct ring_buffer_event *event;
2955 	struct ftrace_entry *entry;
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2958 					    trace_ctx);
2959 	if (!event)
2960 		return;
2961 	entry	= ring_buffer_event_data(event);
2962 	entry->ip			= ip;
2963 	entry->parent_ip		= parent_ip;
2964 
2965 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2966 		if (static_branch_unlikely(&trace_function_exports_enabled))
2967 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2968 		__buffer_unlock_commit(buffer, event);
2969 	}
2970 }
2971 
2972 #ifdef CONFIG_STACKTRACE
2973 
2974 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2975 #define FTRACE_KSTACK_NESTING	4
2976 
2977 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2978 
2979 struct ftrace_stack {
2980 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2981 };
2982 
2983 
2984 struct ftrace_stacks {
2985 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2986 };
2987 
2988 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2989 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2990 
2991 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2992 				 unsigned int trace_ctx,
2993 				 int skip, struct pt_regs *regs)
2994 {
2995 	struct trace_event_call *call = &event_kernel_stack;
2996 	struct ring_buffer_event *event;
2997 	unsigned int size, nr_entries;
2998 	struct ftrace_stack *fstack;
2999 	struct stack_entry *entry;
3000 	int stackidx;
3001 
3002 	/*
3003 	 * Add one, for this function and the call to save_stack_trace()
3004 	 * If regs is set, then these functions will not be in the way.
3005 	 */
3006 #ifndef CONFIG_UNWINDER_ORC
3007 	if (!regs)
3008 		skip++;
3009 #endif
3010 
3011 	preempt_disable_notrace();
3012 
3013 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3014 
3015 	/* This should never happen. If it does, yell once and skip */
3016 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3017 		goto out;
3018 
3019 	/*
3020 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3021 	 * interrupt will either see the value pre increment or post
3022 	 * increment. If the interrupt happens pre increment it will have
3023 	 * restored the counter when it returns.  We just need a barrier to
3024 	 * keep gcc from moving things around.
3025 	 */
3026 	barrier();
3027 
3028 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3029 	size = ARRAY_SIZE(fstack->calls);
3030 
3031 	if (regs) {
3032 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3033 						   size, skip);
3034 	} else {
3035 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3036 	}
3037 
3038 	size = nr_entries * sizeof(unsigned long);
3039 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3040 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3041 				    trace_ctx);
3042 	if (!event)
3043 		goto out;
3044 	entry = ring_buffer_event_data(event);
3045 
3046 	memcpy(&entry->caller, fstack->calls, size);
3047 	entry->size = nr_entries;
3048 
3049 	if (!call_filter_check_discard(call, entry, buffer, event))
3050 		__buffer_unlock_commit(buffer, event);
3051 
3052  out:
3053 	/* Again, don't let gcc optimize things here */
3054 	barrier();
3055 	__this_cpu_dec(ftrace_stack_reserve);
3056 	preempt_enable_notrace();
3057 
3058 }
3059 
3060 static inline void ftrace_trace_stack(struct trace_array *tr,
3061 				      struct trace_buffer *buffer,
3062 				      unsigned int trace_ctx,
3063 				      int skip, struct pt_regs *regs)
3064 {
3065 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3066 		return;
3067 
3068 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3069 }
3070 
3071 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3072 		   int skip)
3073 {
3074 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3075 
3076 	if (rcu_is_watching()) {
3077 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3078 		return;
3079 	}
3080 
3081 	/*
3082 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3083 	 * but if the above rcu_is_watching() failed, then the NMI
3084 	 * triggered someplace critical, and rcu_irq_enter() should
3085 	 * not be called from NMI.
3086 	 */
3087 	if (unlikely(in_nmi()))
3088 		return;
3089 
3090 	rcu_irq_enter_irqson();
3091 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3092 	rcu_irq_exit_irqson();
3093 }
3094 
3095 /**
3096  * trace_dump_stack - record a stack back trace in the trace buffer
3097  * @skip: Number of functions to skip (helper handlers)
3098  */
3099 void trace_dump_stack(int skip)
3100 {
3101 	if (tracing_disabled || tracing_selftest_running)
3102 		return;
3103 
3104 #ifndef CONFIG_UNWINDER_ORC
3105 	/* Skip 1 to skip this function. */
3106 	skip++;
3107 #endif
3108 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3109 			     tracing_gen_ctx(), skip, NULL);
3110 }
3111 EXPORT_SYMBOL_GPL(trace_dump_stack);
3112 
3113 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3114 static DEFINE_PER_CPU(int, user_stack_count);
3115 
3116 static void
3117 ftrace_trace_userstack(struct trace_array *tr,
3118 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3119 {
3120 	struct trace_event_call *call = &event_user_stack;
3121 	struct ring_buffer_event *event;
3122 	struct userstack_entry *entry;
3123 
3124 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3125 		return;
3126 
3127 	/*
3128 	 * NMIs can not handle page faults, even with fix ups.
3129 	 * The save user stack can (and often does) fault.
3130 	 */
3131 	if (unlikely(in_nmi()))
3132 		return;
3133 
3134 	/*
3135 	 * prevent recursion, since the user stack tracing may
3136 	 * trigger other kernel events.
3137 	 */
3138 	preempt_disable();
3139 	if (__this_cpu_read(user_stack_count))
3140 		goto out;
3141 
3142 	__this_cpu_inc(user_stack_count);
3143 
3144 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3145 					    sizeof(*entry), trace_ctx);
3146 	if (!event)
3147 		goto out_drop_count;
3148 	entry	= ring_buffer_event_data(event);
3149 
3150 	entry->tgid		= current->tgid;
3151 	memset(&entry->caller, 0, sizeof(entry->caller));
3152 
3153 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3154 	if (!call_filter_check_discard(call, entry, buffer, event))
3155 		__buffer_unlock_commit(buffer, event);
3156 
3157  out_drop_count:
3158 	__this_cpu_dec(user_stack_count);
3159  out:
3160 	preempt_enable();
3161 }
3162 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3163 static void ftrace_trace_userstack(struct trace_array *tr,
3164 				   struct trace_buffer *buffer,
3165 				   unsigned int trace_ctx)
3166 {
3167 }
3168 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3169 
3170 #endif /* CONFIG_STACKTRACE */
3171 
3172 static inline void
3173 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3174 			  unsigned long long delta)
3175 {
3176 	entry->bottom_delta_ts = delta & U32_MAX;
3177 	entry->top_delta_ts = (delta >> 32);
3178 }
3179 
3180 void trace_last_func_repeats(struct trace_array *tr,
3181 			     struct trace_func_repeats *last_info,
3182 			     unsigned int trace_ctx)
3183 {
3184 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3185 	struct func_repeats_entry *entry;
3186 	struct ring_buffer_event *event;
3187 	u64 delta;
3188 
3189 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3190 					    sizeof(*entry), trace_ctx);
3191 	if (!event)
3192 		return;
3193 
3194 	delta = ring_buffer_event_time_stamp(buffer, event) -
3195 		last_info->ts_last_call;
3196 
3197 	entry = ring_buffer_event_data(event);
3198 	entry->ip = last_info->ip;
3199 	entry->parent_ip = last_info->parent_ip;
3200 	entry->count = last_info->count;
3201 	func_repeats_set_delta_ts(entry, delta);
3202 
3203 	__buffer_unlock_commit(buffer, event);
3204 }
3205 
3206 /* created for use with alloc_percpu */
3207 struct trace_buffer_struct {
3208 	int nesting;
3209 	char buffer[4][TRACE_BUF_SIZE];
3210 };
3211 
3212 static struct trace_buffer_struct *trace_percpu_buffer;
3213 
3214 /*
3215  * This allows for lockless recording.  If we're nested too deeply, then
3216  * this returns NULL.
3217  */
3218 static char *get_trace_buf(void)
3219 {
3220 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3221 
3222 	if (!buffer || buffer->nesting >= 4)
3223 		return NULL;
3224 
3225 	buffer->nesting++;
3226 
3227 	/* Interrupts must see nesting incremented before we use the buffer */
3228 	barrier();
3229 	return &buffer->buffer[buffer->nesting - 1][0];
3230 }
3231 
3232 static void put_trace_buf(void)
3233 {
3234 	/* Don't let the decrement of nesting leak before this */
3235 	barrier();
3236 	this_cpu_dec(trace_percpu_buffer->nesting);
3237 }
3238 
3239 static int alloc_percpu_trace_buffer(void)
3240 {
3241 	struct trace_buffer_struct *buffers;
3242 
3243 	if (trace_percpu_buffer)
3244 		return 0;
3245 
3246 	buffers = alloc_percpu(struct trace_buffer_struct);
3247 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3248 		return -ENOMEM;
3249 
3250 	trace_percpu_buffer = buffers;
3251 	return 0;
3252 }
3253 
3254 static int buffers_allocated;
3255 
3256 void trace_printk_init_buffers(void)
3257 {
3258 	if (buffers_allocated)
3259 		return;
3260 
3261 	if (alloc_percpu_trace_buffer())
3262 		return;
3263 
3264 	/* trace_printk() is for debug use only. Don't use it in production. */
3265 
3266 	pr_warn("\n");
3267 	pr_warn("**********************************************************\n");
3268 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3269 	pr_warn("**                                                      **\n");
3270 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3271 	pr_warn("**                                                      **\n");
3272 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3273 	pr_warn("** unsafe for production use.                           **\n");
3274 	pr_warn("**                                                      **\n");
3275 	pr_warn("** If you see this message and you are not debugging    **\n");
3276 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3277 	pr_warn("**                                                      **\n");
3278 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3279 	pr_warn("**********************************************************\n");
3280 
3281 	/* Expand the buffers to set size */
3282 	tracing_update_buffers();
3283 
3284 	buffers_allocated = 1;
3285 
3286 	/*
3287 	 * trace_printk_init_buffers() can be called by modules.
3288 	 * If that happens, then we need to start cmdline recording
3289 	 * directly here. If the global_trace.buffer is already
3290 	 * allocated here, then this was called by module code.
3291 	 */
3292 	if (global_trace.array_buffer.buffer)
3293 		tracing_start_cmdline_record();
3294 }
3295 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3296 
3297 void trace_printk_start_comm(void)
3298 {
3299 	/* Start tracing comms if trace printk is set */
3300 	if (!buffers_allocated)
3301 		return;
3302 	tracing_start_cmdline_record();
3303 }
3304 
3305 static void trace_printk_start_stop_comm(int enabled)
3306 {
3307 	if (!buffers_allocated)
3308 		return;
3309 
3310 	if (enabled)
3311 		tracing_start_cmdline_record();
3312 	else
3313 		tracing_stop_cmdline_record();
3314 }
3315 
3316 /**
3317  * trace_vbprintk - write binary msg to tracing buffer
3318  * @ip:    The address of the caller
3319  * @fmt:   The string format to write to the buffer
3320  * @args:  Arguments for @fmt
3321  */
3322 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3323 {
3324 	struct trace_event_call *call = &event_bprint;
3325 	struct ring_buffer_event *event;
3326 	struct trace_buffer *buffer;
3327 	struct trace_array *tr = &global_trace;
3328 	struct bprint_entry *entry;
3329 	unsigned int trace_ctx;
3330 	char *tbuffer;
3331 	int len = 0, size;
3332 
3333 	if (unlikely(tracing_selftest_running || tracing_disabled))
3334 		return 0;
3335 
3336 	/* Don't pollute graph traces with trace_vprintk internals */
3337 	pause_graph_tracing();
3338 
3339 	trace_ctx = tracing_gen_ctx();
3340 	preempt_disable_notrace();
3341 
3342 	tbuffer = get_trace_buf();
3343 	if (!tbuffer) {
3344 		len = 0;
3345 		goto out_nobuffer;
3346 	}
3347 
3348 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3349 
3350 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3351 		goto out_put;
3352 
3353 	size = sizeof(*entry) + sizeof(u32) * len;
3354 	buffer = tr->array_buffer.buffer;
3355 	ring_buffer_nest_start(buffer);
3356 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3357 					    trace_ctx);
3358 	if (!event)
3359 		goto out;
3360 	entry = ring_buffer_event_data(event);
3361 	entry->ip			= ip;
3362 	entry->fmt			= fmt;
3363 
3364 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3365 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3366 		__buffer_unlock_commit(buffer, event);
3367 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3368 	}
3369 
3370 out:
3371 	ring_buffer_nest_end(buffer);
3372 out_put:
3373 	put_trace_buf();
3374 
3375 out_nobuffer:
3376 	preempt_enable_notrace();
3377 	unpause_graph_tracing();
3378 
3379 	return len;
3380 }
3381 EXPORT_SYMBOL_GPL(trace_vbprintk);
3382 
3383 __printf(3, 0)
3384 static int
3385 __trace_array_vprintk(struct trace_buffer *buffer,
3386 		      unsigned long ip, const char *fmt, va_list args)
3387 {
3388 	struct trace_event_call *call = &event_print;
3389 	struct ring_buffer_event *event;
3390 	int len = 0, size;
3391 	struct print_entry *entry;
3392 	unsigned int trace_ctx;
3393 	char *tbuffer;
3394 
3395 	if (tracing_disabled || tracing_selftest_running)
3396 		return 0;
3397 
3398 	/* Don't pollute graph traces with trace_vprintk internals */
3399 	pause_graph_tracing();
3400 
3401 	trace_ctx = tracing_gen_ctx();
3402 	preempt_disable_notrace();
3403 
3404 
3405 	tbuffer = get_trace_buf();
3406 	if (!tbuffer) {
3407 		len = 0;
3408 		goto out_nobuffer;
3409 	}
3410 
3411 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412 
3413 	size = sizeof(*entry) + len + 1;
3414 	ring_buffer_nest_start(buffer);
3415 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416 					    trace_ctx);
3417 	if (!event)
3418 		goto out;
3419 	entry = ring_buffer_event_data(event);
3420 	entry->ip = ip;
3421 
3422 	memcpy(&entry->buf, tbuffer, len + 1);
3423 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3424 		__buffer_unlock_commit(buffer, event);
3425 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3426 	}
3427 
3428 out:
3429 	ring_buffer_nest_end(buffer);
3430 	put_trace_buf();
3431 
3432 out_nobuffer:
3433 	preempt_enable_notrace();
3434 	unpause_graph_tracing();
3435 
3436 	return len;
3437 }
3438 
3439 __printf(3, 0)
3440 int trace_array_vprintk(struct trace_array *tr,
3441 			unsigned long ip, const char *fmt, va_list args)
3442 {
3443 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3444 }
3445 
3446 /**
3447  * trace_array_printk - Print a message to a specific instance
3448  * @tr: The instance trace_array descriptor
3449  * @ip: The instruction pointer that this is called from.
3450  * @fmt: The format to print (printf format)
3451  *
3452  * If a subsystem sets up its own instance, they have the right to
3453  * printk strings into their tracing instance buffer using this
3454  * function. Note, this function will not write into the top level
3455  * buffer (use trace_printk() for that), as writing into the top level
3456  * buffer should only have events that can be individually disabled.
3457  * trace_printk() is only used for debugging a kernel, and should not
3458  * be ever incorporated in normal use.
3459  *
3460  * trace_array_printk() can be used, as it will not add noise to the
3461  * top level tracing buffer.
3462  *
3463  * Note, trace_array_init_printk() must be called on @tr before this
3464  * can be used.
3465  */
3466 __printf(3, 0)
3467 int trace_array_printk(struct trace_array *tr,
3468 		       unsigned long ip, const char *fmt, ...)
3469 {
3470 	int ret;
3471 	va_list ap;
3472 
3473 	if (!tr)
3474 		return -ENOENT;
3475 
3476 	/* This is only allowed for created instances */
3477 	if (tr == &global_trace)
3478 		return 0;
3479 
3480 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3481 		return 0;
3482 
3483 	va_start(ap, fmt);
3484 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3485 	va_end(ap);
3486 	return ret;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_array_printk);
3489 
3490 /**
3491  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3492  * @tr: The trace array to initialize the buffers for
3493  *
3494  * As trace_array_printk() only writes into instances, they are OK to
3495  * have in the kernel (unlike trace_printk()). This needs to be called
3496  * before trace_array_printk() can be used on a trace_array.
3497  */
3498 int trace_array_init_printk(struct trace_array *tr)
3499 {
3500 	if (!tr)
3501 		return -ENOENT;
3502 
3503 	/* This is only allowed for created instances */
3504 	if (tr == &global_trace)
3505 		return -EINVAL;
3506 
3507 	return alloc_percpu_trace_buffer();
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3510 
3511 __printf(3, 4)
3512 int trace_array_printk_buf(struct trace_buffer *buffer,
3513 			   unsigned long ip, const char *fmt, ...)
3514 {
3515 	int ret;
3516 	va_list ap;
3517 
3518 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3519 		return 0;
3520 
3521 	va_start(ap, fmt);
3522 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3523 	va_end(ap);
3524 	return ret;
3525 }
3526 
3527 __printf(2, 0)
3528 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3529 {
3530 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3531 }
3532 EXPORT_SYMBOL_GPL(trace_vprintk);
3533 
3534 static void trace_iterator_increment(struct trace_iterator *iter)
3535 {
3536 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3537 
3538 	iter->idx++;
3539 	if (buf_iter)
3540 		ring_buffer_iter_advance(buf_iter);
3541 }
3542 
3543 static struct trace_entry *
3544 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3545 		unsigned long *lost_events)
3546 {
3547 	struct ring_buffer_event *event;
3548 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3549 
3550 	if (buf_iter) {
3551 		event = ring_buffer_iter_peek(buf_iter, ts);
3552 		if (lost_events)
3553 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3554 				(unsigned long)-1 : 0;
3555 	} else {
3556 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3557 					 lost_events);
3558 	}
3559 
3560 	if (event) {
3561 		iter->ent_size = ring_buffer_event_length(event);
3562 		return ring_buffer_event_data(event);
3563 	}
3564 	iter->ent_size = 0;
3565 	return NULL;
3566 }
3567 
3568 static struct trace_entry *
3569 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3570 		  unsigned long *missing_events, u64 *ent_ts)
3571 {
3572 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3573 	struct trace_entry *ent, *next = NULL;
3574 	unsigned long lost_events = 0, next_lost = 0;
3575 	int cpu_file = iter->cpu_file;
3576 	u64 next_ts = 0, ts;
3577 	int next_cpu = -1;
3578 	int next_size = 0;
3579 	int cpu;
3580 
3581 	/*
3582 	 * If we are in a per_cpu trace file, don't bother by iterating over
3583 	 * all cpu and peek directly.
3584 	 */
3585 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3586 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3587 			return NULL;
3588 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3589 		if (ent_cpu)
3590 			*ent_cpu = cpu_file;
3591 
3592 		return ent;
3593 	}
3594 
3595 	for_each_tracing_cpu(cpu) {
3596 
3597 		if (ring_buffer_empty_cpu(buffer, cpu))
3598 			continue;
3599 
3600 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3601 
3602 		/*
3603 		 * Pick the entry with the smallest timestamp:
3604 		 */
3605 		if (ent && (!next || ts < next_ts)) {
3606 			next = ent;
3607 			next_cpu = cpu;
3608 			next_ts = ts;
3609 			next_lost = lost_events;
3610 			next_size = iter->ent_size;
3611 		}
3612 	}
3613 
3614 	iter->ent_size = next_size;
3615 
3616 	if (ent_cpu)
3617 		*ent_cpu = next_cpu;
3618 
3619 	if (ent_ts)
3620 		*ent_ts = next_ts;
3621 
3622 	if (missing_events)
3623 		*missing_events = next_lost;
3624 
3625 	return next;
3626 }
3627 
3628 #define STATIC_FMT_BUF_SIZE	128
3629 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3630 
3631 static char *trace_iter_expand_format(struct trace_iterator *iter)
3632 {
3633 	char *tmp;
3634 
3635 	/*
3636 	 * iter->tr is NULL when used with tp_printk, which makes
3637 	 * this get called where it is not safe to call krealloc().
3638 	 */
3639 	if (!iter->tr || iter->fmt == static_fmt_buf)
3640 		return NULL;
3641 
3642 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3643 		       GFP_KERNEL);
3644 	if (tmp) {
3645 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3646 		iter->fmt = tmp;
3647 	}
3648 
3649 	return tmp;
3650 }
3651 
3652 /* Returns true if the string is safe to dereference from an event */
3653 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3654 {
3655 	unsigned long addr = (unsigned long)str;
3656 	struct trace_event *trace_event;
3657 	struct trace_event_call *event;
3658 
3659 	/* OK if part of the event data */
3660 	if ((addr >= (unsigned long)iter->ent) &&
3661 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3662 		return true;
3663 
3664 	/* OK if part of the temp seq buffer */
3665 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3666 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3667 		return true;
3668 
3669 	/* Core rodata can not be freed */
3670 	if (is_kernel_rodata(addr))
3671 		return true;
3672 
3673 	if (trace_is_tracepoint_string(str))
3674 		return true;
3675 
3676 	/*
3677 	 * Now this could be a module event, referencing core module
3678 	 * data, which is OK.
3679 	 */
3680 	if (!iter->ent)
3681 		return false;
3682 
3683 	trace_event = ftrace_find_event(iter->ent->type);
3684 	if (!trace_event)
3685 		return false;
3686 
3687 	event = container_of(trace_event, struct trace_event_call, event);
3688 	if (!event->mod)
3689 		return false;
3690 
3691 	/* Would rather have rodata, but this will suffice */
3692 	if (within_module_core(addr, event->mod))
3693 		return true;
3694 
3695 	return false;
3696 }
3697 
3698 static const char *show_buffer(struct trace_seq *s)
3699 {
3700 	struct seq_buf *seq = &s->seq;
3701 
3702 	seq_buf_terminate(seq);
3703 
3704 	return seq->buffer;
3705 }
3706 
3707 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3708 
3709 static int test_can_verify_check(const char *fmt, ...)
3710 {
3711 	char buf[16];
3712 	va_list ap;
3713 	int ret;
3714 
3715 	/*
3716 	 * The verifier is dependent on vsnprintf() modifies the va_list
3717 	 * passed to it, where it is sent as a reference. Some architectures
3718 	 * (like x86_32) passes it by value, which means that vsnprintf()
3719 	 * does not modify the va_list passed to it, and the verifier
3720 	 * would then need to be able to understand all the values that
3721 	 * vsnprintf can use. If it is passed by value, then the verifier
3722 	 * is disabled.
3723 	 */
3724 	va_start(ap, fmt);
3725 	vsnprintf(buf, 16, "%d", ap);
3726 	ret = va_arg(ap, int);
3727 	va_end(ap);
3728 
3729 	return ret;
3730 }
3731 
3732 static void test_can_verify(void)
3733 {
3734 	if (!test_can_verify_check("%d %d", 0, 1)) {
3735 		pr_info("trace event string verifier disabled\n");
3736 		static_branch_inc(&trace_no_verify);
3737 	}
3738 }
3739 
3740 /**
3741  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3742  * @iter: The iterator that holds the seq buffer and the event being printed
3743  * @fmt: The format used to print the event
3744  * @ap: The va_list holding the data to print from @fmt.
3745  *
3746  * This writes the data into the @iter->seq buffer using the data from
3747  * @fmt and @ap. If the format has a %s, then the source of the string
3748  * is examined to make sure it is safe to print, otherwise it will
3749  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3750  * pointer.
3751  */
3752 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3753 			 va_list ap)
3754 {
3755 	const char *p = fmt;
3756 	const char *str;
3757 	int i, j;
3758 
3759 	if (WARN_ON_ONCE(!fmt))
3760 		return;
3761 
3762 	if (static_branch_unlikely(&trace_no_verify))
3763 		goto print;
3764 
3765 	/* Don't bother checking when doing a ftrace_dump() */
3766 	if (iter->fmt == static_fmt_buf)
3767 		goto print;
3768 
3769 	while (*p) {
3770 		bool star = false;
3771 		int len = 0;
3772 
3773 		j = 0;
3774 
3775 		/* We only care about %s and variants */
3776 		for (i = 0; p[i]; i++) {
3777 			if (i + 1 >= iter->fmt_size) {
3778 				/*
3779 				 * If we can't expand the copy buffer,
3780 				 * just print it.
3781 				 */
3782 				if (!trace_iter_expand_format(iter))
3783 					goto print;
3784 			}
3785 
3786 			if (p[i] == '\\' && p[i+1]) {
3787 				i++;
3788 				continue;
3789 			}
3790 			if (p[i] == '%') {
3791 				/* Need to test cases like %08.*s */
3792 				for (j = 1; p[i+j]; j++) {
3793 					if (isdigit(p[i+j]) ||
3794 					    p[i+j] == '.')
3795 						continue;
3796 					if (p[i+j] == '*') {
3797 						star = true;
3798 						continue;
3799 					}
3800 					break;
3801 				}
3802 				if (p[i+j] == 's')
3803 					break;
3804 				star = false;
3805 			}
3806 			j = 0;
3807 		}
3808 		/* If no %s found then just print normally */
3809 		if (!p[i])
3810 			break;
3811 
3812 		/* Copy up to the %s, and print that */
3813 		strncpy(iter->fmt, p, i);
3814 		iter->fmt[i] = '\0';
3815 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3816 
3817 		if (star)
3818 			len = va_arg(ap, int);
3819 
3820 		/* The ap now points to the string data of the %s */
3821 		str = va_arg(ap, const char *);
3822 
3823 		/*
3824 		 * If you hit this warning, it is likely that the
3825 		 * trace event in question used %s on a string that
3826 		 * was saved at the time of the event, but may not be
3827 		 * around when the trace is read. Use __string(),
3828 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3829 		 * instead. See samples/trace_events/trace-events-sample.h
3830 		 * for reference.
3831 		 */
3832 		if (WARN_ONCE(!trace_safe_str(iter, str),
3833 			      "fmt: '%s' current_buffer: '%s'",
3834 			      fmt, show_buffer(&iter->seq))) {
3835 			int ret;
3836 
3837 			/* Try to safely read the string */
3838 			if (star) {
3839 				if (len + 1 > iter->fmt_size)
3840 					len = iter->fmt_size - 1;
3841 				if (len < 0)
3842 					len = 0;
3843 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3844 				iter->fmt[len] = 0;
3845 				star = false;
3846 			} else {
3847 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3848 								  iter->fmt_size);
3849 			}
3850 			if (ret < 0)
3851 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3852 			else
3853 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3854 						 str, iter->fmt);
3855 			str = "[UNSAFE-MEMORY]";
3856 			strcpy(iter->fmt, "%s");
3857 		} else {
3858 			strncpy(iter->fmt, p + i, j + 1);
3859 			iter->fmt[j+1] = '\0';
3860 		}
3861 		if (star)
3862 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3863 		else
3864 			trace_seq_printf(&iter->seq, iter->fmt, str);
3865 
3866 		p += i + j + 1;
3867 	}
3868  print:
3869 	if (*p)
3870 		trace_seq_vprintf(&iter->seq, p, ap);
3871 }
3872 
3873 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3874 {
3875 	const char *p, *new_fmt;
3876 	char *q;
3877 
3878 	if (WARN_ON_ONCE(!fmt))
3879 		return fmt;
3880 
3881 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3882 		return fmt;
3883 
3884 	p = fmt;
3885 	new_fmt = q = iter->fmt;
3886 	while (*p) {
3887 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3888 			if (!trace_iter_expand_format(iter))
3889 				return fmt;
3890 
3891 			q += iter->fmt - new_fmt;
3892 			new_fmt = iter->fmt;
3893 		}
3894 
3895 		*q++ = *p++;
3896 
3897 		/* Replace %p with %px */
3898 		if (p[-1] == '%') {
3899 			if (p[0] == '%') {
3900 				*q++ = *p++;
3901 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3902 				*q++ = *p++;
3903 				*q++ = 'x';
3904 			}
3905 		}
3906 	}
3907 	*q = '\0';
3908 
3909 	return new_fmt;
3910 }
3911 
3912 #define STATIC_TEMP_BUF_SIZE	128
3913 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3914 
3915 /* Find the next real entry, without updating the iterator itself */
3916 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3917 					  int *ent_cpu, u64 *ent_ts)
3918 {
3919 	/* __find_next_entry will reset ent_size */
3920 	int ent_size = iter->ent_size;
3921 	struct trace_entry *entry;
3922 
3923 	/*
3924 	 * If called from ftrace_dump(), then the iter->temp buffer
3925 	 * will be the static_temp_buf and not created from kmalloc.
3926 	 * If the entry size is greater than the buffer, we can
3927 	 * not save it. Just return NULL in that case. This is only
3928 	 * used to add markers when two consecutive events' time
3929 	 * stamps have a large delta. See trace_print_lat_context()
3930 	 */
3931 	if (iter->temp == static_temp_buf &&
3932 	    STATIC_TEMP_BUF_SIZE < ent_size)
3933 		return NULL;
3934 
3935 	/*
3936 	 * The __find_next_entry() may call peek_next_entry(), which may
3937 	 * call ring_buffer_peek() that may make the contents of iter->ent
3938 	 * undefined. Need to copy iter->ent now.
3939 	 */
3940 	if (iter->ent && iter->ent != iter->temp) {
3941 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3942 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3943 			void *temp;
3944 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3945 			if (!temp)
3946 				return NULL;
3947 			kfree(iter->temp);
3948 			iter->temp = temp;
3949 			iter->temp_size = iter->ent_size;
3950 		}
3951 		memcpy(iter->temp, iter->ent, iter->ent_size);
3952 		iter->ent = iter->temp;
3953 	}
3954 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3955 	/* Put back the original ent_size */
3956 	iter->ent_size = ent_size;
3957 
3958 	return entry;
3959 }
3960 
3961 /* Find the next real entry, and increment the iterator to the next entry */
3962 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3963 {
3964 	iter->ent = __find_next_entry(iter, &iter->cpu,
3965 				      &iter->lost_events, &iter->ts);
3966 
3967 	if (iter->ent)
3968 		trace_iterator_increment(iter);
3969 
3970 	return iter->ent ? iter : NULL;
3971 }
3972 
3973 static void trace_consume(struct trace_iterator *iter)
3974 {
3975 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3976 			    &iter->lost_events);
3977 }
3978 
3979 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3980 {
3981 	struct trace_iterator *iter = m->private;
3982 	int i = (int)*pos;
3983 	void *ent;
3984 
3985 	WARN_ON_ONCE(iter->leftover);
3986 
3987 	(*pos)++;
3988 
3989 	/* can't go backwards */
3990 	if (iter->idx > i)
3991 		return NULL;
3992 
3993 	if (iter->idx < 0)
3994 		ent = trace_find_next_entry_inc(iter);
3995 	else
3996 		ent = iter;
3997 
3998 	while (ent && iter->idx < i)
3999 		ent = trace_find_next_entry_inc(iter);
4000 
4001 	iter->pos = *pos;
4002 
4003 	return ent;
4004 }
4005 
4006 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4007 {
4008 	struct ring_buffer_iter *buf_iter;
4009 	unsigned long entries = 0;
4010 	u64 ts;
4011 
4012 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4013 
4014 	buf_iter = trace_buffer_iter(iter, cpu);
4015 	if (!buf_iter)
4016 		return;
4017 
4018 	ring_buffer_iter_reset(buf_iter);
4019 
4020 	/*
4021 	 * We could have the case with the max latency tracers
4022 	 * that a reset never took place on a cpu. This is evident
4023 	 * by the timestamp being before the start of the buffer.
4024 	 */
4025 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4026 		if (ts >= iter->array_buffer->time_start)
4027 			break;
4028 		entries++;
4029 		ring_buffer_iter_advance(buf_iter);
4030 	}
4031 
4032 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4033 }
4034 
4035 /*
4036  * The current tracer is copied to avoid a global locking
4037  * all around.
4038  */
4039 static void *s_start(struct seq_file *m, loff_t *pos)
4040 {
4041 	struct trace_iterator *iter = m->private;
4042 	struct trace_array *tr = iter->tr;
4043 	int cpu_file = iter->cpu_file;
4044 	void *p = NULL;
4045 	loff_t l = 0;
4046 	int cpu;
4047 
4048 	/*
4049 	 * copy the tracer to avoid using a global lock all around.
4050 	 * iter->trace is a copy of current_trace, the pointer to the
4051 	 * name may be used instead of a strcmp(), as iter->trace->name
4052 	 * will point to the same string as current_trace->name.
4053 	 */
4054 	mutex_lock(&trace_types_lock);
4055 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4056 		*iter->trace = *tr->current_trace;
4057 	mutex_unlock(&trace_types_lock);
4058 
4059 #ifdef CONFIG_TRACER_MAX_TRACE
4060 	if (iter->snapshot && iter->trace->use_max_tr)
4061 		return ERR_PTR(-EBUSY);
4062 #endif
4063 
4064 	if (*pos != iter->pos) {
4065 		iter->ent = NULL;
4066 		iter->cpu = 0;
4067 		iter->idx = -1;
4068 
4069 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4070 			for_each_tracing_cpu(cpu)
4071 				tracing_iter_reset(iter, cpu);
4072 		} else
4073 			tracing_iter_reset(iter, cpu_file);
4074 
4075 		iter->leftover = 0;
4076 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4077 			;
4078 
4079 	} else {
4080 		/*
4081 		 * If we overflowed the seq_file before, then we want
4082 		 * to just reuse the trace_seq buffer again.
4083 		 */
4084 		if (iter->leftover)
4085 			p = iter;
4086 		else {
4087 			l = *pos - 1;
4088 			p = s_next(m, p, &l);
4089 		}
4090 	}
4091 
4092 	trace_event_read_lock();
4093 	trace_access_lock(cpu_file);
4094 	return p;
4095 }
4096 
4097 static void s_stop(struct seq_file *m, void *p)
4098 {
4099 	struct trace_iterator *iter = m->private;
4100 
4101 #ifdef CONFIG_TRACER_MAX_TRACE
4102 	if (iter->snapshot && iter->trace->use_max_tr)
4103 		return;
4104 #endif
4105 
4106 	trace_access_unlock(iter->cpu_file);
4107 	trace_event_read_unlock();
4108 }
4109 
4110 static void
4111 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4112 		      unsigned long *entries, int cpu)
4113 {
4114 	unsigned long count;
4115 
4116 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4117 	/*
4118 	 * If this buffer has skipped entries, then we hold all
4119 	 * entries for the trace and we need to ignore the
4120 	 * ones before the time stamp.
4121 	 */
4122 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4123 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4124 		/* total is the same as the entries */
4125 		*total = count;
4126 	} else
4127 		*total = count +
4128 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4129 	*entries = count;
4130 }
4131 
4132 static void
4133 get_total_entries(struct array_buffer *buf,
4134 		  unsigned long *total, unsigned long *entries)
4135 {
4136 	unsigned long t, e;
4137 	int cpu;
4138 
4139 	*total = 0;
4140 	*entries = 0;
4141 
4142 	for_each_tracing_cpu(cpu) {
4143 		get_total_entries_cpu(buf, &t, &e, cpu);
4144 		*total += t;
4145 		*entries += e;
4146 	}
4147 }
4148 
4149 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4150 {
4151 	unsigned long total, entries;
4152 
4153 	if (!tr)
4154 		tr = &global_trace;
4155 
4156 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4157 
4158 	return entries;
4159 }
4160 
4161 unsigned long trace_total_entries(struct trace_array *tr)
4162 {
4163 	unsigned long total, entries;
4164 
4165 	if (!tr)
4166 		tr = &global_trace;
4167 
4168 	get_total_entries(&tr->array_buffer, &total, &entries);
4169 
4170 	return entries;
4171 }
4172 
4173 static void print_lat_help_header(struct seq_file *m)
4174 {
4175 	seq_puts(m, "#                    _------=> CPU#            \n"
4176 		    "#                   / _-----=> irqs-off        \n"
4177 		    "#                  | / _----=> need-resched    \n"
4178 		    "#                  || / _---=> hardirq/softirq \n"
4179 		    "#                  ||| / _--=> preempt-depth   \n"
4180 		    "#                  |||| /     delay            \n"
4181 		    "#  cmd     pid     ||||| time  |   caller      \n"
4182 		    "#     \\   /        |||||  \\    |   /         \n");
4183 }
4184 
4185 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4186 {
4187 	unsigned long total;
4188 	unsigned long entries;
4189 
4190 	get_total_entries(buf, &total, &entries);
4191 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4192 		   entries, total, num_online_cpus());
4193 	seq_puts(m, "#\n");
4194 }
4195 
4196 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4197 				   unsigned int flags)
4198 {
4199 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4200 
4201 	print_event_info(buf, m);
4202 
4203 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4204 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4205 }
4206 
4207 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4208 				       unsigned int flags)
4209 {
4210 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211 	const char *space = "            ";
4212 	int prec = tgid ? 12 : 2;
4213 
4214 	print_event_info(buf, m);
4215 
4216 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4217 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4218 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4219 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4220 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4221 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4223 }
4224 
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229 	struct array_buffer *buf = iter->array_buffer;
4230 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231 	struct tracer *type = iter->trace;
4232 	unsigned long entries;
4233 	unsigned long total;
4234 	const char *name = "preemption";
4235 
4236 	name = type->name;
4237 
4238 	get_total_entries(buf, &total, &entries);
4239 
4240 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241 		   name, UTS_RELEASE);
4242 	seq_puts(m, "# -----------------------------------"
4243 		 "---------------------------------\n");
4244 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246 		   nsecs_to_usecs(data->saved_latency),
4247 		   entries,
4248 		   total,
4249 		   buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251 		   "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253 		   "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255 		   "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257 		   "preempt_rt",
4258 #else
4259 		   "unknown",
4260 #endif
4261 		   /* These are reserved for later use */
4262 		   0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266 	seq_puts(m, ")\n");
4267 #endif
4268 	seq_puts(m, "#    -----------------\n");
4269 	seq_printf(m, "#    | task: %.16s-%d "
4270 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271 		   data->comm, data->pid,
4272 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273 		   data->policy, data->rt_priority);
4274 	seq_puts(m, "#    -----------------\n");
4275 
4276 	if (data->critical_start) {
4277 		seq_puts(m, "#  => started at: ");
4278 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279 		trace_print_seq(m, &iter->seq);
4280 		seq_puts(m, "\n#  => ended at:   ");
4281 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282 		trace_print_seq(m, &iter->seq);
4283 		seq_puts(m, "\n#\n");
4284 	}
4285 
4286 	seq_puts(m, "#\n");
4287 }
4288 
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291 	struct trace_seq *s = &iter->seq;
4292 	struct trace_array *tr = iter->tr;
4293 
4294 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295 		return;
4296 
4297 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298 		return;
4299 
4300 	if (cpumask_available(iter->started) &&
4301 	    cpumask_test_cpu(iter->cpu, iter->started))
4302 		return;
4303 
4304 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305 		return;
4306 
4307 	if (cpumask_available(iter->started))
4308 		cpumask_set_cpu(iter->cpu, iter->started);
4309 
4310 	/* Don't print started cpu buffer for the first entry of the trace */
4311 	if (iter->idx > 1)
4312 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313 				iter->cpu);
4314 }
4315 
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318 	struct trace_array *tr = iter->tr;
4319 	struct trace_seq *s = &iter->seq;
4320 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321 	struct trace_entry *entry;
4322 	struct trace_event *event;
4323 
4324 	entry = iter->ent;
4325 
4326 	test_cpu_buff_start(iter);
4327 
4328 	event = ftrace_find_event(entry->type);
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332 			trace_print_lat_context(iter);
4333 		else
4334 			trace_print_context(iter);
4335 	}
4336 
4337 	if (trace_seq_has_overflowed(s))
4338 		return TRACE_TYPE_PARTIAL_LINE;
4339 
4340 	if (event)
4341 		return event->funcs->trace(iter, sym_flags, event);
4342 
4343 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344 
4345 	return trace_handle_return(s);
4346 }
4347 
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350 	struct trace_array *tr = iter->tr;
4351 	struct trace_seq *s = &iter->seq;
4352 	struct trace_entry *entry;
4353 	struct trace_event *event;
4354 
4355 	entry = iter->ent;
4356 
4357 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358 		trace_seq_printf(s, "%d %d %llu ",
4359 				 entry->pid, iter->cpu, iter->ts);
4360 
4361 	if (trace_seq_has_overflowed(s))
4362 		return TRACE_TYPE_PARTIAL_LINE;
4363 
4364 	event = ftrace_find_event(entry->type);
4365 	if (event)
4366 		return event->funcs->raw(iter, 0, event);
4367 
4368 	trace_seq_printf(s, "%d ?\n", entry->type);
4369 
4370 	return trace_handle_return(s);
4371 }
4372 
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375 	struct trace_array *tr = iter->tr;
4376 	struct trace_seq *s = &iter->seq;
4377 	unsigned char newline = '\n';
4378 	struct trace_entry *entry;
4379 	struct trace_event *event;
4380 
4381 	entry = iter->ent;
4382 
4383 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4385 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4387 		if (trace_seq_has_overflowed(s))
4388 			return TRACE_TYPE_PARTIAL_LINE;
4389 	}
4390 
4391 	event = ftrace_find_event(entry->type);
4392 	if (event) {
4393 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394 		if (ret != TRACE_TYPE_HANDLED)
4395 			return ret;
4396 	}
4397 
4398 	SEQ_PUT_FIELD(s, newline);
4399 
4400 	return trace_handle_return(s);
4401 }
4402 
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405 	struct trace_array *tr = iter->tr;
4406 	struct trace_seq *s = &iter->seq;
4407 	struct trace_entry *entry;
4408 	struct trace_event *event;
4409 
4410 	entry = iter->ent;
4411 
4412 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413 		SEQ_PUT_FIELD(s, entry->pid);
4414 		SEQ_PUT_FIELD(s, iter->cpu);
4415 		SEQ_PUT_FIELD(s, iter->ts);
4416 		if (trace_seq_has_overflowed(s))
4417 			return TRACE_TYPE_PARTIAL_LINE;
4418 	}
4419 
4420 	event = ftrace_find_event(entry->type);
4421 	return event ? event->funcs->binary(iter, 0, event) :
4422 		TRACE_TYPE_HANDLED;
4423 }
4424 
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427 	struct ring_buffer_iter *buf_iter;
4428 	int cpu;
4429 
4430 	/* If we are looking at one CPU buffer, only check that one */
4431 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432 		cpu = iter->cpu_file;
4433 		buf_iter = trace_buffer_iter(iter, cpu);
4434 		if (buf_iter) {
4435 			if (!ring_buffer_iter_empty(buf_iter))
4436 				return 0;
4437 		} else {
4438 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439 				return 0;
4440 		}
4441 		return 1;
4442 	}
4443 
4444 	for_each_tracing_cpu(cpu) {
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 	}
4454 
4455 	return 1;
4456 }
4457 
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461 	struct trace_array *tr = iter->tr;
4462 	unsigned long trace_flags = tr->trace_flags;
4463 	enum print_line_t ret;
4464 
4465 	if (iter->lost_events) {
4466 		if (iter->lost_events == (unsigned long)-1)
4467 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468 					 iter->cpu);
4469 		else
4470 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471 					 iter->cpu, iter->lost_events);
4472 		if (trace_seq_has_overflowed(&iter->seq))
4473 			return TRACE_TYPE_PARTIAL_LINE;
4474 	}
4475 
4476 	if (iter->trace && iter->trace->print_line) {
4477 		ret = iter->trace->print_line(iter);
4478 		if (ret != TRACE_TYPE_UNHANDLED)
4479 			return ret;
4480 	}
4481 
4482 	if (iter->ent->type == TRACE_BPUTS &&
4483 			trace_flags & TRACE_ITER_PRINTK &&
4484 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485 		return trace_print_bputs_msg_only(iter);
4486 
4487 	if (iter->ent->type == TRACE_BPRINT &&
4488 			trace_flags & TRACE_ITER_PRINTK &&
4489 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490 		return trace_print_bprintk_msg_only(iter);
4491 
4492 	if (iter->ent->type == TRACE_PRINT &&
4493 			trace_flags & TRACE_ITER_PRINTK &&
4494 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495 		return trace_print_printk_msg_only(iter);
4496 
4497 	if (trace_flags & TRACE_ITER_BIN)
4498 		return print_bin_fmt(iter);
4499 
4500 	if (trace_flags & TRACE_ITER_HEX)
4501 		return print_hex_fmt(iter);
4502 
4503 	if (trace_flags & TRACE_ITER_RAW)
4504 		return print_raw_fmt(iter);
4505 
4506 	return print_trace_fmt(iter);
4507 }
4508 
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511 	struct trace_iterator *iter = m->private;
4512 	struct trace_array *tr = iter->tr;
4513 
4514 	/* print nothing if the buffers are empty */
4515 	if (trace_empty(iter))
4516 		return;
4517 
4518 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519 		print_trace_header(m, iter);
4520 
4521 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522 		print_lat_help_header(m);
4523 }
4524 
4525 void trace_default_header(struct seq_file *m)
4526 {
4527 	struct trace_iterator *iter = m->private;
4528 	struct trace_array *tr = iter->tr;
4529 	unsigned long trace_flags = tr->trace_flags;
4530 
4531 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532 		return;
4533 
4534 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535 		/* print nothing if the buffers are empty */
4536 		if (trace_empty(iter))
4537 			return;
4538 		print_trace_header(m, iter);
4539 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4540 			print_lat_help_header(m);
4541 	} else {
4542 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4544 				print_func_help_header_irq(iter->array_buffer,
4545 							   m, trace_flags);
4546 			else
4547 				print_func_help_header(iter->array_buffer, m,
4548 						       trace_flags);
4549 		}
4550 	}
4551 }
4552 
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555 	if (!ftrace_is_dead())
4556 		return;
4557 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560 
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566 		    "#                      Takes a snapshot of the main buffer.\n"
4567 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568 		    "#                      (Doesn't have to be '2' works with any number that\n"
4569 		    "#                       is not a '0' or '1')\n");
4570 }
4571 
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580 		    "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583 		    "#                      (Doesn't have to be '2' works with any number that\n"
4584 		    "#                       is not a '0' or '1')\n");
4585 }
4586 
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589 	if (iter->tr->allocated_snapshot)
4590 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591 	else
4592 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593 
4594 	seq_puts(m, "# Snapshot commands:\n");
4595 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596 		show_snapshot_main_help(m);
4597 	else
4598 		show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604 
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607 	struct trace_iterator *iter = v;
4608 	int ret;
4609 
4610 	if (iter->ent == NULL) {
4611 		if (iter->tr) {
4612 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613 			seq_puts(m, "#\n");
4614 			test_ftrace_alive(m);
4615 		}
4616 		if (iter->snapshot && trace_empty(iter))
4617 			print_snapshot_help(m, iter);
4618 		else if (iter->trace && iter->trace->print_header)
4619 			iter->trace->print_header(m);
4620 		else
4621 			trace_default_header(m);
4622 
4623 	} else if (iter->leftover) {
4624 		/*
4625 		 * If we filled the seq_file buffer earlier, we
4626 		 * want to just show it now.
4627 		 */
4628 		ret = trace_print_seq(m, &iter->seq);
4629 
4630 		/* ret should this time be zero, but you never know */
4631 		iter->leftover = ret;
4632 
4633 	} else {
4634 		print_trace_line(iter);
4635 		ret = trace_print_seq(m, &iter->seq);
4636 		/*
4637 		 * If we overflow the seq_file buffer, then it will
4638 		 * ask us for this data again at start up.
4639 		 * Use that instead.
4640 		 *  ret is 0 if seq_file write succeeded.
4641 		 *        -1 otherwise.
4642 		 */
4643 		iter->leftover = ret;
4644 	}
4645 
4646 	return 0;
4647 }
4648 
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4656 		return (long)inode->i_cdev - 1;
4657 	return RING_BUFFER_ALL_CPUS;
4658 }
4659 
4660 static const struct seq_operations tracer_seq_ops = {
4661 	.start		= s_start,
4662 	.next		= s_next,
4663 	.stop		= s_stop,
4664 	.show		= s_show,
4665 };
4666 
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670 	struct trace_array *tr = inode->i_private;
4671 	struct trace_iterator *iter;
4672 	int cpu;
4673 
4674 	if (tracing_disabled)
4675 		return ERR_PTR(-ENODEV);
4676 
4677 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678 	if (!iter)
4679 		return ERR_PTR(-ENOMEM);
4680 
4681 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682 				    GFP_KERNEL);
4683 	if (!iter->buffer_iter)
4684 		goto release;
4685 
4686 	/*
4687 	 * trace_find_next_entry() may need to save off iter->ent.
4688 	 * It will place it into the iter->temp buffer. As most
4689 	 * events are less than 128, allocate a buffer of that size.
4690 	 * If one is greater, then trace_find_next_entry() will
4691 	 * allocate a new buffer to adjust for the bigger iter->ent.
4692 	 * It's not critical if it fails to get allocated here.
4693 	 */
4694 	iter->temp = kmalloc(128, GFP_KERNEL);
4695 	if (iter->temp)
4696 		iter->temp_size = 128;
4697 
4698 	/*
4699 	 * trace_event_printf() may need to modify given format
4700 	 * string to replace %p with %px so that it shows real address
4701 	 * instead of hash value. However, that is only for the event
4702 	 * tracing, other tracer may not need. Defer the allocation
4703 	 * until it is needed.
4704 	 */
4705 	iter->fmt = NULL;
4706 	iter->fmt_size = 0;
4707 
4708 	/*
4709 	 * We make a copy of the current tracer to avoid concurrent
4710 	 * changes on it while we are reading.
4711 	 */
4712 	mutex_lock(&trace_types_lock);
4713 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714 	if (!iter->trace)
4715 		goto fail;
4716 
4717 	*iter->trace = *tr->current_trace;
4718 
4719 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720 		goto fail;
4721 
4722 	iter->tr = tr;
4723 
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725 	/* Currently only the top directory has a snapshot */
4726 	if (tr->current_trace->print_max || snapshot)
4727 		iter->array_buffer = &tr->max_buffer;
4728 	else
4729 #endif
4730 		iter->array_buffer = &tr->array_buffer;
4731 	iter->snapshot = snapshot;
4732 	iter->pos = -1;
4733 	iter->cpu_file = tracing_get_cpu(inode);
4734 	mutex_init(&iter->mutex);
4735 
4736 	/* Notify the tracer early; before we stop tracing. */
4737 	if (iter->trace->open)
4738 		iter->trace->open(iter);
4739 
4740 	/* Annotate start of buffers if we had overruns */
4741 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4742 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743 
4744 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745 	if (trace_clocks[tr->clock_id].in_ns)
4746 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747 
4748 	/*
4749 	 * If pause-on-trace is enabled, then stop the trace while
4750 	 * dumping, unless this is the "snapshot" file
4751 	 */
4752 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753 		tracing_stop_tr(tr);
4754 
4755 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756 		for_each_tracing_cpu(cpu) {
4757 			iter->buffer_iter[cpu] =
4758 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4759 							 cpu, GFP_KERNEL);
4760 		}
4761 		ring_buffer_read_prepare_sync();
4762 		for_each_tracing_cpu(cpu) {
4763 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4764 			tracing_iter_reset(iter, cpu);
4765 		}
4766 	} else {
4767 		cpu = iter->cpu_file;
4768 		iter->buffer_iter[cpu] =
4769 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4770 						 cpu, GFP_KERNEL);
4771 		ring_buffer_read_prepare_sync();
4772 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4773 		tracing_iter_reset(iter, cpu);
4774 	}
4775 
4776 	mutex_unlock(&trace_types_lock);
4777 
4778 	return iter;
4779 
4780  fail:
4781 	mutex_unlock(&trace_types_lock);
4782 	kfree(iter->trace);
4783 	kfree(iter->temp);
4784 	kfree(iter->buffer_iter);
4785 release:
4786 	seq_release_private(inode, file);
4787 	return ERR_PTR(-ENOMEM);
4788 }
4789 
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792 	int ret;
4793 
4794 	ret = tracing_check_open_get_tr(NULL);
4795 	if (ret)
4796 		return ret;
4797 
4798 	filp->private_data = inode->i_private;
4799 	return 0;
4800 }
4801 
4802 bool tracing_is_disabled(void)
4803 {
4804 	return (tracing_disabled) ? true: false;
4805 }
4806 
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813 	struct trace_array *tr = inode->i_private;
4814 	int ret;
4815 
4816 	ret = tracing_check_open_get_tr(tr);
4817 	if (ret)
4818 		return ret;
4819 
4820 	filp->private_data = inode->i_private;
4821 
4822 	return 0;
4823 }
4824 
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827 	struct trace_array *tr = inode->i_private;
4828 	struct seq_file *m = file->private_data;
4829 	struct trace_iterator *iter;
4830 	int cpu;
4831 
4832 	if (!(file->f_mode & FMODE_READ)) {
4833 		trace_array_put(tr);
4834 		return 0;
4835 	}
4836 
4837 	/* Writes do not use seq_file */
4838 	iter = m->private;
4839 	mutex_lock(&trace_types_lock);
4840 
4841 	for_each_tracing_cpu(cpu) {
4842 		if (iter->buffer_iter[cpu])
4843 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844 	}
4845 
4846 	if (iter->trace && iter->trace->close)
4847 		iter->trace->close(iter);
4848 
4849 	if (!iter->snapshot && tr->stop_count)
4850 		/* reenable tracing if it was previously enabled */
4851 		tracing_start_tr(tr);
4852 
4853 	__trace_array_put(tr);
4854 
4855 	mutex_unlock(&trace_types_lock);
4856 
4857 	mutex_destroy(&iter->mutex);
4858 	free_cpumask_var(iter->started);
4859 	kfree(iter->fmt);
4860 	kfree(iter->temp);
4861 	kfree(iter->trace);
4862 	kfree(iter->buffer_iter);
4863 	seq_release_private(inode, file);
4864 
4865 	return 0;
4866 }
4867 
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870 	struct trace_array *tr = inode->i_private;
4871 
4872 	trace_array_put(tr);
4873 	return 0;
4874 }
4875 
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878 	struct trace_array *tr = inode->i_private;
4879 
4880 	trace_array_put(tr);
4881 
4882 	return single_release(inode, file);
4883 }
4884 
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887 	struct trace_array *tr = inode->i_private;
4888 	struct trace_iterator *iter;
4889 	int ret;
4890 
4891 	ret = tracing_check_open_get_tr(tr);
4892 	if (ret)
4893 		return ret;
4894 
4895 	/* If this file was open for write, then erase contents */
4896 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897 		int cpu = tracing_get_cpu(inode);
4898 		struct array_buffer *trace_buf = &tr->array_buffer;
4899 
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901 		if (tr->current_trace->print_max)
4902 			trace_buf = &tr->max_buffer;
4903 #endif
4904 
4905 		if (cpu == RING_BUFFER_ALL_CPUS)
4906 			tracing_reset_online_cpus(trace_buf);
4907 		else
4908 			tracing_reset_cpu(trace_buf, cpu);
4909 	}
4910 
4911 	if (file->f_mode & FMODE_READ) {
4912 		iter = __tracing_open(inode, file, false);
4913 		if (IS_ERR(iter))
4914 			ret = PTR_ERR(iter);
4915 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917 	}
4918 
4919 	if (ret < 0)
4920 		trace_array_put(tr);
4921 
4922 	return ret;
4923 }
4924 
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935 
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940 	while (t && !trace_ok_for_array(t, tr))
4941 		t = t->next;
4942 
4943 	return t;
4944 }
4945 
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949 	struct trace_array *tr = m->private;
4950 	struct tracer *t = v;
4951 
4952 	(*pos)++;
4953 
4954 	if (t)
4955 		t = get_tracer_for_array(tr, t->next);
4956 
4957 	return t;
4958 }
4959 
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962 	struct trace_array *tr = m->private;
4963 	struct tracer *t;
4964 	loff_t l = 0;
4965 
4966 	mutex_lock(&trace_types_lock);
4967 
4968 	t = get_tracer_for_array(tr, trace_types);
4969 	for (; t && l < *pos; t = t_next(m, t, &l))
4970 			;
4971 
4972 	return t;
4973 }
4974 
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977 	mutex_unlock(&trace_types_lock);
4978 }
4979 
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982 	struct tracer *t = v;
4983 
4984 	if (!t)
4985 		return 0;
4986 
4987 	seq_puts(m, t->name);
4988 	if (t->next)
4989 		seq_putc(m, ' ');
4990 	else
4991 		seq_putc(m, '\n');
4992 
4993 	return 0;
4994 }
4995 
4996 static const struct seq_operations show_traces_seq_ops = {
4997 	.start		= t_start,
4998 	.next		= t_next,
4999 	.stop		= t_stop,
5000 	.show		= t_show,
5001 };
5002 
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005 	struct trace_array *tr = inode->i_private;
5006 	struct seq_file *m;
5007 	int ret;
5008 
5009 	ret = tracing_check_open_get_tr(tr);
5010 	if (ret)
5011 		return ret;
5012 
5013 	ret = seq_open(file, &show_traces_seq_ops);
5014 	if (ret) {
5015 		trace_array_put(tr);
5016 		return ret;
5017 	}
5018 
5019 	m = file->private_data;
5020 	m->private = tr;
5021 
5022 	return 0;
5023 }
5024 
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027 	struct trace_array *tr = inode->i_private;
5028 
5029 	trace_array_put(tr);
5030 	return seq_release(inode, file);
5031 }
5032 
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035 		   size_t count, loff_t *ppos)
5036 {
5037 	return count;
5038 }
5039 
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042 	int ret;
5043 
5044 	if (file->f_mode & FMODE_READ)
5045 		ret = seq_lseek(file, offset, whence);
5046 	else
5047 		file->f_pos = ret = 0;
5048 
5049 	return ret;
5050 }
5051 
5052 static const struct file_operations tracing_fops = {
5053 	.open		= tracing_open,
5054 	.read		= seq_read,
5055 	.write		= tracing_write_stub,
5056 	.llseek		= tracing_lseek,
5057 	.release	= tracing_release,
5058 };
5059 
5060 static const struct file_operations show_traces_fops = {
5061 	.open		= show_traces_open,
5062 	.read		= seq_read,
5063 	.llseek		= seq_lseek,
5064 	.release	= show_traces_release,
5065 };
5066 
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069 		     size_t count, loff_t *ppos)
5070 {
5071 	struct trace_array *tr = file_inode(filp)->i_private;
5072 	char *mask_str;
5073 	int len;
5074 
5075 	len = snprintf(NULL, 0, "%*pb\n",
5076 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077 	mask_str = kmalloc(len, GFP_KERNEL);
5078 	if (!mask_str)
5079 		return -ENOMEM;
5080 
5081 	len = snprintf(mask_str, len, "%*pb\n",
5082 		       cpumask_pr_args(tr->tracing_cpumask));
5083 	if (len >= count) {
5084 		count = -EINVAL;
5085 		goto out_err;
5086 	}
5087 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088 
5089 out_err:
5090 	kfree(mask_str);
5091 
5092 	return count;
5093 }
5094 
5095 int tracing_set_cpumask(struct trace_array *tr,
5096 			cpumask_var_t tracing_cpumask_new)
5097 {
5098 	int cpu;
5099 
5100 	if (!tr)
5101 		return -EINVAL;
5102 
5103 	local_irq_disable();
5104 	arch_spin_lock(&tr->max_lock);
5105 	for_each_tracing_cpu(cpu) {
5106 		/*
5107 		 * Increase/decrease the disabled counter if we are
5108 		 * about to flip a bit in the cpumask:
5109 		 */
5110 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114 		}
5115 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119 		}
5120 	}
5121 	arch_spin_unlock(&tr->max_lock);
5122 	local_irq_enable();
5123 
5124 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125 
5126 	return 0;
5127 }
5128 
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131 		      size_t count, loff_t *ppos)
5132 {
5133 	struct trace_array *tr = file_inode(filp)->i_private;
5134 	cpumask_var_t tracing_cpumask_new;
5135 	int err;
5136 
5137 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138 		return -ENOMEM;
5139 
5140 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141 	if (err)
5142 		goto err_free;
5143 
5144 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145 	if (err)
5146 		goto err_free;
5147 
5148 	free_cpumask_var(tracing_cpumask_new);
5149 
5150 	return count;
5151 
5152 err_free:
5153 	free_cpumask_var(tracing_cpumask_new);
5154 
5155 	return err;
5156 }
5157 
5158 static const struct file_operations tracing_cpumask_fops = {
5159 	.open		= tracing_open_generic_tr,
5160 	.read		= tracing_cpumask_read,
5161 	.write		= tracing_cpumask_write,
5162 	.release	= tracing_release_generic_tr,
5163 	.llseek		= generic_file_llseek,
5164 };
5165 
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168 	struct tracer_opt *trace_opts;
5169 	struct trace_array *tr = m->private;
5170 	u32 tracer_flags;
5171 	int i;
5172 
5173 	mutex_lock(&trace_types_lock);
5174 	tracer_flags = tr->current_trace->flags->val;
5175 	trace_opts = tr->current_trace->flags->opts;
5176 
5177 	for (i = 0; trace_options[i]; i++) {
5178 		if (tr->trace_flags & (1 << i))
5179 			seq_printf(m, "%s\n", trace_options[i]);
5180 		else
5181 			seq_printf(m, "no%s\n", trace_options[i]);
5182 	}
5183 
5184 	for (i = 0; trace_opts[i].name; i++) {
5185 		if (tracer_flags & trace_opts[i].bit)
5186 			seq_printf(m, "%s\n", trace_opts[i].name);
5187 		else
5188 			seq_printf(m, "no%s\n", trace_opts[i].name);
5189 	}
5190 	mutex_unlock(&trace_types_lock);
5191 
5192 	return 0;
5193 }
5194 
5195 static int __set_tracer_option(struct trace_array *tr,
5196 			       struct tracer_flags *tracer_flags,
5197 			       struct tracer_opt *opts, int neg)
5198 {
5199 	struct tracer *trace = tracer_flags->trace;
5200 	int ret;
5201 
5202 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203 	if (ret)
5204 		return ret;
5205 
5206 	if (neg)
5207 		tracer_flags->val &= ~opts->bit;
5208 	else
5209 		tracer_flags->val |= opts->bit;
5210 	return 0;
5211 }
5212 
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216 	struct tracer *trace = tr->current_trace;
5217 	struct tracer_flags *tracer_flags = trace->flags;
5218 	struct tracer_opt *opts = NULL;
5219 	int i;
5220 
5221 	for (i = 0; tracer_flags->opts[i].name; i++) {
5222 		opts = &tracer_flags->opts[i];
5223 
5224 		if (strcmp(cmp, opts->name) == 0)
5225 			return __set_tracer_option(tr, trace->flags, opts, neg);
5226 	}
5227 
5228 	return -EINVAL;
5229 }
5230 
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235 		return -1;
5236 
5237 	return 0;
5238 }
5239 
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242 	int *map;
5243 
5244 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5245 	    (mask == TRACE_ITER_RECORD_CMD))
5246 		lockdep_assert_held(&event_mutex);
5247 
5248 	/* do nothing if flag is already set */
5249 	if (!!(tr->trace_flags & mask) == !!enabled)
5250 		return 0;
5251 
5252 	/* Give the tracer a chance to approve the change */
5253 	if (tr->current_trace->flag_changed)
5254 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255 			return -EINVAL;
5256 
5257 	if (enabled)
5258 		tr->trace_flags |= mask;
5259 	else
5260 		tr->trace_flags &= ~mask;
5261 
5262 	if (mask == TRACE_ITER_RECORD_CMD)
5263 		trace_event_enable_cmd_record(enabled);
5264 
5265 	if (mask == TRACE_ITER_RECORD_TGID) {
5266 		if (!tgid_map) {
5267 			tgid_map_max = pid_max;
5268 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269 				       GFP_KERNEL);
5270 
5271 			/*
5272 			 * Pairs with smp_load_acquire() in
5273 			 * trace_find_tgid_ptr() to ensure that if it observes
5274 			 * the tgid_map we just allocated then it also observes
5275 			 * the corresponding tgid_map_max value.
5276 			 */
5277 			smp_store_release(&tgid_map, map);
5278 		}
5279 		if (!tgid_map) {
5280 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281 			return -ENOMEM;
5282 		}
5283 
5284 		trace_event_enable_tgid_record(enabled);
5285 	}
5286 
5287 	if (mask == TRACE_ITER_EVENT_FORK)
5288 		trace_event_follow_fork(tr, enabled);
5289 
5290 	if (mask == TRACE_ITER_FUNC_FORK)
5291 		ftrace_pid_follow_fork(tr, enabled);
5292 
5293 	if (mask == TRACE_ITER_OVERWRITE) {
5294 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298 	}
5299 
5300 	if (mask == TRACE_ITER_PRINTK) {
5301 		trace_printk_start_stop_comm(enabled);
5302 		trace_printk_control(enabled);
5303 	}
5304 
5305 	return 0;
5306 }
5307 
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310 	char *cmp;
5311 	int neg = 0;
5312 	int ret;
5313 	size_t orig_len = strlen(option);
5314 	int len;
5315 
5316 	cmp = strstrip(option);
5317 
5318 	len = str_has_prefix(cmp, "no");
5319 	if (len)
5320 		neg = 1;
5321 
5322 	cmp += len;
5323 
5324 	mutex_lock(&event_mutex);
5325 	mutex_lock(&trace_types_lock);
5326 
5327 	ret = match_string(trace_options, -1, cmp);
5328 	/* If no option could be set, test the specific tracer options */
5329 	if (ret < 0)
5330 		ret = set_tracer_option(tr, cmp, neg);
5331 	else
5332 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5333 
5334 	mutex_unlock(&trace_types_lock);
5335 	mutex_unlock(&event_mutex);
5336 
5337 	/*
5338 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5339 	 * turn it back into a space.
5340 	 */
5341 	if (orig_len > strlen(option))
5342 		option[strlen(option)] = ' ';
5343 
5344 	return ret;
5345 }
5346 
5347 static void __init apply_trace_boot_options(void)
5348 {
5349 	char *buf = trace_boot_options_buf;
5350 	char *option;
5351 
5352 	while (true) {
5353 		option = strsep(&buf, ",");
5354 
5355 		if (!option)
5356 			break;
5357 
5358 		if (*option)
5359 			trace_set_options(&global_trace, option);
5360 
5361 		/* Put back the comma to allow this to be called again */
5362 		if (buf)
5363 			*(buf - 1) = ',';
5364 	}
5365 }
5366 
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369 			size_t cnt, loff_t *ppos)
5370 {
5371 	struct seq_file *m = filp->private_data;
5372 	struct trace_array *tr = m->private;
5373 	char buf[64];
5374 	int ret;
5375 
5376 	if (cnt >= sizeof(buf))
5377 		return -EINVAL;
5378 
5379 	if (copy_from_user(buf, ubuf, cnt))
5380 		return -EFAULT;
5381 
5382 	buf[cnt] = 0;
5383 
5384 	ret = trace_set_options(tr, buf);
5385 	if (ret < 0)
5386 		return ret;
5387 
5388 	*ppos += cnt;
5389 
5390 	return cnt;
5391 }
5392 
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395 	struct trace_array *tr = inode->i_private;
5396 	int ret;
5397 
5398 	ret = tracing_check_open_get_tr(tr);
5399 	if (ret)
5400 		return ret;
5401 
5402 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403 	if (ret < 0)
5404 		trace_array_put(tr);
5405 
5406 	return ret;
5407 }
5408 
5409 static const struct file_operations tracing_iter_fops = {
5410 	.open		= tracing_trace_options_open,
5411 	.read		= seq_read,
5412 	.llseek		= seq_lseek,
5413 	.release	= tracing_single_release_tr,
5414 	.write		= tracing_trace_options_write,
5415 };
5416 
5417 static const char readme_msg[] =
5418 	"tracing mini-HOWTO:\n\n"
5419 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5420 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421 	" Important files:\n"
5422 	"  trace\t\t\t- The static contents of the buffer\n"
5423 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425 	"  current_tracer\t- function and latency tracers\n"
5426 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5427 	"  error_log\t- error log for failed commands (that support it)\n"
5428 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430 	"  trace_clock\t\t-change the clock used to order events\n"
5431 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5432 	"      global:   Synced across CPUs but slows tracing down.\n"
5433 	"     counter:   Not a clock, but just an increment\n"
5434 	"      uptime:   Jiffy counter from time of boot\n"
5435 	"        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437 	"     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5441 	"    absolute:   Absolute (standalone) timestamp\n"
5442 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5445 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446 	"\t\t\t  Remove sub-buffer with rmdir\n"
5447 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5448 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5449 	"\t\t\t  option name\n"
5450 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5453 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454 	"\t\t\t  functions\n"
5455 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5456 	"\t     modules: Can select a group via module\n"
5457 	"\t      Format: :mod:<module-name>\n"
5458 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459 	"\t    triggers: a command to perform when function is hit\n"
5460 	"\t      Format: <function>:<trigger>[:count]\n"
5461 	"\t     trigger: traceon, traceoff\n"
5462 	"\t\t      enable_event:<system>:<event>\n"
5463 	"\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465 	"\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468 	"\t\t      snapshot\n"
5469 #endif
5470 	"\t\t      dump\n"
5471 	"\t\t      cpudump\n"
5472 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474 	"\t     The first one will disable tracing every time do_fault is hit\n"
5475 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476 	"\t       The first time do trap is hit and it disables tracing, the\n"
5477 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5478 	"\t       the counter will not decrement. It only decrements when the\n"
5479 	"\t       trigger did work\n"
5480 	"\t     To remove trigger without count:\n"
5481 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482 	"\t     To remove trigger with a count:\n"
5483 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486 	"\t    modules: Can select a group via module command :mod:\n"
5487 	"\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491 	"\t\t    (function)\n"
5492 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493 	"\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5503 	"\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5507 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5508 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5509 	"\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512 	"\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528 	"\t  accepts: event-definitions (one definition per line)\n"
5529 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534 	"\t           -:[<group>/]<event>\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542 	"\t     args: <name>=fetcharg[:type]\n"
5543 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #else
5547 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5548 #endif
5549 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5550 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5551 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5552 	"\t           <type>\\[<array-size>\\]\n"
5553 #ifdef CONFIG_HIST_TRIGGERS
5554 	"\t    field: <stype> <name>;\n"
5555 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5556 	"\t           [unsigned] char/int/long\n"
5557 #endif
5558 #endif
5559 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5560 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5561 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5562 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5563 	"\t\t\t  events\n"
5564 	"      filter\t\t- If set, only events passing filter are traced\n"
5565 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5566 	"\t\t\t  <event>:\n"
5567 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5568 	"      filter\t\t- If set, only events passing filter are traced\n"
5569 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5570 	"\t    Format: <trigger>[:count][if <filter>]\n"
5571 	"\t   trigger: traceon, traceoff\n"
5572 	"\t            enable_event:<system>:<event>\n"
5573 	"\t            disable_event:<system>:<event>\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575 	"\t            enable_hist:<system>:<event>\n"
5576 	"\t            disable_hist:<system>:<event>\n"
5577 #endif
5578 #ifdef CONFIG_STACKTRACE
5579 	"\t\t    stacktrace\n"
5580 #endif
5581 #ifdef CONFIG_TRACER_SNAPSHOT
5582 	"\t\t    snapshot\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585 	"\t\t    hist (see below)\n"
5586 #endif
5587 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5588 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5589 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5590 	"\t                  events/block/block_unplug/trigger\n"
5591 	"\t   The first disables tracing every time block_unplug is hit.\n"
5592 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5593 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5594 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5595 	"\t   Like function triggers, the counter is only decremented if it\n"
5596 	"\t    enabled or disabled tracing.\n"
5597 	"\t   To remove a trigger without a count:\n"
5598 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5599 	"\t   To remove a trigger with a count:\n"
5600 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5601 	"\t   Filters can be ignored when removing a trigger.\n"
5602 #ifdef CONFIG_HIST_TRIGGERS
5603 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5604 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5605 	"\t            [:values=<field1[,field2,...]>]\n"
5606 	"\t            [:sort=<field1[,field2,...]>]\n"
5607 	"\t            [:size=#entries]\n"
5608 	"\t            [:pause][:continue][:clear]\n"
5609 	"\t            [:name=histname1]\n"
5610 	"\t            [:<handler>.<action>]\n"
5611 	"\t            [if <filter>]\n\n"
5612 	"\t    Note, special fields can be used as well:\n"
5613 	"\t            common_timestamp - to record current timestamp\n"
5614 	"\t            common_cpu - to record the CPU the event happened on\n"
5615 	"\n"
5616 	"\t    When a matching event is hit, an entry is added to a hash\n"
5617 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5618 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5619 	"\t    correspond to fields in the event's format description.  Keys\n"
5620 	"\t    can be any field, or the special string 'stacktrace'.\n"
5621 	"\t    Compound keys consisting of up to two fields can be specified\n"
5622 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5623 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5624 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5625 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5626 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5627 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5628 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5629 	"\t    its histogram data will be shared with other triggers of the\n"
5630 	"\t    same name, and trigger hits will update this common data.\n\n"
5631 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5632 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5633 	"\t    triggers attached to an event, there will be a table for each\n"
5634 	"\t    trigger in the output.  The table displayed for a named\n"
5635 	"\t    trigger will be the same as any other instance having the\n"
5636 	"\t    same name.  The default format used to display a given field\n"
5637 	"\t    can be modified by appending any of the following modifiers\n"
5638 	"\t    to the field name, as applicable:\n\n"
5639 	"\t            .hex        display a number as a hex value\n"
5640 	"\t            .sym        display an address as a symbol\n"
5641 	"\t            .sym-offset display an address as a symbol and offset\n"
5642 	"\t            .execname   display a common_pid as a program name\n"
5643 	"\t            .syscall    display a syscall id as a syscall name\n"
5644 	"\t            .log2       display log2 value rather than raw number\n"
5645 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5646 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5647 	"\t    trigger or to start a hist trigger but not log any events\n"
5648 	"\t    until told to do so.  'continue' can be used to start or\n"
5649 	"\t    restart a paused hist trigger.\n\n"
5650 	"\t    The 'clear' parameter will clear the contents of a running\n"
5651 	"\t    hist trigger and leave its current paused/active state\n"
5652 	"\t    unchanged.\n\n"
5653 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5654 	"\t    have one event conditionally start and stop another event's\n"
5655 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5656 	"\t    the enable_event and disable_event triggers.\n\n"
5657 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5658 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5659 	"\t        <handler>.<action>\n\n"
5660 	"\t    The available handlers are:\n\n"
5661 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5662 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5663 	"\t        onchange(var)            - invoke action if var changes\n\n"
5664 	"\t    The available actions are:\n\n"
5665 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5666 	"\t        save(field,...)                      - save current event fields\n"
5667 #ifdef CONFIG_TRACER_SNAPSHOT
5668 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5669 #endif
5670 #ifdef CONFIG_SYNTH_EVENTS
5671 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5672 	"\t  Write into this file to define/undefine new synthetic events.\n"
5673 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5674 #endif
5675 #endif
5676 ;
5677 
5678 static ssize_t
5679 tracing_readme_read(struct file *filp, char __user *ubuf,
5680 		       size_t cnt, loff_t *ppos)
5681 {
5682 	return simple_read_from_buffer(ubuf, cnt, ppos,
5683 					readme_msg, strlen(readme_msg));
5684 }
5685 
5686 static const struct file_operations tracing_readme_fops = {
5687 	.open		= tracing_open_generic,
5688 	.read		= tracing_readme_read,
5689 	.llseek		= generic_file_llseek,
5690 };
5691 
5692 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5693 {
5694 	int pid = ++(*pos);
5695 
5696 	return trace_find_tgid_ptr(pid);
5697 }
5698 
5699 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5700 {
5701 	int pid = *pos;
5702 
5703 	return trace_find_tgid_ptr(pid);
5704 }
5705 
5706 static void saved_tgids_stop(struct seq_file *m, void *v)
5707 {
5708 }
5709 
5710 static int saved_tgids_show(struct seq_file *m, void *v)
5711 {
5712 	int *entry = (int *)v;
5713 	int pid = entry - tgid_map;
5714 	int tgid = *entry;
5715 
5716 	if (tgid == 0)
5717 		return SEQ_SKIP;
5718 
5719 	seq_printf(m, "%d %d\n", pid, tgid);
5720 	return 0;
5721 }
5722 
5723 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5724 	.start		= saved_tgids_start,
5725 	.stop		= saved_tgids_stop,
5726 	.next		= saved_tgids_next,
5727 	.show		= saved_tgids_show,
5728 };
5729 
5730 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5731 {
5732 	int ret;
5733 
5734 	ret = tracing_check_open_get_tr(NULL);
5735 	if (ret)
5736 		return ret;
5737 
5738 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5739 }
5740 
5741 
5742 static const struct file_operations tracing_saved_tgids_fops = {
5743 	.open		= tracing_saved_tgids_open,
5744 	.read		= seq_read,
5745 	.llseek		= seq_lseek,
5746 	.release	= seq_release,
5747 };
5748 
5749 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5750 {
5751 	unsigned int *ptr = v;
5752 
5753 	if (*pos || m->count)
5754 		ptr++;
5755 
5756 	(*pos)++;
5757 
5758 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5759 	     ptr++) {
5760 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5761 			continue;
5762 
5763 		return ptr;
5764 	}
5765 
5766 	return NULL;
5767 }
5768 
5769 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5770 {
5771 	void *v;
5772 	loff_t l = 0;
5773 
5774 	preempt_disable();
5775 	arch_spin_lock(&trace_cmdline_lock);
5776 
5777 	v = &savedcmd->map_cmdline_to_pid[0];
5778 	while (l <= *pos) {
5779 		v = saved_cmdlines_next(m, v, &l);
5780 		if (!v)
5781 			return NULL;
5782 	}
5783 
5784 	return v;
5785 }
5786 
5787 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5788 {
5789 	arch_spin_unlock(&trace_cmdline_lock);
5790 	preempt_enable();
5791 }
5792 
5793 static int saved_cmdlines_show(struct seq_file *m, void *v)
5794 {
5795 	char buf[TASK_COMM_LEN];
5796 	unsigned int *pid = v;
5797 
5798 	__trace_find_cmdline(*pid, buf);
5799 	seq_printf(m, "%d %s\n", *pid, buf);
5800 	return 0;
5801 }
5802 
5803 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5804 	.start		= saved_cmdlines_start,
5805 	.next		= saved_cmdlines_next,
5806 	.stop		= saved_cmdlines_stop,
5807 	.show		= saved_cmdlines_show,
5808 };
5809 
5810 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5811 {
5812 	int ret;
5813 
5814 	ret = tracing_check_open_get_tr(NULL);
5815 	if (ret)
5816 		return ret;
5817 
5818 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5819 }
5820 
5821 static const struct file_operations tracing_saved_cmdlines_fops = {
5822 	.open		= tracing_saved_cmdlines_open,
5823 	.read		= seq_read,
5824 	.llseek		= seq_lseek,
5825 	.release	= seq_release,
5826 };
5827 
5828 static ssize_t
5829 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5830 				 size_t cnt, loff_t *ppos)
5831 {
5832 	char buf[64];
5833 	int r;
5834 
5835 	arch_spin_lock(&trace_cmdline_lock);
5836 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5837 	arch_spin_unlock(&trace_cmdline_lock);
5838 
5839 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5840 }
5841 
5842 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5843 {
5844 	kfree(s->saved_cmdlines);
5845 	kfree(s->map_cmdline_to_pid);
5846 	kfree(s);
5847 }
5848 
5849 static int tracing_resize_saved_cmdlines(unsigned int val)
5850 {
5851 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5852 
5853 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5854 	if (!s)
5855 		return -ENOMEM;
5856 
5857 	if (allocate_cmdlines_buffer(val, s) < 0) {
5858 		kfree(s);
5859 		return -ENOMEM;
5860 	}
5861 
5862 	arch_spin_lock(&trace_cmdline_lock);
5863 	savedcmd_temp = savedcmd;
5864 	savedcmd = s;
5865 	arch_spin_unlock(&trace_cmdline_lock);
5866 	free_saved_cmdlines_buffer(savedcmd_temp);
5867 
5868 	return 0;
5869 }
5870 
5871 static ssize_t
5872 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5873 				  size_t cnt, loff_t *ppos)
5874 {
5875 	unsigned long val;
5876 	int ret;
5877 
5878 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5879 	if (ret)
5880 		return ret;
5881 
5882 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5883 	if (!val || val > PID_MAX_DEFAULT)
5884 		return -EINVAL;
5885 
5886 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5887 	if (ret < 0)
5888 		return ret;
5889 
5890 	*ppos += cnt;
5891 
5892 	return cnt;
5893 }
5894 
5895 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5896 	.open		= tracing_open_generic,
5897 	.read		= tracing_saved_cmdlines_size_read,
5898 	.write		= tracing_saved_cmdlines_size_write,
5899 };
5900 
5901 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5902 static union trace_eval_map_item *
5903 update_eval_map(union trace_eval_map_item *ptr)
5904 {
5905 	if (!ptr->map.eval_string) {
5906 		if (ptr->tail.next) {
5907 			ptr = ptr->tail.next;
5908 			/* Set ptr to the next real item (skip head) */
5909 			ptr++;
5910 		} else
5911 			return NULL;
5912 	}
5913 	return ptr;
5914 }
5915 
5916 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5917 {
5918 	union trace_eval_map_item *ptr = v;
5919 
5920 	/*
5921 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5922 	 * This really should never happen.
5923 	 */
5924 	(*pos)++;
5925 	ptr = update_eval_map(ptr);
5926 	if (WARN_ON_ONCE(!ptr))
5927 		return NULL;
5928 
5929 	ptr++;
5930 	ptr = update_eval_map(ptr);
5931 
5932 	return ptr;
5933 }
5934 
5935 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5936 {
5937 	union trace_eval_map_item *v;
5938 	loff_t l = 0;
5939 
5940 	mutex_lock(&trace_eval_mutex);
5941 
5942 	v = trace_eval_maps;
5943 	if (v)
5944 		v++;
5945 
5946 	while (v && l < *pos) {
5947 		v = eval_map_next(m, v, &l);
5948 	}
5949 
5950 	return v;
5951 }
5952 
5953 static void eval_map_stop(struct seq_file *m, void *v)
5954 {
5955 	mutex_unlock(&trace_eval_mutex);
5956 }
5957 
5958 static int eval_map_show(struct seq_file *m, void *v)
5959 {
5960 	union trace_eval_map_item *ptr = v;
5961 
5962 	seq_printf(m, "%s %ld (%s)\n",
5963 		   ptr->map.eval_string, ptr->map.eval_value,
5964 		   ptr->map.system);
5965 
5966 	return 0;
5967 }
5968 
5969 static const struct seq_operations tracing_eval_map_seq_ops = {
5970 	.start		= eval_map_start,
5971 	.next		= eval_map_next,
5972 	.stop		= eval_map_stop,
5973 	.show		= eval_map_show,
5974 };
5975 
5976 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5977 {
5978 	int ret;
5979 
5980 	ret = tracing_check_open_get_tr(NULL);
5981 	if (ret)
5982 		return ret;
5983 
5984 	return seq_open(filp, &tracing_eval_map_seq_ops);
5985 }
5986 
5987 static const struct file_operations tracing_eval_map_fops = {
5988 	.open		= tracing_eval_map_open,
5989 	.read		= seq_read,
5990 	.llseek		= seq_lseek,
5991 	.release	= seq_release,
5992 };
5993 
5994 static inline union trace_eval_map_item *
5995 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5996 {
5997 	/* Return tail of array given the head */
5998 	return ptr + ptr->head.length + 1;
5999 }
6000 
6001 static void
6002 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6003 			   int len)
6004 {
6005 	struct trace_eval_map **stop;
6006 	struct trace_eval_map **map;
6007 	union trace_eval_map_item *map_array;
6008 	union trace_eval_map_item *ptr;
6009 
6010 	stop = start + len;
6011 
6012 	/*
6013 	 * The trace_eval_maps contains the map plus a head and tail item,
6014 	 * where the head holds the module and length of array, and the
6015 	 * tail holds a pointer to the next list.
6016 	 */
6017 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6018 	if (!map_array) {
6019 		pr_warn("Unable to allocate trace eval mapping\n");
6020 		return;
6021 	}
6022 
6023 	mutex_lock(&trace_eval_mutex);
6024 
6025 	if (!trace_eval_maps)
6026 		trace_eval_maps = map_array;
6027 	else {
6028 		ptr = trace_eval_maps;
6029 		for (;;) {
6030 			ptr = trace_eval_jmp_to_tail(ptr);
6031 			if (!ptr->tail.next)
6032 				break;
6033 			ptr = ptr->tail.next;
6034 
6035 		}
6036 		ptr->tail.next = map_array;
6037 	}
6038 	map_array->head.mod = mod;
6039 	map_array->head.length = len;
6040 	map_array++;
6041 
6042 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6043 		map_array->map = **map;
6044 		map_array++;
6045 	}
6046 	memset(map_array, 0, sizeof(*map_array));
6047 
6048 	mutex_unlock(&trace_eval_mutex);
6049 }
6050 
6051 static void trace_create_eval_file(struct dentry *d_tracer)
6052 {
6053 	trace_create_file("eval_map", 0444, d_tracer,
6054 			  NULL, &tracing_eval_map_fops);
6055 }
6056 
6057 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6058 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6059 static inline void trace_insert_eval_map_file(struct module *mod,
6060 			      struct trace_eval_map **start, int len) { }
6061 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6062 
6063 static void trace_insert_eval_map(struct module *mod,
6064 				  struct trace_eval_map **start, int len)
6065 {
6066 	struct trace_eval_map **map;
6067 
6068 	if (len <= 0)
6069 		return;
6070 
6071 	map = start;
6072 
6073 	trace_event_eval_update(map, len);
6074 
6075 	trace_insert_eval_map_file(mod, start, len);
6076 }
6077 
6078 static ssize_t
6079 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6080 		       size_t cnt, loff_t *ppos)
6081 {
6082 	struct trace_array *tr = filp->private_data;
6083 	char buf[MAX_TRACER_SIZE+2];
6084 	int r;
6085 
6086 	mutex_lock(&trace_types_lock);
6087 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6088 	mutex_unlock(&trace_types_lock);
6089 
6090 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6091 }
6092 
6093 int tracer_init(struct tracer *t, struct trace_array *tr)
6094 {
6095 	tracing_reset_online_cpus(&tr->array_buffer);
6096 	return t->init(tr);
6097 }
6098 
6099 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6100 {
6101 	int cpu;
6102 
6103 	for_each_tracing_cpu(cpu)
6104 		per_cpu_ptr(buf->data, cpu)->entries = val;
6105 }
6106 
6107 #ifdef CONFIG_TRACER_MAX_TRACE
6108 /* resize @tr's buffer to the size of @size_tr's entries */
6109 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6110 					struct array_buffer *size_buf, int cpu_id)
6111 {
6112 	int cpu, ret = 0;
6113 
6114 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6115 		for_each_tracing_cpu(cpu) {
6116 			ret = ring_buffer_resize(trace_buf->buffer,
6117 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6118 			if (ret < 0)
6119 				break;
6120 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6121 				per_cpu_ptr(size_buf->data, cpu)->entries;
6122 		}
6123 	} else {
6124 		ret = ring_buffer_resize(trace_buf->buffer,
6125 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6126 		if (ret == 0)
6127 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6128 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6129 	}
6130 
6131 	return ret;
6132 }
6133 #endif /* CONFIG_TRACER_MAX_TRACE */
6134 
6135 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6136 					unsigned long size, int cpu)
6137 {
6138 	int ret;
6139 
6140 	/*
6141 	 * If kernel or user changes the size of the ring buffer
6142 	 * we use the size that was given, and we can forget about
6143 	 * expanding it later.
6144 	 */
6145 	ring_buffer_expanded = true;
6146 
6147 	/* May be called before buffers are initialized */
6148 	if (!tr->array_buffer.buffer)
6149 		return 0;
6150 
6151 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6152 	if (ret < 0)
6153 		return ret;
6154 
6155 #ifdef CONFIG_TRACER_MAX_TRACE
6156 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6157 	    !tr->current_trace->use_max_tr)
6158 		goto out;
6159 
6160 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6161 	if (ret < 0) {
6162 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6163 						     &tr->array_buffer, cpu);
6164 		if (r < 0) {
6165 			/*
6166 			 * AARGH! We are left with different
6167 			 * size max buffer!!!!
6168 			 * The max buffer is our "snapshot" buffer.
6169 			 * When a tracer needs a snapshot (one of the
6170 			 * latency tracers), it swaps the max buffer
6171 			 * with the saved snap shot. We succeeded to
6172 			 * update the size of the main buffer, but failed to
6173 			 * update the size of the max buffer. But when we tried
6174 			 * to reset the main buffer to the original size, we
6175 			 * failed there too. This is very unlikely to
6176 			 * happen, but if it does, warn and kill all
6177 			 * tracing.
6178 			 */
6179 			WARN_ON(1);
6180 			tracing_disabled = 1;
6181 		}
6182 		return ret;
6183 	}
6184 
6185 	if (cpu == RING_BUFFER_ALL_CPUS)
6186 		set_buffer_entries(&tr->max_buffer, size);
6187 	else
6188 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6189 
6190  out:
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192 
6193 	if (cpu == RING_BUFFER_ALL_CPUS)
6194 		set_buffer_entries(&tr->array_buffer, size);
6195 	else
6196 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6197 
6198 	return ret;
6199 }
6200 
6201 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6202 				  unsigned long size, int cpu_id)
6203 {
6204 	int ret;
6205 
6206 	mutex_lock(&trace_types_lock);
6207 
6208 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6209 		/* make sure, this cpu is enabled in the mask */
6210 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6211 			ret = -EINVAL;
6212 			goto out;
6213 		}
6214 	}
6215 
6216 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6217 	if (ret < 0)
6218 		ret = -ENOMEM;
6219 
6220 out:
6221 	mutex_unlock(&trace_types_lock);
6222 
6223 	return ret;
6224 }
6225 
6226 
6227 /**
6228  * tracing_update_buffers - used by tracing facility to expand ring buffers
6229  *
6230  * To save on memory when the tracing is never used on a system with it
6231  * configured in. The ring buffers are set to a minimum size. But once
6232  * a user starts to use the tracing facility, then they need to grow
6233  * to their default size.
6234  *
6235  * This function is to be called when a tracer is about to be used.
6236  */
6237 int tracing_update_buffers(void)
6238 {
6239 	int ret = 0;
6240 
6241 	mutex_lock(&trace_types_lock);
6242 	if (!ring_buffer_expanded)
6243 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6244 						RING_BUFFER_ALL_CPUS);
6245 	mutex_unlock(&trace_types_lock);
6246 
6247 	return ret;
6248 }
6249 
6250 struct trace_option_dentry;
6251 
6252 static void
6253 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6254 
6255 /*
6256  * Used to clear out the tracer before deletion of an instance.
6257  * Must have trace_types_lock held.
6258  */
6259 static void tracing_set_nop(struct trace_array *tr)
6260 {
6261 	if (tr->current_trace == &nop_trace)
6262 		return;
6263 
6264 	tr->current_trace->enabled--;
6265 
6266 	if (tr->current_trace->reset)
6267 		tr->current_trace->reset(tr);
6268 
6269 	tr->current_trace = &nop_trace;
6270 }
6271 
6272 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6273 {
6274 	/* Only enable if the directory has been created already. */
6275 	if (!tr->dir)
6276 		return;
6277 
6278 	create_trace_option_files(tr, t);
6279 }
6280 
6281 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6282 {
6283 	struct tracer *t;
6284 #ifdef CONFIG_TRACER_MAX_TRACE
6285 	bool had_max_tr;
6286 #endif
6287 	int ret = 0;
6288 
6289 	mutex_lock(&trace_types_lock);
6290 
6291 	if (!ring_buffer_expanded) {
6292 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6293 						RING_BUFFER_ALL_CPUS);
6294 		if (ret < 0)
6295 			goto out;
6296 		ret = 0;
6297 	}
6298 
6299 	for (t = trace_types; t; t = t->next) {
6300 		if (strcmp(t->name, buf) == 0)
6301 			break;
6302 	}
6303 	if (!t) {
6304 		ret = -EINVAL;
6305 		goto out;
6306 	}
6307 	if (t == tr->current_trace)
6308 		goto out;
6309 
6310 #ifdef CONFIG_TRACER_SNAPSHOT
6311 	if (t->use_max_tr) {
6312 		arch_spin_lock(&tr->max_lock);
6313 		if (tr->cond_snapshot)
6314 			ret = -EBUSY;
6315 		arch_spin_unlock(&tr->max_lock);
6316 		if (ret)
6317 			goto out;
6318 	}
6319 #endif
6320 	/* Some tracers won't work on kernel command line */
6321 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6322 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6323 			t->name);
6324 		goto out;
6325 	}
6326 
6327 	/* Some tracers are only allowed for the top level buffer */
6328 	if (!trace_ok_for_array(t, tr)) {
6329 		ret = -EINVAL;
6330 		goto out;
6331 	}
6332 
6333 	/* If trace pipe files are being read, we can't change the tracer */
6334 	if (tr->trace_ref) {
6335 		ret = -EBUSY;
6336 		goto out;
6337 	}
6338 
6339 	trace_branch_disable();
6340 
6341 	tr->current_trace->enabled--;
6342 
6343 	if (tr->current_trace->reset)
6344 		tr->current_trace->reset(tr);
6345 
6346 	/* Current trace needs to be nop_trace before synchronize_rcu */
6347 	tr->current_trace = &nop_trace;
6348 
6349 #ifdef CONFIG_TRACER_MAX_TRACE
6350 	had_max_tr = tr->allocated_snapshot;
6351 
6352 	if (had_max_tr && !t->use_max_tr) {
6353 		/*
6354 		 * We need to make sure that the update_max_tr sees that
6355 		 * current_trace changed to nop_trace to keep it from
6356 		 * swapping the buffers after we resize it.
6357 		 * The update_max_tr is called from interrupts disabled
6358 		 * so a synchronized_sched() is sufficient.
6359 		 */
6360 		synchronize_rcu();
6361 		free_snapshot(tr);
6362 	}
6363 #endif
6364 
6365 #ifdef CONFIG_TRACER_MAX_TRACE
6366 	if (t->use_max_tr && !had_max_tr) {
6367 		ret = tracing_alloc_snapshot_instance(tr);
6368 		if (ret < 0)
6369 			goto out;
6370 	}
6371 #endif
6372 
6373 	if (t->init) {
6374 		ret = tracer_init(t, tr);
6375 		if (ret)
6376 			goto out;
6377 	}
6378 
6379 	tr->current_trace = t;
6380 	tr->current_trace->enabled++;
6381 	trace_branch_enable(tr);
6382  out:
6383 	mutex_unlock(&trace_types_lock);
6384 
6385 	return ret;
6386 }
6387 
6388 static ssize_t
6389 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6390 			size_t cnt, loff_t *ppos)
6391 {
6392 	struct trace_array *tr = filp->private_data;
6393 	char buf[MAX_TRACER_SIZE+1];
6394 	int i;
6395 	size_t ret;
6396 	int err;
6397 
6398 	ret = cnt;
6399 
6400 	if (cnt > MAX_TRACER_SIZE)
6401 		cnt = MAX_TRACER_SIZE;
6402 
6403 	if (copy_from_user(buf, ubuf, cnt))
6404 		return -EFAULT;
6405 
6406 	buf[cnt] = 0;
6407 
6408 	/* strip ending whitespace. */
6409 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6410 		buf[i] = 0;
6411 
6412 	err = tracing_set_tracer(tr, buf);
6413 	if (err)
6414 		return err;
6415 
6416 	*ppos += ret;
6417 
6418 	return ret;
6419 }
6420 
6421 static ssize_t
6422 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6423 		   size_t cnt, loff_t *ppos)
6424 {
6425 	char buf[64];
6426 	int r;
6427 
6428 	r = snprintf(buf, sizeof(buf), "%ld\n",
6429 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6430 	if (r > sizeof(buf))
6431 		r = sizeof(buf);
6432 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6433 }
6434 
6435 static ssize_t
6436 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6437 		    size_t cnt, loff_t *ppos)
6438 {
6439 	unsigned long val;
6440 	int ret;
6441 
6442 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6443 	if (ret)
6444 		return ret;
6445 
6446 	*ptr = val * 1000;
6447 
6448 	return cnt;
6449 }
6450 
6451 static ssize_t
6452 tracing_thresh_read(struct file *filp, char __user *ubuf,
6453 		    size_t cnt, loff_t *ppos)
6454 {
6455 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6456 }
6457 
6458 static ssize_t
6459 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6460 		     size_t cnt, loff_t *ppos)
6461 {
6462 	struct trace_array *tr = filp->private_data;
6463 	int ret;
6464 
6465 	mutex_lock(&trace_types_lock);
6466 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6467 	if (ret < 0)
6468 		goto out;
6469 
6470 	if (tr->current_trace->update_thresh) {
6471 		ret = tr->current_trace->update_thresh(tr);
6472 		if (ret < 0)
6473 			goto out;
6474 	}
6475 
6476 	ret = cnt;
6477 out:
6478 	mutex_unlock(&trace_types_lock);
6479 
6480 	return ret;
6481 }
6482 
6483 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6484 
6485 static ssize_t
6486 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6487 		     size_t cnt, loff_t *ppos)
6488 {
6489 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6490 }
6491 
6492 static ssize_t
6493 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6494 		      size_t cnt, loff_t *ppos)
6495 {
6496 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6497 }
6498 
6499 #endif
6500 
6501 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6502 {
6503 	struct trace_array *tr = inode->i_private;
6504 	struct trace_iterator *iter;
6505 	int ret;
6506 
6507 	ret = tracing_check_open_get_tr(tr);
6508 	if (ret)
6509 		return ret;
6510 
6511 	mutex_lock(&trace_types_lock);
6512 
6513 	/* create a buffer to store the information to pass to userspace */
6514 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6515 	if (!iter) {
6516 		ret = -ENOMEM;
6517 		__trace_array_put(tr);
6518 		goto out;
6519 	}
6520 
6521 	trace_seq_init(&iter->seq);
6522 	iter->trace = tr->current_trace;
6523 
6524 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6525 		ret = -ENOMEM;
6526 		goto fail;
6527 	}
6528 
6529 	/* trace pipe does not show start of buffer */
6530 	cpumask_setall(iter->started);
6531 
6532 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6533 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6534 
6535 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6536 	if (trace_clocks[tr->clock_id].in_ns)
6537 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6538 
6539 	iter->tr = tr;
6540 	iter->array_buffer = &tr->array_buffer;
6541 	iter->cpu_file = tracing_get_cpu(inode);
6542 	mutex_init(&iter->mutex);
6543 	filp->private_data = iter;
6544 
6545 	if (iter->trace->pipe_open)
6546 		iter->trace->pipe_open(iter);
6547 
6548 	nonseekable_open(inode, filp);
6549 
6550 	tr->trace_ref++;
6551 out:
6552 	mutex_unlock(&trace_types_lock);
6553 	return ret;
6554 
6555 fail:
6556 	kfree(iter);
6557 	__trace_array_put(tr);
6558 	mutex_unlock(&trace_types_lock);
6559 	return ret;
6560 }
6561 
6562 static int tracing_release_pipe(struct inode *inode, struct file *file)
6563 {
6564 	struct trace_iterator *iter = file->private_data;
6565 	struct trace_array *tr = inode->i_private;
6566 
6567 	mutex_lock(&trace_types_lock);
6568 
6569 	tr->trace_ref--;
6570 
6571 	if (iter->trace->pipe_close)
6572 		iter->trace->pipe_close(iter);
6573 
6574 	mutex_unlock(&trace_types_lock);
6575 
6576 	free_cpumask_var(iter->started);
6577 	mutex_destroy(&iter->mutex);
6578 	kfree(iter);
6579 
6580 	trace_array_put(tr);
6581 
6582 	return 0;
6583 }
6584 
6585 static __poll_t
6586 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6587 {
6588 	struct trace_array *tr = iter->tr;
6589 
6590 	/* Iterators are static, they should be filled or empty */
6591 	if (trace_buffer_iter(iter, iter->cpu_file))
6592 		return EPOLLIN | EPOLLRDNORM;
6593 
6594 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6595 		/*
6596 		 * Always select as readable when in blocking mode
6597 		 */
6598 		return EPOLLIN | EPOLLRDNORM;
6599 	else
6600 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6601 					     filp, poll_table);
6602 }
6603 
6604 static __poll_t
6605 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6606 {
6607 	struct trace_iterator *iter = filp->private_data;
6608 
6609 	return trace_poll(iter, filp, poll_table);
6610 }
6611 
6612 /* Must be called with iter->mutex held. */
6613 static int tracing_wait_pipe(struct file *filp)
6614 {
6615 	struct trace_iterator *iter = filp->private_data;
6616 	int ret;
6617 
6618 	while (trace_empty(iter)) {
6619 
6620 		if ((filp->f_flags & O_NONBLOCK)) {
6621 			return -EAGAIN;
6622 		}
6623 
6624 		/*
6625 		 * We block until we read something and tracing is disabled.
6626 		 * We still block if tracing is disabled, but we have never
6627 		 * read anything. This allows a user to cat this file, and
6628 		 * then enable tracing. But after we have read something,
6629 		 * we give an EOF when tracing is again disabled.
6630 		 *
6631 		 * iter->pos will be 0 if we haven't read anything.
6632 		 */
6633 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6634 			break;
6635 
6636 		mutex_unlock(&iter->mutex);
6637 
6638 		ret = wait_on_pipe(iter, 0);
6639 
6640 		mutex_lock(&iter->mutex);
6641 
6642 		if (ret)
6643 			return ret;
6644 	}
6645 
6646 	return 1;
6647 }
6648 
6649 /*
6650  * Consumer reader.
6651  */
6652 static ssize_t
6653 tracing_read_pipe(struct file *filp, char __user *ubuf,
6654 		  size_t cnt, loff_t *ppos)
6655 {
6656 	struct trace_iterator *iter = filp->private_data;
6657 	ssize_t sret;
6658 
6659 	/*
6660 	 * Avoid more than one consumer on a single file descriptor
6661 	 * This is just a matter of traces coherency, the ring buffer itself
6662 	 * is protected.
6663 	 */
6664 	mutex_lock(&iter->mutex);
6665 
6666 	/* return any leftover data */
6667 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6668 	if (sret != -EBUSY)
6669 		goto out;
6670 
6671 	trace_seq_init(&iter->seq);
6672 
6673 	if (iter->trace->read) {
6674 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6675 		if (sret)
6676 			goto out;
6677 	}
6678 
6679 waitagain:
6680 	sret = tracing_wait_pipe(filp);
6681 	if (sret <= 0)
6682 		goto out;
6683 
6684 	/* stop when tracing is finished */
6685 	if (trace_empty(iter)) {
6686 		sret = 0;
6687 		goto out;
6688 	}
6689 
6690 	if (cnt >= PAGE_SIZE)
6691 		cnt = PAGE_SIZE - 1;
6692 
6693 	/* reset all but tr, trace, and overruns */
6694 	memset(&iter->seq, 0,
6695 	       sizeof(struct trace_iterator) -
6696 	       offsetof(struct trace_iterator, seq));
6697 	cpumask_clear(iter->started);
6698 	trace_seq_init(&iter->seq);
6699 	iter->pos = -1;
6700 
6701 	trace_event_read_lock();
6702 	trace_access_lock(iter->cpu_file);
6703 	while (trace_find_next_entry_inc(iter) != NULL) {
6704 		enum print_line_t ret;
6705 		int save_len = iter->seq.seq.len;
6706 
6707 		ret = print_trace_line(iter);
6708 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6709 			/* don't print partial lines */
6710 			iter->seq.seq.len = save_len;
6711 			break;
6712 		}
6713 		if (ret != TRACE_TYPE_NO_CONSUME)
6714 			trace_consume(iter);
6715 
6716 		if (trace_seq_used(&iter->seq) >= cnt)
6717 			break;
6718 
6719 		/*
6720 		 * Setting the full flag means we reached the trace_seq buffer
6721 		 * size and we should leave by partial output condition above.
6722 		 * One of the trace_seq_* functions is not used properly.
6723 		 */
6724 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6725 			  iter->ent->type);
6726 	}
6727 	trace_access_unlock(iter->cpu_file);
6728 	trace_event_read_unlock();
6729 
6730 	/* Now copy what we have to the user */
6731 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6732 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6733 		trace_seq_init(&iter->seq);
6734 
6735 	/*
6736 	 * If there was nothing to send to user, in spite of consuming trace
6737 	 * entries, go back to wait for more entries.
6738 	 */
6739 	if (sret == -EBUSY)
6740 		goto waitagain;
6741 
6742 out:
6743 	mutex_unlock(&iter->mutex);
6744 
6745 	return sret;
6746 }
6747 
6748 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6749 				     unsigned int idx)
6750 {
6751 	__free_page(spd->pages[idx]);
6752 }
6753 
6754 static size_t
6755 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6756 {
6757 	size_t count;
6758 	int save_len;
6759 	int ret;
6760 
6761 	/* Seq buffer is page-sized, exactly what we need. */
6762 	for (;;) {
6763 		save_len = iter->seq.seq.len;
6764 		ret = print_trace_line(iter);
6765 
6766 		if (trace_seq_has_overflowed(&iter->seq)) {
6767 			iter->seq.seq.len = save_len;
6768 			break;
6769 		}
6770 
6771 		/*
6772 		 * This should not be hit, because it should only
6773 		 * be set if the iter->seq overflowed. But check it
6774 		 * anyway to be safe.
6775 		 */
6776 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6777 			iter->seq.seq.len = save_len;
6778 			break;
6779 		}
6780 
6781 		count = trace_seq_used(&iter->seq) - save_len;
6782 		if (rem < count) {
6783 			rem = 0;
6784 			iter->seq.seq.len = save_len;
6785 			break;
6786 		}
6787 
6788 		if (ret != TRACE_TYPE_NO_CONSUME)
6789 			trace_consume(iter);
6790 		rem -= count;
6791 		if (!trace_find_next_entry_inc(iter))	{
6792 			rem = 0;
6793 			iter->ent = NULL;
6794 			break;
6795 		}
6796 	}
6797 
6798 	return rem;
6799 }
6800 
6801 static ssize_t tracing_splice_read_pipe(struct file *filp,
6802 					loff_t *ppos,
6803 					struct pipe_inode_info *pipe,
6804 					size_t len,
6805 					unsigned int flags)
6806 {
6807 	struct page *pages_def[PIPE_DEF_BUFFERS];
6808 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6809 	struct trace_iterator *iter = filp->private_data;
6810 	struct splice_pipe_desc spd = {
6811 		.pages		= pages_def,
6812 		.partial	= partial_def,
6813 		.nr_pages	= 0, /* This gets updated below. */
6814 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6815 		.ops		= &default_pipe_buf_ops,
6816 		.spd_release	= tracing_spd_release_pipe,
6817 	};
6818 	ssize_t ret;
6819 	size_t rem;
6820 	unsigned int i;
6821 
6822 	if (splice_grow_spd(pipe, &spd))
6823 		return -ENOMEM;
6824 
6825 	mutex_lock(&iter->mutex);
6826 
6827 	if (iter->trace->splice_read) {
6828 		ret = iter->trace->splice_read(iter, filp,
6829 					       ppos, pipe, len, flags);
6830 		if (ret)
6831 			goto out_err;
6832 	}
6833 
6834 	ret = tracing_wait_pipe(filp);
6835 	if (ret <= 0)
6836 		goto out_err;
6837 
6838 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6839 		ret = -EFAULT;
6840 		goto out_err;
6841 	}
6842 
6843 	trace_event_read_lock();
6844 	trace_access_lock(iter->cpu_file);
6845 
6846 	/* Fill as many pages as possible. */
6847 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6848 		spd.pages[i] = alloc_page(GFP_KERNEL);
6849 		if (!spd.pages[i])
6850 			break;
6851 
6852 		rem = tracing_fill_pipe_page(rem, iter);
6853 
6854 		/* Copy the data into the page, so we can start over. */
6855 		ret = trace_seq_to_buffer(&iter->seq,
6856 					  page_address(spd.pages[i]),
6857 					  trace_seq_used(&iter->seq));
6858 		if (ret < 0) {
6859 			__free_page(spd.pages[i]);
6860 			break;
6861 		}
6862 		spd.partial[i].offset = 0;
6863 		spd.partial[i].len = trace_seq_used(&iter->seq);
6864 
6865 		trace_seq_init(&iter->seq);
6866 	}
6867 
6868 	trace_access_unlock(iter->cpu_file);
6869 	trace_event_read_unlock();
6870 	mutex_unlock(&iter->mutex);
6871 
6872 	spd.nr_pages = i;
6873 
6874 	if (i)
6875 		ret = splice_to_pipe(pipe, &spd);
6876 	else
6877 		ret = 0;
6878 out:
6879 	splice_shrink_spd(&spd);
6880 	return ret;
6881 
6882 out_err:
6883 	mutex_unlock(&iter->mutex);
6884 	goto out;
6885 }
6886 
6887 static ssize_t
6888 tracing_entries_read(struct file *filp, char __user *ubuf,
6889 		     size_t cnt, loff_t *ppos)
6890 {
6891 	struct inode *inode = file_inode(filp);
6892 	struct trace_array *tr = inode->i_private;
6893 	int cpu = tracing_get_cpu(inode);
6894 	char buf[64];
6895 	int r = 0;
6896 	ssize_t ret;
6897 
6898 	mutex_lock(&trace_types_lock);
6899 
6900 	if (cpu == RING_BUFFER_ALL_CPUS) {
6901 		int cpu, buf_size_same;
6902 		unsigned long size;
6903 
6904 		size = 0;
6905 		buf_size_same = 1;
6906 		/* check if all cpu sizes are same */
6907 		for_each_tracing_cpu(cpu) {
6908 			/* fill in the size from first enabled cpu */
6909 			if (size == 0)
6910 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6911 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6912 				buf_size_same = 0;
6913 				break;
6914 			}
6915 		}
6916 
6917 		if (buf_size_same) {
6918 			if (!ring_buffer_expanded)
6919 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6920 					    size >> 10,
6921 					    trace_buf_size >> 10);
6922 			else
6923 				r = sprintf(buf, "%lu\n", size >> 10);
6924 		} else
6925 			r = sprintf(buf, "X\n");
6926 	} else
6927 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6928 
6929 	mutex_unlock(&trace_types_lock);
6930 
6931 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6932 	return ret;
6933 }
6934 
6935 static ssize_t
6936 tracing_entries_write(struct file *filp, const char __user *ubuf,
6937 		      size_t cnt, loff_t *ppos)
6938 {
6939 	struct inode *inode = file_inode(filp);
6940 	struct trace_array *tr = inode->i_private;
6941 	unsigned long val;
6942 	int ret;
6943 
6944 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6945 	if (ret)
6946 		return ret;
6947 
6948 	/* must have at least 1 entry */
6949 	if (!val)
6950 		return -EINVAL;
6951 
6952 	/* value is in KB */
6953 	val <<= 10;
6954 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6955 	if (ret < 0)
6956 		return ret;
6957 
6958 	*ppos += cnt;
6959 
6960 	return cnt;
6961 }
6962 
6963 static ssize_t
6964 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6965 				size_t cnt, loff_t *ppos)
6966 {
6967 	struct trace_array *tr = filp->private_data;
6968 	char buf[64];
6969 	int r, cpu;
6970 	unsigned long size = 0, expanded_size = 0;
6971 
6972 	mutex_lock(&trace_types_lock);
6973 	for_each_tracing_cpu(cpu) {
6974 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6975 		if (!ring_buffer_expanded)
6976 			expanded_size += trace_buf_size >> 10;
6977 	}
6978 	if (ring_buffer_expanded)
6979 		r = sprintf(buf, "%lu\n", size);
6980 	else
6981 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6982 	mutex_unlock(&trace_types_lock);
6983 
6984 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6985 }
6986 
6987 static ssize_t
6988 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6989 			  size_t cnt, loff_t *ppos)
6990 {
6991 	/*
6992 	 * There is no need to read what the user has written, this function
6993 	 * is just to make sure that there is no error when "echo" is used
6994 	 */
6995 
6996 	*ppos += cnt;
6997 
6998 	return cnt;
6999 }
7000 
7001 static int
7002 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7003 {
7004 	struct trace_array *tr = inode->i_private;
7005 
7006 	/* disable tracing ? */
7007 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7008 		tracer_tracing_off(tr);
7009 	/* resize the ring buffer to 0 */
7010 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7011 
7012 	trace_array_put(tr);
7013 
7014 	return 0;
7015 }
7016 
7017 static ssize_t
7018 tracing_mark_write(struct file *filp, const char __user *ubuf,
7019 					size_t cnt, loff_t *fpos)
7020 {
7021 	struct trace_array *tr = filp->private_data;
7022 	struct ring_buffer_event *event;
7023 	enum event_trigger_type tt = ETT_NONE;
7024 	struct trace_buffer *buffer;
7025 	struct print_entry *entry;
7026 	ssize_t written;
7027 	int size;
7028 	int len;
7029 
7030 /* Used in tracing_mark_raw_write() as well */
7031 #define FAULTED_STR "<faulted>"
7032 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7033 
7034 	if (tracing_disabled)
7035 		return -EINVAL;
7036 
7037 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7038 		return -EINVAL;
7039 
7040 	if (cnt > TRACE_BUF_SIZE)
7041 		cnt = TRACE_BUF_SIZE;
7042 
7043 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7044 
7045 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7046 
7047 	/* If less than "<faulted>", then make sure we can still add that */
7048 	if (cnt < FAULTED_SIZE)
7049 		size += FAULTED_SIZE - cnt;
7050 
7051 	buffer = tr->array_buffer.buffer;
7052 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7053 					    tracing_gen_ctx());
7054 	if (unlikely(!event))
7055 		/* Ring buffer disabled, return as if not open for write */
7056 		return -EBADF;
7057 
7058 	entry = ring_buffer_event_data(event);
7059 	entry->ip = _THIS_IP_;
7060 
7061 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7062 	if (len) {
7063 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7064 		cnt = FAULTED_SIZE;
7065 		written = -EFAULT;
7066 	} else
7067 		written = cnt;
7068 
7069 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7070 		/* do not add \n before testing triggers, but add \0 */
7071 		entry->buf[cnt] = '\0';
7072 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7073 	}
7074 
7075 	if (entry->buf[cnt - 1] != '\n') {
7076 		entry->buf[cnt] = '\n';
7077 		entry->buf[cnt + 1] = '\0';
7078 	} else
7079 		entry->buf[cnt] = '\0';
7080 
7081 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7082 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7083 	__buffer_unlock_commit(buffer, event);
7084 
7085 	if (tt)
7086 		event_triggers_post_call(tr->trace_marker_file, tt);
7087 
7088 	if (written > 0)
7089 		*fpos += written;
7090 
7091 	return written;
7092 }
7093 
7094 /* Limit it for now to 3K (including tag) */
7095 #define RAW_DATA_MAX_SIZE (1024*3)
7096 
7097 static ssize_t
7098 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7099 					size_t cnt, loff_t *fpos)
7100 {
7101 	struct trace_array *tr = filp->private_data;
7102 	struct ring_buffer_event *event;
7103 	struct trace_buffer *buffer;
7104 	struct raw_data_entry *entry;
7105 	ssize_t written;
7106 	int size;
7107 	int len;
7108 
7109 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7110 
7111 	if (tracing_disabled)
7112 		return -EINVAL;
7113 
7114 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7115 		return -EINVAL;
7116 
7117 	/* The marker must at least have a tag id */
7118 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7119 		return -EINVAL;
7120 
7121 	if (cnt > TRACE_BUF_SIZE)
7122 		cnt = TRACE_BUF_SIZE;
7123 
7124 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7125 
7126 	size = sizeof(*entry) + cnt;
7127 	if (cnt < FAULT_SIZE_ID)
7128 		size += FAULT_SIZE_ID - cnt;
7129 
7130 	buffer = tr->array_buffer.buffer;
7131 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7132 					    tracing_gen_ctx());
7133 	if (!event)
7134 		/* Ring buffer disabled, return as if not open for write */
7135 		return -EBADF;
7136 
7137 	entry = ring_buffer_event_data(event);
7138 
7139 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7140 	if (len) {
7141 		entry->id = -1;
7142 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7143 		written = -EFAULT;
7144 	} else
7145 		written = cnt;
7146 
7147 	__buffer_unlock_commit(buffer, event);
7148 
7149 	if (written > 0)
7150 		*fpos += written;
7151 
7152 	return written;
7153 }
7154 
7155 static int tracing_clock_show(struct seq_file *m, void *v)
7156 {
7157 	struct trace_array *tr = m->private;
7158 	int i;
7159 
7160 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7161 		seq_printf(m,
7162 			"%s%s%s%s", i ? " " : "",
7163 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7164 			i == tr->clock_id ? "]" : "");
7165 	seq_putc(m, '\n');
7166 
7167 	return 0;
7168 }
7169 
7170 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7171 {
7172 	int i;
7173 
7174 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7175 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7176 			break;
7177 	}
7178 	if (i == ARRAY_SIZE(trace_clocks))
7179 		return -EINVAL;
7180 
7181 	mutex_lock(&trace_types_lock);
7182 
7183 	tr->clock_id = i;
7184 
7185 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7186 
7187 	/*
7188 	 * New clock may not be consistent with the previous clock.
7189 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7190 	 */
7191 	tracing_reset_online_cpus(&tr->array_buffer);
7192 
7193 #ifdef CONFIG_TRACER_MAX_TRACE
7194 	if (tr->max_buffer.buffer)
7195 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7196 	tracing_reset_online_cpus(&tr->max_buffer);
7197 #endif
7198 
7199 	mutex_unlock(&trace_types_lock);
7200 
7201 	return 0;
7202 }
7203 
7204 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7205 				   size_t cnt, loff_t *fpos)
7206 {
7207 	struct seq_file *m = filp->private_data;
7208 	struct trace_array *tr = m->private;
7209 	char buf[64];
7210 	const char *clockstr;
7211 	int ret;
7212 
7213 	if (cnt >= sizeof(buf))
7214 		return -EINVAL;
7215 
7216 	if (copy_from_user(buf, ubuf, cnt))
7217 		return -EFAULT;
7218 
7219 	buf[cnt] = 0;
7220 
7221 	clockstr = strstrip(buf);
7222 
7223 	ret = tracing_set_clock(tr, clockstr);
7224 	if (ret)
7225 		return ret;
7226 
7227 	*fpos += cnt;
7228 
7229 	return cnt;
7230 }
7231 
7232 static int tracing_clock_open(struct inode *inode, struct file *file)
7233 {
7234 	struct trace_array *tr = inode->i_private;
7235 	int ret;
7236 
7237 	ret = tracing_check_open_get_tr(tr);
7238 	if (ret)
7239 		return ret;
7240 
7241 	ret = single_open(file, tracing_clock_show, inode->i_private);
7242 	if (ret < 0)
7243 		trace_array_put(tr);
7244 
7245 	return ret;
7246 }
7247 
7248 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7249 {
7250 	struct trace_array *tr = m->private;
7251 
7252 	mutex_lock(&trace_types_lock);
7253 
7254 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7255 		seq_puts(m, "delta [absolute]\n");
7256 	else
7257 		seq_puts(m, "[delta] absolute\n");
7258 
7259 	mutex_unlock(&trace_types_lock);
7260 
7261 	return 0;
7262 }
7263 
7264 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7265 {
7266 	struct trace_array *tr = inode->i_private;
7267 	int ret;
7268 
7269 	ret = tracing_check_open_get_tr(tr);
7270 	if (ret)
7271 		return ret;
7272 
7273 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7274 	if (ret < 0)
7275 		trace_array_put(tr);
7276 
7277 	return ret;
7278 }
7279 
7280 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7281 {
7282 	if (rbe == this_cpu_read(trace_buffered_event))
7283 		return ring_buffer_time_stamp(buffer);
7284 
7285 	return ring_buffer_event_time_stamp(buffer, rbe);
7286 }
7287 
7288 /*
7289  * Set or disable using the per CPU trace_buffer_event when possible.
7290  */
7291 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7292 {
7293 	int ret = 0;
7294 
7295 	mutex_lock(&trace_types_lock);
7296 
7297 	if (set && tr->no_filter_buffering_ref++)
7298 		goto out;
7299 
7300 	if (!set) {
7301 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7302 			ret = -EINVAL;
7303 			goto out;
7304 		}
7305 
7306 		--tr->no_filter_buffering_ref;
7307 	}
7308  out:
7309 	mutex_unlock(&trace_types_lock);
7310 
7311 	return ret;
7312 }
7313 
7314 struct ftrace_buffer_info {
7315 	struct trace_iterator	iter;
7316 	void			*spare;
7317 	unsigned int		spare_cpu;
7318 	unsigned int		read;
7319 };
7320 
7321 #ifdef CONFIG_TRACER_SNAPSHOT
7322 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7323 {
7324 	struct trace_array *tr = inode->i_private;
7325 	struct trace_iterator *iter;
7326 	struct seq_file *m;
7327 	int ret;
7328 
7329 	ret = tracing_check_open_get_tr(tr);
7330 	if (ret)
7331 		return ret;
7332 
7333 	if (file->f_mode & FMODE_READ) {
7334 		iter = __tracing_open(inode, file, true);
7335 		if (IS_ERR(iter))
7336 			ret = PTR_ERR(iter);
7337 	} else {
7338 		/* Writes still need the seq_file to hold the private data */
7339 		ret = -ENOMEM;
7340 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7341 		if (!m)
7342 			goto out;
7343 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7344 		if (!iter) {
7345 			kfree(m);
7346 			goto out;
7347 		}
7348 		ret = 0;
7349 
7350 		iter->tr = tr;
7351 		iter->array_buffer = &tr->max_buffer;
7352 		iter->cpu_file = tracing_get_cpu(inode);
7353 		m->private = iter;
7354 		file->private_data = m;
7355 	}
7356 out:
7357 	if (ret < 0)
7358 		trace_array_put(tr);
7359 
7360 	return ret;
7361 }
7362 
7363 static ssize_t
7364 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7365 		       loff_t *ppos)
7366 {
7367 	struct seq_file *m = filp->private_data;
7368 	struct trace_iterator *iter = m->private;
7369 	struct trace_array *tr = iter->tr;
7370 	unsigned long val;
7371 	int ret;
7372 
7373 	ret = tracing_update_buffers();
7374 	if (ret < 0)
7375 		return ret;
7376 
7377 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7378 	if (ret)
7379 		return ret;
7380 
7381 	mutex_lock(&trace_types_lock);
7382 
7383 	if (tr->current_trace->use_max_tr) {
7384 		ret = -EBUSY;
7385 		goto out;
7386 	}
7387 
7388 	arch_spin_lock(&tr->max_lock);
7389 	if (tr->cond_snapshot)
7390 		ret = -EBUSY;
7391 	arch_spin_unlock(&tr->max_lock);
7392 	if (ret)
7393 		goto out;
7394 
7395 	switch (val) {
7396 	case 0:
7397 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7398 			ret = -EINVAL;
7399 			break;
7400 		}
7401 		if (tr->allocated_snapshot)
7402 			free_snapshot(tr);
7403 		break;
7404 	case 1:
7405 /* Only allow per-cpu swap if the ring buffer supports it */
7406 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7407 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7408 			ret = -EINVAL;
7409 			break;
7410 		}
7411 #endif
7412 		if (tr->allocated_snapshot)
7413 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7414 					&tr->array_buffer, iter->cpu_file);
7415 		else
7416 			ret = tracing_alloc_snapshot_instance(tr);
7417 		if (ret < 0)
7418 			break;
7419 		local_irq_disable();
7420 		/* Now, we're going to swap */
7421 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7422 			update_max_tr(tr, current, smp_processor_id(), NULL);
7423 		else
7424 			update_max_tr_single(tr, current, iter->cpu_file);
7425 		local_irq_enable();
7426 		break;
7427 	default:
7428 		if (tr->allocated_snapshot) {
7429 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7430 				tracing_reset_online_cpus(&tr->max_buffer);
7431 			else
7432 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7433 		}
7434 		break;
7435 	}
7436 
7437 	if (ret >= 0) {
7438 		*ppos += cnt;
7439 		ret = cnt;
7440 	}
7441 out:
7442 	mutex_unlock(&trace_types_lock);
7443 	return ret;
7444 }
7445 
7446 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7447 {
7448 	struct seq_file *m = file->private_data;
7449 	int ret;
7450 
7451 	ret = tracing_release(inode, file);
7452 
7453 	if (file->f_mode & FMODE_READ)
7454 		return ret;
7455 
7456 	/* If write only, the seq_file is just a stub */
7457 	if (m)
7458 		kfree(m->private);
7459 	kfree(m);
7460 
7461 	return 0;
7462 }
7463 
7464 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7465 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7466 				    size_t count, loff_t *ppos);
7467 static int tracing_buffers_release(struct inode *inode, struct file *file);
7468 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7469 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7470 
7471 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7472 {
7473 	struct ftrace_buffer_info *info;
7474 	int ret;
7475 
7476 	/* The following checks for tracefs lockdown */
7477 	ret = tracing_buffers_open(inode, filp);
7478 	if (ret < 0)
7479 		return ret;
7480 
7481 	info = filp->private_data;
7482 
7483 	if (info->iter.trace->use_max_tr) {
7484 		tracing_buffers_release(inode, filp);
7485 		return -EBUSY;
7486 	}
7487 
7488 	info->iter.snapshot = true;
7489 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7490 
7491 	return ret;
7492 }
7493 
7494 #endif /* CONFIG_TRACER_SNAPSHOT */
7495 
7496 
7497 static const struct file_operations tracing_thresh_fops = {
7498 	.open		= tracing_open_generic,
7499 	.read		= tracing_thresh_read,
7500 	.write		= tracing_thresh_write,
7501 	.llseek		= generic_file_llseek,
7502 };
7503 
7504 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7505 static const struct file_operations tracing_max_lat_fops = {
7506 	.open		= tracing_open_generic,
7507 	.read		= tracing_max_lat_read,
7508 	.write		= tracing_max_lat_write,
7509 	.llseek		= generic_file_llseek,
7510 };
7511 #endif
7512 
7513 static const struct file_operations set_tracer_fops = {
7514 	.open		= tracing_open_generic,
7515 	.read		= tracing_set_trace_read,
7516 	.write		= tracing_set_trace_write,
7517 	.llseek		= generic_file_llseek,
7518 };
7519 
7520 static const struct file_operations tracing_pipe_fops = {
7521 	.open		= tracing_open_pipe,
7522 	.poll		= tracing_poll_pipe,
7523 	.read		= tracing_read_pipe,
7524 	.splice_read	= tracing_splice_read_pipe,
7525 	.release	= tracing_release_pipe,
7526 	.llseek		= no_llseek,
7527 };
7528 
7529 static const struct file_operations tracing_entries_fops = {
7530 	.open		= tracing_open_generic_tr,
7531 	.read		= tracing_entries_read,
7532 	.write		= tracing_entries_write,
7533 	.llseek		= generic_file_llseek,
7534 	.release	= tracing_release_generic_tr,
7535 };
7536 
7537 static const struct file_operations tracing_total_entries_fops = {
7538 	.open		= tracing_open_generic_tr,
7539 	.read		= tracing_total_entries_read,
7540 	.llseek		= generic_file_llseek,
7541 	.release	= tracing_release_generic_tr,
7542 };
7543 
7544 static const struct file_operations tracing_free_buffer_fops = {
7545 	.open		= tracing_open_generic_tr,
7546 	.write		= tracing_free_buffer_write,
7547 	.release	= tracing_free_buffer_release,
7548 };
7549 
7550 static const struct file_operations tracing_mark_fops = {
7551 	.open		= tracing_open_generic_tr,
7552 	.write		= tracing_mark_write,
7553 	.llseek		= generic_file_llseek,
7554 	.release	= tracing_release_generic_tr,
7555 };
7556 
7557 static const struct file_operations tracing_mark_raw_fops = {
7558 	.open		= tracing_open_generic_tr,
7559 	.write		= tracing_mark_raw_write,
7560 	.llseek		= generic_file_llseek,
7561 	.release	= tracing_release_generic_tr,
7562 };
7563 
7564 static const struct file_operations trace_clock_fops = {
7565 	.open		= tracing_clock_open,
7566 	.read		= seq_read,
7567 	.llseek		= seq_lseek,
7568 	.release	= tracing_single_release_tr,
7569 	.write		= tracing_clock_write,
7570 };
7571 
7572 static const struct file_operations trace_time_stamp_mode_fops = {
7573 	.open		= tracing_time_stamp_mode_open,
7574 	.read		= seq_read,
7575 	.llseek		= seq_lseek,
7576 	.release	= tracing_single_release_tr,
7577 };
7578 
7579 #ifdef CONFIG_TRACER_SNAPSHOT
7580 static const struct file_operations snapshot_fops = {
7581 	.open		= tracing_snapshot_open,
7582 	.read		= seq_read,
7583 	.write		= tracing_snapshot_write,
7584 	.llseek		= tracing_lseek,
7585 	.release	= tracing_snapshot_release,
7586 };
7587 
7588 static const struct file_operations snapshot_raw_fops = {
7589 	.open		= snapshot_raw_open,
7590 	.read		= tracing_buffers_read,
7591 	.release	= tracing_buffers_release,
7592 	.splice_read	= tracing_buffers_splice_read,
7593 	.llseek		= no_llseek,
7594 };
7595 
7596 #endif /* CONFIG_TRACER_SNAPSHOT */
7597 
7598 /*
7599  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7600  * @filp: The active open file structure
7601  * @ubuf: The userspace provided buffer to read value into
7602  * @cnt: The maximum number of bytes to read
7603  * @ppos: The current "file" position
7604  *
7605  * This function implements the write interface for a struct trace_min_max_param.
7606  * The filp->private_data must point to a trace_min_max_param structure that
7607  * defines where to write the value, the min and the max acceptable values,
7608  * and a lock to protect the write.
7609  */
7610 static ssize_t
7611 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7612 {
7613 	struct trace_min_max_param *param = filp->private_data;
7614 	u64 val;
7615 	int err;
7616 
7617 	if (!param)
7618 		return -EFAULT;
7619 
7620 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7621 	if (err)
7622 		return err;
7623 
7624 	if (param->lock)
7625 		mutex_lock(param->lock);
7626 
7627 	if (param->min && val < *param->min)
7628 		err = -EINVAL;
7629 
7630 	if (param->max && val > *param->max)
7631 		err = -EINVAL;
7632 
7633 	if (!err)
7634 		*param->val = val;
7635 
7636 	if (param->lock)
7637 		mutex_unlock(param->lock);
7638 
7639 	if (err)
7640 		return err;
7641 
7642 	return cnt;
7643 }
7644 
7645 /*
7646  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7647  * @filp: The active open file structure
7648  * @ubuf: The userspace provided buffer to read value into
7649  * @cnt: The maximum number of bytes to read
7650  * @ppos: The current "file" position
7651  *
7652  * This function implements the read interface for a struct trace_min_max_param.
7653  * The filp->private_data must point to a trace_min_max_param struct with valid
7654  * data.
7655  */
7656 static ssize_t
7657 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7658 {
7659 	struct trace_min_max_param *param = filp->private_data;
7660 	char buf[U64_STR_SIZE];
7661 	int len;
7662 	u64 val;
7663 
7664 	if (!param)
7665 		return -EFAULT;
7666 
7667 	val = *param->val;
7668 
7669 	if (cnt > sizeof(buf))
7670 		cnt = sizeof(buf);
7671 
7672 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7673 
7674 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7675 }
7676 
7677 const struct file_operations trace_min_max_fops = {
7678 	.open		= tracing_open_generic,
7679 	.read		= trace_min_max_read,
7680 	.write		= trace_min_max_write,
7681 };
7682 
7683 #define TRACING_LOG_ERRS_MAX	8
7684 #define TRACING_LOG_LOC_MAX	128
7685 
7686 #define CMD_PREFIX "  Command: "
7687 
7688 struct err_info {
7689 	const char	**errs;	/* ptr to loc-specific array of err strings */
7690 	u8		type;	/* index into errs -> specific err string */
7691 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7692 	u64		ts;
7693 };
7694 
7695 struct tracing_log_err {
7696 	struct list_head	list;
7697 	struct err_info		info;
7698 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7699 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7700 };
7701 
7702 static DEFINE_MUTEX(tracing_err_log_lock);
7703 
7704 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7705 {
7706 	struct tracing_log_err *err;
7707 
7708 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7709 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7710 		if (!err)
7711 			err = ERR_PTR(-ENOMEM);
7712 		tr->n_err_log_entries++;
7713 
7714 		return err;
7715 	}
7716 
7717 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7718 	list_del(&err->list);
7719 
7720 	return err;
7721 }
7722 
7723 /**
7724  * err_pos - find the position of a string within a command for error careting
7725  * @cmd: The tracing command that caused the error
7726  * @str: The string to position the caret at within @cmd
7727  *
7728  * Finds the position of the first occurrence of @str within @cmd.  The
7729  * return value can be passed to tracing_log_err() for caret placement
7730  * within @cmd.
7731  *
7732  * Returns the index within @cmd of the first occurrence of @str or 0
7733  * if @str was not found.
7734  */
7735 unsigned int err_pos(char *cmd, const char *str)
7736 {
7737 	char *found;
7738 
7739 	if (WARN_ON(!strlen(cmd)))
7740 		return 0;
7741 
7742 	found = strstr(cmd, str);
7743 	if (found)
7744 		return found - cmd;
7745 
7746 	return 0;
7747 }
7748 
7749 /**
7750  * tracing_log_err - write an error to the tracing error log
7751  * @tr: The associated trace array for the error (NULL for top level array)
7752  * @loc: A string describing where the error occurred
7753  * @cmd: The tracing command that caused the error
7754  * @errs: The array of loc-specific static error strings
7755  * @type: The index into errs[], which produces the specific static err string
7756  * @pos: The position the caret should be placed in the cmd
7757  *
7758  * Writes an error into tracing/error_log of the form:
7759  *
7760  * <loc>: error: <text>
7761  *   Command: <cmd>
7762  *              ^
7763  *
7764  * tracing/error_log is a small log file containing the last
7765  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7766  * unless there has been a tracing error, and the error log can be
7767  * cleared and have its memory freed by writing the empty string in
7768  * truncation mode to it i.e. echo > tracing/error_log.
7769  *
7770  * NOTE: the @errs array along with the @type param are used to
7771  * produce a static error string - this string is not copied and saved
7772  * when the error is logged - only a pointer to it is saved.  See
7773  * existing callers for examples of how static strings are typically
7774  * defined for use with tracing_log_err().
7775  */
7776 void tracing_log_err(struct trace_array *tr,
7777 		     const char *loc, const char *cmd,
7778 		     const char **errs, u8 type, u8 pos)
7779 {
7780 	struct tracing_log_err *err;
7781 
7782 	if (!tr)
7783 		tr = &global_trace;
7784 
7785 	mutex_lock(&tracing_err_log_lock);
7786 	err = get_tracing_log_err(tr);
7787 	if (PTR_ERR(err) == -ENOMEM) {
7788 		mutex_unlock(&tracing_err_log_lock);
7789 		return;
7790 	}
7791 
7792 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7793 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7794 
7795 	err->info.errs = errs;
7796 	err->info.type = type;
7797 	err->info.pos = pos;
7798 	err->info.ts = local_clock();
7799 
7800 	list_add_tail(&err->list, &tr->err_log);
7801 	mutex_unlock(&tracing_err_log_lock);
7802 }
7803 
7804 static void clear_tracing_err_log(struct trace_array *tr)
7805 {
7806 	struct tracing_log_err *err, *next;
7807 
7808 	mutex_lock(&tracing_err_log_lock);
7809 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7810 		list_del(&err->list);
7811 		kfree(err);
7812 	}
7813 
7814 	tr->n_err_log_entries = 0;
7815 	mutex_unlock(&tracing_err_log_lock);
7816 }
7817 
7818 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7819 {
7820 	struct trace_array *tr = m->private;
7821 
7822 	mutex_lock(&tracing_err_log_lock);
7823 
7824 	return seq_list_start(&tr->err_log, *pos);
7825 }
7826 
7827 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7828 {
7829 	struct trace_array *tr = m->private;
7830 
7831 	return seq_list_next(v, &tr->err_log, pos);
7832 }
7833 
7834 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7835 {
7836 	mutex_unlock(&tracing_err_log_lock);
7837 }
7838 
7839 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7840 {
7841 	u8 i;
7842 
7843 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7844 		seq_putc(m, ' ');
7845 	for (i = 0; i < pos; i++)
7846 		seq_putc(m, ' ');
7847 	seq_puts(m, "^\n");
7848 }
7849 
7850 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7851 {
7852 	struct tracing_log_err *err = v;
7853 
7854 	if (err) {
7855 		const char *err_text = err->info.errs[err->info.type];
7856 		u64 sec = err->info.ts;
7857 		u32 nsec;
7858 
7859 		nsec = do_div(sec, NSEC_PER_SEC);
7860 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7861 			   err->loc, err_text);
7862 		seq_printf(m, "%s", err->cmd);
7863 		tracing_err_log_show_pos(m, err->info.pos);
7864 	}
7865 
7866 	return 0;
7867 }
7868 
7869 static const struct seq_operations tracing_err_log_seq_ops = {
7870 	.start  = tracing_err_log_seq_start,
7871 	.next   = tracing_err_log_seq_next,
7872 	.stop   = tracing_err_log_seq_stop,
7873 	.show   = tracing_err_log_seq_show
7874 };
7875 
7876 static int tracing_err_log_open(struct inode *inode, struct file *file)
7877 {
7878 	struct trace_array *tr = inode->i_private;
7879 	int ret = 0;
7880 
7881 	ret = tracing_check_open_get_tr(tr);
7882 	if (ret)
7883 		return ret;
7884 
7885 	/* If this file was opened for write, then erase contents */
7886 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7887 		clear_tracing_err_log(tr);
7888 
7889 	if (file->f_mode & FMODE_READ) {
7890 		ret = seq_open(file, &tracing_err_log_seq_ops);
7891 		if (!ret) {
7892 			struct seq_file *m = file->private_data;
7893 			m->private = tr;
7894 		} else {
7895 			trace_array_put(tr);
7896 		}
7897 	}
7898 	return ret;
7899 }
7900 
7901 static ssize_t tracing_err_log_write(struct file *file,
7902 				     const char __user *buffer,
7903 				     size_t count, loff_t *ppos)
7904 {
7905 	return count;
7906 }
7907 
7908 static int tracing_err_log_release(struct inode *inode, struct file *file)
7909 {
7910 	struct trace_array *tr = inode->i_private;
7911 
7912 	trace_array_put(tr);
7913 
7914 	if (file->f_mode & FMODE_READ)
7915 		seq_release(inode, file);
7916 
7917 	return 0;
7918 }
7919 
7920 static const struct file_operations tracing_err_log_fops = {
7921 	.open           = tracing_err_log_open,
7922 	.write		= tracing_err_log_write,
7923 	.read           = seq_read,
7924 	.llseek         = seq_lseek,
7925 	.release        = tracing_err_log_release,
7926 };
7927 
7928 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7929 {
7930 	struct trace_array *tr = inode->i_private;
7931 	struct ftrace_buffer_info *info;
7932 	int ret;
7933 
7934 	ret = tracing_check_open_get_tr(tr);
7935 	if (ret)
7936 		return ret;
7937 
7938 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7939 	if (!info) {
7940 		trace_array_put(tr);
7941 		return -ENOMEM;
7942 	}
7943 
7944 	mutex_lock(&trace_types_lock);
7945 
7946 	info->iter.tr		= tr;
7947 	info->iter.cpu_file	= tracing_get_cpu(inode);
7948 	info->iter.trace	= tr->current_trace;
7949 	info->iter.array_buffer = &tr->array_buffer;
7950 	info->spare		= NULL;
7951 	/* Force reading ring buffer for first read */
7952 	info->read		= (unsigned int)-1;
7953 
7954 	filp->private_data = info;
7955 
7956 	tr->trace_ref++;
7957 
7958 	mutex_unlock(&trace_types_lock);
7959 
7960 	ret = nonseekable_open(inode, filp);
7961 	if (ret < 0)
7962 		trace_array_put(tr);
7963 
7964 	return ret;
7965 }
7966 
7967 static __poll_t
7968 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7969 {
7970 	struct ftrace_buffer_info *info = filp->private_data;
7971 	struct trace_iterator *iter = &info->iter;
7972 
7973 	return trace_poll(iter, filp, poll_table);
7974 }
7975 
7976 static ssize_t
7977 tracing_buffers_read(struct file *filp, char __user *ubuf,
7978 		     size_t count, loff_t *ppos)
7979 {
7980 	struct ftrace_buffer_info *info = filp->private_data;
7981 	struct trace_iterator *iter = &info->iter;
7982 	ssize_t ret = 0;
7983 	ssize_t size;
7984 
7985 	if (!count)
7986 		return 0;
7987 
7988 #ifdef CONFIG_TRACER_MAX_TRACE
7989 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7990 		return -EBUSY;
7991 #endif
7992 
7993 	if (!info->spare) {
7994 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7995 							  iter->cpu_file);
7996 		if (IS_ERR(info->spare)) {
7997 			ret = PTR_ERR(info->spare);
7998 			info->spare = NULL;
7999 		} else {
8000 			info->spare_cpu = iter->cpu_file;
8001 		}
8002 	}
8003 	if (!info->spare)
8004 		return ret;
8005 
8006 	/* Do we have previous read data to read? */
8007 	if (info->read < PAGE_SIZE)
8008 		goto read;
8009 
8010  again:
8011 	trace_access_lock(iter->cpu_file);
8012 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8013 				    &info->spare,
8014 				    count,
8015 				    iter->cpu_file, 0);
8016 	trace_access_unlock(iter->cpu_file);
8017 
8018 	if (ret < 0) {
8019 		if (trace_empty(iter)) {
8020 			if ((filp->f_flags & O_NONBLOCK))
8021 				return -EAGAIN;
8022 
8023 			ret = wait_on_pipe(iter, 0);
8024 			if (ret)
8025 				return ret;
8026 
8027 			goto again;
8028 		}
8029 		return 0;
8030 	}
8031 
8032 	info->read = 0;
8033  read:
8034 	size = PAGE_SIZE - info->read;
8035 	if (size > count)
8036 		size = count;
8037 
8038 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8039 	if (ret == size)
8040 		return -EFAULT;
8041 
8042 	size -= ret;
8043 
8044 	*ppos += size;
8045 	info->read += size;
8046 
8047 	return size;
8048 }
8049 
8050 static int tracing_buffers_release(struct inode *inode, struct file *file)
8051 {
8052 	struct ftrace_buffer_info *info = file->private_data;
8053 	struct trace_iterator *iter = &info->iter;
8054 
8055 	mutex_lock(&trace_types_lock);
8056 
8057 	iter->tr->trace_ref--;
8058 
8059 	__trace_array_put(iter->tr);
8060 
8061 	if (info->spare)
8062 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8063 					   info->spare_cpu, info->spare);
8064 	kvfree(info);
8065 
8066 	mutex_unlock(&trace_types_lock);
8067 
8068 	return 0;
8069 }
8070 
8071 struct buffer_ref {
8072 	struct trace_buffer	*buffer;
8073 	void			*page;
8074 	int			cpu;
8075 	refcount_t		refcount;
8076 };
8077 
8078 static void buffer_ref_release(struct buffer_ref *ref)
8079 {
8080 	if (!refcount_dec_and_test(&ref->refcount))
8081 		return;
8082 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8083 	kfree(ref);
8084 }
8085 
8086 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8087 				    struct pipe_buffer *buf)
8088 {
8089 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8090 
8091 	buffer_ref_release(ref);
8092 	buf->private = 0;
8093 }
8094 
8095 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8096 				struct pipe_buffer *buf)
8097 {
8098 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8099 
8100 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8101 		return false;
8102 
8103 	refcount_inc(&ref->refcount);
8104 	return true;
8105 }
8106 
8107 /* Pipe buffer operations for a buffer. */
8108 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8109 	.release		= buffer_pipe_buf_release,
8110 	.get			= buffer_pipe_buf_get,
8111 };
8112 
8113 /*
8114  * Callback from splice_to_pipe(), if we need to release some pages
8115  * at the end of the spd in case we error'ed out in filling the pipe.
8116  */
8117 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8118 {
8119 	struct buffer_ref *ref =
8120 		(struct buffer_ref *)spd->partial[i].private;
8121 
8122 	buffer_ref_release(ref);
8123 	spd->partial[i].private = 0;
8124 }
8125 
8126 static ssize_t
8127 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8128 			    struct pipe_inode_info *pipe, size_t len,
8129 			    unsigned int flags)
8130 {
8131 	struct ftrace_buffer_info *info = file->private_data;
8132 	struct trace_iterator *iter = &info->iter;
8133 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8134 	struct page *pages_def[PIPE_DEF_BUFFERS];
8135 	struct splice_pipe_desc spd = {
8136 		.pages		= pages_def,
8137 		.partial	= partial_def,
8138 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8139 		.ops		= &buffer_pipe_buf_ops,
8140 		.spd_release	= buffer_spd_release,
8141 	};
8142 	struct buffer_ref *ref;
8143 	int entries, i;
8144 	ssize_t ret = 0;
8145 
8146 #ifdef CONFIG_TRACER_MAX_TRACE
8147 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8148 		return -EBUSY;
8149 #endif
8150 
8151 	if (*ppos & (PAGE_SIZE - 1))
8152 		return -EINVAL;
8153 
8154 	if (len & (PAGE_SIZE - 1)) {
8155 		if (len < PAGE_SIZE)
8156 			return -EINVAL;
8157 		len &= PAGE_MASK;
8158 	}
8159 
8160 	if (splice_grow_spd(pipe, &spd))
8161 		return -ENOMEM;
8162 
8163  again:
8164 	trace_access_lock(iter->cpu_file);
8165 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8166 
8167 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8168 		struct page *page;
8169 		int r;
8170 
8171 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8172 		if (!ref) {
8173 			ret = -ENOMEM;
8174 			break;
8175 		}
8176 
8177 		refcount_set(&ref->refcount, 1);
8178 		ref->buffer = iter->array_buffer->buffer;
8179 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8180 		if (IS_ERR(ref->page)) {
8181 			ret = PTR_ERR(ref->page);
8182 			ref->page = NULL;
8183 			kfree(ref);
8184 			break;
8185 		}
8186 		ref->cpu = iter->cpu_file;
8187 
8188 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8189 					  len, iter->cpu_file, 1);
8190 		if (r < 0) {
8191 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8192 						   ref->page);
8193 			kfree(ref);
8194 			break;
8195 		}
8196 
8197 		page = virt_to_page(ref->page);
8198 
8199 		spd.pages[i] = page;
8200 		spd.partial[i].len = PAGE_SIZE;
8201 		spd.partial[i].offset = 0;
8202 		spd.partial[i].private = (unsigned long)ref;
8203 		spd.nr_pages++;
8204 		*ppos += PAGE_SIZE;
8205 
8206 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8207 	}
8208 
8209 	trace_access_unlock(iter->cpu_file);
8210 	spd.nr_pages = i;
8211 
8212 	/* did we read anything? */
8213 	if (!spd.nr_pages) {
8214 		if (ret)
8215 			goto out;
8216 
8217 		ret = -EAGAIN;
8218 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8219 			goto out;
8220 
8221 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8222 		if (ret)
8223 			goto out;
8224 
8225 		goto again;
8226 	}
8227 
8228 	ret = splice_to_pipe(pipe, &spd);
8229 out:
8230 	splice_shrink_spd(&spd);
8231 
8232 	return ret;
8233 }
8234 
8235 static const struct file_operations tracing_buffers_fops = {
8236 	.open		= tracing_buffers_open,
8237 	.read		= tracing_buffers_read,
8238 	.poll		= tracing_buffers_poll,
8239 	.release	= tracing_buffers_release,
8240 	.splice_read	= tracing_buffers_splice_read,
8241 	.llseek		= no_llseek,
8242 };
8243 
8244 static ssize_t
8245 tracing_stats_read(struct file *filp, char __user *ubuf,
8246 		   size_t count, loff_t *ppos)
8247 {
8248 	struct inode *inode = file_inode(filp);
8249 	struct trace_array *tr = inode->i_private;
8250 	struct array_buffer *trace_buf = &tr->array_buffer;
8251 	int cpu = tracing_get_cpu(inode);
8252 	struct trace_seq *s;
8253 	unsigned long cnt;
8254 	unsigned long long t;
8255 	unsigned long usec_rem;
8256 
8257 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8258 	if (!s)
8259 		return -ENOMEM;
8260 
8261 	trace_seq_init(s);
8262 
8263 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8264 	trace_seq_printf(s, "entries: %ld\n", cnt);
8265 
8266 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8267 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8268 
8269 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8270 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8271 
8272 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8273 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8274 
8275 	if (trace_clocks[tr->clock_id].in_ns) {
8276 		/* local or global for trace_clock */
8277 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8278 		usec_rem = do_div(t, USEC_PER_SEC);
8279 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8280 								t, usec_rem);
8281 
8282 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8283 		usec_rem = do_div(t, USEC_PER_SEC);
8284 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8285 	} else {
8286 		/* counter or tsc mode for trace_clock */
8287 		trace_seq_printf(s, "oldest event ts: %llu\n",
8288 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8289 
8290 		trace_seq_printf(s, "now ts: %llu\n",
8291 				ring_buffer_time_stamp(trace_buf->buffer));
8292 	}
8293 
8294 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8295 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8296 
8297 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8298 	trace_seq_printf(s, "read events: %ld\n", cnt);
8299 
8300 	count = simple_read_from_buffer(ubuf, count, ppos,
8301 					s->buffer, trace_seq_used(s));
8302 
8303 	kfree(s);
8304 
8305 	return count;
8306 }
8307 
8308 static const struct file_operations tracing_stats_fops = {
8309 	.open		= tracing_open_generic_tr,
8310 	.read		= tracing_stats_read,
8311 	.llseek		= generic_file_llseek,
8312 	.release	= tracing_release_generic_tr,
8313 };
8314 
8315 #ifdef CONFIG_DYNAMIC_FTRACE
8316 
8317 static ssize_t
8318 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8319 		  size_t cnt, loff_t *ppos)
8320 {
8321 	ssize_t ret;
8322 	char *buf;
8323 	int r;
8324 
8325 	/* 256 should be plenty to hold the amount needed */
8326 	buf = kmalloc(256, GFP_KERNEL);
8327 	if (!buf)
8328 		return -ENOMEM;
8329 
8330 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8331 		      ftrace_update_tot_cnt,
8332 		      ftrace_number_of_pages,
8333 		      ftrace_number_of_groups);
8334 
8335 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8336 	kfree(buf);
8337 	return ret;
8338 }
8339 
8340 static const struct file_operations tracing_dyn_info_fops = {
8341 	.open		= tracing_open_generic,
8342 	.read		= tracing_read_dyn_info,
8343 	.llseek		= generic_file_llseek,
8344 };
8345 #endif /* CONFIG_DYNAMIC_FTRACE */
8346 
8347 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8348 static void
8349 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8350 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8351 		void *data)
8352 {
8353 	tracing_snapshot_instance(tr);
8354 }
8355 
8356 static void
8357 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8358 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8359 		      void *data)
8360 {
8361 	struct ftrace_func_mapper *mapper = data;
8362 	long *count = NULL;
8363 
8364 	if (mapper)
8365 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8366 
8367 	if (count) {
8368 
8369 		if (*count <= 0)
8370 			return;
8371 
8372 		(*count)--;
8373 	}
8374 
8375 	tracing_snapshot_instance(tr);
8376 }
8377 
8378 static int
8379 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8380 		      struct ftrace_probe_ops *ops, void *data)
8381 {
8382 	struct ftrace_func_mapper *mapper = data;
8383 	long *count = NULL;
8384 
8385 	seq_printf(m, "%ps:", (void *)ip);
8386 
8387 	seq_puts(m, "snapshot");
8388 
8389 	if (mapper)
8390 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8391 
8392 	if (count)
8393 		seq_printf(m, ":count=%ld\n", *count);
8394 	else
8395 		seq_puts(m, ":unlimited\n");
8396 
8397 	return 0;
8398 }
8399 
8400 static int
8401 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8402 		     unsigned long ip, void *init_data, void **data)
8403 {
8404 	struct ftrace_func_mapper *mapper = *data;
8405 
8406 	if (!mapper) {
8407 		mapper = allocate_ftrace_func_mapper();
8408 		if (!mapper)
8409 			return -ENOMEM;
8410 		*data = mapper;
8411 	}
8412 
8413 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8414 }
8415 
8416 static void
8417 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8418 		     unsigned long ip, void *data)
8419 {
8420 	struct ftrace_func_mapper *mapper = data;
8421 
8422 	if (!ip) {
8423 		if (!mapper)
8424 			return;
8425 		free_ftrace_func_mapper(mapper, NULL);
8426 		return;
8427 	}
8428 
8429 	ftrace_func_mapper_remove_ip(mapper, ip);
8430 }
8431 
8432 static struct ftrace_probe_ops snapshot_probe_ops = {
8433 	.func			= ftrace_snapshot,
8434 	.print			= ftrace_snapshot_print,
8435 };
8436 
8437 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8438 	.func			= ftrace_count_snapshot,
8439 	.print			= ftrace_snapshot_print,
8440 	.init			= ftrace_snapshot_init,
8441 	.free			= ftrace_snapshot_free,
8442 };
8443 
8444 static int
8445 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8446 			       char *glob, char *cmd, char *param, int enable)
8447 {
8448 	struct ftrace_probe_ops *ops;
8449 	void *count = (void *)-1;
8450 	char *number;
8451 	int ret;
8452 
8453 	if (!tr)
8454 		return -ENODEV;
8455 
8456 	/* hash funcs only work with set_ftrace_filter */
8457 	if (!enable)
8458 		return -EINVAL;
8459 
8460 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8461 
8462 	if (glob[0] == '!')
8463 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8464 
8465 	if (!param)
8466 		goto out_reg;
8467 
8468 	number = strsep(&param, ":");
8469 
8470 	if (!strlen(number))
8471 		goto out_reg;
8472 
8473 	/*
8474 	 * We use the callback data field (which is a pointer)
8475 	 * as our counter.
8476 	 */
8477 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8478 	if (ret)
8479 		return ret;
8480 
8481  out_reg:
8482 	ret = tracing_alloc_snapshot_instance(tr);
8483 	if (ret < 0)
8484 		goto out;
8485 
8486 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8487 
8488  out:
8489 	return ret < 0 ? ret : 0;
8490 }
8491 
8492 static struct ftrace_func_command ftrace_snapshot_cmd = {
8493 	.name			= "snapshot",
8494 	.func			= ftrace_trace_snapshot_callback,
8495 };
8496 
8497 static __init int register_snapshot_cmd(void)
8498 {
8499 	return register_ftrace_command(&ftrace_snapshot_cmd);
8500 }
8501 #else
8502 static inline __init int register_snapshot_cmd(void) { return 0; }
8503 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8504 
8505 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8506 {
8507 	if (WARN_ON(!tr->dir))
8508 		return ERR_PTR(-ENODEV);
8509 
8510 	/* Top directory uses NULL as the parent */
8511 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8512 		return NULL;
8513 
8514 	/* All sub buffers have a descriptor */
8515 	return tr->dir;
8516 }
8517 
8518 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8519 {
8520 	struct dentry *d_tracer;
8521 
8522 	if (tr->percpu_dir)
8523 		return tr->percpu_dir;
8524 
8525 	d_tracer = tracing_get_dentry(tr);
8526 	if (IS_ERR(d_tracer))
8527 		return NULL;
8528 
8529 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8530 
8531 	MEM_FAIL(!tr->percpu_dir,
8532 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8533 
8534 	return tr->percpu_dir;
8535 }
8536 
8537 static struct dentry *
8538 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8539 		      void *data, long cpu, const struct file_operations *fops)
8540 {
8541 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8542 
8543 	if (ret) /* See tracing_get_cpu() */
8544 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8545 	return ret;
8546 }
8547 
8548 static void
8549 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8550 {
8551 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8552 	struct dentry *d_cpu;
8553 	char cpu_dir[30]; /* 30 characters should be more than enough */
8554 
8555 	if (!d_percpu)
8556 		return;
8557 
8558 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8559 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8560 	if (!d_cpu) {
8561 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8562 		return;
8563 	}
8564 
8565 	/* per cpu trace_pipe */
8566 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8567 				tr, cpu, &tracing_pipe_fops);
8568 
8569 	/* per cpu trace */
8570 	trace_create_cpu_file("trace", 0644, d_cpu,
8571 				tr, cpu, &tracing_fops);
8572 
8573 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8574 				tr, cpu, &tracing_buffers_fops);
8575 
8576 	trace_create_cpu_file("stats", 0444, d_cpu,
8577 				tr, cpu, &tracing_stats_fops);
8578 
8579 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8580 				tr, cpu, &tracing_entries_fops);
8581 
8582 #ifdef CONFIG_TRACER_SNAPSHOT
8583 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8584 				tr, cpu, &snapshot_fops);
8585 
8586 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8587 				tr, cpu, &snapshot_raw_fops);
8588 #endif
8589 }
8590 
8591 #ifdef CONFIG_FTRACE_SELFTEST
8592 /* Let selftest have access to static functions in this file */
8593 #include "trace_selftest.c"
8594 #endif
8595 
8596 static ssize_t
8597 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8598 			loff_t *ppos)
8599 {
8600 	struct trace_option_dentry *topt = filp->private_data;
8601 	char *buf;
8602 
8603 	if (topt->flags->val & topt->opt->bit)
8604 		buf = "1\n";
8605 	else
8606 		buf = "0\n";
8607 
8608 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8609 }
8610 
8611 static ssize_t
8612 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8613 			 loff_t *ppos)
8614 {
8615 	struct trace_option_dentry *topt = filp->private_data;
8616 	unsigned long val;
8617 	int ret;
8618 
8619 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8620 	if (ret)
8621 		return ret;
8622 
8623 	if (val != 0 && val != 1)
8624 		return -EINVAL;
8625 
8626 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8627 		mutex_lock(&trace_types_lock);
8628 		ret = __set_tracer_option(topt->tr, topt->flags,
8629 					  topt->opt, !val);
8630 		mutex_unlock(&trace_types_lock);
8631 		if (ret)
8632 			return ret;
8633 	}
8634 
8635 	*ppos += cnt;
8636 
8637 	return cnt;
8638 }
8639 
8640 
8641 static const struct file_operations trace_options_fops = {
8642 	.open = tracing_open_generic,
8643 	.read = trace_options_read,
8644 	.write = trace_options_write,
8645 	.llseek	= generic_file_llseek,
8646 };
8647 
8648 /*
8649  * In order to pass in both the trace_array descriptor as well as the index
8650  * to the flag that the trace option file represents, the trace_array
8651  * has a character array of trace_flags_index[], which holds the index
8652  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8653  * The address of this character array is passed to the flag option file
8654  * read/write callbacks.
8655  *
8656  * In order to extract both the index and the trace_array descriptor,
8657  * get_tr_index() uses the following algorithm.
8658  *
8659  *   idx = *ptr;
8660  *
8661  * As the pointer itself contains the address of the index (remember
8662  * index[1] == 1).
8663  *
8664  * Then to get the trace_array descriptor, by subtracting that index
8665  * from the ptr, we get to the start of the index itself.
8666  *
8667  *   ptr - idx == &index[0]
8668  *
8669  * Then a simple container_of() from that pointer gets us to the
8670  * trace_array descriptor.
8671  */
8672 static void get_tr_index(void *data, struct trace_array **ptr,
8673 			 unsigned int *pindex)
8674 {
8675 	*pindex = *(unsigned char *)data;
8676 
8677 	*ptr = container_of(data - *pindex, struct trace_array,
8678 			    trace_flags_index);
8679 }
8680 
8681 static ssize_t
8682 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8683 			loff_t *ppos)
8684 {
8685 	void *tr_index = filp->private_data;
8686 	struct trace_array *tr;
8687 	unsigned int index;
8688 	char *buf;
8689 
8690 	get_tr_index(tr_index, &tr, &index);
8691 
8692 	if (tr->trace_flags & (1 << index))
8693 		buf = "1\n";
8694 	else
8695 		buf = "0\n";
8696 
8697 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8698 }
8699 
8700 static ssize_t
8701 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8702 			 loff_t *ppos)
8703 {
8704 	void *tr_index = filp->private_data;
8705 	struct trace_array *tr;
8706 	unsigned int index;
8707 	unsigned long val;
8708 	int ret;
8709 
8710 	get_tr_index(tr_index, &tr, &index);
8711 
8712 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8713 	if (ret)
8714 		return ret;
8715 
8716 	if (val != 0 && val != 1)
8717 		return -EINVAL;
8718 
8719 	mutex_lock(&event_mutex);
8720 	mutex_lock(&trace_types_lock);
8721 	ret = set_tracer_flag(tr, 1 << index, val);
8722 	mutex_unlock(&trace_types_lock);
8723 	mutex_unlock(&event_mutex);
8724 
8725 	if (ret < 0)
8726 		return ret;
8727 
8728 	*ppos += cnt;
8729 
8730 	return cnt;
8731 }
8732 
8733 static const struct file_operations trace_options_core_fops = {
8734 	.open = tracing_open_generic,
8735 	.read = trace_options_core_read,
8736 	.write = trace_options_core_write,
8737 	.llseek = generic_file_llseek,
8738 };
8739 
8740 struct dentry *trace_create_file(const char *name,
8741 				 umode_t mode,
8742 				 struct dentry *parent,
8743 				 void *data,
8744 				 const struct file_operations *fops)
8745 {
8746 	struct dentry *ret;
8747 
8748 	ret = tracefs_create_file(name, mode, parent, data, fops);
8749 	if (!ret)
8750 		pr_warn("Could not create tracefs '%s' entry\n", name);
8751 
8752 	return ret;
8753 }
8754 
8755 
8756 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8757 {
8758 	struct dentry *d_tracer;
8759 
8760 	if (tr->options)
8761 		return tr->options;
8762 
8763 	d_tracer = tracing_get_dentry(tr);
8764 	if (IS_ERR(d_tracer))
8765 		return NULL;
8766 
8767 	tr->options = tracefs_create_dir("options", d_tracer);
8768 	if (!tr->options) {
8769 		pr_warn("Could not create tracefs directory 'options'\n");
8770 		return NULL;
8771 	}
8772 
8773 	return tr->options;
8774 }
8775 
8776 static void
8777 create_trace_option_file(struct trace_array *tr,
8778 			 struct trace_option_dentry *topt,
8779 			 struct tracer_flags *flags,
8780 			 struct tracer_opt *opt)
8781 {
8782 	struct dentry *t_options;
8783 
8784 	t_options = trace_options_init_dentry(tr);
8785 	if (!t_options)
8786 		return;
8787 
8788 	topt->flags = flags;
8789 	topt->opt = opt;
8790 	topt->tr = tr;
8791 
8792 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8793 				    &trace_options_fops);
8794 
8795 }
8796 
8797 static void
8798 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8799 {
8800 	struct trace_option_dentry *topts;
8801 	struct trace_options *tr_topts;
8802 	struct tracer_flags *flags;
8803 	struct tracer_opt *opts;
8804 	int cnt;
8805 	int i;
8806 
8807 	if (!tracer)
8808 		return;
8809 
8810 	flags = tracer->flags;
8811 
8812 	if (!flags || !flags->opts)
8813 		return;
8814 
8815 	/*
8816 	 * If this is an instance, only create flags for tracers
8817 	 * the instance may have.
8818 	 */
8819 	if (!trace_ok_for_array(tracer, tr))
8820 		return;
8821 
8822 	for (i = 0; i < tr->nr_topts; i++) {
8823 		/* Make sure there's no duplicate flags. */
8824 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8825 			return;
8826 	}
8827 
8828 	opts = flags->opts;
8829 
8830 	for (cnt = 0; opts[cnt].name; cnt++)
8831 		;
8832 
8833 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8834 	if (!topts)
8835 		return;
8836 
8837 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8838 			    GFP_KERNEL);
8839 	if (!tr_topts) {
8840 		kfree(topts);
8841 		return;
8842 	}
8843 
8844 	tr->topts = tr_topts;
8845 	tr->topts[tr->nr_topts].tracer = tracer;
8846 	tr->topts[tr->nr_topts].topts = topts;
8847 	tr->nr_topts++;
8848 
8849 	for (cnt = 0; opts[cnt].name; cnt++) {
8850 		create_trace_option_file(tr, &topts[cnt], flags,
8851 					 &opts[cnt]);
8852 		MEM_FAIL(topts[cnt].entry == NULL,
8853 			  "Failed to create trace option: %s",
8854 			  opts[cnt].name);
8855 	}
8856 }
8857 
8858 static struct dentry *
8859 create_trace_option_core_file(struct trace_array *tr,
8860 			      const char *option, long index)
8861 {
8862 	struct dentry *t_options;
8863 
8864 	t_options = trace_options_init_dentry(tr);
8865 	if (!t_options)
8866 		return NULL;
8867 
8868 	return trace_create_file(option, 0644, t_options,
8869 				 (void *)&tr->trace_flags_index[index],
8870 				 &trace_options_core_fops);
8871 }
8872 
8873 static void create_trace_options_dir(struct trace_array *tr)
8874 {
8875 	struct dentry *t_options;
8876 	bool top_level = tr == &global_trace;
8877 	int i;
8878 
8879 	t_options = trace_options_init_dentry(tr);
8880 	if (!t_options)
8881 		return;
8882 
8883 	for (i = 0; trace_options[i]; i++) {
8884 		if (top_level ||
8885 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8886 			create_trace_option_core_file(tr, trace_options[i], i);
8887 	}
8888 }
8889 
8890 static ssize_t
8891 rb_simple_read(struct file *filp, char __user *ubuf,
8892 	       size_t cnt, loff_t *ppos)
8893 {
8894 	struct trace_array *tr = filp->private_data;
8895 	char buf[64];
8896 	int r;
8897 
8898 	r = tracer_tracing_is_on(tr);
8899 	r = sprintf(buf, "%d\n", r);
8900 
8901 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8902 }
8903 
8904 static ssize_t
8905 rb_simple_write(struct file *filp, const char __user *ubuf,
8906 		size_t cnt, loff_t *ppos)
8907 {
8908 	struct trace_array *tr = filp->private_data;
8909 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8910 	unsigned long val;
8911 	int ret;
8912 
8913 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8914 	if (ret)
8915 		return ret;
8916 
8917 	if (buffer) {
8918 		mutex_lock(&trace_types_lock);
8919 		if (!!val == tracer_tracing_is_on(tr)) {
8920 			val = 0; /* do nothing */
8921 		} else if (val) {
8922 			tracer_tracing_on(tr);
8923 			if (tr->current_trace->start)
8924 				tr->current_trace->start(tr);
8925 		} else {
8926 			tracer_tracing_off(tr);
8927 			if (tr->current_trace->stop)
8928 				tr->current_trace->stop(tr);
8929 		}
8930 		mutex_unlock(&trace_types_lock);
8931 	}
8932 
8933 	(*ppos)++;
8934 
8935 	return cnt;
8936 }
8937 
8938 static const struct file_operations rb_simple_fops = {
8939 	.open		= tracing_open_generic_tr,
8940 	.read		= rb_simple_read,
8941 	.write		= rb_simple_write,
8942 	.release	= tracing_release_generic_tr,
8943 	.llseek		= default_llseek,
8944 };
8945 
8946 static ssize_t
8947 buffer_percent_read(struct file *filp, char __user *ubuf,
8948 		    size_t cnt, loff_t *ppos)
8949 {
8950 	struct trace_array *tr = filp->private_data;
8951 	char buf[64];
8952 	int r;
8953 
8954 	r = tr->buffer_percent;
8955 	r = sprintf(buf, "%d\n", r);
8956 
8957 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8958 }
8959 
8960 static ssize_t
8961 buffer_percent_write(struct file *filp, const char __user *ubuf,
8962 		     size_t cnt, loff_t *ppos)
8963 {
8964 	struct trace_array *tr = filp->private_data;
8965 	unsigned long val;
8966 	int ret;
8967 
8968 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8969 	if (ret)
8970 		return ret;
8971 
8972 	if (val > 100)
8973 		return -EINVAL;
8974 
8975 	if (!val)
8976 		val = 1;
8977 
8978 	tr->buffer_percent = val;
8979 
8980 	(*ppos)++;
8981 
8982 	return cnt;
8983 }
8984 
8985 static const struct file_operations buffer_percent_fops = {
8986 	.open		= tracing_open_generic_tr,
8987 	.read		= buffer_percent_read,
8988 	.write		= buffer_percent_write,
8989 	.release	= tracing_release_generic_tr,
8990 	.llseek		= default_llseek,
8991 };
8992 
8993 static struct dentry *trace_instance_dir;
8994 
8995 static void
8996 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8997 
8998 static int
8999 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9000 {
9001 	enum ring_buffer_flags rb_flags;
9002 
9003 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9004 
9005 	buf->tr = tr;
9006 
9007 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9008 	if (!buf->buffer)
9009 		return -ENOMEM;
9010 
9011 	buf->data = alloc_percpu(struct trace_array_cpu);
9012 	if (!buf->data) {
9013 		ring_buffer_free(buf->buffer);
9014 		buf->buffer = NULL;
9015 		return -ENOMEM;
9016 	}
9017 
9018 	/* Allocate the first page for all buffers */
9019 	set_buffer_entries(&tr->array_buffer,
9020 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9021 
9022 	return 0;
9023 }
9024 
9025 static int allocate_trace_buffers(struct trace_array *tr, int size)
9026 {
9027 	int ret;
9028 
9029 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9030 	if (ret)
9031 		return ret;
9032 
9033 #ifdef CONFIG_TRACER_MAX_TRACE
9034 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9035 				    allocate_snapshot ? size : 1);
9036 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9037 		ring_buffer_free(tr->array_buffer.buffer);
9038 		tr->array_buffer.buffer = NULL;
9039 		free_percpu(tr->array_buffer.data);
9040 		tr->array_buffer.data = NULL;
9041 		return -ENOMEM;
9042 	}
9043 	tr->allocated_snapshot = allocate_snapshot;
9044 
9045 	/*
9046 	 * Only the top level trace array gets its snapshot allocated
9047 	 * from the kernel command line.
9048 	 */
9049 	allocate_snapshot = false;
9050 #endif
9051 
9052 	return 0;
9053 }
9054 
9055 static void free_trace_buffer(struct array_buffer *buf)
9056 {
9057 	if (buf->buffer) {
9058 		ring_buffer_free(buf->buffer);
9059 		buf->buffer = NULL;
9060 		free_percpu(buf->data);
9061 		buf->data = NULL;
9062 	}
9063 }
9064 
9065 static void free_trace_buffers(struct trace_array *tr)
9066 {
9067 	if (!tr)
9068 		return;
9069 
9070 	free_trace_buffer(&tr->array_buffer);
9071 
9072 #ifdef CONFIG_TRACER_MAX_TRACE
9073 	free_trace_buffer(&tr->max_buffer);
9074 #endif
9075 }
9076 
9077 static void init_trace_flags_index(struct trace_array *tr)
9078 {
9079 	int i;
9080 
9081 	/* Used by the trace options files */
9082 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9083 		tr->trace_flags_index[i] = i;
9084 }
9085 
9086 static void __update_tracer_options(struct trace_array *tr)
9087 {
9088 	struct tracer *t;
9089 
9090 	for (t = trace_types; t; t = t->next)
9091 		add_tracer_options(tr, t);
9092 }
9093 
9094 static void update_tracer_options(struct trace_array *tr)
9095 {
9096 	mutex_lock(&trace_types_lock);
9097 	__update_tracer_options(tr);
9098 	mutex_unlock(&trace_types_lock);
9099 }
9100 
9101 /* Must have trace_types_lock held */
9102 struct trace_array *trace_array_find(const char *instance)
9103 {
9104 	struct trace_array *tr, *found = NULL;
9105 
9106 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9107 		if (tr->name && strcmp(tr->name, instance) == 0) {
9108 			found = tr;
9109 			break;
9110 		}
9111 	}
9112 
9113 	return found;
9114 }
9115 
9116 struct trace_array *trace_array_find_get(const char *instance)
9117 {
9118 	struct trace_array *tr;
9119 
9120 	mutex_lock(&trace_types_lock);
9121 	tr = trace_array_find(instance);
9122 	if (tr)
9123 		tr->ref++;
9124 	mutex_unlock(&trace_types_lock);
9125 
9126 	return tr;
9127 }
9128 
9129 static int trace_array_create_dir(struct trace_array *tr)
9130 {
9131 	int ret;
9132 
9133 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9134 	if (!tr->dir)
9135 		return -EINVAL;
9136 
9137 	ret = event_trace_add_tracer(tr->dir, tr);
9138 	if (ret)
9139 		tracefs_remove(tr->dir);
9140 
9141 	init_tracer_tracefs(tr, tr->dir);
9142 	__update_tracer_options(tr);
9143 
9144 	return ret;
9145 }
9146 
9147 static struct trace_array *trace_array_create(const char *name)
9148 {
9149 	struct trace_array *tr;
9150 	int ret;
9151 
9152 	ret = -ENOMEM;
9153 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9154 	if (!tr)
9155 		return ERR_PTR(ret);
9156 
9157 	tr->name = kstrdup(name, GFP_KERNEL);
9158 	if (!tr->name)
9159 		goto out_free_tr;
9160 
9161 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9162 		goto out_free_tr;
9163 
9164 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9165 
9166 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9167 
9168 	raw_spin_lock_init(&tr->start_lock);
9169 
9170 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9171 
9172 	tr->current_trace = &nop_trace;
9173 
9174 	INIT_LIST_HEAD(&tr->systems);
9175 	INIT_LIST_HEAD(&tr->events);
9176 	INIT_LIST_HEAD(&tr->hist_vars);
9177 	INIT_LIST_HEAD(&tr->err_log);
9178 
9179 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9180 		goto out_free_tr;
9181 
9182 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9183 		goto out_free_tr;
9184 
9185 	ftrace_init_trace_array(tr);
9186 
9187 	init_trace_flags_index(tr);
9188 
9189 	if (trace_instance_dir) {
9190 		ret = trace_array_create_dir(tr);
9191 		if (ret)
9192 			goto out_free_tr;
9193 	} else
9194 		__trace_early_add_events(tr);
9195 
9196 	list_add(&tr->list, &ftrace_trace_arrays);
9197 
9198 	tr->ref++;
9199 
9200 	return tr;
9201 
9202  out_free_tr:
9203 	ftrace_free_ftrace_ops(tr);
9204 	free_trace_buffers(tr);
9205 	free_cpumask_var(tr->tracing_cpumask);
9206 	kfree(tr->name);
9207 	kfree(tr);
9208 
9209 	return ERR_PTR(ret);
9210 }
9211 
9212 static int instance_mkdir(const char *name)
9213 {
9214 	struct trace_array *tr;
9215 	int ret;
9216 
9217 	mutex_lock(&event_mutex);
9218 	mutex_lock(&trace_types_lock);
9219 
9220 	ret = -EEXIST;
9221 	if (trace_array_find(name))
9222 		goto out_unlock;
9223 
9224 	tr = trace_array_create(name);
9225 
9226 	ret = PTR_ERR_OR_ZERO(tr);
9227 
9228 out_unlock:
9229 	mutex_unlock(&trace_types_lock);
9230 	mutex_unlock(&event_mutex);
9231 	return ret;
9232 }
9233 
9234 /**
9235  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9236  * @name: The name of the trace array to be looked up/created.
9237  *
9238  * Returns pointer to trace array with given name.
9239  * NULL, if it cannot be created.
9240  *
9241  * NOTE: This function increments the reference counter associated with the
9242  * trace array returned. This makes sure it cannot be freed while in use.
9243  * Use trace_array_put() once the trace array is no longer needed.
9244  * If the trace_array is to be freed, trace_array_destroy() needs to
9245  * be called after the trace_array_put(), or simply let user space delete
9246  * it from the tracefs instances directory. But until the
9247  * trace_array_put() is called, user space can not delete it.
9248  *
9249  */
9250 struct trace_array *trace_array_get_by_name(const char *name)
9251 {
9252 	struct trace_array *tr;
9253 
9254 	mutex_lock(&event_mutex);
9255 	mutex_lock(&trace_types_lock);
9256 
9257 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9258 		if (tr->name && strcmp(tr->name, name) == 0)
9259 			goto out_unlock;
9260 	}
9261 
9262 	tr = trace_array_create(name);
9263 
9264 	if (IS_ERR(tr))
9265 		tr = NULL;
9266 out_unlock:
9267 	if (tr)
9268 		tr->ref++;
9269 
9270 	mutex_unlock(&trace_types_lock);
9271 	mutex_unlock(&event_mutex);
9272 	return tr;
9273 }
9274 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9275 
9276 static int __remove_instance(struct trace_array *tr)
9277 {
9278 	int i;
9279 
9280 	/* Reference counter for a newly created trace array = 1. */
9281 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9282 		return -EBUSY;
9283 
9284 	list_del(&tr->list);
9285 
9286 	/* Disable all the flags that were enabled coming in */
9287 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9288 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9289 			set_tracer_flag(tr, 1 << i, 0);
9290 	}
9291 
9292 	tracing_set_nop(tr);
9293 	clear_ftrace_function_probes(tr);
9294 	event_trace_del_tracer(tr);
9295 	ftrace_clear_pids(tr);
9296 	ftrace_destroy_function_files(tr);
9297 	tracefs_remove(tr->dir);
9298 	free_percpu(tr->last_func_repeats);
9299 	free_trace_buffers(tr);
9300 
9301 	for (i = 0; i < tr->nr_topts; i++) {
9302 		kfree(tr->topts[i].topts);
9303 	}
9304 	kfree(tr->topts);
9305 
9306 	free_cpumask_var(tr->tracing_cpumask);
9307 	kfree(tr->name);
9308 	kfree(tr);
9309 
9310 	return 0;
9311 }
9312 
9313 int trace_array_destroy(struct trace_array *this_tr)
9314 {
9315 	struct trace_array *tr;
9316 	int ret;
9317 
9318 	if (!this_tr)
9319 		return -EINVAL;
9320 
9321 	mutex_lock(&event_mutex);
9322 	mutex_lock(&trace_types_lock);
9323 
9324 	ret = -ENODEV;
9325 
9326 	/* Making sure trace array exists before destroying it. */
9327 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9328 		if (tr == this_tr) {
9329 			ret = __remove_instance(tr);
9330 			break;
9331 		}
9332 	}
9333 
9334 	mutex_unlock(&trace_types_lock);
9335 	mutex_unlock(&event_mutex);
9336 
9337 	return ret;
9338 }
9339 EXPORT_SYMBOL_GPL(trace_array_destroy);
9340 
9341 static int instance_rmdir(const char *name)
9342 {
9343 	struct trace_array *tr;
9344 	int ret;
9345 
9346 	mutex_lock(&event_mutex);
9347 	mutex_lock(&trace_types_lock);
9348 
9349 	ret = -ENODEV;
9350 	tr = trace_array_find(name);
9351 	if (tr)
9352 		ret = __remove_instance(tr);
9353 
9354 	mutex_unlock(&trace_types_lock);
9355 	mutex_unlock(&event_mutex);
9356 
9357 	return ret;
9358 }
9359 
9360 static __init void create_trace_instances(struct dentry *d_tracer)
9361 {
9362 	struct trace_array *tr;
9363 
9364 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9365 							 instance_mkdir,
9366 							 instance_rmdir);
9367 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9368 		return;
9369 
9370 	mutex_lock(&event_mutex);
9371 	mutex_lock(&trace_types_lock);
9372 
9373 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9374 		if (!tr->name)
9375 			continue;
9376 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9377 			     "Failed to create instance directory\n"))
9378 			break;
9379 	}
9380 
9381 	mutex_unlock(&trace_types_lock);
9382 	mutex_unlock(&event_mutex);
9383 }
9384 
9385 static void
9386 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9387 {
9388 	struct trace_event_file *file;
9389 	int cpu;
9390 
9391 	trace_create_file("available_tracers", 0444, d_tracer,
9392 			tr, &show_traces_fops);
9393 
9394 	trace_create_file("current_tracer", 0644, d_tracer,
9395 			tr, &set_tracer_fops);
9396 
9397 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9398 			  tr, &tracing_cpumask_fops);
9399 
9400 	trace_create_file("trace_options", 0644, d_tracer,
9401 			  tr, &tracing_iter_fops);
9402 
9403 	trace_create_file("trace", 0644, d_tracer,
9404 			  tr, &tracing_fops);
9405 
9406 	trace_create_file("trace_pipe", 0444, d_tracer,
9407 			  tr, &tracing_pipe_fops);
9408 
9409 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9410 			  tr, &tracing_entries_fops);
9411 
9412 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9413 			  tr, &tracing_total_entries_fops);
9414 
9415 	trace_create_file("free_buffer", 0200, d_tracer,
9416 			  tr, &tracing_free_buffer_fops);
9417 
9418 	trace_create_file("trace_marker", 0220, d_tracer,
9419 			  tr, &tracing_mark_fops);
9420 
9421 	file = __find_event_file(tr, "ftrace", "print");
9422 	if (file && file->dir)
9423 		trace_create_file("trigger", 0644, file->dir, file,
9424 				  &event_trigger_fops);
9425 	tr->trace_marker_file = file;
9426 
9427 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9428 			  tr, &tracing_mark_raw_fops);
9429 
9430 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9431 			  &trace_clock_fops);
9432 
9433 	trace_create_file("tracing_on", 0644, d_tracer,
9434 			  tr, &rb_simple_fops);
9435 
9436 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9437 			  &trace_time_stamp_mode_fops);
9438 
9439 	tr->buffer_percent = 50;
9440 
9441 	trace_create_file("buffer_percent", 0444, d_tracer,
9442 			tr, &buffer_percent_fops);
9443 
9444 	create_trace_options_dir(tr);
9445 
9446 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9447 	trace_create_maxlat_file(tr, d_tracer);
9448 #endif
9449 
9450 	if (ftrace_create_function_files(tr, d_tracer))
9451 		MEM_FAIL(1, "Could not allocate function filter files");
9452 
9453 #ifdef CONFIG_TRACER_SNAPSHOT
9454 	trace_create_file("snapshot", 0644, d_tracer,
9455 			  tr, &snapshot_fops);
9456 #endif
9457 
9458 	trace_create_file("error_log", 0644, d_tracer,
9459 			  tr, &tracing_err_log_fops);
9460 
9461 	for_each_tracing_cpu(cpu)
9462 		tracing_init_tracefs_percpu(tr, cpu);
9463 
9464 	ftrace_init_tracefs(tr, d_tracer);
9465 }
9466 
9467 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9468 {
9469 	struct vfsmount *mnt;
9470 	struct file_system_type *type;
9471 
9472 	/*
9473 	 * To maintain backward compatibility for tools that mount
9474 	 * debugfs to get to the tracing facility, tracefs is automatically
9475 	 * mounted to the debugfs/tracing directory.
9476 	 */
9477 	type = get_fs_type("tracefs");
9478 	if (!type)
9479 		return NULL;
9480 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9481 	put_filesystem(type);
9482 	if (IS_ERR(mnt))
9483 		return NULL;
9484 	mntget(mnt);
9485 
9486 	return mnt;
9487 }
9488 
9489 /**
9490  * tracing_init_dentry - initialize top level trace array
9491  *
9492  * This is called when creating files or directories in the tracing
9493  * directory. It is called via fs_initcall() by any of the boot up code
9494  * and expects to return the dentry of the top level tracing directory.
9495  */
9496 int tracing_init_dentry(void)
9497 {
9498 	struct trace_array *tr = &global_trace;
9499 
9500 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9501 		pr_warn("Tracing disabled due to lockdown\n");
9502 		return -EPERM;
9503 	}
9504 
9505 	/* The top level trace array uses  NULL as parent */
9506 	if (tr->dir)
9507 		return 0;
9508 
9509 	if (WARN_ON(!tracefs_initialized()))
9510 		return -ENODEV;
9511 
9512 	/*
9513 	 * As there may still be users that expect the tracing
9514 	 * files to exist in debugfs/tracing, we must automount
9515 	 * the tracefs file system there, so older tools still
9516 	 * work with the newer kernel.
9517 	 */
9518 	tr->dir = debugfs_create_automount("tracing", NULL,
9519 					   trace_automount, NULL);
9520 
9521 	return 0;
9522 }
9523 
9524 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9525 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9526 
9527 static struct workqueue_struct *eval_map_wq __initdata;
9528 static struct work_struct eval_map_work __initdata;
9529 
9530 static void __init eval_map_work_func(struct work_struct *work)
9531 {
9532 	int len;
9533 
9534 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9535 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9536 }
9537 
9538 static int __init trace_eval_init(void)
9539 {
9540 	INIT_WORK(&eval_map_work, eval_map_work_func);
9541 
9542 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9543 	if (!eval_map_wq) {
9544 		pr_err("Unable to allocate eval_map_wq\n");
9545 		/* Do work here */
9546 		eval_map_work_func(&eval_map_work);
9547 		return -ENOMEM;
9548 	}
9549 
9550 	queue_work(eval_map_wq, &eval_map_work);
9551 	return 0;
9552 }
9553 
9554 static int __init trace_eval_sync(void)
9555 {
9556 	/* Make sure the eval map updates are finished */
9557 	if (eval_map_wq)
9558 		destroy_workqueue(eval_map_wq);
9559 	return 0;
9560 }
9561 
9562 late_initcall_sync(trace_eval_sync);
9563 
9564 
9565 #ifdef CONFIG_MODULES
9566 static void trace_module_add_evals(struct module *mod)
9567 {
9568 	if (!mod->num_trace_evals)
9569 		return;
9570 
9571 	/*
9572 	 * Modules with bad taint do not have events created, do
9573 	 * not bother with enums either.
9574 	 */
9575 	if (trace_module_has_bad_taint(mod))
9576 		return;
9577 
9578 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9579 }
9580 
9581 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9582 static void trace_module_remove_evals(struct module *mod)
9583 {
9584 	union trace_eval_map_item *map;
9585 	union trace_eval_map_item **last = &trace_eval_maps;
9586 
9587 	if (!mod->num_trace_evals)
9588 		return;
9589 
9590 	mutex_lock(&trace_eval_mutex);
9591 
9592 	map = trace_eval_maps;
9593 
9594 	while (map) {
9595 		if (map->head.mod == mod)
9596 			break;
9597 		map = trace_eval_jmp_to_tail(map);
9598 		last = &map->tail.next;
9599 		map = map->tail.next;
9600 	}
9601 	if (!map)
9602 		goto out;
9603 
9604 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9605 	kfree(map);
9606  out:
9607 	mutex_unlock(&trace_eval_mutex);
9608 }
9609 #else
9610 static inline void trace_module_remove_evals(struct module *mod) { }
9611 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9612 
9613 static int trace_module_notify(struct notifier_block *self,
9614 			       unsigned long val, void *data)
9615 {
9616 	struct module *mod = data;
9617 
9618 	switch (val) {
9619 	case MODULE_STATE_COMING:
9620 		trace_module_add_evals(mod);
9621 		break;
9622 	case MODULE_STATE_GOING:
9623 		trace_module_remove_evals(mod);
9624 		break;
9625 	}
9626 
9627 	return NOTIFY_OK;
9628 }
9629 
9630 static struct notifier_block trace_module_nb = {
9631 	.notifier_call = trace_module_notify,
9632 	.priority = 0,
9633 };
9634 #endif /* CONFIG_MODULES */
9635 
9636 static __init int tracer_init_tracefs(void)
9637 {
9638 	int ret;
9639 
9640 	trace_access_lock_init();
9641 
9642 	ret = tracing_init_dentry();
9643 	if (ret)
9644 		return 0;
9645 
9646 	event_trace_init();
9647 
9648 	init_tracer_tracefs(&global_trace, NULL);
9649 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9650 
9651 	trace_create_file("tracing_thresh", 0644, NULL,
9652 			&global_trace, &tracing_thresh_fops);
9653 
9654 	trace_create_file("README", 0444, NULL,
9655 			NULL, &tracing_readme_fops);
9656 
9657 	trace_create_file("saved_cmdlines", 0444, NULL,
9658 			NULL, &tracing_saved_cmdlines_fops);
9659 
9660 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9661 			  NULL, &tracing_saved_cmdlines_size_fops);
9662 
9663 	trace_create_file("saved_tgids", 0444, NULL,
9664 			NULL, &tracing_saved_tgids_fops);
9665 
9666 	trace_eval_init();
9667 
9668 	trace_create_eval_file(NULL);
9669 
9670 #ifdef CONFIG_MODULES
9671 	register_module_notifier(&trace_module_nb);
9672 #endif
9673 
9674 #ifdef CONFIG_DYNAMIC_FTRACE
9675 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9676 			NULL, &tracing_dyn_info_fops);
9677 #endif
9678 
9679 	create_trace_instances(NULL);
9680 
9681 	update_tracer_options(&global_trace);
9682 
9683 	return 0;
9684 }
9685 
9686 fs_initcall(tracer_init_tracefs);
9687 
9688 static int trace_panic_handler(struct notifier_block *this,
9689 			       unsigned long event, void *unused)
9690 {
9691 	if (ftrace_dump_on_oops)
9692 		ftrace_dump(ftrace_dump_on_oops);
9693 	return NOTIFY_OK;
9694 }
9695 
9696 static struct notifier_block trace_panic_notifier = {
9697 	.notifier_call  = trace_panic_handler,
9698 	.next           = NULL,
9699 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9700 };
9701 
9702 static int trace_die_handler(struct notifier_block *self,
9703 			     unsigned long val,
9704 			     void *data)
9705 {
9706 	switch (val) {
9707 	case DIE_OOPS:
9708 		if (ftrace_dump_on_oops)
9709 			ftrace_dump(ftrace_dump_on_oops);
9710 		break;
9711 	default:
9712 		break;
9713 	}
9714 	return NOTIFY_OK;
9715 }
9716 
9717 static struct notifier_block trace_die_notifier = {
9718 	.notifier_call = trace_die_handler,
9719 	.priority = 200
9720 };
9721 
9722 /*
9723  * printk is set to max of 1024, we really don't need it that big.
9724  * Nothing should be printing 1000 characters anyway.
9725  */
9726 #define TRACE_MAX_PRINT		1000
9727 
9728 /*
9729  * Define here KERN_TRACE so that we have one place to modify
9730  * it if we decide to change what log level the ftrace dump
9731  * should be at.
9732  */
9733 #define KERN_TRACE		KERN_EMERG
9734 
9735 void
9736 trace_printk_seq(struct trace_seq *s)
9737 {
9738 	/* Probably should print a warning here. */
9739 	if (s->seq.len >= TRACE_MAX_PRINT)
9740 		s->seq.len = TRACE_MAX_PRINT;
9741 
9742 	/*
9743 	 * More paranoid code. Although the buffer size is set to
9744 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9745 	 * an extra layer of protection.
9746 	 */
9747 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9748 		s->seq.len = s->seq.size - 1;
9749 
9750 	/* should be zero ended, but we are paranoid. */
9751 	s->buffer[s->seq.len] = 0;
9752 
9753 	printk(KERN_TRACE "%s", s->buffer);
9754 
9755 	trace_seq_init(s);
9756 }
9757 
9758 void trace_init_global_iter(struct trace_iterator *iter)
9759 {
9760 	iter->tr = &global_trace;
9761 	iter->trace = iter->tr->current_trace;
9762 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9763 	iter->array_buffer = &global_trace.array_buffer;
9764 
9765 	if (iter->trace && iter->trace->open)
9766 		iter->trace->open(iter);
9767 
9768 	/* Annotate start of buffers if we had overruns */
9769 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9770 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9771 
9772 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9773 	if (trace_clocks[iter->tr->clock_id].in_ns)
9774 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9775 }
9776 
9777 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9778 {
9779 	/* use static because iter can be a bit big for the stack */
9780 	static struct trace_iterator iter;
9781 	static atomic_t dump_running;
9782 	struct trace_array *tr = &global_trace;
9783 	unsigned int old_userobj;
9784 	unsigned long flags;
9785 	int cnt = 0, cpu;
9786 
9787 	/* Only allow one dump user at a time. */
9788 	if (atomic_inc_return(&dump_running) != 1) {
9789 		atomic_dec(&dump_running);
9790 		return;
9791 	}
9792 
9793 	/*
9794 	 * Always turn off tracing when we dump.
9795 	 * We don't need to show trace output of what happens
9796 	 * between multiple crashes.
9797 	 *
9798 	 * If the user does a sysrq-z, then they can re-enable
9799 	 * tracing with echo 1 > tracing_on.
9800 	 */
9801 	tracing_off();
9802 
9803 	local_irq_save(flags);
9804 	printk_nmi_direct_enter();
9805 
9806 	/* Simulate the iterator */
9807 	trace_init_global_iter(&iter);
9808 	/* Can not use kmalloc for iter.temp and iter.fmt */
9809 	iter.temp = static_temp_buf;
9810 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9811 	iter.fmt = static_fmt_buf;
9812 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9813 
9814 	for_each_tracing_cpu(cpu) {
9815 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9816 	}
9817 
9818 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9819 
9820 	/* don't look at user memory in panic mode */
9821 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9822 
9823 	switch (oops_dump_mode) {
9824 	case DUMP_ALL:
9825 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9826 		break;
9827 	case DUMP_ORIG:
9828 		iter.cpu_file = raw_smp_processor_id();
9829 		break;
9830 	case DUMP_NONE:
9831 		goto out_enable;
9832 	default:
9833 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9834 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9835 	}
9836 
9837 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9838 
9839 	/* Did function tracer already get disabled? */
9840 	if (ftrace_is_dead()) {
9841 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9842 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9843 	}
9844 
9845 	/*
9846 	 * We need to stop all tracing on all CPUS to read
9847 	 * the next buffer. This is a bit expensive, but is
9848 	 * not done often. We fill all what we can read,
9849 	 * and then release the locks again.
9850 	 */
9851 
9852 	while (!trace_empty(&iter)) {
9853 
9854 		if (!cnt)
9855 			printk(KERN_TRACE "---------------------------------\n");
9856 
9857 		cnt++;
9858 
9859 		trace_iterator_reset(&iter);
9860 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9861 
9862 		if (trace_find_next_entry_inc(&iter) != NULL) {
9863 			int ret;
9864 
9865 			ret = print_trace_line(&iter);
9866 			if (ret != TRACE_TYPE_NO_CONSUME)
9867 				trace_consume(&iter);
9868 		}
9869 		touch_nmi_watchdog();
9870 
9871 		trace_printk_seq(&iter.seq);
9872 	}
9873 
9874 	if (!cnt)
9875 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9876 	else
9877 		printk(KERN_TRACE "---------------------------------\n");
9878 
9879  out_enable:
9880 	tr->trace_flags |= old_userobj;
9881 
9882 	for_each_tracing_cpu(cpu) {
9883 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9884 	}
9885 	atomic_dec(&dump_running);
9886 	printk_nmi_direct_exit();
9887 	local_irq_restore(flags);
9888 }
9889 EXPORT_SYMBOL_GPL(ftrace_dump);
9890 
9891 #define WRITE_BUFSIZE  4096
9892 
9893 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9894 				size_t count, loff_t *ppos,
9895 				int (*createfn)(const char *))
9896 {
9897 	char *kbuf, *buf, *tmp;
9898 	int ret = 0;
9899 	size_t done = 0;
9900 	size_t size;
9901 
9902 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9903 	if (!kbuf)
9904 		return -ENOMEM;
9905 
9906 	while (done < count) {
9907 		size = count - done;
9908 
9909 		if (size >= WRITE_BUFSIZE)
9910 			size = WRITE_BUFSIZE - 1;
9911 
9912 		if (copy_from_user(kbuf, buffer + done, size)) {
9913 			ret = -EFAULT;
9914 			goto out;
9915 		}
9916 		kbuf[size] = '\0';
9917 		buf = kbuf;
9918 		do {
9919 			tmp = strchr(buf, '\n');
9920 			if (tmp) {
9921 				*tmp = '\0';
9922 				size = tmp - buf + 1;
9923 			} else {
9924 				size = strlen(buf);
9925 				if (done + size < count) {
9926 					if (buf != kbuf)
9927 						break;
9928 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9929 					pr_warn("Line length is too long: Should be less than %d\n",
9930 						WRITE_BUFSIZE - 2);
9931 					ret = -EINVAL;
9932 					goto out;
9933 				}
9934 			}
9935 			done += size;
9936 
9937 			/* Remove comments */
9938 			tmp = strchr(buf, '#');
9939 
9940 			if (tmp)
9941 				*tmp = '\0';
9942 
9943 			ret = createfn(buf);
9944 			if (ret)
9945 				goto out;
9946 			buf += size;
9947 
9948 		} while (done < count);
9949 	}
9950 	ret = done;
9951 
9952 out:
9953 	kfree(kbuf);
9954 
9955 	return ret;
9956 }
9957 
9958 __init static int tracer_alloc_buffers(void)
9959 {
9960 	int ring_buf_size;
9961 	int ret = -ENOMEM;
9962 
9963 
9964 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9965 		pr_warn("Tracing disabled due to lockdown\n");
9966 		return -EPERM;
9967 	}
9968 
9969 	/*
9970 	 * Make sure we don't accidentally add more trace options
9971 	 * than we have bits for.
9972 	 */
9973 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9974 
9975 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9976 		goto out;
9977 
9978 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9979 		goto out_free_buffer_mask;
9980 
9981 	/* Only allocate trace_printk buffers if a trace_printk exists */
9982 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9983 		/* Must be called before global_trace.buffer is allocated */
9984 		trace_printk_init_buffers();
9985 
9986 	/* To save memory, keep the ring buffer size to its minimum */
9987 	if (ring_buffer_expanded)
9988 		ring_buf_size = trace_buf_size;
9989 	else
9990 		ring_buf_size = 1;
9991 
9992 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9993 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9994 
9995 	raw_spin_lock_init(&global_trace.start_lock);
9996 
9997 	/*
9998 	 * The prepare callbacks allocates some memory for the ring buffer. We
9999 	 * don't free the buffer if the CPU goes down. If we were to free
10000 	 * the buffer, then the user would lose any trace that was in the
10001 	 * buffer. The memory will be removed once the "instance" is removed.
10002 	 */
10003 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10004 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10005 				      NULL);
10006 	if (ret < 0)
10007 		goto out_free_cpumask;
10008 	/* Used for event triggers */
10009 	ret = -ENOMEM;
10010 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10011 	if (!temp_buffer)
10012 		goto out_rm_hp_state;
10013 
10014 	if (trace_create_savedcmd() < 0)
10015 		goto out_free_temp_buffer;
10016 
10017 	/* TODO: make the number of buffers hot pluggable with CPUS */
10018 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10019 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10020 		goto out_free_savedcmd;
10021 	}
10022 
10023 	if (global_trace.buffer_disabled)
10024 		tracing_off();
10025 
10026 	if (trace_boot_clock) {
10027 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10028 		if (ret < 0)
10029 			pr_warn("Trace clock %s not defined, going back to default\n",
10030 				trace_boot_clock);
10031 	}
10032 
10033 	/*
10034 	 * register_tracer() might reference current_trace, so it
10035 	 * needs to be set before we register anything. This is
10036 	 * just a bootstrap of current_trace anyway.
10037 	 */
10038 	global_trace.current_trace = &nop_trace;
10039 
10040 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10041 
10042 	ftrace_init_global_array_ops(&global_trace);
10043 
10044 	init_trace_flags_index(&global_trace);
10045 
10046 	register_tracer(&nop_trace);
10047 
10048 	/* Function tracing may start here (via kernel command line) */
10049 	init_function_trace();
10050 
10051 	/* All seems OK, enable tracing */
10052 	tracing_disabled = 0;
10053 
10054 	atomic_notifier_chain_register(&panic_notifier_list,
10055 				       &trace_panic_notifier);
10056 
10057 	register_die_notifier(&trace_die_notifier);
10058 
10059 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10060 
10061 	INIT_LIST_HEAD(&global_trace.systems);
10062 	INIT_LIST_HEAD(&global_trace.events);
10063 	INIT_LIST_HEAD(&global_trace.hist_vars);
10064 	INIT_LIST_HEAD(&global_trace.err_log);
10065 	list_add(&global_trace.list, &ftrace_trace_arrays);
10066 
10067 	apply_trace_boot_options();
10068 
10069 	register_snapshot_cmd();
10070 
10071 	test_can_verify();
10072 
10073 	return 0;
10074 
10075 out_free_savedcmd:
10076 	free_saved_cmdlines_buffer(savedcmd);
10077 out_free_temp_buffer:
10078 	ring_buffer_free(temp_buffer);
10079 out_rm_hp_state:
10080 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10081 out_free_cpumask:
10082 	free_cpumask_var(global_trace.tracing_cpumask);
10083 out_free_buffer_mask:
10084 	free_cpumask_var(tracing_buffer_mask);
10085 out:
10086 	return ret;
10087 }
10088 
10089 void __init early_trace_init(void)
10090 {
10091 	if (tracepoint_printk) {
10092 		tracepoint_print_iter =
10093 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10094 		if (MEM_FAIL(!tracepoint_print_iter,
10095 			     "Failed to allocate trace iterator\n"))
10096 			tracepoint_printk = 0;
10097 		else
10098 			static_key_enable(&tracepoint_printk_key.key);
10099 	}
10100 	tracer_alloc_buffers();
10101 }
10102 
10103 void __init trace_init(void)
10104 {
10105 	trace_event_init();
10106 }
10107 
10108 __init static void clear_boot_tracer(void)
10109 {
10110 	/*
10111 	 * The default tracer at boot buffer is an init section.
10112 	 * This function is called in lateinit. If we did not
10113 	 * find the boot tracer, then clear it out, to prevent
10114 	 * later registration from accessing the buffer that is
10115 	 * about to be freed.
10116 	 */
10117 	if (!default_bootup_tracer)
10118 		return;
10119 
10120 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10121 	       default_bootup_tracer);
10122 	default_bootup_tracer = NULL;
10123 }
10124 
10125 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10126 __init static void tracing_set_default_clock(void)
10127 {
10128 	/* sched_clock_stable() is determined in late_initcall */
10129 	if (!trace_boot_clock && !sched_clock_stable()) {
10130 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10131 			pr_warn("Can not set tracing clock due to lockdown\n");
10132 			return;
10133 		}
10134 
10135 		printk(KERN_WARNING
10136 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10137 		       "If you want to keep using the local clock, then add:\n"
10138 		       "  \"trace_clock=local\"\n"
10139 		       "on the kernel command line\n");
10140 		tracing_set_clock(&global_trace, "global");
10141 	}
10142 }
10143 #else
10144 static inline void tracing_set_default_clock(void) { }
10145 #endif
10146 
10147 __init static int late_trace_init(void)
10148 {
10149 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10150 		static_key_disable(&tracepoint_printk_key.key);
10151 		tracepoint_printk = 0;
10152 	}
10153 
10154 	tracing_set_default_clock();
10155 	clear_boot_tracer();
10156 	return 0;
10157 }
10158 
10159 late_initcall_sync(late_trace_init);
10160