xref: /openbmc/linux/kernel/trace/trace.c (revision 44c2cd80)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752 
1753 #define trace_create_maxlat_file(tr, d_tracer)				\
1754 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1755 			  &tr->max_latency, &tracing_max_lat_fops)
1756 
1757 #endif
1758 
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768 	struct array_buffer *trace_buf = &tr->array_buffer;
1769 	struct array_buffer *max_buf = &tr->max_buffer;
1770 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772 
1773 	max_buf->cpu = cpu;
1774 	max_buf->time_start = data->preempt_timestamp;
1775 
1776 	max_data->saved_latency = tr->max_latency;
1777 	max_data->critical_start = data->critical_start;
1778 	max_data->critical_end = data->critical_end;
1779 
1780 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781 	max_data->pid = tsk->pid;
1782 	/*
1783 	 * If tsk == current, then use current_uid(), as that does not use
1784 	 * RCU. The irq tracer can be called out of RCU scope.
1785 	 */
1786 	if (tsk == current)
1787 		max_data->uid = current_uid();
1788 	else
1789 		max_data->uid = task_uid(tsk);
1790 
1791 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792 	max_data->policy = tsk->policy;
1793 	max_data->rt_priority = tsk->rt_priority;
1794 
1795 	/* record this tasks comm */
1796 	tracing_record_cmdline(tsk);
1797 	latency_fsnotify(tr);
1798 }
1799 
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812 	      void *cond_data)
1813 {
1814 	if (tr->stop_count)
1815 		return;
1816 
1817 	WARN_ON_ONCE(!irqs_disabled());
1818 
1819 	if (!tr->allocated_snapshot) {
1820 		/* Only the nop tracer should hit this when disabling */
1821 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822 		return;
1823 	}
1824 
1825 	arch_spin_lock(&tr->max_lock);
1826 
1827 	/* Inherit the recordable setting from array_buffer */
1828 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829 		ring_buffer_record_on(tr->max_buffer.buffer);
1830 	else
1831 		ring_buffer_record_off(tr->max_buffer.buffer);
1832 
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835 		goto out_unlock;
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841  out_unlock:
1842 	arch_spin_unlock(&tr->max_lock);
1843 }
1844 
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856 	int ret;
1857 
1858 	if (tr->stop_count)
1859 		return;
1860 
1861 	WARN_ON_ONCE(!irqs_disabled());
1862 	if (!tr->allocated_snapshot) {
1863 		/* Only the nop tracer should hit this when disabling */
1864 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865 		return;
1866 	}
1867 
1868 	arch_spin_lock(&tr->max_lock);
1869 
1870 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871 
1872 	if (ret == -EBUSY) {
1873 		/*
1874 		 * We failed to swap the buffer due to a commit taking
1875 		 * place on this CPU. We fail to record, but we reset
1876 		 * the max trace buffer (no one writes directly to it)
1877 		 * and flag that it failed.
1878 		 */
1879 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880 			"Failed to swap buffers due to commit in progress\n");
1881 	}
1882 
1883 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884 
1885 	__update_max_tr(tr, tsk, cpu);
1886 	arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889 
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892 	/* Iterators are static, they should be filled or empty */
1893 	if (trace_buffer_iter(iter, iter->cpu_file))
1894 		return 0;
1895 
1896 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897 				full);
1898 }
1899 
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902 
1903 struct trace_selftests {
1904 	struct list_head		list;
1905 	struct tracer			*type;
1906 };
1907 
1908 static LIST_HEAD(postponed_selftests);
1909 
1910 static int save_selftest(struct tracer *type)
1911 {
1912 	struct trace_selftests *selftest;
1913 
1914 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915 	if (!selftest)
1916 		return -ENOMEM;
1917 
1918 	selftest->type = type;
1919 	list_add(&selftest->list, &postponed_selftests);
1920 	return 0;
1921 }
1922 
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925 	struct trace_array *tr = &global_trace;
1926 	struct tracer *saved_tracer = tr->current_trace;
1927 	int ret;
1928 
1929 	if (!type->selftest || tracing_selftest_disabled)
1930 		return 0;
1931 
1932 	/*
1933 	 * If a tracer registers early in boot up (before scheduling is
1934 	 * initialized and such), then do not run its selftests yet.
1935 	 * Instead, run it a little later in the boot process.
1936 	 */
1937 	if (!selftests_can_run)
1938 		return save_selftest(type);
1939 
1940 	if (!tracing_is_on()) {
1941 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942 			type->name);
1943 		return 0;
1944 	}
1945 
1946 	/*
1947 	 * Run a selftest on this tracer.
1948 	 * Here we reset the trace buffer, and set the current
1949 	 * tracer to be this tracer. The tracer can then run some
1950 	 * internal tracing to verify that everything is in order.
1951 	 * If we fail, we do not register this tracer.
1952 	 */
1953 	tracing_reset_online_cpus(&tr->array_buffer);
1954 
1955 	tr->current_trace = type;
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		/* If we expanded the buffers, make sure the max is expanded too */
1960 		if (ring_buffer_expanded)
1961 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962 					   RING_BUFFER_ALL_CPUS);
1963 		tr->allocated_snapshot = true;
1964 	}
1965 #endif
1966 
1967 	/* the test is responsible for initializing and enabling */
1968 	pr_info("Testing tracer %s: ", type->name);
1969 	ret = type->selftest(type, tr);
1970 	/* the test is responsible for resetting too */
1971 	tr->current_trace = saved_tracer;
1972 	if (ret) {
1973 		printk(KERN_CONT "FAILED!\n");
1974 		/* Add the warning after printing 'FAILED' */
1975 		WARN_ON(1);
1976 		return -1;
1977 	}
1978 	/* Only reset on passing, to avoid touching corrupted buffers */
1979 	tracing_reset_online_cpus(&tr->array_buffer);
1980 
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982 	if (type->use_max_tr) {
1983 		tr->allocated_snapshot = false;
1984 
1985 		/* Shrink the max buffer again */
1986 		if (ring_buffer_expanded)
1987 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1988 					   RING_BUFFER_ALL_CPUS);
1989 	}
1990 #endif
1991 
1992 	printk(KERN_CONT "PASSED\n");
1993 	return 0;
1994 }
1995 
1996 static __init int init_trace_selftests(void)
1997 {
1998 	struct trace_selftests *p, *n;
1999 	struct tracer *t, **last;
2000 	int ret;
2001 
2002 	selftests_can_run = true;
2003 
2004 	mutex_lock(&trace_types_lock);
2005 
2006 	if (list_empty(&postponed_selftests))
2007 		goto out;
2008 
2009 	pr_info("Running postponed tracer tests:\n");
2010 
2011 	tracing_selftest_running = true;
2012 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013 		/* This loop can take minutes when sanitizers are enabled, so
2014 		 * lets make sure we allow RCU processing.
2015 		 */
2016 		cond_resched();
2017 		ret = run_tracer_selftest(p->type);
2018 		/* If the test fails, then warn and remove from available_tracers */
2019 		if (ret < 0) {
2020 			WARN(1, "tracer: %s failed selftest, disabling\n",
2021 			     p->type->name);
2022 			last = &trace_types;
2023 			for (t = trace_types; t; t = t->next) {
2024 				if (t == p->type) {
2025 					*last = t->next;
2026 					break;
2027 				}
2028 				last = &t->next;
2029 			}
2030 		}
2031 		list_del(&p->list);
2032 		kfree(p);
2033 	}
2034 	tracing_selftest_running = false;
2035 
2036  out:
2037 	mutex_unlock(&trace_types_lock);
2038 
2039 	return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045 	return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048 
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050 
2051 static void __init apply_trace_boot_options(void);
2052 
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061 	struct tracer *t;
2062 	int ret = 0;
2063 
2064 	if (!type->name) {
2065 		pr_info("Tracer must have a name\n");
2066 		return -1;
2067 	}
2068 
2069 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071 		return -1;
2072 	}
2073 
2074 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075 		pr_warn("Can not register tracer %s due to lockdown\n",
2076 			   type->name);
2077 		return -EPERM;
2078 	}
2079 
2080 	mutex_lock(&trace_types_lock);
2081 
2082 	tracing_selftest_running = true;
2083 
2084 	for (t = trace_types; t; t = t->next) {
2085 		if (strcmp(type->name, t->name) == 0) {
2086 			/* already found */
2087 			pr_info("Tracer %s already registered\n",
2088 				type->name);
2089 			ret = -1;
2090 			goto out;
2091 		}
2092 	}
2093 
2094 	if (!type->set_flag)
2095 		type->set_flag = &dummy_set_flag;
2096 	if (!type->flags) {
2097 		/*allocate a dummy tracer_flags*/
2098 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099 		if (!type->flags) {
2100 			ret = -ENOMEM;
2101 			goto out;
2102 		}
2103 		type->flags->val = 0;
2104 		type->flags->opts = dummy_tracer_opt;
2105 	} else
2106 		if (!type->flags->opts)
2107 			type->flags->opts = dummy_tracer_opt;
2108 
2109 	/* store the tracer for __set_tracer_option */
2110 	type->flags->trace = type;
2111 
2112 	ret = run_tracer_selftest(type);
2113 	if (ret < 0)
2114 		goto out;
2115 
2116 	type->next = trace_types;
2117 	trace_types = type;
2118 	add_tracer_options(&global_trace, type);
2119 
2120  out:
2121 	tracing_selftest_running = false;
2122 	mutex_unlock(&trace_types_lock);
2123 
2124 	if (ret || !default_bootup_tracer)
2125 		goto out_unlock;
2126 
2127 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128 		goto out_unlock;
2129 
2130 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131 	/* Do we want this tracer to start on bootup? */
2132 	tracing_set_tracer(&global_trace, type->name);
2133 	default_bootup_tracer = NULL;
2134 
2135 	apply_trace_boot_options();
2136 
2137 	/* disable other selftests, since this will break it. */
2138 	disable_tracing_selftest("running a tracer");
2139 
2140  out_unlock:
2141 	return ret;
2142 }
2143 
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146 	struct trace_buffer *buffer = buf->buffer;
2147 
2148 	if (!buffer)
2149 		return;
2150 
2151 	ring_buffer_record_disable(buffer);
2152 
2153 	/* Make sure all commits have finished */
2154 	synchronize_rcu();
2155 	ring_buffer_reset_cpu(buffer, cpu);
2156 
2157 	ring_buffer_record_enable(buffer);
2158 }
2159 
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 
2172 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173 
2174 	ring_buffer_reset_online_cpus(buffer);
2175 
2176 	ring_buffer_record_enable(buffer);
2177 }
2178 
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182 	struct trace_array *tr;
2183 
2184 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185 		if (!tr->clear_trace)
2186 			continue;
2187 		tr->clear_trace = false;
2188 		tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190 		tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192 	}
2193 }
2194 
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200 
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203 
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209 	unsigned *map_cmdline_to_pid;
2210 	unsigned cmdline_num;
2211 	int cmdline_idx;
2212 	char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215 
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220 
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225 
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227 				    struct saved_cmdlines_buffer *s)
2228 {
2229 	s->map_cmdline_to_pid = kmalloc_array(val,
2230 					      sizeof(*s->map_cmdline_to_pid),
2231 					      GFP_KERNEL);
2232 	if (!s->map_cmdline_to_pid)
2233 		return -ENOMEM;
2234 
2235 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236 	if (!s->saved_cmdlines) {
2237 		kfree(s->map_cmdline_to_pid);
2238 		return -ENOMEM;
2239 	}
2240 
2241 	s->cmdline_idx = 0;
2242 	s->cmdline_num = val;
2243 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244 	       sizeof(s->map_pid_to_cmdline));
2245 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246 	       val * sizeof(*s->map_cmdline_to_pid));
2247 
2248 	return 0;
2249 }
2250 
2251 static int trace_create_savedcmd(void)
2252 {
2253 	int ret;
2254 
2255 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256 	if (!savedcmd)
2257 		return -ENOMEM;
2258 
2259 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260 	if (ret < 0) {
2261 		kfree(savedcmd);
2262 		savedcmd = NULL;
2263 		return -ENOMEM;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 int is_tracing_stopped(void)
2270 {
2271 	return global_trace.stop_count;
2272 }
2273 
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282 	struct trace_buffer *buffer;
2283 	unsigned long flags;
2284 
2285 	if (tracing_disabled)
2286 		return;
2287 
2288 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289 	if (--global_trace.stop_count) {
2290 		if (global_trace.stop_count < 0) {
2291 			/* Someone screwed up their debugging */
2292 			WARN_ON_ONCE(1);
2293 			global_trace.stop_count = 0;
2294 		}
2295 		goto out;
2296 	}
2297 
2298 	/* Prevent the buffers from switching */
2299 	arch_spin_lock(&global_trace.max_lock);
2300 
2301 	buffer = global_trace.array_buffer.buffer;
2302 	if (buffer)
2303 		ring_buffer_record_enable(buffer);
2304 
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306 	buffer = global_trace.max_buffer.buffer;
2307 	if (buffer)
2308 		ring_buffer_record_enable(buffer);
2309 #endif
2310 
2311 	arch_spin_unlock(&global_trace.max_lock);
2312 
2313  out:
2314 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316 
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319 	struct trace_buffer *buffer;
2320 	unsigned long flags;
2321 
2322 	if (tracing_disabled)
2323 		return;
2324 
2325 	/* If global, we need to also start the max tracer */
2326 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327 		return tracing_start();
2328 
2329 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2330 
2331 	if (--tr->stop_count) {
2332 		if (tr->stop_count < 0) {
2333 			/* Someone screwed up their debugging */
2334 			WARN_ON_ONCE(1);
2335 			tr->stop_count = 0;
2336 		}
2337 		goto out;
2338 	}
2339 
2340 	buffer = tr->array_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 
2344  out:
2345 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347 
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356 	struct trace_buffer *buffer;
2357 	unsigned long flags;
2358 
2359 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360 	if (global_trace.stop_count++)
2361 		goto out;
2362 
2363 	/* Prevent the buffers from switching */
2364 	arch_spin_lock(&global_trace.max_lock);
2365 
2366 	buffer = global_trace.array_buffer.buffer;
2367 	if (buffer)
2368 		ring_buffer_record_disable(buffer);
2369 
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371 	buffer = global_trace.max_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 #endif
2375 
2376 	arch_spin_unlock(&global_trace.max_lock);
2377 
2378  out:
2379 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381 
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384 	struct trace_buffer *buffer;
2385 	unsigned long flags;
2386 
2387 	/* If global, we need to also stop the max tracer */
2388 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389 		return tracing_stop();
2390 
2391 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2392 	if (tr->stop_count++)
2393 		goto out;
2394 
2395 	buffer = tr->array_buffer.buffer;
2396 	if (buffer)
2397 		ring_buffer_record_disable(buffer);
2398 
2399  out:
2400 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402 
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 */
2419 	if (!arch_spin_trylock(&trace_cmdline_lock))
2420 		return 0;
2421 
2422 	idx = savedcmd->map_pid_to_cmdline[tpid];
2423 	if (idx == NO_CMDLINE_MAP) {
2424 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425 
2426 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2427 		savedcmd->cmdline_idx = idx;
2428 	}
2429 
2430 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431 	set_cmdline(idx, tsk->comm);
2432 
2433 	arch_spin_unlock(&trace_cmdline_lock);
2434 
2435 	return 1;
2436 }
2437 
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440 	unsigned map;
2441 	int tpid;
2442 
2443 	if (!pid) {
2444 		strcpy(comm, "<idle>");
2445 		return;
2446 	}
2447 
2448 	if (WARN_ON_ONCE(pid < 0)) {
2449 		strcpy(comm, "<XXX>");
2450 		return;
2451 	}
2452 
2453 	tpid = pid & (PID_MAX_DEFAULT - 1);
2454 	map = savedcmd->map_pid_to_cmdline[tpid];
2455 	if (map != NO_CMDLINE_MAP) {
2456 		tpid = savedcmd->map_cmdline_to_pid[map];
2457 		if (tpid == pid) {
2458 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459 			return;
2460 		}
2461 	}
2462 	strcpy(comm, "<...>");
2463 }
2464 
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467 	preempt_disable();
2468 	arch_spin_lock(&trace_cmdline_lock);
2469 
2470 	__trace_find_cmdline(pid, comm);
2471 
2472 	arch_spin_unlock(&trace_cmdline_lock);
2473 	preempt_enable();
2474 }
2475 
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478 	/*
2479 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480 	 * if we observe a non-NULL tgid_map then we also observe the correct
2481 	 * tgid_map_max.
2482 	 */
2483 	int *map = smp_load_acquire(&tgid_map);
2484 
2485 	if (unlikely(!map || pid > tgid_map_max))
2486 		return NULL;
2487 
2488 	return &map[pid];
2489 }
2490 
2491 int trace_find_tgid(int pid)
2492 {
2493 	int *ptr = trace_find_tgid_ptr(pid);
2494 
2495 	return ptr ? *ptr : 0;
2496 }
2497 
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500 	int *ptr;
2501 
2502 	/* treat recording of idle task as a success */
2503 	if (!tsk->pid)
2504 		return 1;
2505 
2506 	ptr = trace_find_tgid_ptr(tsk->pid);
2507 	if (!ptr)
2508 		return 0;
2509 
2510 	*ptr = tsk->tgid;
2511 	return 1;
2512 }
2513 
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517 		return true;
2518 	if (!__this_cpu_read(trace_taskinfo_save))
2519 		return true;
2520 	return false;
2521 }
2522 
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532 	bool done;
2533 
2534 	if (tracing_record_taskinfo_skip(flags))
2535 		return;
2536 
2537 	/*
2538 	 * Record as much task information as possible. If some fail, continue
2539 	 * to try to record the others.
2540 	 */
2541 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560 					  struct task_struct *next, int flags)
2561 {
2562 	bool done;
2563 
2564 	if (tracing_record_taskinfo_skip(flags))
2565 		return;
2566 
2567 	/*
2568 	 * Record as much task information as possible. If some fail, continue
2569 	 * to try to record the others.
2570 	 */
2571 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575 
2576 	/* If recording any information failed, retry again soon. */
2577 	if (!done)
2578 		return;
2579 
2580 	__this_cpu_write(trace_taskinfo_save, false);
2581 }
2582 
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588 
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593 
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601 	return trace_seq_has_overflowed(s) ?
2602 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605 
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608 	unsigned int trace_flags = irqs_status;
2609 	unsigned int pc;
2610 
2611 	pc = preempt_count();
2612 
2613 	if (pc & NMI_MASK)
2614 		trace_flags |= TRACE_FLAG_NMI;
2615 	if (pc & HARDIRQ_MASK)
2616 		trace_flags |= TRACE_FLAG_HARDIRQ;
2617 	if (in_serving_softirq())
2618 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2619 
2620 	if (tif_need_resched())
2621 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622 	if (test_preempt_need_resched())
2623 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624 	return (trace_flags << 16) | (pc & 0xff);
2625 }
2626 
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629 			  int type,
2630 			  unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635 
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639 
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656 	struct ring_buffer_event *event;
2657 	struct page *page;
2658 	int cpu;
2659 
2660 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661 
2662 	if (trace_buffered_event_ref++)
2663 		return;
2664 
2665 	for_each_tracing_cpu(cpu) {
2666 		page = alloc_pages_node(cpu_to_node(cpu),
2667 					GFP_KERNEL | __GFP_NORETRY, 0);
2668 		if (!page)
2669 			goto failed;
2670 
2671 		event = page_address(page);
2672 		memset(event, 0, sizeof(*event));
2673 
2674 		per_cpu(trace_buffered_event, cpu) = event;
2675 
2676 		preempt_disable();
2677 		if (cpu == smp_processor_id() &&
2678 		    __this_cpu_read(trace_buffered_event) !=
2679 		    per_cpu(trace_buffered_event, cpu))
2680 			WARN_ON_ONCE(1);
2681 		preempt_enable();
2682 	}
2683 
2684 	return;
2685  failed:
2686 	trace_buffered_event_disable();
2687 }
2688 
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691 	/* Probably not needed, but do it anyway */
2692 	smp_rmb();
2693 	this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695 
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698 	this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700 
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711 	int cpu;
2712 
2713 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714 
2715 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716 		return;
2717 
2718 	if (--trace_buffered_event_ref)
2719 		return;
2720 
2721 	preempt_disable();
2722 	/* For each CPU, set the buffer as used. */
2723 	smp_call_function_many(tracing_buffer_mask,
2724 			       disable_trace_buffered_event, NULL, 1);
2725 	preempt_enable();
2726 
2727 	/* Wait for all current users to finish */
2728 	synchronize_rcu();
2729 
2730 	for_each_tracing_cpu(cpu) {
2731 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732 		per_cpu(trace_buffered_event, cpu) = NULL;
2733 	}
2734 	/*
2735 	 * Make sure trace_buffered_event is NULL before clearing
2736 	 * trace_buffered_event_cnt.
2737 	 */
2738 	smp_wmb();
2739 
2740 	preempt_disable();
2741 	/* Do the work on each cpu */
2742 	smp_call_function_many(tracing_buffer_mask,
2743 			       enable_trace_buffered_event, NULL, 1);
2744 	preempt_enable();
2745 }
2746 
2747 static struct trace_buffer *temp_buffer;
2748 
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751 			  struct trace_event_file *trace_file,
2752 			  int type, unsigned long len,
2753 			  unsigned int trace_ctx)
2754 {
2755 	struct ring_buffer_event *entry;
2756 	struct trace_array *tr = trace_file->tr;
2757 	int val;
2758 
2759 	*current_rb = tr->array_buffer.buffer;
2760 
2761 	if (!tr->no_filter_buffering_ref &&
2762 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763 	    (entry = this_cpu_read(trace_buffered_event))) {
2764 		/*
2765 		 * Filtering is on, so try to use the per cpu buffer first.
2766 		 * This buffer will simulate a ring_buffer_event,
2767 		 * where the type_len is zero and the array[0] will
2768 		 * hold the full length.
2769 		 * (see include/linux/ring-buffer.h for details on
2770 		 *  how the ring_buffer_event is structured).
2771 		 *
2772 		 * Using a temp buffer during filtering and copying it
2773 		 * on a matched filter is quicker than writing directly
2774 		 * into the ring buffer and then discarding it when
2775 		 * it doesn't match. That is because the discard
2776 		 * requires several atomic operations to get right.
2777 		 * Copying on match and doing nothing on a failed match
2778 		 * is still quicker than no copy on match, but having
2779 		 * to discard out of the ring buffer on a failed match.
2780 		 */
2781 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782 
2783 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2784 
2785 		/*
2786 		 * Preemption is disabled, but interrupts and NMIs
2787 		 * can still come in now. If that happens after
2788 		 * the above increment, then it will have to go
2789 		 * back to the old method of allocating the event
2790 		 * on the ring buffer, and if the filter fails, it
2791 		 * will have to call ring_buffer_discard_commit()
2792 		 * to remove it.
2793 		 *
2794 		 * Need to also check the unlikely case that the
2795 		 * length is bigger than the temp buffer size.
2796 		 * If that happens, then the reserve is pretty much
2797 		 * guaranteed to fail, as the ring buffer currently
2798 		 * only allows events less than a page. But that may
2799 		 * change in the future, so let the ring buffer reserve
2800 		 * handle the failure in that case.
2801 		 */
2802 		if (val == 1 && likely(len <= max_len)) {
2803 			trace_event_setup(entry, type, trace_ctx);
2804 			entry->array[0] = len;
2805 			return entry;
2806 		}
2807 		this_cpu_dec(trace_buffered_event_cnt);
2808 	}
2809 
2810 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811 					    trace_ctx);
2812 	/*
2813 	 * If tracing is off, but we have triggers enabled
2814 	 * we still need to look at the event data. Use the temp_buffer
2815 	 * to store the trace event for the trigger to use. It's recursive
2816 	 * safe and will not be recorded anywhere.
2817 	 */
2818 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819 		*current_rb = temp_buffer;
2820 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821 						    trace_ctx);
2822 	}
2823 	return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826 
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829 
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832 	struct trace_event_call *event_call;
2833 	struct trace_event_file *file;
2834 	struct trace_event *event;
2835 	unsigned long flags;
2836 	struct trace_iterator *iter = tracepoint_print_iter;
2837 
2838 	/* We should never get here if iter is NULL */
2839 	if (WARN_ON_ONCE(!iter))
2840 		return;
2841 
2842 	event_call = fbuffer->trace_file->event_call;
2843 	if (!event_call || !event_call->event.funcs ||
2844 	    !event_call->event.funcs->trace)
2845 		return;
2846 
2847 	file = fbuffer->trace_file;
2848 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850 	     !filter_match_preds(file->filter, fbuffer->entry)))
2851 		return;
2852 
2853 	event = &fbuffer->trace_file->event_call->event;
2854 
2855 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856 	trace_seq_init(&iter->seq);
2857 	iter->ent = fbuffer->entry;
2858 	event_call->event.funcs->trace(iter, 0, event);
2859 	trace_seq_putc(&iter->seq, 0);
2860 	printk("%s", iter->seq.buffer);
2861 
2862 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864 
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866 			     void *buffer, size_t *lenp,
2867 			     loff_t *ppos)
2868 {
2869 	int save_tracepoint_printk;
2870 	int ret;
2871 
2872 	mutex_lock(&tracepoint_printk_mutex);
2873 	save_tracepoint_printk = tracepoint_printk;
2874 
2875 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876 
2877 	/*
2878 	 * This will force exiting early, as tracepoint_printk
2879 	 * is always zero when tracepoint_printk_iter is not allocated
2880 	 */
2881 	if (!tracepoint_print_iter)
2882 		tracepoint_printk = 0;
2883 
2884 	if (save_tracepoint_printk == tracepoint_printk)
2885 		goto out;
2886 
2887 	if (tracepoint_printk)
2888 		static_key_enable(&tracepoint_printk_key.key);
2889 	else
2890 		static_key_disable(&tracepoint_printk_key.key);
2891 
2892  out:
2893 	mutex_unlock(&tracepoint_printk_mutex);
2894 
2895 	return ret;
2896 }
2897 
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900 	if (static_key_false(&tracepoint_printk_key.key))
2901 		output_printk(fbuffer);
2902 
2903 	if (static_branch_unlikely(&trace_event_exports_enabled))
2904 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2905 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2906 				    fbuffer->event, fbuffer->entry,
2907 				    fbuffer->trace_ctx, fbuffer->regs);
2908 }
2909 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2910 
2911 /*
2912  * Skip 3:
2913  *
2914  *   trace_buffer_unlock_commit_regs()
2915  *   trace_event_buffer_commit()
2916  *   trace_event_raw_event_xxx()
2917  */
2918 # define STACK_SKIP 3
2919 
2920 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2921 				     struct trace_buffer *buffer,
2922 				     struct ring_buffer_event *event,
2923 				     unsigned int trace_ctx,
2924 				     struct pt_regs *regs)
2925 {
2926 	__buffer_unlock_commit(buffer, event);
2927 
2928 	/*
2929 	 * If regs is not set, then skip the necessary functions.
2930 	 * Note, we can still get here via blktrace, wakeup tracer
2931 	 * and mmiotrace, but that's ok if they lose a function or
2932 	 * two. They are not that meaningful.
2933 	 */
2934 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2935 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2936 }
2937 
2938 /*
2939  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2940  */
2941 void
2942 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2943 				   struct ring_buffer_event *event)
2944 {
2945 	__buffer_unlock_commit(buffer, event);
2946 }
2947 
2948 void
2949 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2950 	       parent_ip, unsigned int trace_ctx)
2951 {
2952 	struct trace_event_call *call = &event_function;
2953 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2954 	struct ring_buffer_event *event;
2955 	struct ftrace_entry *entry;
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2958 					    trace_ctx);
2959 	if (!event)
2960 		return;
2961 	entry	= ring_buffer_event_data(event);
2962 	entry->ip			= ip;
2963 	entry->parent_ip		= parent_ip;
2964 
2965 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2966 		if (static_branch_unlikely(&trace_function_exports_enabled))
2967 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2968 		__buffer_unlock_commit(buffer, event);
2969 	}
2970 }
2971 
2972 #ifdef CONFIG_STACKTRACE
2973 
2974 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2975 #define FTRACE_KSTACK_NESTING	4
2976 
2977 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2978 
2979 struct ftrace_stack {
2980 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2981 };
2982 
2983 
2984 struct ftrace_stacks {
2985 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2986 };
2987 
2988 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2989 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2990 
2991 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2992 				 unsigned int trace_ctx,
2993 				 int skip, struct pt_regs *regs)
2994 {
2995 	struct trace_event_call *call = &event_kernel_stack;
2996 	struct ring_buffer_event *event;
2997 	unsigned int size, nr_entries;
2998 	struct ftrace_stack *fstack;
2999 	struct stack_entry *entry;
3000 	int stackidx;
3001 
3002 	/*
3003 	 * Add one, for this function and the call to save_stack_trace()
3004 	 * If regs is set, then these functions will not be in the way.
3005 	 */
3006 #ifndef CONFIG_UNWINDER_ORC
3007 	if (!regs)
3008 		skip++;
3009 #endif
3010 
3011 	preempt_disable_notrace();
3012 
3013 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3014 
3015 	/* This should never happen. If it does, yell once and skip */
3016 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3017 		goto out;
3018 
3019 	/*
3020 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3021 	 * interrupt will either see the value pre increment or post
3022 	 * increment. If the interrupt happens pre increment it will have
3023 	 * restored the counter when it returns.  We just need a barrier to
3024 	 * keep gcc from moving things around.
3025 	 */
3026 	barrier();
3027 
3028 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3029 	size = ARRAY_SIZE(fstack->calls);
3030 
3031 	if (regs) {
3032 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3033 						   size, skip);
3034 	} else {
3035 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3036 	}
3037 
3038 	size = nr_entries * sizeof(unsigned long);
3039 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3040 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3041 				    trace_ctx);
3042 	if (!event)
3043 		goto out;
3044 	entry = ring_buffer_event_data(event);
3045 
3046 	memcpy(&entry->caller, fstack->calls, size);
3047 	entry->size = nr_entries;
3048 
3049 	if (!call_filter_check_discard(call, entry, buffer, event))
3050 		__buffer_unlock_commit(buffer, event);
3051 
3052  out:
3053 	/* Again, don't let gcc optimize things here */
3054 	barrier();
3055 	__this_cpu_dec(ftrace_stack_reserve);
3056 	preempt_enable_notrace();
3057 
3058 }
3059 
3060 static inline void ftrace_trace_stack(struct trace_array *tr,
3061 				      struct trace_buffer *buffer,
3062 				      unsigned int trace_ctx,
3063 				      int skip, struct pt_regs *regs)
3064 {
3065 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3066 		return;
3067 
3068 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3069 }
3070 
3071 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3072 		   int skip)
3073 {
3074 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3075 
3076 	if (rcu_is_watching()) {
3077 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3078 		return;
3079 	}
3080 
3081 	/*
3082 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3083 	 * but if the above rcu_is_watching() failed, then the NMI
3084 	 * triggered someplace critical, and rcu_irq_enter() should
3085 	 * not be called from NMI.
3086 	 */
3087 	if (unlikely(in_nmi()))
3088 		return;
3089 
3090 	rcu_irq_enter_irqson();
3091 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3092 	rcu_irq_exit_irqson();
3093 }
3094 
3095 /**
3096  * trace_dump_stack - record a stack back trace in the trace buffer
3097  * @skip: Number of functions to skip (helper handlers)
3098  */
3099 void trace_dump_stack(int skip)
3100 {
3101 	if (tracing_disabled || tracing_selftest_running)
3102 		return;
3103 
3104 #ifndef CONFIG_UNWINDER_ORC
3105 	/* Skip 1 to skip this function. */
3106 	skip++;
3107 #endif
3108 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3109 			     tracing_gen_ctx(), skip, NULL);
3110 }
3111 EXPORT_SYMBOL_GPL(trace_dump_stack);
3112 
3113 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3114 static DEFINE_PER_CPU(int, user_stack_count);
3115 
3116 static void
3117 ftrace_trace_userstack(struct trace_array *tr,
3118 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3119 {
3120 	struct trace_event_call *call = &event_user_stack;
3121 	struct ring_buffer_event *event;
3122 	struct userstack_entry *entry;
3123 
3124 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3125 		return;
3126 
3127 	/*
3128 	 * NMIs can not handle page faults, even with fix ups.
3129 	 * The save user stack can (and often does) fault.
3130 	 */
3131 	if (unlikely(in_nmi()))
3132 		return;
3133 
3134 	/*
3135 	 * prevent recursion, since the user stack tracing may
3136 	 * trigger other kernel events.
3137 	 */
3138 	preempt_disable();
3139 	if (__this_cpu_read(user_stack_count))
3140 		goto out;
3141 
3142 	__this_cpu_inc(user_stack_count);
3143 
3144 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3145 					    sizeof(*entry), trace_ctx);
3146 	if (!event)
3147 		goto out_drop_count;
3148 	entry	= ring_buffer_event_data(event);
3149 
3150 	entry->tgid		= current->tgid;
3151 	memset(&entry->caller, 0, sizeof(entry->caller));
3152 
3153 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3154 	if (!call_filter_check_discard(call, entry, buffer, event))
3155 		__buffer_unlock_commit(buffer, event);
3156 
3157  out_drop_count:
3158 	__this_cpu_dec(user_stack_count);
3159  out:
3160 	preempt_enable();
3161 }
3162 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3163 static void ftrace_trace_userstack(struct trace_array *tr,
3164 				   struct trace_buffer *buffer,
3165 				   unsigned int trace_ctx)
3166 {
3167 }
3168 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3169 
3170 #endif /* CONFIG_STACKTRACE */
3171 
3172 static inline void
3173 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3174 			  unsigned long long delta)
3175 {
3176 	entry->bottom_delta_ts = delta & U32_MAX;
3177 	entry->top_delta_ts = (delta >> 32);
3178 }
3179 
3180 void trace_last_func_repeats(struct trace_array *tr,
3181 			     struct trace_func_repeats *last_info,
3182 			     unsigned int trace_ctx)
3183 {
3184 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3185 	struct func_repeats_entry *entry;
3186 	struct ring_buffer_event *event;
3187 	u64 delta;
3188 
3189 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3190 					    sizeof(*entry), trace_ctx);
3191 	if (!event)
3192 		return;
3193 
3194 	delta = ring_buffer_event_time_stamp(buffer, event) -
3195 		last_info->ts_last_call;
3196 
3197 	entry = ring_buffer_event_data(event);
3198 	entry->ip = last_info->ip;
3199 	entry->parent_ip = last_info->parent_ip;
3200 	entry->count = last_info->count;
3201 	func_repeats_set_delta_ts(entry, delta);
3202 
3203 	__buffer_unlock_commit(buffer, event);
3204 }
3205 
3206 /* created for use with alloc_percpu */
3207 struct trace_buffer_struct {
3208 	int nesting;
3209 	char buffer[4][TRACE_BUF_SIZE];
3210 };
3211 
3212 static struct trace_buffer_struct *trace_percpu_buffer;
3213 
3214 /*
3215  * This allows for lockless recording.  If we're nested too deeply, then
3216  * this returns NULL.
3217  */
3218 static char *get_trace_buf(void)
3219 {
3220 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3221 
3222 	if (!buffer || buffer->nesting >= 4)
3223 		return NULL;
3224 
3225 	buffer->nesting++;
3226 
3227 	/* Interrupts must see nesting incremented before we use the buffer */
3228 	barrier();
3229 	return &buffer->buffer[buffer->nesting - 1][0];
3230 }
3231 
3232 static void put_trace_buf(void)
3233 {
3234 	/* Don't let the decrement of nesting leak before this */
3235 	barrier();
3236 	this_cpu_dec(trace_percpu_buffer->nesting);
3237 }
3238 
3239 static int alloc_percpu_trace_buffer(void)
3240 {
3241 	struct trace_buffer_struct *buffers;
3242 
3243 	if (trace_percpu_buffer)
3244 		return 0;
3245 
3246 	buffers = alloc_percpu(struct trace_buffer_struct);
3247 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3248 		return -ENOMEM;
3249 
3250 	trace_percpu_buffer = buffers;
3251 	return 0;
3252 }
3253 
3254 static int buffers_allocated;
3255 
3256 void trace_printk_init_buffers(void)
3257 {
3258 	if (buffers_allocated)
3259 		return;
3260 
3261 	if (alloc_percpu_trace_buffer())
3262 		return;
3263 
3264 	/* trace_printk() is for debug use only. Don't use it in production. */
3265 
3266 	pr_warn("\n");
3267 	pr_warn("**********************************************************\n");
3268 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3269 	pr_warn("**                                                      **\n");
3270 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3271 	pr_warn("**                                                      **\n");
3272 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3273 	pr_warn("** unsafe for production use.                           **\n");
3274 	pr_warn("**                                                      **\n");
3275 	pr_warn("** If you see this message and you are not debugging    **\n");
3276 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3277 	pr_warn("**                                                      **\n");
3278 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3279 	pr_warn("**********************************************************\n");
3280 
3281 	/* Expand the buffers to set size */
3282 	tracing_update_buffers();
3283 
3284 	buffers_allocated = 1;
3285 
3286 	/*
3287 	 * trace_printk_init_buffers() can be called by modules.
3288 	 * If that happens, then we need to start cmdline recording
3289 	 * directly here. If the global_trace.buffer is already
3290 	 * allocated here, then this was called by module code.
3291 	 */
3292 	if (global_trace.array_buffer.buffer)
3293 		tracing_start_cmdline_record();
3294 }
3295 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3296 
3297 void trace_printk_start_comm(void)
3298 {
3299 	/* Start tracing comms if trace printk is set */
3300 	if (!buffers_allocated)
3301 		return;
3302 	tracing_start_cmdline_record();
3303 }
3304 
3305 static void trace_printk_start_stop_comm(int enabled)
3306 {
3307 	if (!buffers_allocated)
3308 		return;
3309 
3310 	if (enabled)
3311 		tracing_start_cmdline_record();
3312 	else
3313 		tracing_stop_cmdline_record();
3314 }
3315 
3316 /**
3317  * trace_vbprintk - write binary msg to tracing buffer
3318  * @ip:    The address of the caller
3319  * @fmt:   The string format to write to the buffer
3320  * @args:  Arguments for @fmt
3321  */
3322 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3323 {
3324 	struct trace_event_call *call = &event_bprint;
3325 	struct ring_buffer_event *event;
3326 	struct trace_buffer *buffer;
3327 	struct trace_array *tr = &global_trace;
3328 	struct bprint_entry *entry;
3329 	unsigned int trace_ctx;
3330 	char *tbuffer;
3331 	int len = 0, size;
3332 
3333 	if (unlikely(tracing_selftest_running || tracing_disabled))
3334 		return 0;
3335 
3336 	/* Don't pollute graph traces with trace_vprintk internals */
3337 	pause_graph_tracing();
3338 
3339 	trace_ctx = tracing_gen_ctx();
3340 	preempt_disable_notrace();
3341 
3342 	tbuffer = get_trace_buf();
3343 	if (!tbuffer) {
3344 		len = 0;
3345 		goto out_nobuffer;
3346 	}
3347 
3348 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3349 
3350 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3351 		goto out_put;
3352 
3353 	size = sizeof(*entry) + sizeof(u32) * len;
3354 	buffer = tr->array_buffer.buffer;
3355 	ring_buffer_nest_start(buffer);
3356 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3357 					    trace_ctx);
3358 	if (!event)
3359 		goto out;
3360 	entry = ring_buffer_event_data(event);
3361 	entry->ip			= ip;
3362 	entry->fmt			= fmt;
3363 
3364 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3365 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3366 		__buffer_unlock_commit(buffer, event);
3367 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3368 	}
3369 
3370 out:
3371 	ring_buffer_nest_end(buffer);
3372 out_put:
3373 	put_trace_buf();
3374 
3375 out_nobuffer:
3376 	preempt_enable_notrace();
3377 	unpause_graph_tracing();
3378 
3379 	return len;
3380 }
3381 EXPORT_SYMBOL_GPL(trace_vbprintk);
3382 
3383 __printf(3, 0)
3384 static int
3385 __trace_array_vprintk(struct trace_buffer *buffer,
3386 		      unsigned long ip, const char *fmt, va_list args)
3387 {
3388 	struct trace_event_call *call = &event_print;
3389 	struct ring_buffer_event *event;
3390 	int len = 0, size;
3391 	struct print_entry *entry;
3392 	unsigned int trace_ctx;
3393 	char *tbuffer;
3394 
3395 	if (tracing_disabled || tracing_selftest_running)
3396 		return 0;
3397 
3398 	/* Don't pollute graph traces with trace_vprintk internals */
3399 	pause_graph_tracing();
3400 
3401 	trace_ctx = tracing_gen_ctx();
3402 	preempt_disable_notrace();
3403 
3404 
3405 	tbuffer = get_trace_buf();
3406 	if (!tbuffer) {
3407 		len = 0;
3408 		goto out_nobuffer;
3409 	}
3410 
3411 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412 
3413 	size = sizeof(*entry) + len + 1;
3414 	ring_buffer_nest_start(buffer);
3415 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416 					    trace_ctx);
3417 	if (!event)
3418 		goto out;
3419 	entry = ring_buffer_event_data(event);
3420 	entry->ip = ip;
3421 
3422 	memcpy(&entry->buf, tbuffer, len + 1);
3423 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3424 		__buffer_unlock_commit(buffer, event);
3425 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3426 	}
3427 
3428 out:
3429 	ring_buffer_nest_end(buffer);
3430 	put_trace_buf();
3431 
3432 out_nobuffer:
3433 	preempt_enable_notrace();
3434 	unpause_graph_tracing();
3435 
3436 	return len;
3437 }
3438 
3439 __printf(3, 0)
3440 int trace_array_vprintk(struct trace_array *tr,
3441 			unsigned long ip, const char *fmt, va_list args)
3442 {
3443 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3444 }
3445 
3446 /**
3447  * trace_array_printk - Print a message to a specific instance
3448  * @tr: The instance trace_array descriptor
3449  * @ip: The instruction pointer that this is called from.
3450  * @fmt: The format to print (printf format)
3451  *
3452  * If a subsystem sets up its own instance, they have the right to
3453  * printk strings into their tracing instance buffer using this
3454  * function. Note, this function will not write into the top level
3455  * buffer (use trace_printk() for that), as writing into the top level
3456  * buffer should only have events that can be individually disabled.
3457  * trace_printk() is only used for debugging a kernel, and should not
3458  * be ever incorporated in normal use.
3459  *
3460  * trace_array_printk() can be used, as it will not add noise to the
3461  * top level tracing buffer.
3462  *
3463  * Note, trace_array_init_printk() must be called on @tr before this
3464  * can be used.
3465  */
3466 __printf(3, 0)
3467 int trace_array_printk(struct trace_array *tr,
3468 		       unsigned long ip, const char *fmt, ...)
3469 {
3470 	int ret;
3471 	va_list ap;
3472 
3473 	if (!tr)
3474 		return -ENOENT;
3475 
3476 	/* This is only allowed for created instances */
3477 	if (tr == &global_trace)
3478 		return 0;
3479 
3480 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3481 		return 0;
3482 
3483 	va_start(ap, fmt);
3484 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3485 	va_end(ap);
3486 	return ret;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_array_printk);
3489 
3490 /**
3491  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3492  * @tr: The trace array to initialize the buffers for
3493  *
3494  * As trace_array_printk() only writes into instances, they are OK to
3495  * have in the kernel (unlike trace_printk()). This needs to be called
3496  * before trace_array_printk() can be used on a trace_array.
3497  */
3498 int trace_array_init_printk(struct trace_array *tr)
3499 {
3500 	if (!tr)
3501 		return -ENOENT;
3502 
3503 	/* This is only allowed for created instances */
3504 	if (tr == &global_trace)
3505 		return -EINVAL;
3506 
3507 	return alloc_percpu_trace_buffer();
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3510 
3511 __printf(3, 4)
3512 int trace_array_printk_buf(struct trace_buffer *buffer,
3513 			   unsigned long ip, const char *fmt, ...)
3514 {
3515 	int ret;
3516 	va_list ap;
3517 
3518 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3519 		return 0;
3520 
3521 	va_start(ap, fmt);
3522 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3523 	va_end(ap);
3524 	return ret;
3525 }
3526 
3527 __printf(2, 0)
3528 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3529 {
3530 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3531 }
3532 EXPORT_SYMBOL_GPL(trace_vprintk);
3533 
3534 static void trace_iterator_increment(struct trace_iterator *iter)
3535 {
3536 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3537 
3538 	iter->idx++;
3539 	if (buf_iter)
3540 		ring_buffer_iter_advance(buf_iter);
3541 }
3542 
3543 static struct trace_entry *
3544 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3545 		unsigned long *lost_events)
3546 {
3547 	struct ring_buffer_event *event;
3548 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3549 
3550 	if (buf_iter) {
3551 		event = ring_buffer_iter_peek(buf_iter, ts);
3552 		if (lost_events)
3553 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3554 				(unsigned long)-1 : 0;
3555 	} else {
3556 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3557 					 lost_events);
3558 	}
3559 
3560 	if (event) {
3561 		iter->ent_size = ring_buffer_event_length(event);
3562 		return ring_buffer_event_data(event);
3563 	}
3564 	iter->ent_size = 0;
3565 	return NULL;
3566 }
3567 
3568 static struct trace_entry *
3569 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3570 		  unsigned long *missing_events, u64 *ent_ts)
3571 {
3572 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3573 	struct trace_entry *ent, *next = NULL;
3574 	unsigned long lost_events = 0, next_lost = 0;
3575 	int cpu_file = iter->cpu_file;
3576 	u64 next_ts = 0, ts;
3577 	int next_cpu = -1;
3578 	int next_size = 0;
3579 	int cpu;
3580 
3581 	/*
3582 	 * If we are in a per_cpu trace file, don't bother by iterating over
3583 	 * all cpu and peek directly.
3584 	 */
3585 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3586 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3587 			return NULL;
3588 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3589 		if (ent_cpu)
3590 			*ent_cpu = cpu_file;
3591 
3592 		return ent;
3593 	}
3594 
3595 	for_each_tracing_cpu(cpu) {
3596 
3597 		if (ring_buffer_empty_cpu(buffer, cpu))
3598 			continue;
3599 
3600 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3601 
3602 		/*
3603 		 * Pick the entry with the smallest timestamp:
3604 		 */
3605 		if (ent && (!next || ts < next_ts)) {
3606 			next = ent;
3607 			next_cpu = cpu;
3608 			next_ts = ts;
3609 			next_lost = lost_events;
3610 			next_size = iter->ent_size;
3611 		}
3612 	}
3613 
3614 	iter->ent_size = next_size;
3615 
3616 	if (ent_cpu)
3617 		*ent_cpu = next_cpu;
3618 
3619 	if (ent_ts)
3620 		*ent_ts = next_ts;
3621 
3622 	if (missing_events)
3623 		*missing_events = next_lost;
3624 
3625 	return next;
3626 }
3627 
3628 #define STATIC_FMT_BUF_SIZE	128
3629 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3630 
3631 static char *trace_iter_expand_format(struct trace_iterator *iter)
3632 {
3633 	char *tmp;
3634 
3635 	/*
3636 	 * iter->tr is NULL when used with tp_printk, which makes
3637 	 * this get called where it is not safe to call krealloc().
3638 	 */
3639 	if (!iter->tr || iter->fmt == static_fmt_buf)
3640 		return NULL;
3641 
3642 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3643 		       GFP_KERNEL);
3644 	if (tmp) {
3645 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3646 		iter->fmt = tmp;
3647 	}
3648 
3649 	return tmp;
3650 }
3651 
3652 /* Returns true if the string is safe to dereference from an event */
3653 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3654 {
3655 	unsigned long addr = (unsigned long)str;
3656 	struct trace_event *trace_event;
3657 	struct trace_event_call *event;
3658 
3659 	/* OK if part of the event data */
3660 	if ((addr >= (unsigned long)iter->ent) &&
3661 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3662 		return true;
3663 
3664 	/* OK if part of the temp seq buffer */
3665 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3666 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3667 		return true;
3668 
3669 	/* Core rodata can not be freed */
3670 	if (is_kernel_rodata(addr))
3671 		return true;
3672 
3673 	if (trace_is_tracepoint_string(str))
3674 		return true;
3675 
3676 	/*
3677 	 * Now this could be a module event, referencing core module
3678 	 * data, which is OK.
3679 	 */
3680 	if (!iter->ent)
3681 		return false;
3682 
3683 	trace_event = ftrace_find_event(iter->ent->type);
3684 	if (!trace_event)
3685 		return false;
3686 
3687 	event = container_of(trace_event, struct trace_event_call, event);
3688 	if (!event->mod)
3689 		return false;
3690 
3691 	/* Would rather have rodata, but this will suffice */
3692 	if (within_module_core(addr, event->mod))
3693 		return true;
3694 
3695 	return false;
3696 }
3697 
3698 static const char *show_buffer(struct trace_seq *s)
3699 {
3700 	struct seq_buf *seq = &s->seq;
3701 
3702 	seq_buf_terminate(seq);
3703 
3704 	return seq->buffer;
3705 }
3706 
3707 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3708 
3709 static int test_can_verify_check(const char *fmt, ...)
3710 {
3711 	char buf[16];
3712 	va_list ap;
3713 	int ret;
3714 
3715 	/*
3716 	 * The verifier is dependent on vsnprintf() modifies the va_list
3717 	 * passed to it, where it is sent as a reference. Some architectures
3718 	 * (like x86_32) passes it by value, which means that vsnprintf()
3719 	 * does not modify the va_list passed to it, and the verifier
3720 	 * would then need to be able to understand all the values that
3721 	 * vsnprintf can use. If it is passed by value, then the verifier
3722 	 * is disabled.
3723 	 */
3724 	va_start(ap, fmt);
3725 	vsnprintf(buf, 16, "%d", ap);
3726 	ret = va_arg(ap, int);
3727 	va_end(ap);
3728 
3729 	return ret;
3730 }
3731 
3732 static void test_can_verify(void)
3733 {
3734 	if (!test_can_verify_check("%d %d", 0, 1)) {
3735 		pr_info("trace event string verifier disabled\n");
3736 		static_branch_inc(&trace_no_verify);
3737 	}
3738 }
3739 
3740 /**
3741  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3742  * @iter: The iterator that holds the seq buffer and the event being printed
3743  * @fmt: The format used to print the event
3744  * @ap: The va_list holding the data to print from @fmt.
3745  *
3746  * This writes the data into the @iter->seq buffer using the data from
3747  * @fmt and @ap. If the format has a %s, then the source of the string
3748  * is examined to make sure it is safe to print, otherwise it will
3749  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3750  * pointer.
3751  */
3752 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3753 			 va_list ap)
3754 {
3755 	const char *p = fmt;
3756 	const char *str;
3757 	int i, j;
3758 
3759 	if (WARN_ON_ONCE(!fmt))
3760 		return;
3761 
3762 	if (static_branch_unlikely(&trace_no_verify))
3763 		goto print;
3764 
3765 	/* Don't bother checking when doing a ftrace_dump() */
3766 	if (iter->fmt == static_fmt_buf)
3767 		goto print;
3768 
3769 	while (*p) {
3770 		bool star = false;
3771 		int len = 0;
3772 
3773 		j = 0;
3774 
3775 		/* We only care about %s and variants */
3776 		for (i = 0; p[i]; i++) {
3777 			if (i + 1 >= iter->fmt_size) {
3778 				/*
3779 				 * If we can't expand the copy buffer,
3780 				 * just print it.
3781 				 */
3782 				if (!trace_iter_expand_format(iter))
3783 					goto print;
3784 			}
3785 
3786 			if (p[i] == '\\' && p[i+1]) {
3787 				i++;
3788 				continue;
3789 			}
3790 			if (p[i] == '%') {
3791 				/* Need to test cases like %08.*s */
3792 				for (j = 1; p[i+j]; j++) {
3793 					if (isdigit(p[i+j]) ||
3794 					    p[i+j] == '.')
3795 						continue;
3796 					if (p[i+j] == '*') {
3797 						star = true;
3798 						continue;
3799 					}
3800 					break;
3801 				}
3802 				if (p[i+j] == 's')
3803 					break;
3804 				star = false;
3805 			}
3806 			j = 0;
3807 		}
3808 		/* If no %s found then just print normally */
3809 		if (!p[i])
3810 			break;
3811 
3812 		/* Copy up to the %s, and print that */
3813 		strncpy(iter->fmt, p, i);
3814 		iter->fmt[i] = '\0';
3815 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3816 
3817 		if (star)
3818 			len = va_arg(ap, int);
3819 
3820 		/* The ap now points to the string data of the %s */
3821 		str = va_arg(ap, const char *);
3822 
3823 		/*
3824 		 * If you hit this warning, it is likely that the
3825 		 * trace event in question used %s on a string that
3826 		 * was saved at the time of the event, but may not be
3827 		 * around when the trace is read. Use __string(),
3828 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3829 		 * instead. See samples/trace_events/trace-events-sample.h
3830 		 * for reference.
3831 		 */
3832 		if (WARN_ONCE(!trace_safe_str(iter, str),
3833 			      "fmt: '%s' current_buffer: '%s'",
3834 			      fmt, show_buffer(&iter->seq))) {
3835 			int ret;
3836 
3837 			/* Try to safely read the string */
3838 			if (star) {
3839 				if (len + 1 > iter->fmt_size)
3840 					len = iter->fmt_size - 1;
3841 				if (len < 0)
3842 					len = 0;
3843 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3844 				iter->fmt[len] = 0;
3845 				star = false;
3846 			} else {
3847 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3848 								  iter->fmt_size);
3849 			}
3850 			if (ret < 0)
3851 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3852 			else
3853 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3854 						 str, iter->fmt);
3855 			str = "[UNSAFE-MEMORY]";
3856 			strcpy(iter->fmt, "%s");
3857 		} else {
3858 			strncpy(iter->fmt, p + i, j + 1);
3859 			iter->fmt[j+1] = '\0';
3860 		}
3861 		if (star)
3862 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3863 		else
3864 			trace_seq_printf(&iter->seq, iter->fmt, str);
3865 
3866 		p += i + j + 1;
3867 	}
3868  print:
3869 	if (*p)
3870 		trace_seq_vprintf(&iter->seq, p, ap);
3871 }
3872 
3873 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3874 {
3875 	const char *p, *new_fmt;
3876 	char *q;
3877 
3878 	if (WARN_ON_ONCE(!fmt))
3879 		return fmt;
3880 
3881 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3882 		return fmt;
3883 
3884 	p = fmt;
3885 	new_fmt = q = iter->fmt;
3886 	while (*p) {
3887 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3888 			if (!trace_iter_expand_format(iter))
3889 				return fmt;
3890 
3891 			q += iter->fmt - new_fmt;
3892 			new_fmt = iter->fmt;
3893 		}
3894 
3895 		*q++ = *p++;
3896 
3897 		/* Replace %p with %px */
3898 		if (p[-1] == '%') {
3899 			if (p[0] == '%') {
3900 				*q++ = *p++;
3901 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3902 				*q++ = *p++;
3903 				*q++ = 'x';
3904 			}
3905 		}
3906 	}
3907 	*q = '\0';
3908 
3909 	return new_fmt;
3910 }
3911 
3912 #define STATIC_TEMP_BUF_SIZE	128
3913 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3914 
3915 /* Find the next real entry, without updating the iterator itself */
3916 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3917 					  int *ent_cpu, u64 *ent_ts)
3918 {
3919 	/* __find_next_entry will reset ent_size */
3920 	int ent_size = iter->ent_size;
3921 	struct trace_entry *entry;
3922 
3923 	/*
3924 	 * If called from ftrace_dump(), then the iter->temp buffer
3925 	 * will be the static_temp_buf and not created from kmalloc.
3926 	 * If the entry size is greater than the buffer, we can
3927 	 * not save it. Just return NULL in that case. This is only
3928 	 * used to add markers when two consecutive events' time
3929 	 * stamps have a large delta. See trace_print_lat_context()
3930 	 */
3931 	if (iter->temp == static_temp_buf &&
3932 	    STATIC_TEMP_BUF_SIZE < ent_size)
3933 		return NULL;
3934 
3935 	/*
3936 	 * The __find_next_entry() may call peek_next_entry(), which may
3937 	 * call ring_buffer_peek() that may make the contents of iter->ent
3938 	 * undefined. Need to copy iter->ent now.
3939 	 */
3940 	if (iter->ent && iter->ent != iter->temp) {
3941 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3942 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3943 			void *temp;
3944 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3945 			if (!temp)
3946 				return NULL;
3947 			kfree(iter->temp);
3948 			iter->temp = temp;
3949 			iter->temp_size = iter->ent_size;
3950 		}
3951 		memcpy(iter->temp, iter->ent, iter->ent_size);
3952 		iter->ent = iter->temp;
3953 	}
3954 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3955 	/* Put back the original ent_size */
3956 	iter->ent_size = ent_size;
3957 
3958 	return entry;
3959 }
3960 
3961 /* Find the next real entry, and increment the iterator to the next entry */
3962 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3963 {
3964 	iter->ent = __find_next_entry(iter, &iter->cpu,
3965 				      &iter->lost_events, &iter->ts);
3966 
3967 	if (iter->ent)
3968 		trace_iterator_increment(iter);
3969 
3970 	return iter->ent ? iter : NULL;
3971 }
3972 
3973 static void trace_consume(struct trace_iterator *iter)
3974 {
3975 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3976 			    &iter->lost_events);
3977 }
3978 
3979 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3980 {
3981 	struct trace_iterator *iter = m->private;
3982 	int i = (int)*pos;
3983 	void *ent;
3984 
3985 	WARN_ON_ONCE(iter->leftover);
3986 
3987 	(*pos)++;
3988 
3989 	/* can't go backwards */
3990 	if (iter->idx > i)
3991 		return NULL;
3992 
3993 	if (iter->idx < 0)
3994 		ent = trace_find_next_entry_inc(iter);
3995 	else
3996 		ent = iter;
3997 
3998 	while (ent && iter->idx < i)
3999 		ent = trace_find_next_entry_inc(iter);
4000 
4001 	iter->pos = *pos;
4002 
4003 	return ent;
4004 }
4005 
4006 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4007 {
4008 	struct ring_buffer_iter *buf_iter;
4009 	unsigned long entries = 0;
4010 	u64 ts;
4011 
4012 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4013 
4014 	buf_iter = trace_buffer_iter(iter, cpu);
4015 	if (!buf_iter)
4016 		return;
4017 
4018 	ring_buffer_iter_reset(buf_iter);
4019 
4020 	/*
4021 	 * We could have the case with the max latency tracers
4022 	 * that a reset never took place on a cpu. This is evident
4023 	 * by the timestamp being before the start of the buffer.
4024 	 */
4025 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4026 		if (ts >= iter->array_buffer->time_start)
4027 			break;
4028 		entries++;
4029 		ring_buffer_iter_advance(buf_iter);
4030 	}
4031 
4032 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4033 }
4034 
4035 /*
4036  * The current tracer is copied to avoid a global locking
4037  * all around.
4038  */
4039 static void *s_start(struct seq_file *m, loff_t *pos)
4040 {
4041 	struct trace_iterator *iter = m->private;
4042 	struct trace_array *tr = iter->tr;
4043 	int cpu_file = iter->cpu_file;
4044 	void *p = NULL;
4045 	loff_t l = 0;
4046 	int cpu;
4047 
4048 	/*
4049 	 * copy the tracer to avoid using a global lock all around.
4050 	 * iter->trace is a copy of current_trace, the pointer to the
4051 	 * name may be used instead of a strcmp(), as iter->trace->name
4052 	 * will point to the same string as current_trace->name.
4053 	 */
4054 	mutex_lock(&trace_types_lock);
4055 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4056 		*iter->trace = *tr->current_trace;
4057 	mutex_unlock(&trace_types_lock);
4058 
4059 #ifdef CONFIG_TRACER_MAX_TRACE
4060 	if (iter->snapshot && iter->trace->use_max_tr)
4061 		return ERR_PTR(-EBUSY);
4062 #endif
4063 
4064 	if (*pos != iter->pos) {
4065 		iter->ent = NULL;
4066 		iter->cpu = 0;
4067 		iter->idx = -1;
4068 
4069 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4070 			for_each_tracing_cpu(cpu)
4071 				tracing_iter_reset(iter, cpu);
4072 		} else
4073 			tracing_iter_reset(iter, cpu_file);
4074 
4075 		iter->leftover = 0;
4076 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4077 			;
4078 
4079 	} else {
4080 		/*
4081 		 * If we overflowed the seq_file before, then we want
4082 		 * to just reuse the trace_seq buffer again.
4083 		 */
4084 		if (iter->leftover)
4085 			p = iter;
4086 		else {
4087 			l = *pos - 1;
4088 			p = s_next(m, p, &l);
4089 		}
4090 	}
4091 
4092 	trace_event_read_lock();
4093 	trace_access_lock(cpu_file);
4094 	return p;
4095 }
4096 
4097 static void s_stop(struct seq_file *m, void *p)
4098 {
4099 	struct trace_iterator *iter = m->private;
4100 
4101 #ifdef CONFIG_TRACER_MAX_TRACE
4102 	if (iter->snapshot && iter->trace->use_max_tr)
4103 		return;
4104 #endif
4105 
4106 	trace_access_unlock(iter->cpu_file);
4107 	trace_event_read_unlock();
4108 }
4109 
4110 static void
4111 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4112 		      unsigned long *entries, int cpu)
4113 {
4114 	unsigned long count;
4115 
4116 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4117 	/*
4118 	 * If this buffer has skipped entries, then we hold all
4119 	 * entries for the trace and we need to ignore the
4120 	 * ones before the time stamp.
4121 	 */
4122 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4123 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4124 		/* total is the same as the entries */
4125 		*total = count;
4126 	} else
4127 		*total = count +
4128 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4129 	*entries = count;
4130 }
4131 
4132 static void
4133 get_total_entries(struct array_buffer *buf,
4134 		  unsigned long *total, unsigned long *entries)
4135 {
4136 	unsigned long t, e;
4137 	int cpu;
4138 
4139 	*total = 0;
4140 	*entries = 0;
4141 
4142 	for_each_tracing_cpu(cpu) {
4143 		get_total_entries_cpu(buf, &t, &e, cpu);
4144 		*total += t;
4145 		*entries += e;
4146 	}
4147 }
4148 
4149 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4150 {
4151 	unsigned long total, entries;
4152 
4153 	if (!tr)
4154 		tr = &global_trace;
4155 
4156 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4157 
4158 	return entries;
4159 }
4160 
4161 unsigned long trace_total_entries(struct trace_array *tr)
4162 {
4163 	unsigned long total, entries;
4164 
4165 	if (!tr)
4166 		tr = &global_trace;
4167 
4168 	get_total_entries(&tr->array_buffer, &total, &entries);
4169 
4170 	return entries;
4171 }
4172 
4173 static void print_lat_help_header(struct seq_file *m)
4174 {
4175 	seq_puts(m, "#                    _------=> CPU#            \n"
4176 		    "#                   / _-----=> irqs-off        \n"
4177 		    "#                  | / _----=> need-resched    \n"
4178 		    "#                  || / _---=> hardirq/softirq \n"
4179 		    "#                  ||| / _--=> preempt-depth   \n"
4180 		    "#                  |||| /     delay            \n"
4181 		    "#  cmd     pid     ||||| time  |   caller      \n"
4182 		    "#     \\   /        |||||  \\    |   /         \n");
4183 }
4184 
4185 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4186 {
4187 	unsigned long total;
4188 	unsigned long entries;
4189 
4190 	get_total_entries(buf, &total, &entries);
4191 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4192 		   entries, total, num_online_cpus());
4193 	seq_puts(m, "#\n");
4194 }
4195 
4196 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4197 				   unsigned int flags)
4198 {
4199 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4200 
4201 	print_event_info(buf, m);
4202 
4203 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4204 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4205 }
4206 
4207 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4208 				       unsigned int flags)
4209 {
4210 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211 	const char *space = "            ";
4212 	int prec = tgid ? 12 : 2;
4213 
4214 	print_event_info(buf, m);
4215 
4216 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4217 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4218 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4219 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4220 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4221 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4223 }
4224 
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229 	struct array_buffer *buf = iter->array_buffer;
4230 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231 	struct tracer *type = iter->trace;
4232 	unsigned long entries;
4233 	unsigned long total;
4234 	const char *name = "preemption";
4235 
4236 	name = type->name;
4237 
4238 	get_total_entries(buf, &total, &entries);
4239 
4240 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241 		   name, UTS_RELEASE);
4242 	seq_puts(m, "# -----------------------------------"
4243 		 "---------------------------------\n");
4244 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246 		   nsecs_to_usecs(data->saved_latency),
4247 		   entries,
4248 		   total,
4249 		   buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251 		   "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253 		   "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255 		   "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257 		   "preempt_rt",
4258 #else
4259 		   "unknown",
4260 #endif
4261 		   /* These are reserved for later use */
4262 		   0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266 	seq_puts(m, ")\n");
4267 #endif
4268 	seq_puts(m, "#    -----------------\n");
4269 	seq_printf(m, "#    | task: %.16s-%d "
4270 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271 		   data->comm, data->pid,
4272 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273 		   data->policy, data->rt_priority);
4274 	seq_puts(m, "#    -----------------\n");
4275 
4276 	if (data->critical_start) {
4277 		seq_puts(m, "#  => started at: ");
4278 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279 		trace_print_seq(m, &iter->seq);
4280 		seq_puts(m, "\n#  => ended at:   ");
4281 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282 		trace_print_seq(m, &iter->seq);
4283 		seq_puts(m, "\n#\n");
4284 	}
4285 
4286 	seq_puts(m, "#\n");
4287 }
4288 
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291 	struct trace_seq *s = &iter->seq;
4292 	struct trace_array *tr = iter->tr;
4293 
4294 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295 		return;
4296 
4297 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298 		return;
4299 
4300 	if (cpumask_available(iter->started) &&
4301 	    cpumask_test_cpu(iter->cpu, iter->started))
4302 		return;
4303 
4304 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305 		return;
4306 
4307 	if (cpumask_available(iter->started))
4308 		cpumask_set_cpu(iter->cpu, iter->started);
4309 
4310 	/* Don't print started cpu buffer for the first entry of the trace */
4311 	if (iter->idx > 1)
4312 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313 				iter->cpu);
4314 }
4315 
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318 	struct trace_array *tr = iter->tr;
4319 	struct trace_seq *s = &iter->seq;
4320 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321 	struct trace_entry *entry;
4322 	struct trace_event *event;
4323 
4324 	entry = iter->ent;
4325 
4326 	test_cpu_buff_start(iter);
4327 
4328 	event = ftrace_find_event(entry->type);
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332 			trace_print_lat_context(iter);
4333 		else
4334 			trace_print_context(iter);
4335 	}
4336 
4337 	if (trace_seq_has_overflowed(s))
4338 		return TRACE_TYPE_PARTIAL_LINE;
4339 
4340 	if (event)
4341 		return event->funcs->trace(iter, sym_flags, event);
4342 
4343 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344 
4345 	return trace_handle_return(s);
4346 }
4347 
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350 	struct trace_array *tr = iter->tr;
4351 	struct trace_seq *s = &iter->seq;
4352 	struct trace_entry *entry;
4353 	struct trace_event *event;
4354 
4355 	entry = iter->ent;
4356 
4357 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358 		trace_seq_printf(s, "%d %d %llu ",
4359 				 entry->pid, iter->cpu, iter->ts);
4360 
4361 	if (trace_seq_has_overflowed(s))
4362 		return TRACE_TYPE_PARTIAL_LINE;
4363 
4364 	event = ftrace_find_event(entry->type);
4365 	if (event)
4366 		return event->funcs->raw(iter, 0, event);
4367 
4368 	trace_seq_printf(s, "%d ?\n", entry->type);
4369 
4370 	return trace_handle_return(s);
4371 }
4372 
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375 	struct trace_array *tr = iter->tr;
4376 	struct trace_seq *s = &iter->seq;
4377 	unsigned char newline = '\n';
4378 	struct trace_entry *entry;
4379 	struct trace_event *event;
4380 
4381 	entry = iter->ent;
4382 
4383 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4385 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4387 		if (trace_seq_has_overflowed(s))
4388 			return TRACE_TYPE_PARTIAL_LINE;
4389 	}
4390 
4391 	event = ftrace_find_event(entry->type);
4392 	if (event) {
4393 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394 		if (ret != TRACE_TYPE_HANDLED)
4395 			return ret;
4396 	}
4397 
4398 	SEQ_PUT_FIELD(s, newline);
4399 
4400 	return trace_handle_return(s);
4401 }
4402 
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405 	struct trace_array *tr = iter->tr;
4406 	struct trace_seq *s = &iter->seq;
4407 	struct trace_entry *entry;
4408 	struct trace_event *event;
4409 
4410 	entry = iter->ent;
4411 
4412 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413 		SEQ_PUT_FIELD(s, entry->pid);
4414 		SEQ_PUT_FIELD(s, iter->cpu);
4415 		SEQ_PUT_FIELD(s, iter->ts);
4416 		if (trace_seq_has_overflowed(s))
4417 			return TRACE_TYPE_PARTIAL_LINE;
4418 	}
4419 
4420 	event = ftrace_find_event(entry->type);
4421 	return event ? event->funcs->binary(iter, 0, event) :
4422 		TRACE_TYPE_HANDLED;
4423 }
4424 
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427 	struct ring_buffer_iter *buf_iter;
4428 	int cpu;
4429 
4430 	/* If we are looking at one CPU buffer, only check that one */
4431 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432 		cpu = iter->cpu_file;
4433 		buf_iter = trace_buffer_iter(iter, cpu);
4434 		if (buf_iter) {
4435 			if (!ring_buffer_iter_empty(buf_iter))
4436 				return 0;
4437 		} else {
4438 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439 				return 0;
4440 		}
4441 		return 1;
4442 	}
4443 
4444 	for_each_tracing_cpu(cpu) {
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 	}
4454 
4455 	return 1;
4456 }
4457 
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461 	struct trace_array *tr = iter->tr;
4462 	unsigned long trace_flags = tr->trace_flags;
4463 	enum print_line_t ret;
4464 
4465 	if (iter->lost_events) {
4466 		if (iter->lost_events == (unsigned long)-1)
4467 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468 					 iter->cpu);
4469 		else
4470 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471 					 iter->cpu, iter->lost_events);
4472 		if (trace_seq_has_overflowed(&iter->seq))
4473 			return TRACE_TYPE_PARTIAL_LINE;
4474 	}
4475 
4476 	if (iter->trace && iter->trace->print_line) {
4477 		ret = iter->trace->print_line(iter);
4478 		if (ret != TRACE_TYPE_UNHANDLED)
4479 			return ret;
4480 	}
4481 
4482 	if (iter->ent->type == TRACE_BPUTS &&
4483 			trace_flags & TRACE_ITER_PRINTK &&
4484 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485 		return trace_print_bputs_msg_only(iter);
4486 
4487 	if (iter->ent->type == TRACE_BPRINT &&
4488 			trace_flags & TRACE_ITER_PRINTK &&
4489 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490 		return trace_print_bprintk_msg_only(iter);
4491 
4492 	if (iter->ent->type == TRACE_PRINT &&
4493 			trace_flags & TRACE_ITER_PRINTK &&
4494 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495 		return trace_print_printk_msg_only(iter);
4496 
4497 	if (trace_flags & TRACE_ITER_BIN)
4498 		return print_bin_fmt(iter);
4499 
4500 	if (trace_flags & TRACE_ITER_HEX)
4501 		return print_hex_fmt(iter);
4502 
4503 	if (trace_flags & TRACE_ITER_RAW)
4504 		return print_raw_fmt(iter);
4505 
4506 	return print_trace_fmt(iter);
4507 }
4508 
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511 	struct trace_iterator *iter = m->private;
4512 	struct trace_array *tr = iter->tr;
4513 
4514 	/* print nothing if the buffers are empty */
4515 	if (trace_empty(iter))
4516 		return;
4517 
4518 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519 		print_trace_header(m, iter);
4520 
4521 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522 		print_lat_help_header(m);
4523 }
4524 
4525 void trace_default_header(struct seq_file *m)
4526 {
4527 	struct trace_iterator *iter = m->private;
4528 	struct trace_array *tr = iter->tr;
4529 	unsigned long trace_flags = tr->trace_flags;
4530 
4531 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532 		return;
4533 
4534 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535 		/* print nothing if the buffers are empty */
4536 		if (trace_empty(iter))
4537 			return;
4538 		print_trace_header(m, iter);
4539 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4540 			print_lat_help_header(m);
4541 	} else {
4542 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4544 				print_func_help_header_irq(iter->array_buffer,
4545 							   m, trace_flags);
4546 			else
4547 				print_func_help_header(iter->array_buffer, m,
4548 						       trace_flags);
4549 		}
4550 	}
4551 }
4552 
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555 	if (!ftrace_is_dead())
4556 		return;
4557 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560 
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566 		    "#                      Takes a snapshot of the main buffer.\n"
4567 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568 		    "#                      (Doesn't have to be '2' works with any number that\n"
4569 		    "#                       is not a '0' or '1')\n");
4570 }
4571 
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580 		    "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583 		    "#                      (Doesn't have to be '2' works with any number that\n"
4584 		    "#                       is not a '0' or '1')\n");
4585 }
4586 
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589 	if (iter->tr->allocated_snapshot)
4590 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591 	else
4592 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593 
4594 	seq_puts(m, "# Snapshot commands:\n");
4595 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596 		show_snapshot_main_help(m);
4597 	else
4598 		show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604 
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607 	struct trace_iterator *iter = v;
4608 	int ret;
4609 
4610 	if (iter->ent == NULL) {
4611 		if (iter->tr) {
4612 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613 			seq_puts(m, "#\n");
4614 			test_ftrace_alive(m);
4615 		}
4616 		if (iter->snapshot && trace_empty(iter))
4617 			print_snapshot_help(m, iter);
4618 		else if (iter->trace && iter->trace->print_header)
4619 			iter->trace->print_header(m);
4620 		else
4621 			trace_default_header(m);
4622 
4623 	} else if (iter->leftover) {
4624 		/*
4625 		 * If we filled the seq_file buffer earlier, we
4626 		 * want to just show it now.
4627 		 */
4628 		ret = trace_print_seq(m, &iter->seq);
4629 
4630 		/* ret should this time be zero, but you never know */
4631 		iter->leftover = ret;
4632 
4633 	} else {
4634 		print_trace_line(iter);
4635 		ret = trace_print_seq(m, &iter->seq);
4636 		/*
4637 		 * If we overflow the seq_file buffer, then it will
4638 		 * ask us for this data again at start up.
4639 		 * Use that instead.
4640 		 *  ret is 0 if seq_file write succeeded.
4641 		 *        -1 otherwise.
4642 		 */
4643 		iter->leftover = ret;
4644 	}
4645 
4646 	return 0;
4647 }
4648 
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4656 		return (long)inode->i_cdev - 1;
4657 	return RING_BUFFER_ALL_CPUS;
4658 }
4659 
4660 static const struct seq_operations tracer_seq_ops = {
4661 	.start		= s_start,
4662 	.next		= s_next,
4663 	.stop		= s_stop,
4664 	.show		= s_show,
4665 };
4666 
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670 	struct trace_array *tr = inode->i_private;
4671 	struct trace_iterator *iter;
4672 	int cpu;
4673 
4674 	if (tracing_disabled)
4675 		return ERR_PTR(-ENODEV);
4676 
4677 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678 	if (!iter)
4679 		return ERR_PTR(-ENOMEM);
4680 
4681 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682 				    GFP_KERNEL);
4683 	if (!iter->buffer_iter)
4684 		goto release;
4685 
4686 	/*
4687 	 * trace_find_next_entry() may need to save off iter->ent.
4688 	 * It will place it into the iter->temp buffer. As most
4689 	 * events are less than 128, allocate a buffer of that size.
4690 	 * If one is greater, then trace_find_next_entry() will
4691 	 * allocate a new buffer to adjust for the bigger iter->ent.
4692 	 * It's not critical if it fails to get allocated here.
4693 	 */
4694 	iter->temp = kmalloc(128, GFP_KERNEL);
4695 	if (iter->temp)
4696 		iter->temp_size = 128;
4697 
4698 	/*
4699 	 * trace_event_printf() may need to modify given format
4700 	 * string to replace %p with %px so that it shows real address
4701 	 * instead of hash value. However, that is only for the event
4702 	 * tracing, other tracer may not need. Defer the allocation
4703 	 * until it is needed.
4704 	 */
4705 	iter->fmt = NULL;
4706 	iter->fmt_size = 0;
4707 
4708 	/*
4709 	 * We make a copy of the current tracer to avoid concurrent
4710 	 * changes on it while we are reading.
4711 	 */
4712 	mutex_lock(&trace_types_lock);
4713 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714 	if (!iter->trace)
4715 		goto fail;
4716 
4717 	*iter->trace = *tr->current_trace;
4718 
4719 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720 		goto fail;
4721 
4722 	iter->tr = tr;
4723 
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725 	/* Currently only the top directory has a snapshot */
4726 	if (tr->current_trace->print_max || snapshot)
4727 		iter->array_buffer = &tr->max_buffer;
4728 	else
4729 #endif
4730 		iter->array_buffer = &tr->array_buffer;
4731 	iter->snapshot = snapshot;
4732 	iter->pos = -1;
4733 	iter->cpu_file = tracing_get_cpu(inode);
4734 	mutex_init(&iter->mutex);
4735 
4736 	/* Notify the tracer early; before we stop tracing. */
4737 	if (iter->trace->open)
4738 		iter->trace->open(iter);
4739 
4740 	/* Annotate start of buffers if we had overruns */
4741 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4742 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743 
4744 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745 	if (trace_clocks[tr->clock_id].in_ns)
4746 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747 
4748 	/*
4749 	 * If pause-on-trace is enabled, then stop the trace while
4750 	 * dumping, unless this is the "snapshot" file
4751 	 */
4752 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753 		tracing_stop_tr(tr);
4754 
4755 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756 		for_each_tracing_cpu(cpu) {
4757 			iter->buffer_iter[cpu] =
4758 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4759 							 cpu, GFP_KERNEL);
4760 		}
4761 		ring_buffer_read_prepare_sync();
4762 		for_each_tracing_cpu(cpu) {
4763 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4764 			tracing_iter_reset(iter, cpu);
4765 		}
4766 	} else {
4767 		cpu = iter->cpu_file;
4768 		iter->buffer_iter[cpu] =
4769 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4770 						 cpu, GFP_KERNEL);
4771 		ring_buffer_read_prepare_sync();
4772 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4773 		tracing_iter_reset(iter, cpu);
4774 	}
4775 
4776 	mutex_unlock(&trace_types_lock);
4777 
4778 	return iter;
4779 
4780  fail:
4781 	mutex_unlock(&trace_types_lock);
4782 	kfree(iter->trace);
4783 	kfree(iter->temp);
4784 	kfree(iter->buffer_iter);
4785 release:
4786 	seq_release_private(inode, file);
4787 	return ERR_PTR(-ENOMEM);
4788 }
4789 
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792 	int ret;
4793 
4794 	ret = tracing_check_open_get_tr(NULL);
4795 	if (ret)
4796 		return ret;
4797 
4798 	filp->private_data = inode->i_private;
4799 	return 0;
4800 }
4801 
4802 bool tracing_is_disabled(void)
4803 {
4804 	return (tracing_disabled) ? true: false;
4805 }
4806 
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813 	struct trace_array *tr = inode->i_private;
4814 	int ret;
4815 
4816 	ret = tracing_check_open_get_tr(tr);
4817 	if (ret)
4818 		return ret;
4819 
4820 	filp->private_data = inode->i_private;
4821 
4822 	return 0;
4823 }
4824 
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827 	struct trace_array *tr = inode->i_private;
4828 	struct seq_file *m = file->private_data;
4829 	struct trace_iterator *iter;
4830 	int cpu;
4831 
4832 	if (!(file->f_mode & FMODE_READ)) {
4833 		trace_array_put(tr);
4834 		return 0;
4835 	}
4836 
4837 	/* Writes do not use seq_file */
4838 	iter = m->private;
4839 	mutex_lock(&trace_types_lock);
4840 
4841 	for_each_tracing_cpu(cpu) {
4842 		if (iter->buffer_iter[cpu])
4843 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844 	}
4845 
4846 	if (iter->trace && iter->trace->close)
4847 		iter->trace->close(iter);
4848 
4849 	if (!iter->snapshot && tr->stop_count)
4850 		/* reenable tracing if it was previously enabled */
4851 		tracing_start_tr(tr);
4852 
4853 	__trace_array_put(tr);
4854 
4855 	mutex_unlock(&trace_types_lock);
4856 
4857 	mutex_destroy(&iter->mutex);
4858 	free_cpumask_var(iter->started);
4859 	kfree(iter->fmt);
4860 	kfree(iter->temp);
4861 	kfree(iter->trace);
4862 	kfree(iter->buffer_iter);
4863 	seq_release_private(inode, file);
4864 
4865 	return 0;
4866 }
4867 
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870 	struct trace_array *tr = inode->i_private;
4871 
4872 	trace_array_put(tr);
4873 	return 0;
4874 }
4875 
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878 	struct trace_array *tr = inode->i_private;
4879 
4880 	trace_array_put(tr);
4881 
4882 	return single_release(inode, file);
4883 }
4884 
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887 	struct trace_array *tr = inode->i_private;
4888 	struct trace_iterator *iter;
4889 	int ret;
4890 
4891 	ret = tracing_check_open_get_tr(tr);
4892 	if (ret)
4893 		return ret;
4894 
4895 	/* If this file was open for write, then erase contents */
4896 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897 		int cpu = tracing_get_cpu(inode);
4898 		struct array_buffer *trace_buf = &tr->array_buffer;
4899 
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901 		if (tr->current_trace->print_max)
4902 			trace_buf = &tr->max_buffer;
4903 #endif
4904 
4905 		if (cpu == RING_BUFFER_ALL_CPUS)
4906 			tracing_reset_online_cpus(trace_buf);
4907 		else
4908 			tracing_reset_cpu(trace_buf, cpu);
4909 	}
4910 
4911 	if (file->f_mode & FMODE_READ) {
4912 		iter = __tracing_open(inode, file, false);
4913 		if (IS_ERR(iter))
4914 			ret = PTR_ERR(iter);
4915 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917 	}
4918 
4919 	if (ret < 0)
4920 		trace_array_put(tr);
4921 
4922 	return ret;
4923 }
4924 
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935 
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940 	while (t && !trace_ok_for_array(t, tr))
4941 		t = t->next;
4942 
4943 	return t;
4944 }
4945 
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949 	struct trace_array *tr = m->private;
4950 	struct tracer *t = v;
4951 
4952 	(*pos)++;
4953 
4954 	if (t)
4955 		t = get_tracer_for_array(tr, t->next);
4956 
4957 	return t;
4958 }
4959 
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962 	struct trace_array *tr = m->private;
4963 	struct tracer *t;
4964 	loff_t l = 0;
4965 
4966 	mutex_lock(&trace_types_lock);
4967 
4968 	t = get_tracer_for_array(tr, trace_types);
4969 	for (; t && l < *pos; t = t_next(m, t, &l))
4970 			;
4971 
4972 	return t;
4973 }
4974 
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977 	mutex_unlock(&trace_types_lock);
4978 }
4979 
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982 	struct tracer *t = v;
4983 
4984 	if (!t)
4985 		return 0;
4986 
4987 	seq_puts(m, t->name);
4988 	if (t->next)
4989 		seq_putc(m, ' ');
4990 	else
4991 		seq_putc(m, '\n');
4992 
4993 	return 0;
4994 }
4995 
4996 static const struct seq_operations show_traces_seq_ops = {
4997 	.start		= t_start,
4998 	.next		= t_next,
4999 	.stop		= t_stop,
5000 	.show		= t_show,
5001 };
5002 
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005 	struct trace_array *tr = inode->i_private;
5006 	struct seq_file *m;
5007 	int ret;
5008 
5009 	ret = tracing_check_open_get_tr(tr);
5010 	if (ret)
5011 		return ret;
5012 
5013 	ret = seq_open(file, &show_traces_seq_ops);
5014 	if (ret) {
5015 		trace_array_put(tr);
5016 		return ret;
5017 	}
5018 
5019 	m = file->private_data;
5020 	m->private = tr;
5021 
5022 	return 0;
5023 }
5024 
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027 	struct trace_array *tr = inode->i_private;
5028 
5029 	trace_array_put(tr);
5030 	return seq_release(inode, file);
5031 }
5032 
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035 		   size_t count, loff_t *ppos)
5036 {
5037 	return count;
5038 }
5039 
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042 	int ret;
5043 
5044 	if (file->f_mode & FMODE_READ)
5045 		ret = seq_lseek(file, offset, whence);
5046 	else
5047 		file->f_pos = ret = 0;
5048 
5049 	return ret;
5050 }
5051 
5052 static const struct file_operations tracing_fops = {
5053 	.open		= tracing_open,
5054 	.read		= seq_read,
5055 	.write		= tracing_write_stub,
5056 	.llseek		= tracing_lseek,
5057 	.release	= tracing_release,
5058 };
5059 
5060 static const struct file_operations show_traces_fops = {
5061 	.open		= show_traces_open,
5062 	.read		= seq_read,
5063 	.llseek		= seq_lseek,
5064 	.release	= show_traces_release,
5065 };
5066 
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069 		     size_t count, loff_t *ppos)
5070 {
5071 	struct trace_array *tr = file_inode(filp)->i_private;
5072 	char *mask_str;
5073 	int len;
5074 
5075 	len = snprintf(NULL, 0, "%*pb\n",
5076 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077 	mask_str = kmalloc(len, GFP_KERNEL);
5078 	if (!mask_str)
5079 		return -ENOMEM;
5080 
5081 	len = snprintf(mask_str, len, "%*pb\n",
5082 		       cpumask_pr_args(tr->tracing_cpumask));
5083 	if (len >= count) {
5084 		count = -EINVAL;
5085 		goto out_err;
5086 	}
5087 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088 
5089 out_err:
5090 	kfree(mask_str);
5091 
5092 	return count;
5093 }
5094 
5095 int tracing_set_cpumask(struct trace_array *tr,
5096 			cpumask_var_t tracing_cpumask_new)
5097 {
5098 	int cpu;
5099 
5100 	if (!tr)
5101 		return -EINVAL;
5102 
5103 	local_irq_disable();
5104 	arch_spin_lock(&tr->max_lock);
5105 	for_each_tracing_cpu(cpu) {
5106 		/*
5107 		 * Increase/decrease the disabled counter if we are
5108 		 * about to flip a bit in the cpumask:
5109 		 */
5110 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114 		}
5115 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119 		}
5120 	}
5121 	arch_spin_unlock(&tr->max_lock);
5122 	local_irq_enable();
5123 
5124 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125 
5126 	return 0;
5127 }
5128 
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131 		      size_t count, loff_t *ppos)
5132 {
5133 	struct trace_array *tr = file_inode(filp)->i_private;
5134 	cpumask_var_t tracing_cpumask_new;
5135 	int err;
5136 
5137 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138 		return -ENOMEM;
5139 
5140 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141 	if (err)
5142 		goto err_free;
5143 
5144 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145 	if (err)
5146 		goto err_free;
5147 
5148 	free_cpumask_var(tracing_cpumask_new);
5149 
5150 	return count;
5151 
5152 err_free:
5153 	free_cpumask_var(tracing_cpumask_new);
5154 
5155 	return err;
5156 }
5157 
5158 static const struct file_operations tracing_cpumask_fops = {
5159 	.open		= tracing_open_generic_tr,
5160 	.read		= tracing_cpumask_read,
5161 	.write		= tracing_cpumask_write,
5162 	.release	= tracing_release_generic_tr,
5163 	.llseek		= generic_file_llseek,
5164 };
5165 
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168 	struct tracer_opt *trace_opts;
5169 	struct trace_array *tr = m->private;
5170 	u32 tracer_flags;
5171 	int i;
5172 
5173 	mutex_lock(&trace_types_lock);
5174 	tracer_flags = tr->current_trace->flags->val;
5175 	trace_opts = tr->current_trace->flags->opts;
5176 
5177 	for (i = 0; trace_options[i]; i++) {
5178 		if (tr->trace_flags & (1 << i))
5179 			seq_printf(m, "%s\n", trace_options[i]);
5180 		else
5181 			seq_printf(m, "no%s\n", trace_options[i]);
5182 	}
5183 
5184 	for (i = 0; trace_opts[i].name; i++) {
5185 		if (tracer_flags & trace_opts[i].bit)
5186 			seq_printf(m, "%s\n", trace_opts[i].name);
5187 		else
5188 			seq_printf(m, "no%s\n", trace_opts[i].name);
5189 	}
5190 	mutex_unlock(&trace_types_lock);
5191 
5192 	return 0;
5193 }
5194 
5195 static int __set_tracer_option(struct trace_array *tr,
5196 			       struct tracer_flags *tracer_flags,
5197 			       struct tracer_opt *opts, int neg)
5198 {
5199 	struct tracer *trace = tracer_flags->trace;
5200 	int ret;
5201 
5202 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203 	if (ret)
5204 		return ret;
5205 
5206 	if (neg)
5207 		tracer_flags->val &= ~opts->bit;
5208 	else
5209 		tracer_flags->val |= opts->bit;
5210 	return 0;
5211 }
5212 
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216 	struct tracer *trace = tr->current_trace;
5217 	struct tracer_flags *tracer_flags = trace->flags;
5218 	struct tracer_opt *opts = NULL;
5219 	int i;
5220 
5221 	for (i = 0; tracer_flags->opts[i].name; i++) {
5222 		opts = &tracer_flags->opts[i];
5223 
5224 		if (strcmp(cmp, opts->name) == 0)
5225 			return __set_tracer_option(tr, trace->flags, opts, neg);
5226 	}
5227 
5228 	return -EINVAL;
5229 }
5230 
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235 		return -1;
5236 
5237 	return 0;
5238 }
5239 
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242 	int *map;
5243 
5244 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5245 	    (mask == TRACE_ITER_RECORD_CMD))
5246 		lockdep_assert_held(&event_mutex);
5247 
5248 	/* do nothing if flag is already set */
5249 	if (!!(tr->trace_flags & mask) == !!enabled)
5250 		return 0;
5251 
5252 	/* Give the tracer a chance to approve the change */
5253 	if (tr->current_trace->flag_changed)
5254 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255 			return -EINVAL;
5256 
5257 	if (enabled)
5258 		tr->trace_flags |= mask;
5259 	else
5260 		tr->trace_flags &= ~mask;
5261 
5262 	if (mask == TRACE_ITER_RECORD_CMD)
5263 		trace_event_enable_cmd_record(enabled);
5264 
5265 	if (mask == TRACE_ITER_RECORD_TGID) {
5266 		if (!tgid_map) {
5267 			tgid_map_max = pid_max;
5268 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269 				       GFP_KERNEL);
5270 
5271 			/*
5272 			 * Pairs with smp_load_acquire() in
5273 			 * trace_find_tgid_ptr() to ensure that if it observes
5274 			 * the tgid_map we just allocated then it also observes
5275 			 * the corresponding tgid_map_max value.
5276 			 */
5277 			smp_store_release(&tgid_map, map);
5278 		}
5279 		if (!tgid_map) {
5280 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281 			return -ENOMEM;
5282 		}
5283 
5284 		trace_event_enable_tgid_record(enabled);
5285 	}
5286 
5287 	if (mask == TRACE_ITER_EVENT_FORK)
5288 		trace_event_follow_fork(tr, enabled);
5289 
5290 	if (mask == TRACE_ITER_FUNC_FORK)
5291 		ftrace_pid_follow_fork(tr, enabled);
5292 
5293 	if (mask == TRACE_ITER_OVERWRITE) {
5294 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298 	}
5299 
5300 	if (mask == TRACE_ITER_PRINTK) {
5301 		trace_printk_start_stop_comm(enabled);
5302 		trace_printk_control(enabled);
5303 	}
5304 
5305 	return 0;
5306 }
5307 
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310 	char *cmp;
5311 	int neg = 0;
5312 	int ret;
5313 	size_t orig_len = strlen(option);
5314 	int len;
5315 
5316 	cmp = strstrip(option);
5317 
5318 	len = str_has_prefix(cmp, "no");
5319 	if (len)
5320 		neg = 1;
5321 
5322 	cmp += len;
5323 
5324 	mutex_lock(&event_mutex);
5325 	mutex_lock(&trace_types_lock);
5326 
5327 	ret = match_string(trace_options, -1, cmp);
5328 	/* If no option could be set, test the specific tracer options */
5329 	if (ret < 0)
5330 		ret = set_tracer_option(tr, cmp, neg);
5331 	else
5332 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5333 
5334 	mutex_unlock(&trace_types_lock);
5335 	mutex_unlock(&event_mutex);
5336 
5337 	/*
5338 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5339 	 * turn it back into a space.
5340 	 */
5341 	if (orig_len > strlen(option))
5342 		option[strlen(option)] = ' ';
5343 
5344 	return ret;
5345 }
5346 
5347 static void __init apply_trace_boot_options(void)
5348 {
5349 	char *buf = trace_boot_options_buf;
5350 	char *option;
5351 
5352 	while (true) {
5353 		option = strsep(&buf, ",");
5354 
5355 		if (!option)
5356 			break;
5357 
5358 		if (*option)
5359 			trace_set_options(&global_trace, option);
5360 
5361 		/* Put back the comma to allow this to be called again */
5362 		if (buf)
5363 			*(buf - 1) = ',';
5364 	}
5365 }
5366 
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369 			size_t cnt, loff_t *ppos)
5370 {
5371 	struct seq_file *m = filp->private_data;
5372 	struct trace_array *tr = m->private;
5373 	char buf[64];
5374 	int ret;
5375 
5376 	if (cnt >= sizeof(buf))
5377 		return -EINVAL;
5378 
5379 	if (copy_from_user(buf, ubuf, cnt))
5380 		return -EFAULT;
5381 
5382 	buf[cnt] = 0;
5383 
5384 	ret = trace_set_options(tr, buf);
5385 	if (ret < 0)
5386 		return ret;
5387 
5388 	*ppos += cnt;
5389 
5390 	return cnt;
5391 }
5392 
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395 	struct trace_array *tr = inode->i_private;
5396 	int ret;
5397 
5398 	ret = tracing_check_open_get_tr(tr);
5399 	if (ret)
5400 		return ret;
5401 
5402 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403 	if (ret < 0)
5404 		trace_array_put(tr);
5405 
5406 	return ret;
5407 }
5408 
5409 static const struct file_operations tracing_iter_fops = {
5410 	.open		= tracing_trace_options_open,
5411 	.read		= seq_read,
5412 	.llseek		= seq_lseek,
5413 	.release	= tracing_single_release_tr,
5414 	.write		= tracing_trace_options_write,
5415 };
5416 
5417 static const char readme_msg[] =
5418 	"tracing mini-HOWTO:\n\n"
5419 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5420 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421 	" Important files:\n"
5422 	"  trace\t\t\t- The static contents of the buffer\n"
5423 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425 	"  current_tracer\t- function and latency tracers\n"
5426 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5427 	"  error_log\t- error log for failed commands (that support it)\n"
5428 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430 	"  trace_clock\t\t-change the clock used to order events\n"
5431 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5432 	"      global:   Synced across CPUs but slows tracing down.\n"
5433 	"     counter:   Not a clock, but just an increment\n"
5434 	"      uptime:   Jiffy counter from time of boot\n"
5435 	"        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437 	"     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5441 	"    absolute:   Absolute (standalone) timestamp\n"
5442 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5445 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446 	"\t\t\t  Remove sub-buffer with rmdir\n"
5447 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5448 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5449 	"\t\t\t  option name\n"
5450 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5453 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454 	"\t\t\t  functions\n"
5455 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5456 	"\t     modules: Can select a group via module\n"
5457 	"\t      Format: :mod:<module-name>\n"
5458 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459 	"\t    triggers: a command to perform when function is hit\n"
5460 	"\t      Format: <function>:<trigger>[:count]\n"
5461 	"\t     trigger: traceon, traceoff\n"
5462 	"\t\t      enable_event:<system>:<event>\n"
5463 	"\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465 	"\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468 	"\t\t      snapshot\n"
5469 #endif
5470 	"\t\t      dump\n"
5471 	"\t\t      cpudump\n"
5472 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474 	"\t     The first one will disable tracing every time do_fault is hit\n"
5475 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476 	"\t       The first time do trap is hit and it disables tracing, the\n"
5477 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5478 	"\t       the counter will not decrement. It only decrements when the\n"
5479 	"\t       trigger did work\n"
5480 	"\t     To remove trigger without count:\n"
5481 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482 	"\t     To remove trigger with a count:\n"
5483 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486 	"\t    modules: Can select a group via module command :mod:\n"
5487 	"\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491 	"\t\t    (function)\n"
5492 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493 	"\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5503 	"\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5507 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5508 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5509 	"\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512 	"\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528 	"\t  accepts: event-definitions (one definition per line)\n"
5529 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534 	"\t           -:[<group>/]<event>\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542 	"\t     args: <name>=fetcharg[:type]\n"
5543 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #else
5547 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5548 #endif
5549 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5550 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5551 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5552 	"\t           <type>\\[<array-size>\\]\n"
5553 #ifdef CONFIG_HIST_TRIGGERS
5554 	"\t    field: <stype> <name>;\n"
5555 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5556 	"\t           [unsigned] char/int/long\n"
5557 #endif
5558 #endif
5559 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5560 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5561 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5562 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5563 	"\t\t\t  events\n"
5564 	"      filter\t\t- If set, only events passing filter are traced\n"
5565 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5566 	"\t\t\t  <event>:\n"
5567 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5568 	"      filter\t\t- If set, only events passing filter are traced\n"
5569 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5570 	"\t    Format: <trigger>[:count][if <filter>]\n"
5571 	"\t   trigger: traceon, traceoff\n"
5572 	"\t            enable_event:<system>:<event>\n"
5573 	"\t            disable_event:<system>:<event>\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575 	"\t            enable_hist:<system>:<event>\n"
5576 	"\t            disable_hist:<system>:<event>\n"
5577 #endif
5578 #ifdef CONFIG_STACKTRACE
5579 	"\t\t    stacktrace\n"
5580 #endif
5581 #ifdef CONFIG_TRACER_SNAPSHOT
5582 	"\t\t    snapshot\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585 	"\t\t    hist (see below)\n"
5586 #endif
5587 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5588 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5589 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5590 	"\t                  events/block/block_unplug/trigger\n"
5591 	"\t   The first disables tracing every time block_unplug is hit.\n"
5592 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5593 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5594 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5595 	"\t   Like function triggers, the counter is only decremented if it\n"
5596 	"\t    enabled or disabled tracing.\n"
5597 	"\t   To remove a trigger without a count:\n"
5598 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5599 	"\t   To remove a trigger with a count:\n"
5600 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5601 	"\t   Filters can be ignored when removing a trigger.\n"
5602 #ifdef CONFIG_HIST_TRIGGERS
5603 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5604 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5605 	"\t            [:values=<field1[,field2,...]>]\n"
5606 	"\t            [:sort=<field1[,field2,...]>]\n"
5607 	"\t            [:size=#entries]\n"
5608 	"\t            [:pause][:continue][:clear]\n"
5609 	"\t            [:name=histname1]\n"
5610 	"\t            [:<handler>.<action>]\n"
5611 	"\t            [if <filter>]\n\n"
5612 	"\t    When a matching event is hit, an entry is added to a hash\n"
5613 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5614 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5615 	"\t    correspond to fields in the event's format description.  Keys\n"
5616 	"\t    can be any field, or the special string 'stacktrace'.\n"
5617 	"\t    Compound keys consisting of up to two fields can be specified\n"
5618 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5619 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5620 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5621 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5622 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5623 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5624 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5625 	"\t    its histogram data will be shared with other triggers of the\n"
5626 	"\t    same name, and trigger hits will update this common data.\n\n"
5627 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5628 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5629 	"\t    triggers attached to an event, there will be a table for each\n"
5630 	"\t    trigger in the output.  The table displayed for a named\n"
5631 	"\t    trigger will be the same as any other instance having the\n"
5632 	"\t    same name.  The default format used to display a given field\n"
5633 	"\t    can be modified by appending any of the following modifiers\n"
5634 	"\t    to the field name, as applicable:\n\n"
5635 	"\t            .hex        display a number as a hex value\n"
5636 	"\t            .sym        display an address as a symbol\n"
5637 	"\t            .sym-offset display an address as a symbol and offset\n"
5638 	"\t            .execname   display a common_pid as a program name\n"
5639 	"\t            .syscall    display a syscall id as a syscall name\n"
5640 	"\t            .log2       display log2 value rather than raw number\n"
5641 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5642 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5643 	"\t    trigger or to start a hist trigger but not log any events\n"
5644 	"\t    until told to do so.  'continue' can be used to start or\n"
5645 	"\t    restart a paused hist trigger.\n\n"
5646 	"\t    The 'clear' parameter will clear the contents of a running\n"
5647 	"\t    hist trigger and leave its current paused/active state\n"
5648 	"\t    unchanged.\n\n"
5649 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5650 	"\t    have one event conditionally start and stop another event's\n"
5651 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5652 	"\t    the enable_event and disable_event triggers.\n\n"
5653 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5654 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5655 	"\t        <handler>.<action>\n\n"
5656 	"\t    The available handlers are:\n\n"
5657 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5658 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5659 	"\t        onchange(var)            - invoke action if var changes\n\n"
5660 	"\t    The available actions are:\n\n"
5661 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5662 	"\t        save(field,...)                      - save current event fields\n"
5663 #ifdef CONFIG_TRACER_SNAPSHOT
5664 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5665 #endif
5666 #ifdef CONFIG_SYNTH_EVENTS
5667 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5668 	"\t  Write into this file to define/undefine new synthetic events.\n"
5669 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5670 #endif
5671 #endif
5672 ;
5673 
5674 static ssize_t
5675 tracing_readme_read(struct file *filp, char __user *ubuf,
5676 		       size_t cnt, loff_t *ppos)
5677 {
5678 	return simple_read_from_buffer(ubuf, cnt, ppos,
5679 					readme_msg, strlen(readme_msg));
5680 }
5681 
5682 static const struct file_operations tracing_readme_fops = {
5683 	.open		= tracing_open_generic,
5684 	.read		= tracing_readme_read,
5685 	.llseek		= generic_file_llseek,
5686 };
5687 
5688 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5689 {
5690 	int pid = ++(*pos);
5691 
5692 	return trace_find_tgid_ptr(pid);
5693 }
5694 
5695 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5696 {
5697 	int pid = *pos;
5698 
5699 	return trace_find_tgid_ptr(pid);
5700 }
5701 
5702 static void saved_tgids_stop(struct seq_file *m, void *v)
5703 {
5704 }
5705 
5706 static int saved_tgids_show(struct seq_file *m, void *v)
5707 {
5708 	int *entry = (int *)v;
5709 	int pid = entry - tgid_map;
5710 	int tgid = *entry;
5711 
5712 	if (tgid == 0)
5713 		return SEQ_SKIP;
5714 
5715 	seq_printf(m, "%d %d\n", pid, tgid);
5716 	return 0;
5717 }
5718 
5719 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5720 	.start		= saved_tgids_start,
5721 	.stop		= saved_tgids_stop,
5722 	.next		= saved_tgids_next,
5723 	.show		= saved_tgids_show,
5724 };
5725 
5726 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5727 {
5728 	int ret;
5729 
5730 	ret = tracing_check_open_get_tr(NULL);
5731 	if (ret)
5732 		return ret;
5733 
5734 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5735 }
5736 
5737 
5738 static const struct file_operations tracing_saved_tgids_fops = {
5739 	.open		= tracing_saved_tgids_open,
5740 	.read		= seq_read,
5741 	.llseek		= seq_lseek,
5742 	.release	= seq_release,
5743 };
5744 
5745 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5746 {
5747 	unsigned int *ptr = v;
5748 
5749 	if (*pos || m->count)
5750 		ptr++;
5751 
5752 	(*pos)++;
5753 
5754 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5755 	     ptr++) {
5756 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5757 			continue;
5758 
5759 		return ptr;
5760 	}
5761 
5762 	return NULL;
5763 }
5764 
5765 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5766 {
5767 	void *v;
5768 	loff_t l = 0;
5769 
5770 	preempt_disable();
5771 	arch_spin_lock(&trace_cmdline_lock);
5772 
5773 	v = &savedcmd->map_cmdline_to_pid[0];
5774 	while (l <= *pos) {
5775 		v = saved_cmdlines_next(m, v, &l);
5776 		if (!v)
5777 			return NULL;
5778 	}
5779 
5780 	return v;
5781 }
5782 
5783 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5784 {
5785 	arch_spin_unlock(&trace_cmdline_lock);
5786 	preempt_enable();
5787 }
5788 
5789 static int saved_cmdlines_show(struct seq_file *m, void *v)
5790 {
5791 	char buf[TASK_COMM_LEN];
5792 	unsigned int *pid = v;
5793 
5794 	__trace_find_cmdline(*pid, buf);
5795 	seq_printf(m, "%d %s\n", *pid, buf);
5796 	return 0;
5797 }
5798 
5799 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5800 	.start		= saved_cmdlines_start,
5801 	.next		= saved_cmdlines_next,
5802 	.stop		= saved_cmdlines_stop,
5803 	.show		= saved_cmdlines_show,
5804 };
5805 
5806 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5807 {
5808 	int ret;
5809 
5810 	ret = tracing_check_open_get_tr(NULL);
5811 	if (ret)
5812 		return ret;
5813 
5814 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5815 }
5816 
5817 static const struct file_operations tracing_saved_cmdlines_fops = {
5818 	.open		= tracing_saved_cmdlines_open,
5819 	.read		= seq_read,
5820 	.llseek		= seq_lseek,
5821 	.release	= seq_release,
5822 };
5823 
5824 static ssize_t
5825 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5826 				 size_t cnt, loff_t *ppos)
5827 {
5828 	char buf[64];
5829 	int r;
5830 
5831 	arch_spin_lock(&trace_cmdline_lock);
5832 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5833 	arch_spin_unlock(&trace_cmdline_lock);
5834 
5835 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5836 }
5837 
5838 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5839 {
5840 	kfree(s->saved_cmdlines);
5841 	kfree(s->map_cmdline_to_pid);
5842 	kfree(s);
5843 }
5844 
5845 static int tracing_resize_saved_cmdlines(unsigned int val)
5846 {
5847 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5848 
5849 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5850 	if (!s)
5851 		return -ENOMEM;
5852 
5853 	if (allocate_cmdlines_buffer(val, s) < 0) {
5854 		kfree(s);
5855 		return -ENOMEM;
5856 	}
5857 
5858 	arch_spin_lock(&trace_cmdline_lock);
5859 	savedcmd_temp = savedcmd;
5860 	savedcmd = s;
5861 	arch_spin_unlock(&trace_cmdline_lock);
5862 	free_saved_cmdlines_buffer(savedcmd_temp);
5863 
5864 	return 0;
5865 }
5866 
5867 static ssize_t
5868 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5869 				  size_t cnt, loff_t *ppos)
5870 {
5871 	unsigned long val;
5872 	int ret;
5873 
5874 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5875 	if (ret)
5876 		return ret;
5877 
5878 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5879 	if (!val || val > PID_MAX_DEFAULT)
5880 		return -EINVAL;
5881 
5882 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5883 	if (ret < 0)
5884 		return ret;
5885 
5886 	*ppos += cnt;
5887 
5888 	return cnt;
5889 }
5890 
5891 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5892 	.open		= tracing_open_generic,
5893 	.read		= tracing_saved_cmdlines_size_read,
5894 	.write		= tracing_saved_cmdlines_size_write,
5895 };
5896 
5897 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5898 static union trace_eval_map_item *
5899 update_eval_map(union trace_eval_map_item *ptr)
5900 {
5901 	if (!ptr->map.eval_string) {
5902 		if (ptr->tail.next) {
5903 			ptr = ptr->tail.next;
5904 			/* Set ptr to the next real item (skip head) */
5905 			ptr++;
5906 		} else
5907 			return NULL;
5908 	}
5909 	return ptr;
5910 }
5911 
5912 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5913 {
5914 	union trace_eval_map_item *ptr = v;
5915 
5916 	/*
5917 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5918 	 * This really should never happen.
5919 	 */
5920 	(*pos)++;
5921 	ptr = update_eval_map(ptr);
5922 	if (WARN_ON_ONCE(!ptr))
5923 		return NULL;
5924 
5925 	ptr++;
5926 	ptr = update_eval_map(ptr);
5927 
5928 	return ptr;
5929 }
5930 
5931 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5932 {
5933 	union trace_eval_map_item *v;
5934 	loff_t l = 0;
5935 
5936 	mutex_lock(&trace_eval_mutex);
5937 
5938 	v = trace_eval_maps;
5939 	if (v)
5940 		v++;
5941 
5942 	while (v && l < *pos) {
5943 		v = eval_map_next(m, v, &l);
5944 	}
5945 
5946 	return v;
5947 }
5948 
5949 static void eval_map_stop(struct seq_file *m, void *v)
5950 {
5951 	mutex_unlock(&trace_eval_mutex);
5952 }
5953 
5954 static int eval_map_show(struct seq_file *m, void *v)
5955 {
5956 	union trace_eval_map_item *ptr = v;
5957 
5958 	seq_printf(m, "%s %ld (%s)\n",
5959 		   ptr->map.eval_string, ptr->map.eval_value,
5960 		   ptr->map.system);
5961 
5962 	return 0;
5963 }
5964 
5965 static const struct seq_operations tracing_eval_map_seq_ops = {
5966 	.start		= eval_map_start,
5967 	.next		= eval_map_next,
5968 	.stop		= eval_map_stop,
5969 	.show		= eval_map_show,
5970 };
5971 
5972 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5973 {
5974 	int ret;
5975 
5976 	ret = tracing_check_open_get_tr(NULL);
5977 	if (ret)
5978 		return ret;
5979 
5980 	return seq_open(filp, &tracing_eval_map_seq_ops);
5981 }
5982 
5983 static const struct file_operations tracing_eval_map_fops = {
5984 	.open		= tracing_eval_map_open,
5985 	.read		= seq_read,
5986 	.llseek		= seq_lseek,
5987 	.release	= seq_release,
5988 };
5989 
5990 static inline union trace_eval_map_item *
5991 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5992 {
5993 	/* Return tail of array given the head */
5994 	return ptr + ptr->head.length + 1;
5995 }
5996 
5997 static void
5998 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5999 			   int len)
6000 {
6001 	struct trace_eval_map **stop;
6002 	struct trace_eval_map **map;
6003 	union trace_eval_map_item *map_array;
6004 	union trace_eval_map_item *ptr;
6005 
6006 	stop = start + len;
6007 
6008 	/*
6009 	 * The trace_eval_maps contains the map plus a head and tail item,
6010 	 * where the head holds the module and length of array, and the
6011 	 * tail holds a pointer to the next list.
6012 	 */
6013 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6014 	if (!map_array) {
6015 		pr_warn("Unable to allocate trace eval mapping\n");
6016 		return;
6017 	}
6018 
6019 	mutex_lock(&trace_eval_mutex);
6020 
6021 	if (!trace_eval_maps)
6022 		trace_eval_maps = map_array;
6023 	else {
6024 		ptr = trace_eval_maps;
6025 		for (;;) {
6026 			ptr = trace_eval_jmp_to_tail(ptr);
6027 			if (!ptr->tail.next)
6028 				break;
6029 			ptr = ptr->tail.next;
6030 
6031 		}
6032 		ptr->tail.next = map_array;
6033 	}
6034 	map_array->head.mod = mod;
6035 	map_array->head.length = len;
6036 	map_array++;
6037 
6038 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6039 		map_array->map = **map;
6040 		map_array++;
6041 	}
6042 	memset(map_array, 0, sizeof(*map_array));
6043 
6044 	mutex_unlock(&trace_eval_mutex);
6045 }
6046 
6047 static void trace_create_eval_file(struct dentry *d_tracer)
6048 {
6049 	trace_create_file("eval_map", 0444, d_tracer,
6050 			  NULL, &tracing_eval_map_fops);
6051 }
6052 
6053 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6054 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6055 static inline void trace_insert_eval_map_file(struct module *mod,
6056 			      struct trace_eval_map **start, int len) { }
6057 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6058 
6059 static void trace_insert_eval_map(struct module *mod,
6060 				  struct trace_eval_map **start, int len)
6061 {
6062 	struct trace_eval_map **map;
6063 
6064 	if (len <= 0)
6065 		return;
6066 
6067 	map = start;
6068 
6069 	trace_event_eval_update(map, len);
6070 
6071 	trace_insert_eval_map_file(mod, start, len);
6072 }
6073 
6074 static ssize_t
6075 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6076 		       size_t cnt, loff_t *ppos)
6077 {
6078 	struct trace_array *tr = filp->private_data;
6079 	char buf[MAX_TRACER_SIZE+2];
6080 	int r;
6081 
6082 	mutex_lock(&trace_types_lock);
6083 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6084 	mutex_unlock(&trace_types_lock);
6085 
6086 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6087 }
6088 
6089 int tracer_init(struct tracer *t, struct trace_array *tr)
6090 {
6091 	tracing_reset_online_cpus(&tr->array_buffer);
6092 	return t->init(tr);
6093 }
6094 
6095 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6096 {
6097 	int cpu;
6098 
6099 	for_each_tracing_cpu(cpu)
6100 		per_cpu_ptr(buf->data, cpu)->entries = val;
6101 }
6102 
6103 #ifdef CONFIG_TRACER_MAX_TRACE
6104 /* resize @tr's buffer to the size of @size_tr's entries */
6105 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6106 					struct array_buffer *size_buf, int cpu_id)
6107 {
6108 	int cpu, ret = 0;
6109 
6110 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6111 		for_each_tracing_cpu(cpu) {
6112 			ret = ring_buffer_resize(trace_buf->buffer,
6113 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6114 			if (ret < 0)
6115 				break;
6116 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6117 				per_cpu_ptr(size_buf->data, cpu)->entries;
6118 		}
6119 	} else {
6120 		ret = ring_buffer_resize(trace_buf->buffer,
6121 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6122 		if (ret == 0)
6123 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6124 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6125 	}
6126 
6127 	return ret;
6128 }
6129 #endif /* CONFIG_TRACER_MAX_TRACE */
6130 
6131 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6132 					unsigned long size, int cpu)
6133 {
6134 	int ret;
6135 
6136 	/*
6137 	 * If kernel or user changes the size of the ring buffer
6138 	 * we use the size that was given, and we can forget about
6139 	 * expanding it later.
6140 	 */
6141 	ring_buffer_expanded = true;
6142 
6143 	/* May be called before buffers are initialized */
6144 	if (!tr->array_buffer.buffer)
6145 		return 0;
6146 
6147 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6148 	if (ret < 0)
6149 		return ret;
6150 
6151 #ifdef CONFIG_TRACER_MAX_TRACE
6152 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6153 	    !tr->current_trace->use_max_tr)
6154 		goto out;
6155 
6156 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6157 	if (ret < 0) {
6158 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6159 						     &tr->array_buffer, cpu);
6160 		if (r < 0) {
6161 			/*
6162 			 * AARGH! We are left with different
6163 			 * size max buffer!!!!
6164 			 * The max buffer is our "snapshot" buffer.
6165 			 * When a tracer needs a snapshot (one of the
6166 			 * latency tracers), it swaps the max buffer
6167 			 * with the saved snap shot. We succeeded to
6168 			 * update the size of the main buffer, but failed to
6169 			 * update the size of the max buffer. But when we tried
6170 			 * to reset the main buffer to the original size, we
6171 			 * failed there too. This is very unlikely to
6172 			 * happen, but if it does, warn and kill all
6173 			 * tracing.
6174 			 */
6175 			WARN_ON(1);
6176 			tracing_disabled = 1;
6177 		}
6178 		return ret;
6179 	}
6180 
6181 	if (cpu == RING_BUFFER_ALL_CPUS)
6182 		set_buffer_entries(&tr->max_buffer, size);
6183 	else
6184 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6185 
6186  out:
6187 #endif /* CONFIG_TRACER_MAX_TRACE */
6188 
6189 	if (cpu == RING_BUFFER_ALL_CPUS)
6190 		set_buffer_entries(&tr->array_buffer, size);
6191 	else
6192 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6193 
6194 	return ret;
6195 }
6196 
6197 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6198 				  unsigned long size, int cpu_id)
6199 {
6200 	int ret;
6201 
6202 	mutex_lock(&trace_types_lock);
6203 
6204 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6205 		/* make sure, this cpu is enabled in the mask */
6206 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6207 			ret = -EINVAL;
6208 			goto out;
6209 		}
6210 	}
6211 
6212 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6213 	if (ret < 0)
6214 		ret = -ENOMEM;
6215 
6216 out:
6217 	mutex_unlock(&trace_types_lock);
6218 
6219 	return ret;
6220 }
6221 
6222 
6223 /**
6224  * tracing_update_buffers - used by tracing facility to expand ring buffers
6225  *
6226  * To save on memory when the tracing is never used on a system with it
6227  * configured in. The ring buffers are set to a minimum size. But once
6228  * a user starts to use the tracing facility, then they need to grow
6229  * to their default size.
6230  *
6231  * This function is to be called when a tracer is about to be used.
6232  */
6233 int tracing_update_buffers(void)
6234 {
6235 	int ret = 0;
6236 
6237 	mutex_lock(&trace_types_lock);
6238 	if (!ring_buffer_expanded)
6239 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6240 						RING_BUFFER_ALL_CPUS);
6241 	mutex_unlock(&trace_types_lock);
6242 
6243 	return ret;
6244 }
6245 
6246 struct trace_option_dentry;
6247 
6248 static void
6249 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6250 
6251 /*
6252  * Used to clear out the tracer before deletion of an instance.
6253  * Must have trace_types_lock held.
6254  */
6255 static void tracing_set_nop(struct trace_array *tr)
6256 {
6257 	if (tr->current_trace == &nop_trace)
6258 		return;
6259 
6260 	tr->current_trace->enabled--;
6261 
6262 	if (tr->current_trace->reset)
6263 		tr->current_trace->reset(tr);
6264 
6265 	tr->current_trace = &nop_trace;
6266 }
6267 
6268 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6269 {
6270 	/* Only enable if the directory has been created already. */
6271 	if (!tr->dir)
6272 		return;
6273 
6274 	create_trace_option_files(tr, t);
6275 }
6276 
6277 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6278 {
6279 	struct tracer *t;
6280 #ifdef CONFIG_TRACER_MAX_TRACE
6281 	bool had_max_tr;
6282 #endif
6283 	int ret = 0;
6284 
6285 	mutex_lock(&trace_types_lock);
6286 
6287 	if (!ring_buffer_expanded) {
6288 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6289 						RING_BUFFER_ALL_CPUS);
6290 		if (ret < 0)
6291 			goto out;
6292 		ret = 0;
6293 	}
6294 
6295 	for (t = trace_types; t; t = t->next) {
6296 		if (strcmp(t->name, buf) == 0)
6297 			break;
6298 	}
6299 	if (!t) {
6300 		ret = -EINVAL;
6301 		goto out;
6302 	}
6303 	if (t == tr->current_trace)
6304 		goto out;
6305 
6306 #ifdef CONFIG_TRACER_SNAPSHOT
6307 	if (t->use_max_tr) {
6308 		arch_spin_lock(&tr->max_lock);
6309 		if (tr->cond_snapshot)
6310 			ret = -EBUSY;
6311 		arch_spin_unlock(&tr->max_lock);
6312 		if (ret)
6313 			goto out;
6314 	}
6315 #endif
6316 	/* Some tracers won't work on kernel command line */
6317 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6318 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6319 			t->name);
6320 		goto out;
6321 	}
6322 
6323 	/* Some tracers are only allowed for the top level buffer */
6324 	if (!trace_ok_for_array(t, tr)) {
6325 		ret = -EINVAL;
6326 		goto out;
6327 	}
6328 
6329 	/* If trace pipe files are being read, we can't change the tracer */
6330 	if (tr->trace_ref) {
6331 		ret = -EBUSY;
6332 		goto out;
6333 	}
6334 
6335 	trace_branch_disable();
6336 
6337 	tr->current_trace->enabled--;
6338 
6339 	if (tr->current_trace->reset)
6340 		tr->current_trace->reset(tr);
6341 
6342 	/* Current trace needs to be nop_trace before synchronize_rcu */
6343 	tr->current_trace = &nop_trace;
6344 
6345 #ifdef CONFIG_TRACER_MAX_TRACE
6346 	had_max_tr = tr->allocated_snapshot;
6347 
6348 	if (had_max_tr && !t->use_max_tr) {
6349 		/*
6350 		 * We need to make sure that the update_max_tr sees that
6351 		 * current_trace changed to nop_trace to keep it from
6352 		 * swapping the buffers after we resize it.
6353 		 * The update_max_tr is called from interrupts disabled
6354 		 * so a synchronized_sched() is sufficient.
6355 		 */
6356 		synchronize_rcu();
6357 		free_snapshot(tr);
6358 	}
6359 #endif
6360 
6361 #ifdef CONFIG_TRACER_MAX_TRACE
6362 	if (t->use_max_tr && !had_max_tr) {
6363 		ret = tracing_alloc_snapshot_instance(tr);
6364 		if (ret < 0)
6365 			goto out;
6366 	}
6367 #endif
6368 
6369 	if (t->init) {
6370 		ret = tracer_init(t, tr);
6371 		if (ret)
6372 			goto out;
6373 	}
6374 
6375 	tr->current_trace = t;
6376 	tr->current_trace->enabled++;
6377 	trace_branch_enable(tr);
6378  out:
6379 	mutex_unlock(&trace_types_lock);
6380 
6381 	return ret;
6382 }
6383 
6384 static ssize_t
6385 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6386 			size_t cnt, loff_t *ppos)
6387 {
6388 	struct trace_array *tr = filp->private_data;
6389 	char buf[MAX_TRACER_SIZE+1];
6390 	int i;
6391 	size_t ret;
6392 	int err;
6393 
6394 	ret = cnt;
6395 
6396 	if (cnt > MAX_TRACER_SIZE)
6397 		cnt = MAX_TRACER_SIZE;
6398 
6399 	if (copy_from_user(buf, ubuf, cnt))
6400 		return -EFAULT;
6401 
6402 	buf[cnt] = 0;
6403 
6404 	/* strip ending whitespace. */
6405 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6406 		buf[i] = 0;
6407 
6408 	err = tracing_set_tracer(tr, buf);
6409 	if (err)
6410 		return err;
6411 
6412 	*ppos += ret;
6413 
6414 	return ret;
6415 }
6416 
6417 static ssize_t
6418 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6419 		   size_t cnt, loff_t *ppos)
6420 {
6421 	char buf[64];
6422 	int r;
6423 
6424 	r = snprintf(buf, sizeof(buf), "%ld\n",
6425 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6426 	if (r > sizeof(buf))
6427 		r = sizeof(buf);
6428 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6429 }
6430 
6431 static ssize_t
6432 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6433 		    size_t cnt, loff_t *ppos)
6434 {
6435 	unsigned long val;
6436 	int ret;
6437 
6438 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6439 	if (ret)
6440 		return ret;
6441 
6442 	*ptr = val * 1000;
6443 
6444 	return cnt;
6445 }
6446 
6447 static ssize_t
6448 tracing_thresh_read(struct file *filp, char __user *ubuf,
6449 		    size_t cnt, loff_t *ppos)
6450 {
6451 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6452 }
6453 
6454 static ssize_t
6455 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6456 		     size_t cnt, loff_t *ppos)
6457 {
6458 	struct trace_array *tr = filp->private_data;
6459 	int ret;
6460 
6461 	mutex_lock(&trace_types_lock);
6462 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6463 	if (ret < 0)
6464 		goto out;
6465 
6466 	if (tr->current_trace->update_thresh) {
6467 		ret = tr->current_trace->update_thresh(tr);
6468 		if (ret < 0)
6469 			goto out;
6470 	}
6471 
6472 	ret = cnt;
6473 out:
6474 	mutex_unlock(&trace_types_lock);
6475 
6476 	return ret;
6477 }
6478 
6479 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6480 
6481 static ssize_t
6482 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6483 		     size_t cnt, loff_t *ppos)
6484 {
6485 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6486 }
6487 
6488 static ssize_t
6489 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6490 		      size_t cnt, loff_t *ppos)
6491 {
6492 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6493 }
6494 
6495 #endif
6496 
6497 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6498 {
6499 	struct trace_array *tr = inode->i_private;
6500 	struct trace_iterator *iter;
6501 	int ret;
6502 
6503 	ret = tracing_check_open_get_tr(tr);
6504 	if (ret)
6505 		return ret;
6506 
6507 	mutex_lock(&trace_types_lock);
6508 
6509 	/* create a buffer to store the information to pass to userspace */
6510 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6511 	if (!iter) {
6512 		ret = -ENOMEM;
6513 		__trace_array_put(tr);
6514 		goto out;
6515 	}
6516 
6517 	trace_seq_init(&iter->seq);
6518 	iter->trace = tr->current_trace;
6519 
6520 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6521 		ret = -ENOMEM;
6522 		goto fail;
6523 	}
6524 
6525 	/* trace pipe does not show start of buffer */
6526 	cpumask_setall(iter->started);
6527 
6528 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6529 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6530 
6531 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6532 	if (trace_clocks[tr->clock_id].in_ns)
6533 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6534 
6535 	iter->tr = tr;
6536 	iter->array_buffer = &tr->array_buffer;
6537 	iter->cpu_file = tracing_get_cpu(inode);
6538 	mutex_init(&iter->mutex);
6539 	filp->private_data = iter;
6540 
6541 	if (iter->trace->pipe_open)
6542 		iter->trace->pipe_open(iter);
6543 
6544 	nonseekable_open(inode, filp);
6545 
6546 	tr->trace_ref++;
6547 out:
6548 	mutex_unlock(&trace_types_lock);
6549 	return ret;
6550 
6551 fail:
6552 	kfree(iter);
6553 	__trace_array_put(tr);
6554 	mutex_unlock(&trace_types_lock);
6555 	return ret;
6556 }
6557 
6558 static int tracing_release_pipe(struct inode *inode, struct file *file)
6559 {
6560 	struct trace_iterator *iter = file->private_data;
6561 	struct trace_array *tr = inode->i_private;
6562 
6563 	mutex_lock(&trace_types_lock);
6564 
6565 	tr->trace_ref--;
6566 
6567 	if (iter->trace->pipe_close)
6568 		iter->trace->pipe_close(iter);
6569 
6570 	mutex_unlock(&trace_types_lock);
6571 
6572 	free_cpumask_var(iter->started);
6573 	mutex_destroy(&iter->mutex);
6574 	kfree(iter);
6575 
6576 	trace_array_put(tr);
6577 
6578 	return 0;
6579 }
6580 
6581 static __poll_t
6582 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6583 {
6584 	struct trace_array *tr = iter->tr;
6585 
6586 	/* Iterators are static, they should be filled or empty */
6587 	if (trace_buffer_iter(iter, iter->cpu_file))
6588 		return EPOLLIN | EPOLLRDNORM;
6589 
6590 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6591 		/*
6592 		 * Always select as readable when in blocking mode
6593 		 */
6594 		return EPOLLIN | EPOLLRDNORM;
6595 	else
6596 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6597 					     filp, poll_table);
6598 }
6599 
6600 static __poll_t
6601 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6602 {
6603 	struct trace_iterator *iter = filp->private_data;
6604 
6605 	return trace_poll(iter, filp, poll_table);
6606 }
6607 
6608 /* Must be called with iter->mutex held. */
6609 static int tracing_wait_pipe(struct file *filp)
6610 {
6611 	struct trace_iterator *iter = filp->private_data;
6612 	int ret;
6613 
6614 	while (trace_empty(iter)) {
6615 
6616 		if ((filp->f_flags & O_NONBLOCK)) {
6617 			return -EAGAIN;
6618 		}
6619 
6620 		/*
6621 		 * We block until we read something and tracing is disabled.
6622 		 * We still block if tracing is disabled, but we have never
6623 		 * read anything. This allows a user to cat this file, and
6624 		 * then enable tracing. But after we have read something,
6625 		 * we give an EOF when tracing is again disabled.
6626 		 *
6627 		 * iter->pos will be 0 if we haven't read anything.
6628 		 */
6629 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6630 			break;
6631 
6632 		mutex_unlock(&iter->mutex);
6633 
6634 		ret = wait_on_pipe(iter, 0);
6635 
6636 		mutex_lock(&iter->mutex);
6637 
6638 		if (ret)
6639 			return ret;
6640 	}
6641 
6642 	return 1;
6643 }
6644 
6645 /*
6646  * Consumer reader.
6647  */
6648 static ssize_t
6649 tracing_read_pipe(struct file *filp, char __user *ubuf,
6650 		  size_t cnt, loff_t *ppos)
6651 {
6652 	struct trace_iterator *iter = filp->private_data;
6653 	ssize_t sret;
6654 
6655 	/*
6656 	 * Avoid more than one consumer on a single file descriptor
6657 	 * This is just a matter of traces coherency, the ring buffer itself
6658 	 * is protected.
6659 	 */
6660 	mutex_lock(&iter->mutex);
6661 
6662 	/* return any leftover data */
6663 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6664 	if (sret != -EBUSY)
6665 		goto out;
6666 
6667 	trace_seq_init(&iter->seq);
6668 
6669 	if (iter->trace->read) {
6670 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6671 		if (sret)
6672 			goto out;
6673 	}
6674 
6675 waitagain:
6676 	sret = tracing_wait_pipe(filp);
6677 	if (sret <= 0)
6678 		goto out;
6679 
6680 	/* stop when tracing is finished */
6681 	if (trace_empty(iter)) {
6682 		sret = 0;
6683 		goto out;
6684 	}
6685 
6686 	if (cnt >= PAGE_SIZE)
6687 		cnt = PAGE_SIZE - 1;
6688 
6689 	/* reset all but tr, trace, and overruns */
6690 	memset(&iter->seq, 0,
6691 	       sizeof(struct trace_iterator) -
6692 	       offsetof(struct trace_iterator, seq));
6693 	cpumask_clear(iter->started);
6694 	trace_seq_init(&iter->seq);
6695 	iter->pos = -1;
6696 
6697 	trace_event_read_lock();
6698 	trace_access_lock(iter->cpu_file);
6699 	while (trace_find_next_entry_inc(iter) != NULL) {
6700 		enum print_line_t ret;
6701 		int save_len = iter->seq.seq.len;
6702 
6703 		ret = print_trace_line(iter);
6704 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6705 			/* don't print partial lines */
6706 			iter->seq.seq.len = save_len;
6707 			break;
6708 		}
6709 		if (ret != TRACE_TYPE_NO_CONSUME)
6710 			trace_consume(iter);
6711 
6712 		if (trace_seq_used(&iter->seq) >= cnt)
6713 			break;
6714 
6715 		/*
6716 		 * Setting the full flag means we reached the trace_seq buffer
6717 		 * size and we should leave by partial output condition above.
6718 		 * One of the trace_seq_* functions is not used properly.
6719 		 */
6720 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6721 			  iter->ent->type);
6722 	}
6723 	trace_access_unlock(iter->cpu_file);
6724 	trace_event_read_unlock();
6725 
6726 	/* Now copy what we have to the user */
6727 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6729 		trace_seq_init(&iter->seq);
6730 
6731 	/*
6732 	 * If there was nothing to send to user, in spite of consuming trace
6733 	 * entries, go back to wait for more entries.
6734 	 */
6735 	if (sret == -EBUSY)
6736 		goto waitagain;
6737 
6738 out:
6739 	mutex_unlock(&iter->mutex);
6740 
6741 	return sret;
6742 }
6743 
6744 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6745 				     unsigned int idx)
6746 {
6747 	__free_page(spd->pages[idx]);
6748 }
6749 
6750 static size_t
6751 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6752 {
6753 	size_t count;
6754 	int save_len;
6755 	int ret;
6756 
6757 	/* Seq buffer is page-sized, exactly what we need. */
6758 	for (;;) {
6759 		save_len = iter->seq.seq.len;
6760 		ret = print_trace_line(iter);
6761 
6762 		if (trace_seq_has_overflowed(&iter->seq)) {
6763 			iter->seq.seq.len = save_len;
6764 			break;
6765 		}
6766 
6767 		/*
6768 		 * This should not be hit, because it should only
6769 		 * be set if the iter->seq overflowed. But check it
6770 		 * anyway to be safe.
6771 		 */
6772 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6773 			iter->seq.seq.len = save_len;
6774 			break;
6775 		}
6776 
6777 		count = trace_seq_used(&iter->seq) - save_len;
6778 		if (rem < count) {
6779 			rem = 0;
6780 			iter->seq.seq.len = save_len;
6781 			break;
6782 		}
6783 
6784 		if (ret != TRACE_TYPE_NO_CONSUME)
6785 			trace_consume(iter);
6786 		rem -= count;
6787 		if (!trace_find_next_entry_inc(iter))	{
6788 			rem = 0;
6789 			iter->ent = NULL;
6790 			break;
6791 		}
6792 	}
6793 
6794 	return rem;
6795 }
6796 
6797 static ssize_t tracing_splice_read_pipe(struct file *filp,
6798 					loff_t *ppos,
6799 					struct pipe_inode_info *pipe,
6800 					size_t len,
6801 					unsigned int flags)
6802 {
6803 	struct page *pages_def[PIPE_DEF_BUFFERS];
6804 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6805 	struct trace_iterator *iter = filp->private_data;
6806 	struct splice_pipe_desc spd = {
6807 		.pages		= pages_def,
6808 		.partial	= partial_def,
6809 		.nr_pages	= 0, /* This gets updated below. */
6810 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6811 		.ops		= &default_pipe_buf_ops,
6812 		.spd_release	= tracing_spd_release_pipe,
6813 	};
6814 	ssize_t ret;
6815 	size_t rem;
6816 	unsigned int i;
6817 
6818 	if (splice_grow_spd(pipe, &spd))
6819 		return -ENOMEM;
6820 
6821 	mutex_lock(&iter->mutex);
6822 
6823 	if (iter->trace->splice_read) {
6824 		ret = iter->trace->splice_read(iter, filp,
6825 					       ppos, pipe, len, flags);
6826 		if (ret)
6827 			goto out_err;
6828 	}
6829 
6830 	ret = tracing_wait_pipe(filp);
6831 	if (ret <= 0)
6832 		goto out_err;
6833 
6834 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6835 		ret = -EFAULT;
6836 		goto out_err;
6837 	}
6838 
6839 	trace_event_read_lock();
6840 	trace_access_lock(iter->cpu_file);
6841 
6842 	/* Fill as many pages as possible. */
6843 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6844 		spd.pages[i] = alloc_page(GFP_KERNEL);
6845 		if (!spd.pages[i])
6846 			break;
6847 
6848 		rem = tracing_fill_pipe_page(rem, iter);
6849 
6850 		/* Copy the data into the page, so we can start over. */
6851 		ret = trace_seq_to_buffer(&iter->seq,
6852 					  page_address(spd.pages[i]),
6853 					  trace_seq_used(&iter->seq));
6854 		if (ret < 0) {
6855 			__free_page(spd.pages[i]);
6856 			break;
6857 		}
6858 		spd.partial[i].offset = 0;
6859 		spd.partial[i].len = trace_seq_used(&iter->seq);
6860 
6861 		trace_seq_init(&iter->seq);
6862 	}
6863 
6864 	trace_access_unlock(iter->cpu_file);
6865 	trace_event_read_unlock();
6866 	mutex_unlock(&iter->mutex);
6867 
6868 	spd.nr_pages = i;
6869 
6870 	if (i)
6871 		ret = splice_to_pipe(pipe, &spd);
6872 	else
6873 		ret = 0;
6874 out:
6875 	splice_shrink_spd(&spd);
6876 	return ret;
6877 
6878 out_err:
6879 	mutex_unlock(&iter->mutex);
6880 	goto out;
6881 }
6882 
6883 static ssize_t
6884 tracing_entries_read(struct file *filp, char __user *ubuf,
6885 		     size_t cnt, loff_t *ppos)
6886 {
6887 	struct inode *inode = file_inode(filp);
6888 	struct trace_array *tr = inode->i_private;
6889 	int cpu = tracing_get_cpu(inode);
6890 	char buf[64];
6891 	int r = 0;
6892 	ssize_t ret;
6893 
6894 	mutex_lock(&trace_types_lock);
6895 
6896 	if (cpu == RING_BUFFER_ALL_CPUS) {
6897 		int cpu, buf_size_same;
6898 		unsigned long size;
6899 
6900 		size = 0;
6901 		buf_size_same = 1;
6902 		/* check if all cpu sizes are same */
6903 		for_each_tracing_cpu(cpu) {
6904 			/* fill in the size from first enabled cpu */
6905 			if (size == 0)
6906 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6907 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6908 				buf_size_same = 0;
6909 				break;
6910 			}
6911 		}
6912 
6913 		if (buf_size_same) {
6914 			if (!ring_buffer_expanded)
6915 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6916 					    size >> 10,
6917 					    trace_buf_size >> 10);
6918 			else
6919 				r = sprintf(buf, "%lu\n", size >> 10);
6920 		} else
6921 			r = sprintf(buf, "X\n");
6922 	} else
6923 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6924 
6925 	mutex_unlock(&trace_types_lock);
6926 
6927 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6928 	return ret;
6929 }
6930 
6931 static ssize_t
6932 tracing_entries_write(struct file *filp, const char __user *ubuf,
6933 		      size_t cnt, loff_t *ppos)
6934 {
6935 	struct inode *inode = file_inode(filp);
6936 	struct trace_array *tr = inode->i_private;
6937 	unsigned long val;
6938 	int ret;
6939 
6940 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6941 	if (ret)
6942 		return ret;
6943 
6944 	/* must have at least 1 entry */
6945 	if (!val)
6946 		return -EINVAL;
6947 
6948 	/* value is in KB */
6949 	val <<= 10;
6950 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6951 	if (ret < 0)
6952 		return ret;
6953 
6954 	*ppos += cnt;
6955 
6956 	return cnt;
6957 }
6958 
6959 static ssize_t
6960 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6961 				size_t cnt, loff_t *ppos)
6962 {
6963 	struct trace_array *tr = filp->private_data;
6964 	char buf[64];
6965 	int r, cpu;
6966 	unsigned long size = 0, expanded_size = 0;
6967 
6968 	mutex_lock(&trace_types_lock);
6969 	for_each_tracing_cpu(cpu) {
6970 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6971 		if (!ring_buffer_expanded)
6972 			expanded_size += trace_buf_size >> 10;
6973 	}
6974 	if (ring_buffer_expanded)
6975 		r = sprintf(buf, "%lu\n", size);
6976 	else
6977 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6978 	mutex_unlock(&trace_types_lock);
6979 
6980 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6981 }
6982 
6983 static ssize_t
6984 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6985 			  size_t cnt, loff_t *ppos)
6986 {
6987 	/*
6988 	 * There is no need to read what the user has written, this function
6989 	 * is just to make sure that there is no error when "echo" is used
6990 	 */
6991 
6992 	*ppos += cnt;
6993 
6994 	return cnt;
6995 }
6996 
6997 static int
6998 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6999 {
7000 	struct trace_array *tr = inode->i_private;
7001 
7002 	/* disable tracing ? */
7003 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7004 		tracer_tracing_off(tr);
7005 	/* resize the ring buffer to 0 */
7006 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7007 
7008 	trace_array_put(tr);
7009 
7010 	return 0;
7011 }
7012 
7013 static ssize_t
7014 tracing_mark_write(struct file *filp, const char __user *ubuf,
7015 					size_t cnt, loff_t *fpos)
7016 {
7017 	struct trace_array *tr = filp->private_data;
7018 	struct ring_buffer_event *event;
7019 	enum event_trigger_type tt = ETT_NONE;
7020 	struct trace_buffer *buffer;
7021 	struct print_entry *entry;
7022 	ssize_t written;
7023 	int size;
7024 	int len;
7025 
7026 /* Used in tracing_mark_raw_write() as well */
7027 #define FAULTED_STR "<faulted>"
7028 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7029 
7030 	if (tracing_disabled)
7031 		return -EINVAL;
7032 
7033 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7034 		return -EINVAL;
7035 
7036 	if (cnt > TRACE_BUF_SIZE)
7037 		cnt = TRACE_BUF_SIZE;
7038 
7039 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7040 
7041 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7042 
7043 	/* If less than "<faulted>", then make sure we can still add that */
7044 	if (cnt < FAULTED_SIZE)
7045 		size += FAULTED_SIZE - cnt;
7046 
7047 	buffer = tr->array_buffer.buffer;
7048 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7049 					    tracing_gen_ctx());
7050 	if (unlikely(!event))
7051 		/* Ring buffer disabled, return as if not open for write */
7052 		return -EBADF;
7053 
7054 	entry = ring_buffer_event_data(event);
7055 	entry->ip = _THIS_IP_;
7056 
7057 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7058 	if (len) {
7059 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7060 		cnt = FAULTED_SIZE;
7061 		written = -EFAULT;
7062 	} else
7063 		written = cnt;
7064 
7065 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7066 		/* do not add \n before testing triggers, but add \0 */
7067 		entry->buf[cnt] = '\0';
7068 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7069 	}
7070 
7071 	if (entry->buf[cnt - 1] != '\n') {
7072 		entry->buf[cnt] = '\n';
7073 		entry->buf[cnt + 1] = '\0';
7074 	} else
7075 		entry->buf[cnt] = '\0';
7076 
7077 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7078 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7079 	__buffer_unlock_commit(buffer, event);
7080 
7081 	if (tt)
7082 		event_triggers_post_call(tr->trace_marker_file, tt);
7083 
7084 	if (written > 0)
7085 		*fpos += written;
7086 
7087 	return written;
7088 }
7089 
7090 /* Limit it for now to 3K (including tag) */
7091 #define RAW_DATA_MAX_SIZE (1024*3)
7092 
7093 static ssize_t
7094 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7095 					size_t cnt, loff_t *fpos)
7096 {
7097 	struct trace_array *tr = filp->private_data;
7098 	struct ring_buffer_event *event;
7099 	struct trace_buffer *buffer;
7100 	struct raw_data_entry *entry;
7101 	ssize_t written;
7102 	int size;
7103 	int len;
7104 
7105 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7106 
7107 	if (tracing_disabled)
7108 		return -EINVAL;
7109 
7110 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7111 		return -EINVAL;
7112 
7113 	/* The marker must at least have a tag id */
7114 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7115 		return -EINVAL;
7116 
7117 	if (cnt > TRACE_BUF_SIZE)
7118 		cnt = TRACE_BUF_SIZE;
7119 
7120 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7121 
7122 	size = sizeof(*entry) + cnt;
7123 	if (cnt < FAULT_SIZE_ID)
7124 		size += FAULT_SIZE_ID - cnt;
7125 
7126 	buffer = tr->array_buffer.buffer;
7127 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7128 					    tracing_gen_ctx());
7129 	if (!event)
7130 		/* Ring buffer disabled, return as if not open for write */
7131 		return -EBADF;
7132 
7133 	entry = ring_buffer_event_data(event);
7134 
7135 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7136 	if (len) {
7137 		entry->id = -1;
7138 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7139 		written = -EFAULT;
7140 	} else
7141 		written = cnt;
7142 
7143 	__buffer_unlock_commit(buffer, event);
7144 
7145 	if (written > 0)
7146 		*fpos += written;
7147 
7148 	return written;
7149 }
7150 
7151 static int tracing_clock_show(struct seq_file *m, void *v)
7152 {
7153 	struct trace_array *tr = m->private;
7154 	int i;
7155 
7156 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7157 		seq_printf(m,
7158 			"%s%s%s%s", i ? " " : "",
7159 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7160 			i == tr->clock_id ? "]" : "");
7161 	seq_putc(m, '\n');
7162 
7163 	return 0;
7164 }
7165 
7166 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7167 {
7168 	int i;
7169 
7170 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7171 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7172 			break;
7173 	}
7174 	if (i == ARRAY_SIZE(trace_clocks))
7175 		return -EINVAL;
7176 
7177 	mutex_lock(&trace_types_lock);
7178 
7179 	tr->clock_id = i;
7180 
7181 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7182 
7183 	/*
7184 	 * New clock may not be consistent with the previous clock.
7185 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7186 	 */
7187 	tracing_reset_online_cpus(&tr->array_buffer);
7188 
7189 #ifdef CONFIG_TRACER_MAX_TRACE
7190 	if (tr->max_buffer.buffer)
7191 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7192 	tracing_reset_online_cpus(&tr->max_buffer);
7193 #endif
7194 
7195 	mutex_unlock(&trace_types_lock);
7196 
7197 	return 0;
7198 }
7199 
7200 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7201 				   size_t cnt, loff_t *fpos)
7202 {
7203 	struct seq_file *m = filp->private_data;
7204 	struct trace_array *tr = m->private;
7205 	char buf[64];
7206 	const char *clockstr;
7207 	int ret;
7208 
7209 	if (cnt >= sizeof(buf))
7210 		return -EINVAL;
7211 
7212 	if (copy_from_user(buf, ubuf, cnt))
7213 		return -EFAULT;
7214 
7215 	buf[cnt] = 0;
7216 
7217 	clockstr = strstrip(buf);
7218 
7219 	ret = tracing_set_clock(tr, clockstr);
7220 	if (ret)
7221 		return ret;
7222 
7223 	*fpos += cnt;
7224 
7225 	return cnt;
7226 }
7227 
7228 static int tracing_clock_open(struct inode *inode, struct file *file)
7229 {
7230 	struct trace_array *tr = inode->i_private;
7231 	int ret;
7232 
7233 	ret = tracing_check_open_get_tr(tr);
7234 	if (ret)
7235 		return ret;
7236 
7237 	ret = single_open(file, tracing_clock_show, inode->i_private);
7238 	if (ret < 0)
7239 		trace_array_put(tr);
7240 
7241 	return ret;
7242 }
7243 
7244 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7245 {
7246 	struct trace_array *tr = m->private;
7247 
7248 	mutex_lock(&trace_types_lock);
7249 
7250 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7251 		seq_puts(m, "delta [absolute]\n");
7252 	else
7253 		seq_puts(m, "[delta] absolute\n");
7254 
7255 	mutex_unlock(&trace_types_lock);
7256 
7257 	return 0;
7258 }
7259 
7260 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7261 {
7262 	struct trace_array *tr = inode->i_private;
7263 	int ret;
7264 
7265 	ret = tracing_check_open_get_tr(tr);
7266 	if (ret)
7267 		return ret;
7268 
7269 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7270 	if (ret < 0)
7271 		trace_array_put(tr);
7272 
7273 	return ret;
7274 }
7275 
7276 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7277 {
7278 	if (rbe == this_cpu_read(trace_buffered_event))
7279 		return ring_buffer_time_stamp(buffer);
7280 
7281 	return ring_buffer_event_time_stamp(buffer, rbe);
7282 }
7283 
7284 /*
7285  * Set or disable using the per CPU trace_buffer_event when possible.
7286  */
7287 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7288 {
7289 	int ret = 0;
7290 
7291 	mutex_lock(&trace_types_lock);
7292 
7293 	if (set && tr->no_filter_buffering_ref++)
7294 		goto out;
7295 
7296 	if (!set) {
7297 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7298 			ret = -EINVAL;
7299 			goto out;
7300 		}
7301 
7302 		--tr->no_filter_buffering_ref;
7303 	}
7304  out:
7305 	mutex_unlock(&trace_types_lock);
7306 
7307 	return ret;
7308 }
7309 
7310 struct ftrace_buffer_info {
7311 	struct trace_iterator	iter;
7312 	void			*spare;
7313 	unsigned int		spare_cpu;
7314 	unsigned int		read;
7315 };
7316 
7317 #ifdef CONFIG_TRACER_SNAPSHOT
7318 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7319 {
7320 	struct trace_array *tr = inode->i_private;
7321 	struct trace_iterator *iter;
7322 	struct seq_file *m;
7323 	int ret;
7324 
7325 	ret = tracing_check_open_get_tr(tr);
7326 	if (ret)
7327 		return ret;
7328 
7329 	if (file->f_mode & FMODE_READ) {
7330 		iter = __tracing_open(inode, file, true);
7331 		if (IS_ERR(iter))
7332 			ret = PTR_ERR(iter);
7333 	} else {
7334 		/* Writes still need the seq_file to hold the private data */
7335 		ret = -ENOMEM;
7336 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7337 		if (!m)
7338 			goto out;
7339 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7340 		if (!iter) {
7341 			kfree(m);
7342 			goto out;
7343 		}
7344 		ret = 0;
7345 
7346 		iter->tr = tr;
7347 		iter->array_buffer = &tr->max_buffer;
7348 		iter->cpu_file = tracing_get_cpu(inode);
7349 		m->private = iter;
7350 		file->private_data = m;
7351 	}
7352 out:
7353 	if (ret < 0)
7354 		trace_array_put(tr);
7355 
7356 	return ret;
7357 }
7358 
7359 static ssize_t
7360 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7361 		       loff_t *ppos)
7362 {
7363 	struct seq_file *m = filp->private_data;
7364 	struct trace_iterator *iter = m->private;
7365 	struct trace_array *tr = iter->tr;
7366 	unsigned long val;
7367 	int ret;
7368 
7369 	ret = tracing_update_buffers();
7370 	if (ret < 0)
7371 		return ret;
7372 
7373 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7374 	if (ret)
7375 		return ret;
7376 
7377 	mutex_lock(&trace_types_lock);
7378 
7379 	if (tr->current_trace->use_max_tr) {
7380 		ret = -EBUSY;
7381 		goto out;
7382 	}
7383 
7384 	arch_spin_lock(&tr->max_lock);
7385 	if (tr->cond_snapshot)
7386 		ret = -EBUSY;
7387 	arch_spin_unlock(&tr->max_lock);
7388 	if (ret)
7389 		goto out;
7390 
7391 	switch (val) {
7392 	case 0:
7393 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7394 			ret = -EINVAL;
7395 			break;
7396 		}
7397 		if (tr->allocated_snapshot)
7398 			free_snapshot(tr);
7399 		break;
7400 	case 1:
7401 /* Only allow per-cpu swap if the ring buffer supports it */
7402 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7403 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7404 			ret = -EINVAL;
7405 			break;
7406 		}
7407 #endif
7408 		if (tr->allocated_snapshot)
7409 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7410 					&tr->array_buffer, iter->cpu_file);
7411 		else
7412 			ret = tracing_alloc_snapshot_instance(tr);
7413 		if (ret < 0)
7414 			break;
7415 		local_irq_disable();
7416 		/* Now, we're going to swap */
7417 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7418 			update_max_tr(tr, current, smp_processor_id(), NULL);
7419 		else
7420 			update_max_tr_single(tr, current, iter->cpu_file);
7421 		local_irq_enable();
7422 		break;
7423 	default:
7424 		if (tr->allocated_snapshot) {
7425 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7426 				tracing_reset_online_cpus(&tr->max_buffer);
7427 			else
7428 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7429 		}
7430 		break;
7431 	}
7432 
7433 	if (ret >= 0) {
7434 		*ppos += cnt;
7435 		ret = cnt;
7436 	}
7437 out:
7438 	mutex_unlock(&trace_types_lock);
7439 	return ret;
7440 }
7441 
7442 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7443 {
7444 	struct seq_file *m = file->private_data;
7445 	int ret;
7446 
7447 	ret = tracing_release(inode, file);
7448 
7449 	if (file->f_mode & FMODE_READ)
7450 		return ret;
7451 
7452 	/* If write only, the seq_file is just a stub */
7453 	if (m)
7454 		kfree(m->private);
7455 	kfree(m);
7456 
7457 	return 0;
7458 }
7459 
7460 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7461 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7462 				    size_t count, loff_t *ppos);
7463 static int tracing_buffers_release(struct inode *inode, struct file *file);
7464 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7465 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7466 
7467 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7468 {
7469 	struct ftrace_buffer_info *info;
7470 	int ret;
7471 
7472 	/* The following checks for tracefs lockdown */
7473 	ret = tracing_buffers_open(inode, filp);
7474 	if (ret < 0)
7475 		return ret;
7476 
7477 	info = filp->private_data;
7478 
7479 	if (info->iter.trace->use_max_tr) {
7480 		tracing_buffers_release(inode, filp);
7481 		return -EBUSY;
7482 	}
7483 
7484 	info->iter.snapshot = true;
7485 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7486 
7487 	return ret;
7488 }
7489 
7490 #endif /* CONFIG_TRACER_SNAPSHOT */
7491 
7492 
7493 static const struct file_operations tracing_thresh_fops = {
7494 	.open		= tracing_open_generic,
7495 	.read		= tracing_thresh_read,
7496 	.write		= tracing_thresh_write,
7497 	.llseek		= generic_file_llseek,
7498 };
7499 
7500 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7501 static const struct file_operations tracing_max_lat_fops = {
7502 	.open		= tracing_open_generic,
7503 	.read		= tracing_max_lat_read,
7504 	.write		= tracing_max_lat_write,
7505 	.llseek		= generic_file_llseek,
7506 };
7507 #endif
7508 
7509 static const struct file_operations set_tracer_fops = {
7510 	.open		= tracing_open_generic,
7511 	.read		= tracing_set_trace_read,
7512 	.write		= tracing_set_trace_write,
7513 	.llseek		= generic_file_llseek,
7514 };
7515 
7516 static const struct file_operations tracing_pipe_fops = {
7517 	.open		= tracing_open_pipe,
7518 	.poll		= tracing_poll_pipe,
7519 	.read		= tracing_read_pipe,
7520 	.splice_read	= tracing_splice_read_pipe,
7521 	.release	= tracing_release_pipe,
7522 	.llseek		= no_llseek,
7523 };
7524 
7525 static const struct file_operations tracing_entries_fops = {
7526 	.open		= tracing_open_generic_tr,
7527 	.read		= tracing_entries_read,
7528 	.write		= tracing_entries_write,
7529 	.llseek		= generic_file_llseek,
7530 	.release	= tracing_release_generic_tr,
7531 };
7532 
7533 static const struct file_operations tracing_total_entries_fops = {
7534 	.open		= tracing_open_generic_tr,
7535 	.read		= tracing_total_entries_read,
7536 	.llseek		= generic_file_llseek,
7537 	.release	= tracing_release_generic_tr,
7538 };
7539 
7540 static const struct file_operations tracing_free_buffer_fops = {
7541 	.open		= tracing_open_generic_tr,
7542 	.write		= tracing_free_buffer_write,
7543 	.release	= tracing_free_buffer_release,
7544 };
7545 
7546 static const struct file_operations tracing_mark_fops = {
7547 	.open		= tracing_open_generic_tr,
7548 	.write		= tracing_mark_write,
7549 	.llseek		= generic_file_llseek,
7550 	.release	= tracing_release_generic_tr,
7551 };
7552 
7553 static const struct file_operations tracing_mark_raw_fops = {
7554 	.open		= tracing_open_generic_tr,
7555 	.write		= tracing_mark_raw_write,
7556 	.llseek		= generic_file_llseek,
7557 	.release	= tracing_release_generic_tr,
7558 };
7559 
7560 static const struct file_operations trace_clock_fops = {
7561 	.open		= tracing_clock_open,
7562 	.read		= seq_read,
7563 	.llseek		= seq_lseek,
7564 	.release	= tracing_single_release_tr,
7565 	.write		= tracing_clock_write,
7566 };
7567 
7568 static const struct file_operations trace_time_stamp_mode_fops = {
7569 	.open		= tracing_time_stamp_mode_open,
7570 	.read		= seq_read,
7571 	.llseek		= seq_lseek,
7572 	.release	= tracing_single_release_tr,
7573 };
7574 
7575 #ifdef CONFIG_TRACER_SNAPSHOT
7576 static const struct file_operations snapshot_fops = {
7577 	.open		= tracing_snapshot_open,
7578 	.read		= seq_read,
7579 	.write		= tracing_snapshot_write,
7580 	.llseek		= tracing_lseek,
7581 	.release	= tracing_snapshot_release,
7582 };
7583 
7584 static const struct file_operations snapshot_raw_fops = {
7585 	.open		= snapshot_raw_open,
7586 	.read		= tracing_buffers_read,
7587 	.release	= tracing_buffers_release,
7588 	.splice_read	= tracing_buffers_splice_read,
7589 	.llseek		= no_llseek,
7590 };
7591 
7592 #endif /* CONFIG_TRACER_SNAPSHOT */
7593 
7594 /*
7595  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7596  * @filp: The active open file structure
7597  * @ubuf: The userspace provided buffer to read value into
7598  * @cnt: The maximum number of bytes to read
7599  * @ppos: The current "file" position
7600  *
7601  * This function implements the write interface for a struct trace_min_max_param.
7602  * The filp->private_data must point to a trace_min_max_param structure that
7603  * defines where to write the value, the min and the max acceptable values,
7604  * and a lock to protect the write.
7605  */
7606 static ssize_t
7607 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7608 {
7609 	struct trace_min_max_param *param = filp->private_data;
7610 	u64 val;
7611 	int err;
7612 
7613 	if (!param)
7614 		return -EFAULT;
7615 
7616 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7617 	if (err)
7618 		return err;
7619 
7620 	if (param->lock)
7621 		mutex_lock(param->lock);
7622 
7623 	if (param->min && val < *param->min)
7624 		err = -EINVAL;
7625 
7626 	if (param->max && val > *param->max)
7627 		err = -EINVAL;
7628 
7629 	if (!err)
7630 		*param->val = val;
7631 
7632 	if (param->lock)
7633 		mutex_unlock(param->lock);
7634 
7635 	if (err)
7636 		return err;
7637 
7638 	return cnt;
7639 }
7640 
7641 /*
7642  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7643  * @filp: The active open file structure
7644  * @ubuf: The userspace provided buffer to read value into
7645  * @cnt: The maximum number of bytes to read
7646  * @ppos: The current "file" position
7647  *
7648  * This function implements the read interface for a struct trace_min_max_param.
7649  * The filp->private_data must point to a trace_min_max_param struct with valid
7650  * data.
7651  */
7652 static ssize_t
7653 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7654 {
7655 	struct trace_min_max_param *param = filp->private_data;
7656 	char buf[U64_STR_SIZE];
7657 	int len;
7658 	u64 val;
7659 
7660 	if (!param)
7661 		return -EFAULT;
7662 
7663 	val = *param->val;
7664 
7665 	if (cnt > sizeof(buf))
7666 		cnt = sizeof(buf);
7667 
7668 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7669 
7670 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7671 }
7672 
7673 const struct file_operations trace_min_max_fops = {
7674 	.open		= tracing_open_generic,
7675 	.read		= trace_min_max_read,
7676 	.write		= trace_min_max_write,
7677 };
7678 
7679 #define TRACING_LOG_ERRS_MAX	8
7680 #define TRACING_LOG_LOC_MAX	128
7681 
7682 #define CMD_PREFIX "  Command: "
7683 
7684 struct err_info {
7685 	const char	**errs;	/* ptr to loc-specific array of err strings */
7686 	u8		type;	/* index into errs -> specific err string */
7687 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7688 	u64		ts;
7689 };
7690 
7691 struct tracing_log_err {
7692 	struct list_head	list;
7693 	struct err_info		info;
7694 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7695 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7696 };
7697 
7698 static DEFINE_MUTEX(tracing_err_log_lock);
7699 
7700 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7701 {
7702 	struct tracing_log_err *err;
7703 
7704 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7705 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7706 		if (!err)
7707 			err = ERR_PTR(-ENOMEM);
7708 		tr->n_err_log_entries++;
7709 
7710 		return err;
7711 	}
7712 
7713 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7714 	list_del(&err->list);
7715 
7716 	return err;
7717 }
7718 
7719 /**
7720  * err_pos - find the position of a string within a command for error careting
7721  * @cmd: The tracing command that caused the error
7722  * @str: The string to position the caret at within @cmd
7723  *
7724  * Finds the position of the first occurrence of @str within @cmd.  The
7725  * return value can be passed to tracing_log_err() for caret placement
7726  * within @cmd.
7727  *
7728  * Returns the index within @cmd of the first occurrence of @str or 0
7729  * if @str was not found.
7730  */
7731 unsigned int err_pos(char *cmd, const char *str)
7732 {
7733 	char *found;
7734 
7735 	if (WARN_ON(!strlen(cmd)))
7736 		return 0;
7737 
7738 	found = strstr(cmd, str);
7739 	if (found)
7740 		return found - cmd;
7741 
7742 	return 0;
7743 }
7744 
7745 /**
7746  * tracing_log_err - write an error to the tracing error log
7747  * @tr: The associated trace array for the error (NULL for top level array)
7748  * @loc: A string describing where the error occurred
7749  * @cmd: The tracing command that caused the error
7750  * @errs: The array of loc-specific static error strings
7751  * @type: The index into errs[], which produces the specific static err string
7752  * @pos: The position the caret should be placed in the cmd
7753  *
7754  * Writes an error into tracing/error_log of the form:
7755  *
7756  * <loc>: error: <text>
7757  *   Command: <cmd>
7758  *              ^
7759  *
7760  * tracing/error_log is a small log file containing the last
7761  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7762  * unless there has been a tracing error, and the error log can be
7763  * cleared and have its memory freed by writing the empty string in
7764  * truncation mode to it i.e. echo > tracing/error_log.
7765  *
7766  * NOTE: the @errs array along with the @type param are used to
7767  * produce a static error string - this string is not copied and saved
7768  * when the error is logged - only a pointer to it is saved.  See
7769  * existing callers for examples of how static strings are typically
7770  * defined for use with tracing_log_err().
7771  */
7772 void tracing_log_err(struct trace_array *tr,
7773 		     const char *loc, const char *cmd,
7774 		     const char **errs, u8 type, u8 pos)
7775 {
7776 	struct tracing_log_err *err;
7777 
7778 	if (!tr)
7779 		tr = &global_trace;
7780 
7781 	mutex_lock(&tracing_err_log_lock);
7782 	err = get_tracing_log_err(tr);
7783 	if (PTR_ERR(err) == -ENOMEM) {
7784 		mutex_unlock(&tracing_err_log_lock);
7785 		return;
7786 	}
7787 
7788 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7789 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7790 
7791 	err->info.errs = errs;
7792 	err->info.type = type;
7793 	err->info.pos = pos;
7794 	err->info.ts = local_clock();
7795 
7796 	list_add_tail(&err->list, &tr->err_log);
7797 	mutex_unlock(&tracing_err_log_lock);
7798 }
7799 
7800 static void clear_tracing_err_log(struct trace_array *tr)
7801 {
7802 	struct tracing_log_err *err, *next;
7803 
7804 	mutex_lock(&tracing_err_log_lock);
7805 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7806 		list_del(&err->list);
7807 		kfree(err);
7808 	}
7809 
7810 	tr->n_err_log_entries = 0;
7811 	mutex_unlock(&tracing_err_log_lock);
7812 }
7813 
7814 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7815 {
7816 	struct trace_array *tr = m->private;
7817 
7818 	mutex_lock(&tracing_err_log_lock);
7819 
7820 	return seq_list_start(&tr->err_log, *pos);
7821 }
7822 
7823 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7824 {
7825 	struct trace_array *tr = m->private;
7826 
7827 	return seq_list_next(v, &tr->err_log, pos);
7828 }
7829 
7830 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7831 {
7832 	mutex_unlock(&tracing_err_log_lock);
7833 }
7834 
7835 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7836 {
7837 	u8 i;
7838 
7839 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7840 		seq_putc(m, ' ');
7841 	for (i = 0; i < pos; i++)
7842 		seq_putc(m, ' ');
7843 	seq_puts(m, "^\n");
7844 }
7845 
7846 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7847 {
7848 	struct tracing_log_err *err = v;
7849 
7850 	if (err) {
7851 		const char *err_text = err->info.errs[err->info.type];
7852 		u64 sec = err->info.ts;
7853 		u32 nsec;
7854 
7855 		nsec = do_div(sec, NSEC_PER_SEC);
7856 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7857 			   err->loc, err_text);
7858 		seq_printf(m, "%s", err->cmd);
7859 		tracing_err_log_show_pos(m, err->info.pos);
7860 	}
7861 
7862 	return 0;
7863 }
7864 
7865 static const struct seq_operations tracing_err_log_seq_ops = {
7866 	.start  = tracing_err_log_seq_start,
7867 	.next   = tracing_err_log_seq_next,
7868 	.stop   = tracing_err_log_seq_stop,
7869 	.show   = tracing_err_log_seq_show
7870 };
7871 
7872 static int tracing_err_log_open(struct inode *inode, struct file *file)
7873 {
7874 	struct trace_array *tr = inode->i_private;
7875 	int ret = 0;
7876 
7877 	ret = tracing_check_open_get_tr(tr);
7878 	if (ret)
7879 		return ret;
7880 
7881 	/* If this file was opened for write, then erase contents */
7882 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7883 		clear_tracing_err_log(tr);
7884 
7885 	if (file->f_mode & FMODE_READ) {
7886 		ret = seq_open(file, &tracing_err_log_seq_ops);
7887 		if (!ret) {
7888 			struct seq_file *m = file->private_data;
7889 			m->private = tr;
7890 		} else {
7891 			trace_array_put(tr);
7892 		}
7893 	}
7894 	return ret;
7895 }
7896 
7897 static ssize_t tracing_err_log_write(struct file *file,
7898 				     const char __user *buffer,
7899 				     size_t count, loff_t *ppos)
7900 {
7901 	return count;
7902 }
7903 
7904 static int tracing_err_log_release(struct inode *inode, struct file *file)
7905 {
7906 	struct trace_array *tr = inode->i_private;
7907 
7908 	trace_array_put(tr);
7909 
7910 	if (file->f_mode & FMODE_READ)
7911 		seq_release(inode, file);
7912 
7913 	return 0;
7914 }
7915 
7916 static const struct file_operations tracing_err_log_fops = {
7917 	.open           = tracing_err_log_open,
7918 	.write		= tracing_err_log_write,
7919 	.read           = seq_read,
7920 	.llseek         = seq_lseek,
7921 	.release        = tracing_err_log_release,
7922 };
7923 
7924 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7925 {
7926 	struct trace_array *tr = inode->i_private;
7927 	struct ftrace_buffer_info *info;
7928 	int ret;
7929 
7930 	ret = tracing_check_open_get_tr(tr);
7931 	if (ret)
7932 		return ret;
7933 
7934 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7935 	if (!info) {
7936 		trace_array_put(tr);
7937 		return -ENOMEM;
7938 	}
7939 
7940 	mutex_lock(&trace_types_lock);
7941 
7942 	info->iter.tr		= tr;
7943 	info->iter.cpu_file	= tracing_get_cpu(inode);
7944 	info->iter.trace	= tr->current_trace;
7945 	info->iter.array_buffer = &tr->array_buffer;
7946 	info->spare		= NULL;
7947 	/* Force reading ring buffer for first read */
7948 	info->read		= (unsigned int)-1;
7949 
7950 	filp->private_data = info;
7951 
7952 	tr->trace_ref++;
7953 
7954 	mutex_unlock(&trace_types_lock);
7955 
7956 	ret = nonseekable_open(inode, filp);
7957 	if (ret < 0)
7958 		trace_array_put(tr);
7959 
7960 	return ret;
7961 }
7962 
7963 static __poll_t
7964 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7965 {
7966 	struct ftrace_buffer_info *info = filp->private_data;
7967 	struct trace_iterator *iter = &info->iter;
7968 
7969 	return trace_poll(iter, filp, poll_table);
7970 }
7971 
7972 static ssize_t
7973 tracing_buffers_read(struct file *filp, char __user *ubuf,
7974 		     size_t count, loff_t *ppos)
7975 {
7976 	struct ftrace_buffer_info *info = filp->private_data;
7977 	struct trace_iterator *iter = &info->iter;
7978 	ssize_t ret = 0;
7979 	ssize_t size;
7980 
7981 	if (!count)
7982 		return 0;
7983 
7984 #ifdef CONFIG_TRACER_MAX_TRACE
7985 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7986 		return -EBUSY;
7987 #endif
7988 
7989 	if (!info->spare) {
7990 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7991 							  iter->cpu_file);
7992 		if (IS_ERR(info->spare)) {
7993 			ret = PTR_ERR(info->spare);
7994 			info->spare = NULL;
7995 		} else {
7996 			info->spare_cpu = iter->cpu_file;
7997 		}
7998 	}
7999 	if (!info->spare)
8000 		return ret;
8001 
8002 	/* Do we have previous read data to read? */
8003 	if (info->read < PAGE_SIZE)
8004 		goto read;
8005 
8006  again:
8007 	trace_access_lock(iter->cpu_file);
8008 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8009 				    &info->spare,
8010 				    count,
8011 				    iter->cpu_file, 0);
8012 	trace_access_unlock(iter->cpu_file);
8013 
8014 	if (ret < 0) {
8015 		if (trace_empty(iter)) {
8016 			if ((filp->f_flags & O_NONBLOCK))
8017 				return -EAGAIN;
8018 
8019 			ret = wait_on_pipe(iter, 0);
8020 			if (ret)
8021 				return ret;
8022 
8023 			goto again;
8024 		}
8025 		return 0;
8026 	}
8027 
8028 	info->read = 0;
8029  read:
8030 	size = PAGE_SIZE - info->read;
8031 	if (size > count)
8032 		size = count;
8033 
8034 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8035 	if (ret == size)
8036 		return -EFAULT;
8037 
8038 	size -= ret;
8039 
8040 	*ppos += size;
8041 	info->read += size;
8042 
8043 	return size;
8044 }
8045 
8046 static int tracing_buffers_release(struct inode *inode, struct file *file)
8047 {
8048 	struct ftrace_buffer_info *info = file->private_data;
8049 	struct trace_iterator *iter = &info->iter;
8050 
8051 	mutex_lock(&trace_types_lock);
8052 
8053 	iter->tr->trace_ref--;
8054 
8055 	__trace_array_put(iter->tr);
8056 
8057 	if (info->spare)
8058 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8059 					   info->spare_cpu, info->spare);
8060 	kvfree(info);
8061 
8062 	mutex_unlock(&trace_types_lock);
8063 
8064 	return 0;
8065 }
8066 
8067 struct buffer_ref {
8068 	struct trace_buffer	*buffer;
8069 	void			*page;
8070 	int			cpu;
8071 	refcount_t		refcount;
8072 };
8073 
8074 static void buffer_ref_release(struct buffer_ref *ref)
8075 {
8076 	if (!refcount_dec_and_test(&ref->refcount))
8077 		return;
8078 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8079 	kfree(ref);
8080 }
8081 
8082 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8083 				    struct pipe_buffer *buf)
8084 {
8085 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8086 
8087 	buffer_ref_release(ref);
8088 	buf->private = 0;
8089 }
8090 
8091 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8092 				struct pipe_buffer *buf)
8093 {
8094 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8095 
8096 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8097 		return false;
8098 
8099 	refcount_inc(&ref->refcount);
8100 	return true;
8101 }
8102 
8103 /* Pipe buffer operations for a buffer. */
8104 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8105 	.release		= buffer_pipe_buf_release,
8106 	.get			= buffer_pipe_buf_get,
8107 };
8108 
8109 /*
8110  * Callback from splice_to_pipe(), if we need to release some pages
8111  * at the end of the spd in case we error'ed out in filling the pipe.
8112  */
8113 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8114 {
8115 	struct buffer_ref *ref =
8116 		(struct buffer_ref *)spd->partial[i].private;
8117 
8118 	buffer_ref_release(ref);
8119 	spd->partial[i].private = 0;
8120 }
8121 
8122 static ssize_t
8123 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8124 			    struct pipe_inode_info *pipe, size_t len,
8125 			    unsigned int flags)
8126 {
8127 	struct ftrace_buffer_info *info = file->private_data;
8128 	struct trace_iterator *iter = &info->iter;
8129 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8130 	struct page *pages_def[PIPE_DEF_BUFFERS];
8131 	struct splice_pipe_desc spd = {
8132 		.pages		= pages_def,
8133 		.partial	= partial_def,
8134 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8135 		.ops		= &buffer_pipe_buf_ops,
8136 		.spd_release	= buffer_spd_release,
8137 	};
8138 	struct buffer_ref *ref;
8139 	int entries, i;
8140 	ssize_t ret = 0;
8141 
8142 #ifdef CONFIG_TRACER_MAX_TRACE
8143 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8144 		return -EBUSY;
8145 #endif
8146 
8147 	if (*ppos & (PAGE_SIZE - 1))
8148 		return -EINVAL;
8149 
8150 	if (len & (PAGE_SIZE - 1)) {
8151 		if (len < PAGE_SIZE)
8152 			return -EINVAL;
8153 		len &= PAGE_MASK;
8154 	}
8155 
8156 	if (splice_grow_spd(pipe, &spd))
8157 		return -ENOMEM;
8158 
8159  again:
8160 	trace_access_lock(iter->cpu_file);
8161 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162 
8163 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8164 		struct page *page;
8165 		int r;
8166 
8167 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8168 		if (!ref) {
8169 			ret = -ENOMEM;
8170 			break;
8171 		}
8172 
8173 		refcount_set(&ref->refcount, 1);
8174 		ref->buffer = iter->array_buffer->buffer;
8175 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8176 		if (IS_ERR(ref->page)) {
8177 			ret = PTR_ERR(ref->page);
8178 			ref->page = NULL;
8179 			kfree(ref);
8180 			break;
8181 		}
8182 		ref->cpu = iter->cpu_file;
8183 
8184 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8185 					  len, iter->cpu_file, 1);
8186 		if (r < 0) {
8187 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8188 						   ref->page);
8189 			kfree(ref);
8190 			break;
8191 		}
8192 
8193 		page = virt_to_page(ref->page);
8194 
8195 		spd.pages[i] = page;
8196 		spd.partial[i].len = PAGE_SIZE;
8197 		spd.partial[i].offset = 0;
8198 		spd.partial[i].private = (unsigned long)ref;
8199 		spd.nr_pages++;
8200 		*ppos += PAGE_SIZE;
8201 
8202 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8203 	}
8204 
8205 	trace_access_unlock(iter->cpu_file);
8206 	spd.nr_pages = i;
8207 
8208 	/* did we read anything? */
8209 	if (!spd.nr_pages) {
8210 		if (ret)
8211 			goto out;
8212 
8213 		ret = -EAGAIN;
8214 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8215 			goto out;
8216 
8217 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8218 		if (ret)
8219 			goto out;
8220 
8221 		goto again;
8222 	}
8223 
8224 	ret = splice_to_pipe(pipe, &spd);
8225 out:
8226 	splice_shrink_spd(&spd);
8227 
8228 	return ret;
8229 }
8230 
8231 static const struct file_operations tracing_buffers_fops = {
8232 	.open		= tracing_buffers_open,
8233 	.read		= tracing_buffers_read,
8234 	.poll		= tracing_buffers_poll,
8235 	.release	= tracing_buffers_release,
8236 	.splice_read	= tracing_buffers_splice_read,
8237 	.llseek		= no_llseek,
8238 };
8239 
8240 static ssize_t
8241 tracing_stats_read(struct file *filp, char __user *ubuf,
8242 		   size_t count, loff_t *ppos)
8243 {
8244 	struct inode *inode = file_inode(filp);
8245 	struct trace_array *tr = inode->i_private;
8246 	struct array_buffer *trace_buf = &tr->array_buffer;
8247 	int cpu = tracing_get_cpu(inode);
8248 	struct trace_seq *s;
8249 	unsigned long cnt;
8250 	unsigned long long t;
8251 	unsigned long usec_rem;
8252 
8253 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8254 	if (!s)
8255 		return -ENOMEM;
8256 
8257 	trace_seq_init(s);
8258 
8259 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8260 	trace_seq_printf(s, "entries: %ld\n", cnt);
8261 
8262 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8263 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8264 
8265 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8266 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8267 
8268 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8269 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8270 
8271 	if (trace_clocks[tr->clock_id].in_ns) {
8272 		/* local or global for trace_clock */
8273 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8274 		usec_rem = do_div(t, USEC_PER_SEC);
8275 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8276 								t, usec_rem);
8277 
8278 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8279 		usec_rem = do_div(t, USEC_PER_SEC);
8280 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8281 	} else {
8282 		/* counter or tsc mode for trace_clock */
8283 		trace_seq_printf(s, "oldest event ts: %llu\n",
8284 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8285 
8286 		trace_seq_printf(s, "now ts: %llu\n",
8287 				ring_buffer_time_stamp(trace_buf->buffer));
8288 	}
8289 
8290 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8291 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8292 
8293 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8294 	trace_seq_printf(s, "read events: %ld\n", cnt);
8295 
8296 	count = simple_read_from_buffer(ubuf, count, ppos,
8297 					s->buffer, trace_seq_used(s));
8298 
8299 	kfree(s);
8300 
8301 	return count;
8302 }
8303 
8304 static const struct file_operations tracing_stats_fops = {
8305 	.open		= tracing_open_generic_tr,
8306 	.read		= tracing_stats_read,
8307 	.llseek		= generic_file_llseek,
8308 	.release	= tracing_release_generic_tr,
8309 };
8310 
8311 #ifdef CONFIG_DYNAMIC_FTRACE
8312 
8313 static ssize_t
8314 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8315 		  size_t cnt, loff_t *ppos)
8316 {
8317 	ssize_t ret;
8318 	char *buf;
8319 	int r;
8320 
8321 	/* 256 should be plenty to hold the amount needed */
8322 	buf = kmalloc(256, GFP_KERNEL);
8323 	if (!buf)
8324 		return -ENOMEM;
8325 
8326 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8327 		      ftrace_update_tot_cnt,
8328 		      ftrace_number_of_pages,
8329 		      ftrace_number_of_groups);
8330 
8331 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8332 	kfree(buf);
8333 	return ret;
8334 }
8335 
8336 static const struct file_operations tracing_dyn_info_fops = {
8337 	.open		= tracing_open_generic,
8338 	.read		= tracing_read_dyn_info,
8339 	.llseek		= generic_file_llseek,
8340 };
8341 #endif /* CONFIG_DYNAMIC_FTRACE */
8342 
8343 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8344 static void
8345 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8346 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8347 		void *data)
8348 {
8349 	tracing_snapshot_instance(tr);
8350 }
8351 
8352 static void
8353 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8354 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8355 		      void *data)
8356 {
8357 	struct ftrace_func_mapper *mapper = data;
8358 	long *count = NULL;
8359 
8360 	if (mapper)
8361 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8362 
8363 	if (count) {
8364 
8365 		if (*count <= 0)
8366 			return;
8367 
8368 		(*count)--;
8369 	}
8370 
8371 	tracing_snapshot_instance(tr);
8372 }
8373 
8374 static int
8375 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8376 		      struct ftrace_probe_ops *ops, void *data)
8377 {
8378 	struct ftrace_func_mapper *mapper = data;
8379 	long *count = NULL;
8380 
8381 	seq_printf(m, "%ps:", (void *)ip);
8382 
8383 	seq_puts(m, "snapshot");
8384 
8385 	if (mapper)
8386 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8387 
8388 	if (count)
8389 		seq_printf(m, ":count=%ld\n", *count);
8390 	else
8391 		seq_puts(m, ":unlimited\n");
8392 
8393 	return 0;
8394 }
8395 
8396 static int
8397 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8398 		     unsigned long ip, void *init_data, void **data)
8399 {
8400 	struct ftrace_func_mapper *mapper = *data;
8401 
8402 	if (!mapper) {
8403 		mapper = allocate_ftrace_func_mapper();
8404 		if (!mapper)
8405 			return -ENOMEM;
8406 		*data = mapper;
8407 	}
8408 
8409 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8410 }
8411 
8412 static void
8413 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8414 		     unsigned long ip, void *data)
8415 {
8416 	struct ftrace_func_mapper *mapper = data;
8417 
8418 	if (!ip) {
8419 		if (!mapper)
8420 			return;
8421 		free_ftrace_func_mapper(mapper, NULL);
8422 		return;
8423 	}
8424 
8425 	ftrace_func_mapper_remove_ip(mapper, ip);
8426 }
8427 
8428 static struct ftrace_probe_ops snapshot_probe_ops = {
8429 	.func			= ftrace_snapshot,
8430 	.print			= ftrace_snapshot_print,
8431 };
8432 
8433 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8434 	.func			= ftrace_count_snapshot,
8435 	.print			= ftrace_snapshot_print,
8436 	.init			= ftrace_snapshot_init,
8437 	.free			= ftrace_snapshot_free,
8438 };
8439 
8440 static int
8441 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8442 			       char *glob, char *cmd, char *param, int enable)
8443 {
8444 	struct ftrace_probe_ops *ops;
8445 	void *count = (void *)-1;
8446 	char *number;
8447 	int ret;
8448 
8449 	if (!tr)
8450 		return -ENODEV;
8451 
8452 	/* hash funcs only work with set_ftrace_filter */
8453 	if (!enable)
8454 		return -EINVAL;
8455 
8456 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8457 
8458 	if (glob[0] == '!')
8459 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8460 
8461 	if (!param)
8462 		goto out_reg;
8463 
8464 	number = strsep(&param, ":");
8465 
8466 	if (!strlen(number))
8467 		goto out_reg;
8468 
8469 	/*
8470 	 * We use the callback data field (which is a pointer)
8471 	 * as our counter.
8472 	 */
8473 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8474 	if (ret)
8475 		return ret;
8476 
8477  out_reg:
8478 	ret = tracing_alloc_snapshot_instance(tr);
8479 	if (ret < 0)
8480 		goto out;
8481 
8482 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8483 
8484  out:
8485 	return ret < 0 ? ret : 0;
8486 }
8487 
8488 static struct ftrace_func_command ftrace_snapshot_cmd = {
8489 	.name			= "snapshot",
8490 	.func			= ftrace_trace_snapshot_callback,
8491 };
8492 
8493 static __init int register_snapshot_cmd(void)
8494 {
8495 	return register_ftrace_command(&ftrace_snapshot_cmd);
8496 }
8497 #else
8498 static inline __init int register_snapshot_cmd(void) { return 0; }
8499 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8500 
8501 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8502 {
8503 	if (WARN_ON(!tr->dir))
8504 		return ERR_PTR(-ENODEV);
8505 
8506 	/* Top directory uses NULL as the parent */
8507 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8508 		return NULL;
8509 
8510 	/* All sub buffers have a descriptor */
8511 	return tr->dir;
8512 }
8513 
8514 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8515 {
8516 	struct dentry *d_tracer;
8517 
8518 	if (tr->percpu_dir)
8519 		return tr->percpu_dir;
8520 
8521 	d_tracer = tracing_get_dentry(tr);
8522 	if (IS_ERR(d_tracer))
8523 		return NULL;
8524 
8525 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8526 
8527 	MEM_FAIL(!tr->percpu_dir,
8528 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8529 
8530 	return tr->percpu_dir;
8531 }
8532 
8533 static struct dentry *
8534 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8535 		      void *data, long cpu, const struct file_operations *fops)
8536 {
8537 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8538 
8539 	if (ret) /* See tracing_get_cpu() */
8540 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8541 	return ret;
8542 }
8543 
8544 static void
8545 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8546 {
8547 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8548 	struct dentry *d_cpu;
8549 	char cpu_dir[30]; /* 30 characters should be more than enough */
8550 
8551 	if (!d_percpu)
8552 		return;
8553 
8554 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8555 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8556 	if (!d_cpu) {
8557 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8558 		return;
8559 	}
8560 
8561 	/* per cpu trace_pipe */
8562 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8563 				tr, cpu, &tracing_pipe_fops);
8564 
8565 	/* per cpu trace */
8566 	trace_create_cpu_file("trace", 0644, d_cpu,
8567 				tr, cpu, &tracing_fops);
8568 
8569 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8570 				tr, cpu, &tracing_buffers_fops);
8571 
8572 	trace_create_cpu_file("stats", 0444, d_cpu,
8573 				tr, cpu, &tracing_stats_fops);
8574 
8575 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8576 				tr, cpu, &tracing_entries_fops);
8577 
8578 #ifdef CONFIG_TRACER_SNAPSHOT
8579 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8580 				tr, cpu, &snapshot_fops);
8581 
8582 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8583 				tr, cpu, &snapshot_raw_fops);
8584 #endif
8585 }
8586 
8587 #ifdef CONFIG_FTRACE_SELFTEST
8588 /* Let selftest have access to static functions in this file */
8589 #include "trace_selftest.c"
8590 #endif
8591 
8592 static ssize_t
8593 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8594 			loff_t *ppos)
8595 {
8596 	struct trace_option_dentry *topt = filp->private_data;
8597 	char *buf;
8598 
8599 	if (topt->flags->val & topt->opt->bit)
8600 		buf = "1\n";
8601 	else
8602 		buf = "0\n";
8603 
8604 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8605 }
8606 
8607 static ssize_t
8608 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8609 			 loff_t *ppos)
8610 {
8611 	struct trace_option_dentry *topt = filp->private_data;
8612 	unsigned long val;
8613 	int ret;
8614 
8615 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8616 	if (ret)
8617 		return ret;
8618 
8619 	if (val != 0 && val != 1)
8620 		return -EINVAL;
8621 
8622 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8623 		mutex_lock(&trace_types_lock);
8624 		ret = __set_tracer_option(topt->tr, topt->flags,
8625 					  topt->opt, !val);
8626 		mutex_unlock(&trace_types_lock);
8627 		if (ret)
8628 			return ret;
8629 	}
8630 
8631 	*ppos += cnt;
8632 
8633 	return cnt;
8634 }
8635 
8636 
8637 static const struct file_operations trace_options_fops = {
8638 	.open = tracing_open_generic,
8639 	.read = trace_options_read,
8640 	.write = trace_options_write,
8641 	.llseek	= generic_file_llseek,
8642 };
8643 
8644 /*
8645  * In order to pass in both the trace_array descriptor as well as the index
8646  * to the flag that the trace option file represents, the trace_array
8647  * has a character array of trace_flags_index[], which holds the index
8648  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8649  * The address of this character array is passed to the flag option file
8650  * read/write callbacks.
8651  *
8652  * In order to extract both the index and the trace_array descriptor,
8653  * get_tr_index() uses the following algorithm.
8654  *
8655  *   idx = *ptr;
8656  *
8657  * As the pointer itself contains the address of the index (remember
8658  * index[1] == 1).
8659  *
8660  * Then to get the trace_array descriptor, by subtracting that index
8661  * from the ptr, we get to the start of the index itself.
8662  *
8663  *   ptr - idx == &index[0]
8664  *
8665  * Then a simple container_of() from that pointer gets us to the
8666  * trace_array descriptor.
8667  */
8668 static void get_tr_index(void *data, struct trace_array **ptr,
8669 			 unsigned int *pindex)
8670 {
8671 	*pindex = *(unsigned char *)data;
8672 
8673 	*ptr = container_of(data - *pindex, struct trace_array,
8674 			    trace_flags_index);
8675 }
8676 
8677 static ssize_t
8678 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8679 			loff_t *ppos)
8680 {
8681 	void *tr_index = filp->private_data;
8682 	struct trace_array *tr;
8683 	unsigned int index;
8684 	char *buf;
8685 
8686 	get_tr_index(tr_index, &tr, &index);
8687 
8688 	if (tr->trace_flags & (1 << index))
8689 		buf = "1\n";
8690 	else
8691 		buf = "0\n";
8692 
8693 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8694 }
8695 
8696 static ssize_t
8697 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8698 			 loff_t *ppos)
8699 {
8700 	void *tr_index = filp->private_data;
8701 	struct trace_array *tr;
8702 	unsigned int index;
8703 	unsigned long val;
8704 	int ret;
8705 
8706 	get_tr_index(tr_index, &tr, &index);
8707 
8708 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8709 	if (ret)
8710 		return ret;
8711 
8712 	if (val != 0 && val != 1)
8713 		return -EINVAL;
8714 
8715 	mutex_lock(&event_mutex);
8716 	mutex_lock(&trace_types_lock);
8717 	ret = set_tracer_flag(tr, 1 << index, val);
8718 	mutex_unlock(&trace_types_lock);
8719 	mutex_unlock(&event_mutex);
8720 
8721 	if (ret < 0)
8722 		return ret;
8723 
8724 	*ppos += cnt;
8725 
8726 	return cnt;
8727 }
8728 
8729 static const struct file_operations trace_options_core_fops = {
8730 	.open = tracing_open_generic,
8731 	.read = trace_options_core_read,
8732 	.write = trace_options_core_write,
8733 	.llseek = generic_file_llseek,
8734 };
8735 
8736 struct dentry *trace_create_file(const char *name,
8737 				 umode_t mode,
8738 				 struct dentry *parent,
8739 				 void *data,
8740 				 const struct file_operations *fops)
8741 {
8742 	struct dentry *ret;
8743 
8744 	ret = tracefs_create_file(name, mode, parent, data, fops);
8745 	if (!ret)
8746 		pr_warn("Could not create tracefs '%s' entry\n", name);
8747 
8748 	return ret;
8749 }
8750 
8751 
8752 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8753 {
8754 	struct dentry *d_tracer;
8755 
8756 	if (tr->options)
8757 		return tr->options;
8758 
8759 	d_tracer = tracing_get_dentry(tr);
8760 	if (IS_ERR(d_tracer))
8761 		return NULL;
8762 
8763 	tr->options = tracefs_create_dir("options", d_tracer);
8764 	if (!tr->options) {
8765 		pr_warn("Could not create tracefs directory 'options'\n");
8766 		return NULL;
8767 	}
8768 
8769 	return tr->options;
8770 }
8771 
8772 static void
8773 create_trace_option_file(struct trace_array *tr,
8774 			 struct trace_option_dentry *topt,
8775 			 struct tracer_flags *flags,
8776 			 struct tracer_opt *opt)
8777 {
8778 	struct dentry *t_options;
8779 
8780 	t_options = trace_options_init_dentry(tr);
8781 	if (!t_options)
8782 		return;
8783 
8784 	topt->flags = flags;
8785 	topt->opt = opt;
8786 	topt->tr = tr;
8787 
8788 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8789 				    &trace_options_fops);
8790 
8791 }
8792 
8793 static void
8794 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8795 {
8796 	struct trace_option_dentry *topts;
8797 	struct trace_options *tr_topts;
8798 	struct tracer_flags *flags;
8799 	struct tracer_opt *opts;
8800 	int cnt;
8801 	int i;
8802 
8803 	if (!tracer)
8804 		return;
8805 
8806 	flags = tracer->flags;
8807 
8808 	if (!flags || !flags->opts)
8809 		return;
8810 
8811 	/*
8812 	 * If this is an instance, only create flags for tracers
8813 	 * the instance may have.
8814 	 */
8815 	if (!trace_ok_for_array(tracer, tr))
8816 		return;
8817 
8818 	for (i = 0; i < tr->nr_topts; i++) {
8819 		/* Make sure there's no duplicate flags. */
8820 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8821 			return;
8822 	}
8823 
8824 	opts = flags->opts;
8825 
8826 	for (cnt = 0; opts[cnt].name; cnt++)
8827 		;
8828 
8829 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8830 	if (!topts)
8831 		return;
8832 
8833 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8834 			    GFP_KERNEL);
8835 	if (!tr_topts) {
8836 		kfree(topts);
8837 		return;
8838 	}
8839 
8840 	tr->topts = tr_topts;
8841 	tr->topts[tr->nr_topts].tracer = tracer;
8842 	tr->topts[tr->nr_topts].topts = topts;
8843 	tr->nr_topts++;
8844 
8845 	for (cnt = 0; opts[cnt].name; cnt++) {
8846 		create_trace_option_file(tr, &topts[cnt], flags,
8847 					 &opts[cnt]);
8848 		MEM_FAIL(topts[cnt].entry == NULL,
8849 			  "Failed to create trace option: %s",
8850 			  opts[cnt].name);
8851 	}
8852 }
8853 
8854 static struct dentry *
8855 create_trace_option_core_file(struct trace_array *tr,
8856 			      const char *option, long index)
8857 {
8858 	struct dentry *t_options;
8859 
8860 	t_options = trace_options_init_dentry(tr);
8861 	if (!t_options)
8862 		return NULL;
8863 
8864 	return trace_create_file(option, 0644, t_options,
8865 				 (void *)&tr->trace_flags_index[index],
8866 				 &trace_options_core_fops);
8867 }
8868 
8869 static void create_trace_options_dir(struct trace_array *tr)
8870 {
8871 	struct dentry *t_options;
8872 	bool top_level = tr == &global_trace;
8873 	int i;
8874 
8875 	t_options = trace_options_init_dentry(tr);
8876 	if (!t_options)
8877 		return;
8878 
8879 	for (i = 0; trace_options[i]; i++) {
8880 		if (top_level ||
8881 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8882 			create_trace_option_core_file(tr, trace_options[i], i);
8883 	}
8884 }
8885 
8886 static ssize_t
8887 rb_simple_read(struct file *filp, char __user *ubuf,
8888 	       size_t cnt, loff_t *ppos)
8889 {
8890 	struct trace_array *tr = filp->private_data;
8891 	char buf[64];
8892 	int r;
8893 
8894 	r = tracer_tracing_is_on(tr);
8895 	r = sprintf(buf, "%d\n", r);
8896 
8897 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8898 }
8899 
8900 static ssize_t
8901 rb_simple_write(struct file *filp, const char __user *ubuf,
8902 		size_t cnt, loff_t *ppos)
8903 {
8904 	struct trace_array *tr = filp->private_data;
8905 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8906 	unsigned long val;
8907 	int ret;
8908 
8909 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8910 	if (ret)
8911 		return ret;
8912 
8913 	if (buffer) {
8914 		mutex_lock(&trace_types_lock);
8915 		if (!!val == tracer_tracing_is_on(tr)) {
8916 			val = 0; /* do nothing */
8917 		} else if (val) {
8918 			tracer_tracing_on(tr);
8919 			if (tr->current_trace->start)
8920 				tr->current_trace->start(tr);
8921 		} else {
8922 			tracer_tracing_off(tr);
8923 			if (tr->current_trace->stop)
8924 				tr->current_trace->stop(tr);
8925 		}
8926 		mutex_unlock(&trace_types_lock);
8927 	}
8928 
8929 	(*ppos)++;
8930 
8931 	return cnt;
8932 }
8933 
8934 static const struct file_operations rb_simple_fops = {
8935 	.open		= tracing_open_generic_tr,
8936 	.read		= rb_simple_read,
8937 	.write		= rb_simple_write,
8938 	.release	= tracing_release_generic_tr,
8939 	.llseek		= default_llseek,
8940 };
8941 
8942 static ssize_t
8943 buffer_percent_read(struct file *filp, char __user *ubuf,
8944 		    size_t cnt, loff_t *ppos)
8945 {
8946 	struct trace_array *tr = filp->private_data;
8947 	char buf[64];
8948 	int r;
8949 
8950 	r = tr->buffer_percent;
8951 	r = sprintf(buf, "%d\n", r);
8952 
8953 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8954 }
8955 
8956 static ssize_t
8957 buffer_percent_write(struct file *filp, const char __user *ubuf,
8958 		     size_t cnt, loff_t *ppos)
8959 {
8960 	struct trace_array *tr = filp->private_data;
8961 	unsigned long val;
8962 	int ret;
8963 
8964 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8965 	if (ret)
8966 		return ret;
8967 
8968 	if (val > 100)
8969 		return -EINVAL;
8970 
8971 	if (!val)
8972 		val = 1;
8973 
8974 	tr->buffer_percent = val;
8975 
8976 	(*ppos)++;
8977 
8978 	return cnt;
8979 }
8980 
8981 static const struct file_operations buffer_percent_fops = {
8982 	.open		= tracing_open_generic_tr,
8983 	.read		= buffer_percent_read,
8984 	.write		= buffer_percent_write,
8985 	.release	= tracing_release_generic_tr,
8986 	.llseek		= default_llseek,
8987 };
8988 
8989 static struct dentry *trace_instance_dir;
8990 
8991 static void
8992 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8993 
8994 static int
8995 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8996 {
8997 	enum ring_buffer_flags rb_flags;
8998 
8999 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9000 
9001 	buf->tr = tr;
9002 
9003 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9004 	if (!buf->buffer)
9005 		return -ENOMEM;
9006 
9007 	buf->data = alloc_percpu(struct trace_array_cpu);
9008 	if (!buf->data) {
9009 		ring_buffer_free(buf->buffer);
9010 		buf->buffer = NULL;
9011 		return -ENOMEM;
9012 	}
9013 
9014 	/* Allocate the first page for all buffers */
9015 	set_buffer_entries(&tr->array_buffer,
9016 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9017 
9018 	return 0;
9019 }
9020 
9021 static int allocate_trace_buffers(struct trace_array *tr, int size)
9022 {
9023 	int ret;
9024 
9025 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9026 	if (ret)
9027 		return ret;
9028 
9029 #ifdef CONFIG_TRACER_MAX_TRACE
9030 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9031 				    allocate_snapshot ? size : 1);
9032 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9033 		ring_buffer_free(tr->array_buffer.buffer);
9034 		tr->array_buffer.buffer = NULL;
9035 		free_percpu(tr->array_buffer.data);
9036 		tr->array_buffer.data = NULL;
9037 		return -ENOMEM;
9038 	}
9039 	tr->allocated_snapshot = allocate_snapshot;
9040 
9041 	/*
9042 	 * Only the top level trace array gets its snapshot allocated
9043 	 * from the kernel command line.
9044 	 */
9045 	allocate_snapshot = false;
9046 #endif
9047 
9048 	return 0;
9049 }
9050 
9051 static void free_trace_buffer(struct array_buffer *buf)
9052 {
9053 	if (buf->buffer) {
9054 		ring_buffer_free(buf->buffer);
9055 		buf->buffer = NULL;
9056 		free_percpu(buf->data);
9057 		buf->data = NULL;
9058 	}
9059 }
9060 
9061 static void free_trace_buffers(struct trace_array *tr)
9062 {
9063 	if (!tr)
9064 		return;
9065 
9066 	free_trace_buffer(&tr->array_buffer);
9067 
9068 #ifdef CONFIG_TRACER_MAX_TRACE
9069 	free_trace_buffer(&tr->max_buffer);
9070 #endif
9071 }
9072 
9073 static void init_trace_flags_index(struct trace_array *tr)
9074 {
9075 	int i;
9076 
9077 	/* Used by the trace options files */
9078 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9079 		tr->trace_flags_index[i] = i;
9080 }
9081 
9082 static void __update_tracer_options(struct trace_array *tr)
9083 {
9084 	struct tracer *t;
9085 
9086 	for (t = trace_types; t; t = t->next)
9087 		add_tracer_options(tr, t);
9088 }
9089 
9090 static void update_tracer_options(struct trace_array *tr)
9091 {
9092 	mutex_lock(&trace_types_lock);
9093 	__update_tracer_options(tr);
9094 	mutex_unlock(&trace_types_lock);
9095 }
9096 
9097 /* Must have trace_types_lock held */
9098 struct trace_array *trace_array_find(const char *instance)
9099 {
9100 	struct trace_array *tr, *found = NULL;
9101 
9102 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9103 		if (tr->name && strcmp(tr->name, instance) == 0) {
9104 			found = tr;
9105 			break;
9106 		}
9107 	}
9108 
9109 	return found;
9110 }
9111 
9112 struct trace_array *trace_array_find_get(const char *instance)
9113 {
9114 	struct trace_array *tr;
9115 
9116 	mutex_lock(&trace_types_lock);
9117 	tr = trace_array_find(instance);
9118 	if (tr)
9119 		tr->ref++;
9120 	mutex_unlock(&trace_types_lock);
9121 
9122 	return tr;
9123 }
9124 
9125 static int trace_array_create_dir(struct trace_array *tr)
9126 {
9127 	int ret;
9128 
9129 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9130 	if (!tr->dir)
9131 		return -EINVAL;
9132 
9133 	ret = event_trace_add_tracer(tr->dir, tr);
9134 	if (ret)
9135 		tracefs_remove(tr->dir);
9136 
9137 	init_tracer_tracefs(tr, tr->dir);
9138 	__update_tracer_options(tr);
9139 
9140 	return ret;
9141 }
9142 
9143 static struct trace_array *trace_array_create(const char *name)
9144 {
9145 	struct trace_array *tr;
9146 	int ret;
9147 
9148 	ret = -ENOMEM;
9149 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9150 	if (!tr)
9151 		return ERR_PTR(ret);
9152 
9153 	tr->name = kstrdup(name, GFP_KERNEL);
9154 	if (!tr->name)
9155 		goto out_free_tr;
9156 
9157 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9158 		goto out_free_tr;
9159 
9160 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9161 
9162 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9163 
9164 	raw_spin_lock_init(&tr->start_lock);
9165 
9166 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9167 
9168 	tr->current_trace = &nop_trace;
9169 
9170 	INIT_LIST_HEAD(&tr->systems);
9171 	INIT_LIST_HEAD(&tr->events);
9172 	INIT_LIST_HEAD(&tr->hist_vars);
9173 	INIT_LIST_HEAD(&tr->err_log);
9174 
9175 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9176 		goto out_free_tr;
9177 
9178 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9179 		goto out_free_tr;
9180 
9181 	ftrace_init_trace_array(tr);
9182 
9183 	init_trace_flags_index(tr);
9184 
9185 	if (trace_instance_dir) {
9186 		ret = trace_array_create_dir(tr);
9187 		if (ret)
9188 			goto out_free_tr;
9189 	} else
9190 		__trace_early_add_events(tr);
9191 
9192 	list_add(&tr->list, &ftrace_trace_arrays);
9193 
9194 	tr->ref++;
9195 
9196 	return tr;
9197 
9198  out_free_tr:
9199 	ftrace_free_ftrace_ops(tr);
9200 	free_trace_buffers(tr);
9201 	free_cpumask_var(tr->tracing_cpumask);
9202 	kfree(tr->name);
9203 	kfree(tr);
9204 
9205 	return ERR_PTR(ret);
9206 }
9207 
9208 static int instance_mkdir(const char *name)
9209 {
9210 	struct trace_array *tr;
9211 	int ret;
9212 
9213 	mutex_lock(&event_mutex);
9214 	mutex_lock(&trace_types_lock);
9215 
9216 	ret = -EEXIST;
9217 	if (trace_array_find(name))
9218 		goto out_unlock;
9219 
9220 	tr = trace_array_create(name);
9221 
9222 	ret = PTR_ERR_OR_ZERO(tr);
9223 
9224 out_unlock:
9225 	mutex_unlock(&trace_types_lock);
9226 	mutex_unlock(&event_mutex);
9227 	return ret;
9228 }
9229 
9230 /**
9231  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9232  * @name: The name of the trace array to be looked up/created.
9233  *
9234  * Returns pointer to trace array with given name.
9235  * NULL, if it cannot be created.
9236  *
9237  * NOTE: This function increments the reference counter associated with the
9238  * trace array returned. This makes sure it cannot be freed while in use.
9239  * Use trace_array_put() once the trace array is no longer needed.
9240  * If the trace_array is to be freed, trace_array_destroy() needs to
9241  * be called after the trace_array_put(), or simply let user space delete
9242  * it from the tracefs instances directory. But until the
9243  * trace_array_put() is called, user space can not delete it.
9244  *
9245  */
9246 struct trace_array *trace_array_get_by_name(const char *name)
9247 {
9248 	struct trace_array *tr;
9249 
9250 	mutex_lock(&event_mutex);
9251 	mutex_lock(&trace_types_lock);
9252 
9253 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9254 		if (tr->name && strcmp(tr->name, name) == 0)
9255 			goto out_unlock;
9256 	}
9257 
9258 	tr = trace_array_create(name);
9259 
9260 	if (IS_ERR(tr))
9261 		tr = NULL;
9262 out_unlock:
9263 	if (tr)
9264 		tr->ref++;
9265 
9266 	mutex_unlock(&trace_types_lock);
9267 	mutex_unlock(&event_mutex);
9268 	return tr;
9269 }
9270 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9271 
9272 static int __remove_instance(struct trace_array *tr)
9273 {
9274 	int i;
9275 
9276 	/* Reference counter for a newly created trace array = 1. */
9277 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9278 		return -EBUSY;
9279 
9280 	list_del(&tr->list);
9281 
9282 	/* Disable all the flags that were enabled coming in */
9283 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9284 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9285 			set_tracer_flag(tr, 1 << i, 0);
9286 	}
9287 
9288 	tracing_set_nop(tr);
9289 	clear_ftrace_function_probes(tr);
9290 	event_trace_del_tracer(tr);
9291 	ftrace_clear_pids(tr);
9292 	ftrace_destroy_function_files(tr);
9293 	tracefs_remove(tr->dir);
9294 	free_percpu(tr->last_func_repeats);
9295 	free_trace_buffers(tr);
9296 
9297 	for (i = 0; i < tr->nr_topts; i++) {
9298 		kfree(tr->topts[i].topts);
9299 	}
9300 	kfree(tr->topts);
9301 
9302 	free_cpumask_var(tr->tracing_cpumask);
9303 	kfree(tr->name);
9304 	kfree(tr);
9305 
9306 	return 0;
9307 }
9308 
9309 int trace_array_destroy(struct trace_array *this_tr)
9310 {
9311 	struct trace_array *tr;
9312 	int ret;
9313 
9314 	if (!this_tr)
9315 		return -EINVAL;
9316 
9317 	mutex_lock(&event_mutex);
9318 	mutex_lock(&trace_types_lock);
9319 
9320 	ret = -ENODEV;
9321 
9322 	/* Making sure trace array exists before destroying it. */
9323 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9324 		if (tr == this_tr) {
9325 			ret = __remove_instance(tr);
9326 			break;
9327 		}
9328 	}
9329 
9330 	mutex_unlock(&trace_types_lock);
9331 	mutex_unlock(&event_mutex);
9332 
9333 	return ret;
9334 }
9335 EXPORT_SYMBOL_GPL(trace_array_destroy);
9336 
9337 static int instance_rmdir(const char *name)
9338 {
9339 	struct trace_array *tr;
9340 	int ret;
9341 
9342 	mutex_lock(&event_mutex);
9343 	mutex_lock(&trace_types_lock);
9344 
9345 	ret = -ENODEV;
9346 	tr = trace_array_find(name);
9347 	if (tr)
9348 		ret = __remove_instance(tr);
9349 
9350 	mutex_unlock(&trace_types_lock);
9351 	mutex_unlock(&event_mutex);
9352 
9353 	return ret;
9354 }
9355 
9356 static __init void create_trace_instances(struct dentry *d_tracer)
9357 {
9358 	struct trace_array *tr;
9359 
9360 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9361 							 instance_mkdir,
9362 							 instance_rmdir);
9363 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9364 		return;
9365 
9366 	mutex_lock(&event_mutex);
9367 	mutex_lock(&trace_types_lock);
9368 
9369 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9370 		if (!tr->name)
9371 			continue;
9372 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9373 			     "Failed to create instance directory\n"))
9374 			break;
9375 	}
9376 
9377 	mutex_unlock(&trace_types_lock);
9378 	mutex_unlock(&event_mutex);
9379 }
9380 
9381 static void
9382 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9383 {
9384 	struct trace_event_file *file;
9385 	int cpu;
9386 
9387 	trace_create_file("available_tracers", 0444, d_tracer,
9388 			tr, &show_traces_fops);
9389 
9390 	trace_create_file("current_tracer", 0644, d_tracer,
9391 			tr, &set_tracer_fops);
9392 
9393 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9394 			  tr, &tracing_cpumask_fops);
9395 
9396 	trace_create_file("trace_options", 0644, d_tracer,
9397 			  tr, &tracing_iter_fops);
9398 
9399 	trace_create_file("trace", 0644, d_tracer,
9400 			  tr, &tracing_fops);
9401 
9402 	trace_create_file("trace_pipe", 0444, d_tracer,
9403 			  tr, &tracing_pipe_fops);
9404 
9405 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9406 			  tr, &tracing_entries_fops);
9407 
9408 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9409 			  tr, &tracing_total_entries_fops);
9410 
9411 	trace_create_file("free_buffer", 0200, d_tracer,
9412 			  tr, &tracing_free_buffer_fops);
9413 
9414 	trace_create_file("trace_marker", 0220, d_tracer,
9415 			  tr, &tracing_mark_fops);
9416 
9417 	file = __find_event_file(tr, "ftrace", "print");
9418 	if (file && file->dir)
9419 		trace_create_file("trigger", 0644, file->dir, file,
9420 				  &event_trigger_fops);
9421 	tr->trace_marker_file = file;
9422 
9423 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9424 			  tr, &tracing_mark_raw_fops);
9425 
9426 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9427 			  &trace_clock_fops);
9428 
9429 	trace_create_file("tracing_on", 0644, d_tracer,
9430 			  tr, &rb_simple_fops);
9431 
9432 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9433 			  &trace_time_stamp_mode_fops);
9434 
9435 	tr->buffer_percent = 50;
9436 
9437 	trace_create_file("buffer_percent", 0444, d_tracer,
9438 			tr, &buffer_percent_fops);
9439 
9440 	create_trace_options_dir(tr);
9441 
9442 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9443 	trace_create_maxlat_file(tr, d_tracer);
9444 #endif
9445 
9446 	if (ftrace_create_function_files(tr, d_tracer))
9447 		MEM_FAIL(1, "Could not allocate function filter files");
9448 
9449 #ifdef CONFIG_TRACER_SNAPSHOT
9450 	trace_create_file("snapshot", 0644, d_tracer,
9451 			  tr, &snapshot_fops);
9452 #endif
9453 
9454 	trace_create_file("error_log", 0644, d_tracer,
9455 			  tr, &tracing_err_log_fops);
9456 
9457 	for_each_tracing_cpu(cpu)
9458 		tracing_init_tracefs_percpu(tr, cpu);
9459 
9460 	ftrace_init_tracefs(tr, d_tracer);
9461 }
9462 
9463 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9464 {
9465 	struct vfsmount *mnt;
9466 	struct file_system_type *type;
9467 
9468 	/*
9469 	 * To maintain backward compatibility for tools that mount
9470 	 * debugfs to get to the tracing facility, tracefs is automatically
9471 	 * mounted to the debugfs/tracing directory.
9472 	 */
9473 	type = get_fs_type("tracefs");
9474 	if (!type)
9475 		return NULL;
9476 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9477 	put_filesystem(type);
9478 	if (IS_ERR(mnt))
9479 		return NULL;
9480 	mntget(mnt);
9481 
9482 	return mnt;
9483 }
9484 
9485 /**
9486  * tracing_init_dentry - initialize top level trace array
9487  *
9488  * This is called when creating files or directories in the tracing
9489  * directory. It is called via fs_initcall() by any of the boot up code
9490  * and expects to return the dentry of the top level tracing directory.
9491  */
9492 int tracing_init_dentry(void)
9493 {
9494 	struct trace_array *tr = &global_trace;
9495 
9496 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9497 		pr_warn("Tracing disabled due to lockdown\n");
9498 		return -EPERM;
9499 	}
9500 
9501 	/* The top level trace array uses  NULL as parent */
9502 	if (tr->dir)
9503 		return 0;
9504 
9505 	if (WARN_ON(!tracefs_initialized()))
9506 		return -ENODEV;
9507 
9508 	/*
9509 	 * As there may still be users that expect the tracing
9510 	 * files to exist in debugfs/tracing, we must automount
9511 	 * the tracefs file system there, so older tools still
9512 	 * work with the newer kernel.
9513 	 */
9514 	tr->dir = debugfs_create_automount("tracing", NULL,
9515 					   trace_automount, NULL);
9516 
9517 	return 0;
9518 }
9519 
9520 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9521 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9522 
9523 static struct workqueue_struct *eval_map_wq __initdata;
9524 static struct work_struct eval_map_work __initdata;
9525 
9526 static void __init eval_map_work_func(struct work_struct *work)
9527 {
9528 	int len;
9529 
9530 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9531 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9532 }
9533 
9534 static int __init trace_eval_init(void)
9535 {
9536 	INIT_WORK(&eval_map_work, eval_map_work_func);
9537 
9538 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9539 	if (!eval_map_wq) {
9540 		pr_err("Unable to allocate eval_map_wq\n");
9541 		/* Do work here */
9542 		eval_map_work_func(&eval_map_work);
9543 		return -ENOMEM;
9544 	}
9545 
9546 	queue_work(eval_map_wq, &eval_map_work);
9547 	return 0;
9548 }
9549 
9550 static int __init trace_eval_sync(void)
9551 {
9552 	/* Make sure the eval map updates are finished */
9553 	if (eval_map_wq)
9554 		destroy_workqueue(eval_map_wq);
9555 	return 0;
9556 }
9557 
9558 late_initcall_sync(trace_eval_sync);
9559 
9560 
9561 #ifdef CONFIG_MODULES
9562 static void trace_module_add_evals(struct module *mod)
9563 {
9564 	if (!mod->num_trace_evals)
9565 		return;
9566 
9567 	/*
9568 	 * Modules with bad taint do not have events created, do
9569 	 * not bother with enums either.
9570 	 */
9571 	if (trace_module_has_bad_taint(mod))
9572 		return;
9573 
9574 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9575 }
9576 
9577 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9578 static void trace_module_remove_evals(struct module *mod)
9579 {
9580 	union trace_eval_map_item *map;
9581 	union trace_eval_map_item **last = &trace_eval_maps;
9582 
9583 	if (!mod->num_trace_evals)
9584 		return;
9585 
9586 	mutex_lock(&trace_eval_mutex);
9587 
9588 	map = trace_eval_maps;
9589 
9590 	while (map) {
9591 		if (map->head.mod == mod)
9592 			break;
9593 		map = trace_eval_jmp_to_tail(map);
9594 		last = &map->tail.next;
9595 		map = map->tail.next;
9596 	}
9597 	if (!map)
9598 		goto out;
9599 
9600 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9601 	kfree(map);
9602  out:
9603 	mutex_unlock(&trace_eval_mutex);
9604 }
9605 #else
9606 static inline void trace_module_remove_evals(struct module *mod) { }
9607 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9608 
9609 static int trace_module_notify(struct notifier_block *self,
9610 			       unsigned long val, void *data)
9611 {
9612 	struct module *mod = data;
9613 
9614 	switch (val) {
9615 	case MODULE_STATE_COMING:
9616 		trace_module_add_evals(mod);
9617 		break;
9618 	case MODULE_STATE_GOING:
9619 		trace_module_remove_evals(mod);
9620 		break;
9621 	}
9622 
9623 	return NOTIFY_OK;
9624 }
9625 
9626 static struct notifier_block trace_module_nb = {
9627 	.notifier_call = trace_module_notify,
9628 	.priority = 0,
9629 };
9630 #endif /* CONFIG_MODULES */
9631 
9632 static __init int tracer_init_tracefs(void)
9633 {
9634 	int ret;
9635 
9636 	trace_access_lock_init();
9637 
9638 	ret = tracing_init_dentry();
9639 	if (ret)
9640 		return 0;
9641 
9642 	event_trace_init();
9643 
9644 	init_tracer_tracefs(&global_trace, NULL);
9645 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9646 
9647 	trace_create_file("tracing_thresh", 0644, NULL,
9648 			&global_trace, &tracing_thresh_fops);
9649 
9650 	trace_create_file("README", 0444, NULL,
9651 			NULL, &tracing_readme_fops);
9652 
9653 	trace_create_file("saved_cmdlines", 0444, NULL,
9654 			NULL, &tracing_saved_cmdlines_fops);
9655 
9656 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9657 			  NULL, &tracing_saved_cmdlines_size_fops);
9658 
9659 	trace_create_file("saved_tgids", 0444, NULL,
9660 			NULL, &tracing_saved_tgids_fops);
9661 
9662 	trace_eval_init();
9663 
9664 	trace_create_eval_file(NULL);
9665 
9666 #ifdef CONFIG_MODULES
9667 	register_module_notifier(&trace_module_nb);
9668 #endif
9669 
9670 #ifdef CONFIG_DYNAMIC_FTRACE
9671 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9672 			NULL, &tracing_dyn_info_fops);
9673 #endif
9674 
9675 	create_trace_instances(NULL);
9676 
9677 	update_tracer_options(&global_trace);
9678 
9679 	return 0;
9680 }
9681 
9682 fs_initcall(tracer_init_tracefs);
9683 
9684 static int trace_panic_handler(struct notifier_block *this,
9685 			       unsigned long event, void *unused)
9686 {
9687 	if (ftrace_dump_on_oops)
9688 		ftrace_dump(ftrace_dump_on_oops);
9689 	return NOTIFY_OK;
9690 }
9691 
9692 static struct notifier_block trace_panic_notifier = {
9693 	.notifier_call  = trace_panic_handler,
9694 	.next           = NULL,
9695 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9696 };
9697 
9698 static int trace_die_handler(struct notifier_block *self,
9699 			     unsigned long val,
9700 			     void *data)
9701 {
9702 	switch (val) {
9703 	case DIE_OOPS:
9704 		if (ftrace_dump_on_oops)
9705 			ftrace_dump(ftrace_dump_on_oops);
9706 		break;
9707 	default:
9708 		break;
9709 	}
9710 	return NOTIFY_OK;
9711 }
9712 
9713 static struct notifier_block trace_die_notifier = {
9714 	.notifier_call = trace_die_handler,
9715 	.priority = 200
9716 };
9717 
9718 /*
9719  * printk is set to max of 1024, we really don't need it that big.
9720  * Nothing should be printing 1000 characters anyway.
9721  */
9722 #define TRACE_MAX_PRINT		1000
9723 
9724 /*
9725  * Define here KERN_TRACE so that we have one place to modify
9726  * it if we decide to change what log level the ftrace dump
9727  * should be at.
9728  */
9729 #define KERN_TRACE		KERN_EMERG
9730 
9731 void
9732 trace_printk_seq(struct trace_seq *s)
9733 {
9734 	/* Probably should print a warning here. */
9735 	if (s->seq.len >= TRACE_MAX_PRINT)
9736 		s->seq.len = TRACE_MAX_PRINT;
9737 
9738 	/*
9739 	 * More paranoid code. Although the buffer size is set to
9740 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9741 	 * an extra layer of protection.
9742 	 */
9743 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9744 		s->seq.len = s->seq.size - 1;
9745 
9746 	/* should be zero ended, but we are paranoid. */
9747 	s->buffer[s->seq.len] = 0;
9748 
9749 	printk(KERN_TRACE "%s", s->buffer);
9750 
9751 	trace_seq_init(s);
9752 }
9753 
9754 void trace_init_global_iter(struct trace_iterator *iter)
9755 {
9756 	iter->tr = &global_trace;
9757 	iter->trace = iter->tr->current_trace;
9758 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9759 	iter->array_buffer = &global_trace.array_buffer;
9760 
9761 	if (iter->trace && iter->trace->open)
9762 		iter->trace->open(iter);
9763 
9764 	/* Annotate start of buffers if we had overruns */
9765 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9766 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9767 
9768 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9769 	if (trace_clocks[iter->tr->clock_id].in_ns)
9770 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9771 }
9772 
9773 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9774 {
9775 	/* use static because iter can be a bit big for the stack */
9776 	static struct trace_iterator iter;
9777 	static atomic_t dump_running;
9778 	struct trace_array *tr = &global_trace;
9779 	unsigned int old_userobj;
9780 	unsigned long flags;
9781 	int cnt = 0, cpu;
9782 
9783 	/* Only allow one dump user at a time. */
9784 	if (atomic_inc_return(&dump_running) != 1) {
9785 		atomic_dec(&dump_running);
9786 		return;
9787 	}
9788 
9789 	/*
9790 	 * Always turn off tracing when we dump.
9791 	 * We don't need to show trace output of what happens
9792 	 * between multiple crashes.
9793 	 *
9794 	 * If the user does a sysrq-z, then they can re-enable
9795 	 * tracing with echo 1 > tracing_on.
9796 	 */
9797 	tracing_off();
9798 
9799 	local_irq_save(flags);
9800 	printk_nmi_direct_enter();
9801 
9802 	/* Simulate the iterator */
9803 	trace_init_global_iter(&iter);
9804 	/* Can not use kmalloc for iter.temp and iter.fmt */
9805 	iter.temp = static_temp_buf;
9806 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9807 	iter.fmt = static_fmt_buf;
9808 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9809 
9810 	for_each_tracing_cpu(cpu) {
9811 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9812 	}
9813 
9814 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9815 
9816 	/* don't look at user memory in panic mode */
9817 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9818 
9819 	switch (oops_dump_mode) {
9820 	case DUMP_ALL:
9821 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9822 		break;
9823 	case DUMP_ORIG:
9824 		iter.cpu_file = raw_smp_processor_id();
9825 		break;
9826 	case DUMP_NONE:
9827 		goto out_enable;
9828 	default:
9829 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9830 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9831 	}
9832 
9833 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9834 
9835 	/* Did function tracer already get disabled? */
9836 	if (ftrace_is_dead()) {
9837 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9838 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9839 	}
9840 
9841 	/*
9842 	 * We need to stop all tracing on all CPUS to read
9843 	 * the next buffer. This is a bit expensive, but is
9844 	 * not done often. We fill all what we can read,
9845 	 * and then release the locks again.
9846 	 */
9847 
9848 	while (!trace_empty(&iter)) {
9849 
9850 		if (!cnt)
9851 			printk(KERN_TRACE "---------------------------------\n");
9852 
9853 		cnt++;
9854 
9855 		trace_iterator_reset(&iter);
9856 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9857 
9858 		if (trace_find_next_entry_inc(&iter) != NULL) {
9859 			int ret;
9860 
9861 			ret = print_trace_line(&iter);
9862 			if (ret != TRACE_TYPE_NO_CONSUME)
9863 				trace_consume(&iter);
9864 		}
9865 		touch_nmi_watchdog();
9866 
9867 		trace_printk_seq(&iter.seq);
9868 	}
9869 
9870 	if (!cnt)
9871 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9872 	else
9873 		printk(KERN_TRACE "---------------------------------\n");
9874 
9875  out_enable:
9876 	tr->trace_flags |= old_userobj;
9877 
9878 	for_each_tracing_cpu(cpu) {
9879 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9880 	}
9881 	atomic_dec(&dump_running);
9882 	printk_nmi_direct_exit();
9883 	local_irq_restore(flags);
9884 }
9885 EXPORT_SYMBOL_GPL(ftrace_dump);
9886 
9887 #define WRITE_BUFSIZE  4096
9888 
9889 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9890 				size_t count, loff_t *ppos,
9891 				int (*createfn)(const char *))
9892 {
9893 	char *kbuf, *buf, *tmp;
9894 	int ret = 0;
9895 	size_t done = 0;
9896 	size_t size;
9897 
9898 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9899 	if (!kbuf)
9900 		return -ENOMEM;
9901 
9902 	while (done < count) {
9903 		size = count - done;
9904 
9905 		if (size >= WRITE_BUFSIZE)
9906 			size = WRITE_BUFSIZE - 1;
9907 
9908 		if (copy_from_user(kbuf, buffer + done, size)) {
9909 			ret = -EFAULT;
9910 			goto out;
9911 		}
9912 		kbuf[size] = '\0';
9913 		buf = kbuf;
9914 		do {
9915 			tmp = strchr(buf, '\n');
9916 			if (tmp) {
9917 				*tmp = '\0';
9918 				size = tmp - buf + 1;
9919 			} else {
9920 				size = strlen(buf);
9921 				if (done + size < count) {
9922 					if (buf != kbuf)
9923 						break;
9924 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9925 					pr_warn("Line length is too long: Should be less than %d\n",
9926 						WRITE_BUFSIZE - 2);
9927 					ret = -EINVAL;
9928 					goto out;
9929 				}
9930 			}
9931 			done += size;
9932 
9933 			/* Remove comments */
9934 			tmp = strchr(buf, '#');
9935 
9936 			if (tmp)
9937 				*tmp = '\0';
9938 
9939 			ret = createfn(buf);
9940 			if (ret)
9941 				goto out;
9942 			buf += size;
9943 
9944 		} while (done < count);
9945 	}
9946 	ret = done;
9947 
9948 out:
9949 	kfree(kbuf);
9950 
9951 	return ret;
9952 }
9953 
9954 __init static int tracer_alloc_buffers(void)
9955 {
9956 	int ring_buf_size;
9957 	int ret = -ENOMEM;
9958 
9959 
9960 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9961 		pr_warn("Tracing disabled due to lockdown\n");
9962 		return -EPERM;
9963 	}
9964 
9965 	/*
9966 	 * Make sure we don't accidentally add more trace options
9967 	 * than we have bits for.
9968 	 */
9969 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9970 
9971 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9972 		goto out;
9973 
9974 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9975 		goto out_free_buffer_mask;
9976 
9977 	/* Only allocate trace_printk buffers if a trace_printk exists */
9978 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9979 		/* Must be called before global_trace.buffer is allocated */
9980 		trace_printk_init_buffers();
9981 
9982 	/* To save memory, keep the ring buffer size to its minimum */
9983 	if (ring_buffer_expanded)
9984 		ring_buf_size = trace_buf_size;
9985 	else
9986 		ring_buf_size = 1;
9987 
9988 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9989 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9990 
9991 	raw_spin_lock_init(&global_trace.start_lock);
9992 
9993 	/*
9994 	 * The prepare callbacks allocates some memory for the ring buffer. We
9995 	 * don't free the buffer if the CPU goes down. If we were to free
9996 	 * the buffer, then the user would lose any trace that was in the
9997 	 * buffer. The memory will be removed once the "instance" is removed.
9998 	 */
9999 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10000 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10001 				      NULL);
10002 	if (ret < 0)
10003 		goto out_free_cpumask;
10004 	/* Used for event triggers */
10005 	ret = -ENOMEM;
10006 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10007 	if (!temp_buffer)
10008 		goto out_rm_hp_state;
10009 
10010 	if (trace_create_savedcmd() < 0)
10011 		goto out_free_temp_buffer;
10012 
10013 	/* TODO: make the number of buffers hot pluggable with CPUS */
10014 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10015 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10016 		goto out_free_savedcmd;
10017 	}
10018 
10019 	if (global_trace.buffer_disabled)
10020 		tracing_off();
10021 
10022 	if (trace_boot_clock) {
10023 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10024 		if (ret < 0)
10025 			pr_warn("Trace clock %s not defined, going back to default\n",
10026 				trace_boot_clock);
10027 	}
10028 
10029 	/*
10030 	 * register_tracer() might reference current_trace, so it
10031 	 * needs to be set before we register anything. This is
10032 	 * just a bootstrap of current_trace anyway.
10033 	 */
10034 	global_trace.current_trace = &nop_trace;
10035 
10036 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10037 
10038 	ftrace_init_global_array_ops(&global_trace);
10039 
10040 	init_trace_flags_index(&global_trace);
10041 
10042 	register_tracer(&nop_trace);
10043 
10044 	/* Function tracing may start here (via kernel command line) */
10045 	init_function_trace();
10046 
10047 	/* All seems OK, enable tracing */
10048 	tracing_disabled = 0;
10049 
10050 	atomic_notifier_chain_register(&panic_notifier_list,
10051 				       &trace_panic_notifier);
10052 
10053 	register_die_notifier(&trace_die_notifier);
10054 
10055 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10056 
10057 	INIT_LIST_HEAD(&global_trace.systems);
10058 	INIT_LIST_HEAD(&global_trace.events);
10059 	INIT_LIST_HEAD(&global_trace.hist_vars);
10060 	INIT_LIST_HEAD(&global_trace.err_log);
10061 	list_add(&global_trace.list, &ftrace_trace_arrays);
10062 
10063 	apply_trace_boot_options();
10064 
10065 	register_snapshot_cmd();
10066 
10067 	test_can_verify();
10068 
10069 	return 0;
10070 
10071 out_free_savedcmd:
10072 	free_saved_cmdlines_buffer(savedcmd);
10073 out_free_temp_buffer:
10074 	ring_buffer_free(temp_buffer);
10075 out_rm_hp_state:
10076 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10077 out_free_cpumask:
10078 	free_cpumask_var(global_trace.tracing_cpumask);
10079 out_free_buffer_mask:
10080 	free_cpumask_var(tracing_buffer_mask);
10081 out:
10082 	return ret;
10083 }
10084 
10085 void __init early_trace_init(void)
10086 {
10087 	if (tracepoint_printk) {
10088 		tracepoint_print_iter =
10089 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10090 		if (MEM_FAIL(!tracepoint_print_iter,
10091 			     "Failed to allocate trace iterator\n"))
10092 			tracepoint_printk = 0;
10093 		else
10094 			static_key_enable(&tracepoint_printk_key.key);
10095 	}
10096 	tracer_alloc_buffers();
10097 }
10098 
10099 void __init trace_init(void)
10100 {
10101 	trace_event_init();
10102 }
10103 
10104 __init static void clear_boot_tracer(void)
10105 {
10106 	/*
10107 	 * The default tracer at boot buffer is an init section.
10108 	 * This function is called in lateinit. If we did not
10109 	 * find the boot tracer, then clear it out, to prevent
10110 	 * later registration from accessing the buffer that is
10111 	 * about to be freed.
10112 	 */
10113 	if (!default_bootup_tracer)
10114 		return;
10115 
10116 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10117 	       default_bootup_tracer);
10118 	default_bootup_tracer = NULL;
10119 }
10120 
10121 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10122 __init static void tracing_set_default_clock(void)
10123 {
10124 	/* sched_clock_stable() is determined in late_initcall */
10125 	if (!trace_boot_clock && !sched_clock_stable()) {
10126 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10127 			pr_warn("Can not set tracing clock due to lockdown\n");
10128 			return;
10129 		}
10130 
10131 		printk(KERN_WARNING
10132 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10133 		       "If you want to keep using the local clock, then add:\n"
10134 		       "  \"trace_clock=local\"\n"
10135 		       "on the kernel command line\n");
10136 		tracing_set_clock(&global_trace, "global");
10137 	}
10138 }
10139 #else
10140 static inline void tracing_set_default_clock(void) { }
10141 #endif
10142 
10143 __init static int late_trace_init(void)
10144 {
10145 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10146 		static_key_disable(&tracepoint_printk_key.key);
10147 		tracepoint_printk = 0;
10148 	}
10149 
10150 	tracing_set_default_clock();
10151 	clear_boot_tracer();
10152 	return 0;
10153 }
10154 
10155 late_initcall_sync(late_trace_init);
10156