xref: /openbmc/linux/kernel/trace/trace.c (revision e368cd72)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752 
1753 #define trace_create_maxlat_file(tr, d_tracer)				\
1754 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1755 			  &tr->max_latency, &tracing_max_lat_fops)
1756 
1757 #endif
1758 
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768 	struct array_buffer *trace_buf = &tr->array_buffer;
1769 	struct array_buffer *max_buf = &tr->max_buffer;
1770 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772 
1773 	max_buf->cpu = cpu;
1774 	max_buf->time_start = data->preempt_timestamp;
1775 
1776 	max_data->saved_latency = tr->max_latency;
1777 	max_data->critical_start = data->critical_start;
1778 	max_data->critical_end = data->critical_end;
1779 
1780 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781 	max_data->pid = tsk->pid;
1782 	/*
1783 	 * If tsk == current, then use current_uid(), as that does not use
1784 	 * RCU. The irq tracer can be called out of RCU scope.
1785 	 */
1786 	if (tsk == current)
1787 		max_data->uid = current_uid();
1788 	else
1789 		max_data->uid = task_uid(tsk);
1790 
1791 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792 	max_data->policy = tsk->policy;
1793 	max_data->rt_priority = tsk->rt_priority;
1794 
1795 	/* record this tasks comm */
1796 	tracing_record_cmdline(tsk);
1797 	latency_fsnotify(tr);
1798 }
1799 
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812 	      void *cond_data)
1813 {
1814 	if (tr->stop_count)
1815 		return;
1816 
1817 	WARN_ON_ONCE(!irqs_disabled());
1818 
1819 	if (!tr->allocated_snapshot) {
1820 		/* Only the nop tracer should hit this when disabling */
1821 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822 		return;
1823 	}
1824 
1825 	arch_spin_lock(&tr->max_lock);
1826 
1827 	/* Inherit the recordable setting from array_buffer */
1828 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829 		ring_buffer_record_on(tr->max_buffer.buffer);
1830 	else
1831 		ring_buffer_record_off(tr->max_buffer.buffer);
1832 
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835 		goto out_unlock;
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841  out_unlock:
1842 	arch_spin_unlock(&tr->max_lock);
1843 }
1844 
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856 	int ret;
1857 
1858 	if (tr->stop_count)
1859 		return;
1860 
1861 	WARN_ON_ONCE(!irqs_disabled());
1862 	if (!tr->allocated_snapshot) {
1863 		/* Only the nop tracer should hit this when disabling */
1864 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865 		return;
1866 	}
1867 
1868 	arch_spin_lock(&tr->max_lock);
1869 
1870 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871 
1872 	if (ret == -EBUSY) {
1873 		/*
1874 		 * We failed to swap the buffer due to a commit taking
1875 		 * place on this CPU. We fail to record, but we reset
1876 		 * the max trace buffer (no one writes directly to it)
1877 		 * and flag that it failed.
1878 		 */
1879 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880 			"Failed to swap buffers due to commit in progress\n");
1881 	}
1882 
1883 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884 
1885 	__update_max_tr(tr, tsk, cpu);
1886 	arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889 
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892 	/* Iterators are static, they should be filled or empty */
1893 	if (trace_buffer_iter(iter, iter->cpu_file))
1894 		return 0;
1895 
1896 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897 				full);
1898 }
1899 
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902 
1903 struct trace_selftests {
1904 	struct list_head		list;
1905 	struct tracer			*type;
1906 };
1907 
1908 static LIST_HEAD(postponed_selftests);
1909 
1910 static int save_selftest(struct tracer *type)
1911 {
1912 	struct trace_selftests *selftest;
1913 
1914 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915 	if (!selftest)
1916 		return -ENOMEM;
1917 
1918 	selftest->type = type;
1919 	list_add(&selftest->list, &postponed_selftests);
1920 	return 0;
1921 }
1922 
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925 	struct trace_array *tr = &global_trace;
1926 	struct tracer *saved_tracer = tr->current_trace;
1927 	int ret;
1928 
1929 	if (!type->selftest || tracing_selftest_disabled)
1930 		return 0;
1931 
1932 	/*
1933 	 * If a tracer registers early in boot up (before scheduling is
1934 	 * initialized and such), then do not run its selftests yet.
1935 	 * Instead, run it a little later in the boot process.
1936 	 */
1937 	if (!selftests_can_run)
1938 		return save_selftest(type);
1939 
1940 	if (!tracing_is_on()) {
1941 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942 			type->name);
1943 		return 0;
1944 	}
1945 
1946 	/*
1947 	 * Run a selftest on this tracer.
1948 	 * Here we reset the trace buffer, and set the current
1949 	 * tracer to be this tracer. The tracer can then run some
1950 	 * internal tracing to verify that everything is in order.
1951 	 * If we fail, we do not register this tracer.
1952 	 */
1953 	tracing_reset_online_cpus(&tr->array_buffer);
1954 
1955 	tr->current_trace = type;
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		/* If we expanded the buffers, make sure the max is expanded too */
1960 		if (ring_buffer_expanded)
1961 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962 					   RING_BUFFER_ALL_CPUS);
1963 		tr->allocated_snapshot = true;
1964 	}
1965 #endif
1966 
1967 	/* the test is responsible for initializing and enabling */
1968 	pr_info("Testing tracer %s: ", type->name);
1969 	ret = type->selftest(type, tr);
1970 	/* the test is responsible for resetting too */
1971 	tr->current_trace = saved_tracer;
1972 	if (ret) {
1973 		printk(KERN_CONT "FAILED!\n");
1974 		/* Add the warning after printing 'FAILED' */
1975 		WARN_ON(1);
1976 		return -1;
1977 	}
1978 	/* Only reset on passing, to avoid touching corrupted buffers */
1979 	tracing_reset_online_cpus(&tr->array_buffer);
1980 
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982 	if (type->use_max_tr) {
1983 		tr->allocated_snapshot = false;
1984 
1985 		/* Shrink the max buffer again */
1986 		if (ring_buffer_expanded)
1987 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1988 					   RING_BUFFER_ALL_CPUS);
1989 	}
1990 #endif
1991 
1992 	printk(KERN_CONT "PASSED\n");
1993 	return 0;
1994 }
1995 
1996 static __init int init_trace_selftests(void)
1997 {
1998 	struct trace_selftests *p, *n;
1999 	struct tracer *t, **last;
2000 	int ret;
2001 
2002 	selftests_can_run = true;
2003 
2004 	mutex_lock(&trace_types_lock);
2005 
2006 	if (list_empty(&postponed_selftests))
2007 		goto out;
2008 
2009 	pr_info("Running postponed tracer tests:\n");
2010 
2011 	tracing_selftest_running = true;
2012 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013 		/* This loop can take minutes when sanitizers are enabled, so
2014 		 * lets make sure we allow RCU processing.
2015 		 */
2016 		cond_resched();
2017 		ret = run_tracer_selftest(p->type);
2018 		/* If the test fails, then warn and remove from available_tracers */
2019 		if (ret < 0) {
2020 			WARN(1, "tracer: %s failed selftest, disabling\n",
2021 			     p->type->name);
2022 			last = &trace_types;
2023 			for (t = trace_types; t; t = t->next) {
2024 				if (t == p->type) {
2025 					*last = t->next;
2026 					break;
2027 				}
2028 				last = &t->next;
2029 			}
2030 		}
2031 		list_del(&p->list);
2032 		kfree(p);
2033 	}
2034 	tracing_selftest_running = false;
2035 
2036  out:
2037 	mutex_unlock(&trace_types_lock);
2038 
2039 	return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045 	return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048 
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050 
2051 static void __init apply_trace_boot_options(void);
2052 
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061 	struct tracer *t;
2062 	int ret = 0;
2063 
2064 	if (!type->name) {
2065 		pr_info("Tracer must have a name\n");
2066 		return -1;
2067 	}
2068 
2069 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071 		return -1;
2072 	}
2073 
2074 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075 		pr_warn("Can not register tracer %s due to lockdown\n",
2076 			   type->name);
2077 		return -EPERM;
2078 	}
2079 
2080 	mutex_lock(&trace_types_lock);
2081 
2082 	tracing_selftest_running = true;
2083 
2084 	for (t = trace_types; t; t = t->next) {
2085 		if (strcmp(type->name, t->name) == 0) {
2086 			/* already found */
2087 			pr_info("Tracer %s already registered\n",
2088 				type->name);
2089 			ret = -1;
2090 			goto out;
2091 		}
2092 	}
2093 
2094 	if (!type->set_flag)
2095 		type->set_flag = &dummy_set_flag;
2096 	if (!type->flags) {
2097 		/*allocate a dummy tracer_flags*/
2098 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099 		if (!type->flags) {
2100 			ret = -ENOMEM;
2101 			goto out;
2102 		}
2103 		type->flags->val = 0;
2104 		type->flags->opts = dummy_tracer_opt;
2105 	} else
2106 		if (!type->flags->opts)
2107 			type->flags->opts = dummy_tracer_opt;
2108 
2109 	/* store the tracer for __set_tracer_option */
2110 	type->flags->trace = type;
2111 
2112 	ret = run_tracer_selftest(type);
2113 	if (ret < 0)
2114 		goto out;
2115 
2116 	type->next = trace_types;
2117 	trace_types = type;
2118 	add_tracer_options(&global_trace, type);
2119 
2120  out:
2121 	tracing_selftest_running = false;
2122 	mutex_unlock(&trace_types_lock);
2123 
2124 	if (ret || !default_bootup_tracer)
2125 		goto out_unlock;
2126 
2127 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128 		goto out_unlock;
2129 
2130 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131 	/* Do we want this tracer to start on bootup? */
2132 	tracing_set_tracer(&global_trace, type->name);
2133 	default_bootup_tracer = NULL;
2134 
2135 	apply_trace_boot_options();
2136 
2137 	/* disable other selftests, since this will break it. */
2138 	disable_tracing_selftest("running a tracer");
2139 
2140  out_unlock:
2141 	return ret;
2142 }
2143 
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146 	struct trace_buffer *buffer = buf->buffer;
2147 
2148 	if (!buffer)
2149 		return;
2150 
2151 	ring_buffer_record_disable(buffer);
2152 
2153 	/* Make sure all commits have finished */
2154 	synchronize_rcu();
2155 	ring_buffer_reset_cpu(buffer, cpu);
2156 
2157 	ring_buffer_record_enable(buffer);
2158 }
2159 
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 
2172 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173 
2174 	ring_buffer_reset_online_cpus(buffer);
2175 
2176 	ring_buffer_record_enable(buffer);
2177 }
2178 
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182 	struct trace_array *tr;
2183 
2184 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185 		if (!tr->clear_trace)
2186 			continue;
2187 		tr->clear_trace = false;
2188 		tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190 		tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192 	}
2193 }
2194 
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200 
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203 
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209 	unsigned *map_cmdline_to_pid;
2210 	unsigned cmdline_num;
2211 	int cmdline_idx;
2212 	char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215 
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220 
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225 
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227 				    struct saved_cmdlines_buffer *s)
2228 {
2229 	s->map_cmdline_to_pid = kmalloc_array(val,
2230 					      sizeof(*s->map_cmdline_to_pid),
2231 					      GFP_KERNEL);
2232 	if (!s->map_cmdline_to_pid)
2233 		return -ENOMEM;
2234 
2235 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236 	if (!s->saved_cmdlines) {
2237 		kfree(s->map_cmdline_to_pid);
2238 		return -ENOMEM;
2239 	}
2240 
2241 	s->cmdline_idx = 0;
2242 	s->cmdline_num = val;
2243 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244 	       sizeof(s->map_pid_to_cmdline));
2245 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246 	       val * sizeof(*s->map_cmdline_to_pid));
2247 
2248 	return 0;
2249 }
2250 
2251 static int trace_create_savedcmd(void)
2252 {
2253 	int ret;
2254 
2255 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256 	if (!savedcmd)
2257 		return -ENOMEM;
2258 
2259 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260 	if (ret < 0) {
2261 		kfree(savedcmd);
2262 		savedcmd = NULL;
2263 		return -ENOMEM;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 int is_tracing_stopped(void)
2270 {
2271 	return global_trace.stop_count;
2272 }
2273 
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282 	struct trace_buffer *buffer;
2283 	unsigned long flags;
2284 
2285 	if (tracing_disabled)
2286 		return;
2287 
2288 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289 	if (--global_trace.stop_count) {
2290 		if (global_trace.stop_count < 0) {
2291 			/* Someone screwed up their debugging */
2292 			WARN_ON_ONCE(1);
2293 			global_trace.stop_count = 0;
2294 		}
2295 		goto out;
2296 	}
2297 
2298 	/* Prevent the buffers from switching */
2299 	arch_spin_lock(&global_trace.max_lock);
2300 
2301 	buffer = global_trace.array_buffer.buffer;
2302 	if (buffer)
2303 		ring_buffer_record_enable(buffer);
2304 
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306 	buffer = global_trace.max_buffer.buffer;
2307 	if (buffer)
2308 		ring_buffer_record_enable(buffer);
2309 #endif
2310 
2311 	arch_spin_unlock(&global_trace.max_lock);
2312 
2313  out:
2314 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316 
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319 	struct trace_buffer *buffer;
2320 	unsigned long flags;
2321 
2322 	if (tracing_disabled)
2323 		return;
2324 
2325 	/* If global, we need to also start the max tracer */
2326 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327 		return tracing_start();
2328 
2329 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2330 
2331 	if (--tr->stop_count) {
2332 		if (tr->stop_count < 0) {
2333 			/* Someone screwed up their debugging */
2334 			WARN_ON_ONCE(1);
2335 			tr->stop_count = 0;
2336 		}
2337 		goto out;
2338 	}
2339 
2340 	buffer = tr->array_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 
2344  out:
2345 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347 
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356 	struct trace_buffer *buffer;
2357 	unsigned long flags;
2358 
2359 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360 	if (global_trace.stop_count++)
2361 		goto out;
2362 
2363 	/* Prevent the buffers from switching */
2364 	arch_spin_lock(&global_trace.max_lock);
2365 
2366 	buffer = global_trace.array_buffer.buffer;
2367 	if (buffer)
2368 		ring_buffer_record_disable(buffer);
2369 
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371 	buffer = global_trace.max_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 #endif
2375 
2376 	arch_spin_unlock(&global_trace.max_lock);
2377 
2378  out:
2379 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381 
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384 	struct trace_buffer *buffer;
2385 	unsigned long flags;
2386 
2387 	/* If global, we need to also stop the max tracer */
2388 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389 		return tracing_stop();
2390 
2391 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2392 	if (tr->stop_count++)
2393 		goto out;
2394 
2395 	buffer = tr->array_buffer.buffer;
2396 	if (buffer)
2397 		ring_buffer_record_disable(buffer);
2398 
2399  out:
2400 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402 
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 */
2419 	if (!arch_spin_trylock(&trace_cmdline_lock))
2420 		return 0;
2421 
2422 	idx = savedcmd->map_pid_to_cmdline[tpid];
2423 	if (idx == NO_CMDLINE_MAP) {
2424 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425 
2426 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2427 		savedcmd->cmdline_idx = idx;
2428 	}
2429 
2430 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431 	set_cmdline(idx, tsk->comm);
2432 
2433 	arch_spin_unlock(&trace_cmdline_lock);
2434 
2435 	return 1;
2436 }
2437 
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440 	unsigned map;
2441 	int tpid;
2442 
2443 	if (!pid) {
2444 		strcpy(comm, "<idle>");
2445 		return;
2446 	}
2447 
2448 	if (WARN_ON_ONCE(pid < 0)) {
2449 		strcpy(comm, "<XXX>");
2450 		return;
2451 	}
2452 
2453 	tpid = pid & (PID_MAX_DEFAULT - 1);
2454 	map = savedcmd->map_pid_to_cmdline[tpid];
2455 	if (map != NO_CMDLINE_MAP) {
2456 		tpid = savedcmd->map_cmdline_to_pid[map];
2457 		if (tpid == pid) {
2458 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459 			return;
2460 		}
2461 	}
2462 	strcpy(comm, "<...>");
2463 }
2464 
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467 	preempt_disable();
2468 	arch_spin_lock(&trace_cmdline_lock);
2469 
2470 	__trace_find_cmdline(pid, comm);
2471 
2472 	arch_spin_unlock(&trace_cmdline_lock);
2473 	preempt_enable();
2474 }
2475 
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478 	/*
2479 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480 	 * if we observe a non-NULL tgid_map then we also observe the correct
2481 	 * tgid_map_max.
2482 	 */
2483 	int *map = smp_load_acquire(&tgid_map);
2484 
2485 	if (unlikely(!map || pid > tgid_map_max))
2486 		return NULL;
2487 
2488 	return &map[pid];
2489 }
2490 
2491 int trace_find_tgid(int pid)
2492 {
2493 	int *ptr = trace_find_tgid_ptr(pid);
2494 
2495 	return ptr ? *ptr : 0;
2496 }
2497 
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500 	int *ptr;
2501 
2502 	/* treat recording of idle task as a success */
2503 	if (!tsk->pid)
2504 		return 1;
2505 
2506 	ptr = trace_find_tgid_ptr(tsk->pid);
2507 	if (!ptr)
2508 		return 0;
2509 
2510 	*ptr = tsk->tgid;
2511 	return 1;
2512 }
2513 
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517 		return true;
2518 	if (!__this_cpu_read(trace_taskinfo_save))
2519 		return true;
2520 	return false;
2521 }
2522 
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532 	bool done;
2533 
2534 	if (tracing_record_taskinfo_skip(flags))
2535 		return;
2536 
2537 	/*
2538 	 * Record as much task information as possible. If some fail, continue
2539 	 * to try to record the others.
2540 	 */
2541 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560 					  struct task_struct *next, int flags)
2561 {
2562 	bool done;
2563 
2564 	if (tracing_record_taskinfo_skip(flags))
2565 		return;
2566 
2567 	/*
2568 	 * Record as much task information as possible. If some fail, continue
2569 	 * to try to record the others.
2570 	 */
2571 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575 
2576 	/* If recording any information failed, retry again soon. */
2577 	if (!done)
2578 		return;
2579 
2580 	__this_cpu_write(trace_taskinfo_save, false);
2581 }
2582 
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588 
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593 
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601 	return trace_seq_has_overflowed(s) ?
2602 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605 
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608 	unsigned int trace_flags = irqs_status;
2609 	unsigned int pc;
2610 
2611 	pc = preempt_count();
2612 
2613 	if (pc & NMI_MASK)
2614 		trace_flags |= TRACE_FLAG_NMI;
2615 	if (pc & HARDIRQ_MASK)
2616 		trace_flags |= TRACE_FLAG_HARDIRQ;
2617 	if (in_serving_softirq())
2618 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2619 
2620 	if (tif_need_resched())
2621 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622 	if (test_preempt_need_resched())
2623 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624 	return (trace_flags << 16) | (pc & 0xff);
2625 }
2626 
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629 			  int type,
2630 			  unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635 
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639 
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656 	struct ring_buffer_event *event;
2657 	struct page *page;
2658 	int cpu;
2659 
2660 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661 
2662 	if (trace_buffered_event_ref++)
2663 		return;
2664 
2665 	for_each_tracing_cpu(cpu) {
2666 		page = alloc_pages_node(cpu_to_node(cpu),
2667 					GFP_KERNEL | __GFP_NORETRY, 0);
2668 		if (!page)
2669 			goto failed;
2670 
2671 		event = page_address(page);
2672 		memset(event, 0, sizeof(*event));
2673 
2674 		per_cpu(trace_buffered_event, cpu) = event;
2675 
2676 		preempt_disable();
2677 		if (cpu == smp_processor_id() &&
2678 		    __this_cpu_read(trace_buffered_event) !=
2679 		    per_cpu(trace_buffered_event, cpu))
2680 			WARN_ON_ONCE(1);
2681 		preempt_enable();
2682 	}
2683 
2684 	return;
2685  failed:
2686 	trace_buffered_event_disable();
2687 }
2688 
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691 	/* Probably not needed, but do it anyway */
2692 	smp_rmb();
2693 	this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695 
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698 	this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700 
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711 	int cpu;
2712 
2713 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714 
2715 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716 		return;
2717 
2718 	if (--trace_buffered_event_ref)
2719 		return;
2720 
2721 	preempt_disable();
2722 	/* For each CPU, set the buffer as used. */
2723 	smp_call_function_many(tracing_buffer_mask,
2724 			       disable_trace_buffered_event, NULL, 1);
2725 	preempt_enable();
2726 
2727 	/* Wait for all current users to finish */
2728 	synchronize_rcu();
2729 
2730 	for_each_tracing_cpu(cpu) {
2731 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732 		per_cpu(trace_buffered_event, cpu) = NULL;
2733 	}
2734 	/*
2735 	 * Make sure trace_buffered_event is NULL before clearing
2736 	 * trace_buffered_event_cnt.
2737 	 */
2738 	smp_wmb();
2739 
2740 	preempt_disable();
2741 	/* Do the work on each cpu */
2742 	smp_call_function_many(tracing_buffer_mask,
2743 			       enable_trace_buffered_event, NULL, 1);
2744 	preempt_enable();
2745 }
2746 
2747 static struct trace_buffer *temp_buffer;
2748 
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751 			  struct trace_event_file *trace_file,
2752 			  int type, unsigned long len,
2753 			  unsigned int trace_ctx)
2754 {
2755 	struct ring_buffer_event *entry;
2756 	struct trace_array *tr = trace_file->tr;
2757 	int val;
2758 
2759 	*current_rb = tr->array_buffer.buffer;
2760 
2761 	if (!tr->no_filter_buffering_ref &&
2762 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763 	    (entry = this_cpu_read(trace_buffered_event))) {
2764 		/*
2765 		 * Filtering is on, so try to use the per cpu buffer first.
2766 		 * This buffer will simulate a ring_buffer_event,
2767 		 * where the type_len is zero and the array[0] will
2768 		 * hold the full length.
2769 		 * (see include/linux/ring-buffer.h for details on
2770 		 *  how the ring_buffer_event is structured).
2771 		 *
2772 		 * Using a temp buffer during filtering and copying it
2773 		 * on a matched filter is quicker than writing directly
2774 		 * into the ring buffer and then discarding it when
2775 		 * it doesn't match. That is because the discard
2776 		 * requires several atomic operations to get right.
2777 		 * Copying on match and doing nothing on a failed match
2778 		 * is still quicker than no copy on match, but having
2779 		 * to discard out of the ring buffer on a failed match.
2780 		 */
2781 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782 
2783 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2784 
2785 		/*
2786 		 * Preemption is disabled, but interrupts and NMIs
2787 		 * can still come in now. If that happens after
2788 		 * the above increment, then it will have to go
2789 		 * back to the old method of allocating the event
2790 		 * on the ring buffer, and if the filter fails, it
2791 		 * will have to call ring_buffer_discard_commit()
2792 		 * to remove it.
2793 		 *
2794 		 * Need to also check the unlikely case that the
2795 		 * length is bigger than the temp buffer size.
2796 		 * If that happens, then the reserve is pretty much
2797 		 * guaranteed to fail, as the ring buffer currently
2798 		 * only allows events less than a page. But that may
2799 		 * change in the future, so let the ring buffer reserve
2800 		 * handle the failure in that case.
2801 		 */
2802 		if (val == 1 && likely(len <= max_len)) {
2803 			trace_event_setup(entry, type, trace_ctx);
2804 			entry->array[0] = len;
2805 			return entry;
2806 		}
2807 		this_cpu_dec(trace_buffered_event_cnt);
2808 	}
2809 
2810 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811 					    trace_ctx);
2812 	/*
2813 	 * If tracing is off, but we have triggers enabled
2814 	 * we still need to look at the event data. Use the temp_buffer
2815 	 * to store the trace event for the trigger to use. It's recursive
2816 	 * safe and will not be recorded anywhere.
2817 	 */
2818 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819 		*current_rb = temp_buffer;
2820 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821 						    trace_ctx);
2822 	}
2823 	return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826 
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829 
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832 	struct trace_event_call *event_call;
2833 	struct trace_event_file *file;
2834 	struct trace_event *event;
2835 	unsigned long flags;
2836 	struct trace_iterator *iter = tracepoint_print_iter;
2837 
2838 	/* We should never get here if iter is NULL */
2839 	if (WARN_ON_ONCE(!iter))
2840 		return;
2841 
2842 	event_call = fbuffer->trace_file->event_call;
2843 	if (!event_call || !event_call->event.funcs ||
2844 	    !event_call->event.funcs->trace)
2845 		return;
2846 
2847 	file = fbuffer->trace_file;
2848 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850 	     !filter_match_preds(file->filter, fbuffer->entry)))
2851 		return;
2852 
2853 	event = &fbuffer->trace_file->event_call->event;
2854 
2855 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856 	trace_seq_init(&iter->seq);
2857 	iter->ent = fbuffer->entry;
2858 	event_call->event.funcs->trace(iter, 0, event);
2859 	trace_seq_putc(&iter->seq, 0);
2860 	printk("%s", iter->seq.buffer);
2861 
2862 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864 
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866 			     void *buffer, size_t *lenp,
2867 			     loff_t *ppos)
2868 {
2869 	int save_tracepoint_printk;
2870 	int ret;
2871 
2872 	mutex_lock(&tracepoint_printk_mutex);
2873 	save_tracepoint_printk = tracepoint_printk;
2874 
2875 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876 
2877 	/*
2878 	 * This will force exiting early, as tracepoint_printk
2879 	 * is always zero when tracepoint_printk_iter is not allocated
2880 	 */
2881 	if (!tracepoint_print_iter)
2882 		tracepoint_printk = 0;
2883 
2884 	if (save_tracepoint_printk == tracepoint_printk)
2885 		goto out;
2886 
2887 	if (tracepoint_printk)
2888 		static_key_enable(&tracepoint_printk_key.key);
2889 	else
2890 		static_key_disable(&tracepoint_printk_key.key);
2891 
2892  out:
2893 	mutex_unlock(&tracepoint_printk_mutex);
2894 
2895 	return ret;
2896 }
2897 
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900 	enum event_trigger_type tt = ETT_NONE;
2901 	struct trace_event_file *file = fbuffer->trace_file;
2902 
2903 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904 			fbuffer->entry, &tt))
2905 		goto discard;
2906 
2907 	if (static_key_false(&tracepoint_printk_key.key))
2908 		output_printk(fbuffer);
2909 
2910 	if (static_branch_unlikely(&trace_event_exports_enabled))
2911 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912 
2913 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915 
2916 discard:
2917 	if (tt)
2918 		event_triggers_post_call(file, tt);
2919 
2920 }
2921 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922 
2923 /*
2924  * Skip 3:
2925  *
2926  *   trace_buffer_unlock_commit_regs()
2927  *   trace_event_buffer_commit()
2928  *   trace_event_raw_event_xxx()
2929  */
2930 # define STACK_SKIP 3
2931 
2932 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933 				     struct trace_buffer *buffer,
2934 				     struct ring_buffer_event *event,
2935 				     unsigned int trace_ctx,
2936 				     struct pt_regs *regs)
2937 {
2938 	__buffer_unlock_commit(buffer, event);
2939 
2940 	/*
2941 	 * If regs is not set, then skip the necessary functions.
2942 	 * Note, we can still get here via blktrace, wakeup tracer
2943 	 * and mmiotrace, but that's ok if they lose a function or
2944 	 * two. They are not that meaningful.
2945 	 */
2946 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2948 }
2949 
2950 /*
2951  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952  */
2953 void
2954 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955 				   struct ring_buffer_event *event)
2956 {
2957 	__buffer_unlock_commit(buffer, event);
2958 }
2959 
2960 void
2961 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962 	       parent_ip, unsigned int trace_ctx)
2963 {
2964 	struct trace_event_call *call = &event_function;
2965 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2966 	struct ring_buffer_event *event;
2967 	struct ftrace_entry *entry;
2968 
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970 					    trace_ctx);
2971 	if (!event)
2972 		return;
2973 	entry	= ring_buffer_event_data(event);
2974 	entry->ip			= ip;
2975 	entry->parent_ip		= parent_ip;
2976 
2977 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2978 		if (static_branch_unlikely(&trace_function_exports_enabled))
2979 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980 		__buffer_unlock_commit(buffer, event);
2981 	}
2982 }
2983 
2984 #ifdef CONFIG_STACKTRACE
2985 
2986 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987 #define FTRACE_KSTACK_NESTING	4
2988 
2989 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990 
2991 struct ftrace_stack {
2992 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2993 };
2994 
2995 
2996 struct ftrace_stacks {
2997 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2998 };
2999 
3000 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002 
3003 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004 				 unsigned int trace_ctx,
3005 				 int skip, struct pt_regs *regs)
3006 {
3007 	struct trace_event_call *call = &event_kernel_stack;
3008 	struct ring_buffer_event *event;
3009 	unsigned int size, nr_entries;
3010 	struct ftrace_stack *fstack;
3011 	struct stack_entry *entry;
3012 	int stackidx;
3013 
3014 	/*
3015 	 * Add one, for this function and the call to save_stack_trace()
3016 	 * If regs is set, then these functions will not be in the way.
3017 	 */
3018 #ifndef CONFIG_UNWINDER_ORC
3019 	if (!regs)
3020 		skip++;
3021 #endif
3022 
3023 	preempt_disable_notrace();
3024 
3025 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026 
3027 	/* This should never happen. If it does, yell once and skip */
3028 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029 		goto out;
3030 
3031 	/*
3032 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033 	 * interrupt will either see the value pre increment or post
3034 	 * increment. If the interrupt happens pre increment it will have
3035 	 * restored the counter when it returns.  We just need a barrier to
3036 	 * keep gcc from moving things around.
3037 	 */
3038 	barrier();
3039 
3040 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041 	size = ARRAY_SIZE(fstack->calls);
3042 
3043 	if (regs) {
3044 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045 						   size, skip);
3046 	} else {
3047 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3048 	}
3049 
3050 	size = nr_entries * sizeof(unsigned long);
3051 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3053 				    trace_ctx);
3054 	if (!event)
3055 		goto out;
3056 	entry = ring_buffer_event_data(event);
3057 
3058 	memcpy(&entry->caller, fstack->calls, size);
3059 	entry->size = nr_entries;
3060 
3061 	if (!call_filter_check_discard(call, entry, buffer, event))
3062 		__buffer_unlock_commit(buffer, event);
3063 
3064  out:
3065 	/* Again, don't let gcc optimize things here */
3066 	barrier();
3067 	__this_cpu_dec(ftrace_stack_reserve);
3068 	preempt_enable_notrace();
3069 
3070 }
3071 
3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073 				      struct trace_buffer *buffer,
3074 				      unsigned int trace_ctx,
3075 				      int skip, struct pt_regs *regs)
3076 {
3077 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078 		return;
3079 
3080 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081 }
3082 
3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084 		   int skip)
3085 {
3086 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3087 
3088 	if (rcu_is_watching()) {
3089 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090 		return;
3091 	}
3092 
3093 	/*
3094 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095 	 * but if the above rcu_is_watching() failed, then the NMI
3096 	 * triggered someplace critical, and rcu_irq_enter() should
3097 	 * not be called from NMI.
3098 	 */
3099 	if (unlikely(in_nmi()))
3100 		return;
3101 
3102 	rcu_irq_enter_irqson();
3103 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104 	rcu_irq_exit_irqson();
3105 }
3106 
3107 /**
3108  * trace_dump_stack - record a stack back trace in the trace buffer
3109  * @skip: Number of functions to skip (helper handlers)
3110  */
3111 void trace_dump_stack(int skip)
3112 {
3113 	if (tracing_disabled || tracing_selftest_running)
3114 		return;
3115 
3116 #ifndef CONFIG_UNWINDER_ORC
3117 	/* Skip 1 to skip this function. */
3118 	skip++;
3119 #endif
3120 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3121 			     tracing_gen_ctx(), skip, NULL);
3122 }
3123 EXPORT_SYMBOL_GPL(trace_dump_stack);
3124 
3125 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126 static DEFINE_PER_CPU(int, user_stack_count);
3127 
3128 static void
3129 ftrace_trace_userstack(struct trace_array *tr,
3130 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3131 {
3132 	struct trace_event_call *call = &event_user_stack;
3133 	struct ring_buffer_event *event;
3134 	struct userstack_entry *entry;
3135 
3136 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137 		return;
3138 
3139 	/*
3140 	 * NMIs can not handle page faults, even with fix ups.
3141 	 * The save user stack can (and often does) fault.
3142 	 */
3143 	if (unlikely(in_nmi()))
3144 		return;
3145 
3146 	/*
3147 	 * prevent recursion, since the user stack tracing may
3148 	 * trigger other kernel events.
3149 	 */
3150 	preempt_disable();
3151 	if (__this_cpu_read(user_stack_count))
3152 		goto out;
3153 
3154 	__this_cpu_inc(user_stack_count);
3155 
3156 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157 					    sizeof(*entry), trace_ctx);
3158 	if (!event)
3159 		goto out_drop_count;
3160 	entry	= ring_buffer_event_data(event);
3161 
3162 	entry->tgid		= current->tgid;
3163 	memset(&entry->caller, 0, sizeof(entry->caller));
3164 
3165 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166 	if (!call_filter_check_discard(call, entry, buffer, event))
3167 		__buffer_unlock_commit(buffer, event);
3168 
3169  out_drop_count:
3170 	__this_cpu_dec(user_stack_count);
3171  out:
3172 	preempt_enable();
3173 }
3174 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175 static void ftrace_trace_userstack(struct trace_array *tr,
3176 				   struct trace_buffer *buffer,
3177 				   unsigned int trace_ctx)
3178 {
3179 }
3180 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181 
3182 #endif /* CONFIG_STACKTRACE */
3183 
3184 static inline void
3185 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186 			  unsigned long long delta)
3187 {
3188 	entry->bottom_delta_ts = delta & U32_MAX;
3189 	entry->top_delta_ts = (delta >> 32);
3190 }
3191 
3192 void trace_last_func_repeats(struct trace_array *tr,
3193 			     struct trace_func_repeats *last_info,
3194 			     unsigned int trace_ctx)
3195 {
3196 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3197 	struct func_repeats_entry *entry;
3198 	struct ring_buffer_event *event;
3199 	u64 delta;
3200 
3201 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202 					    sizeof(*entry), trace_ctx);
3203 	if (!event)
3204 		return;
3205 
3206 	delta = ring_buffer_event_time_stamp(buffer, event) -
3207 		last_info->ts_last_call;
3208 
3209 	entry = ring_buffer_event_data(event);
3210 	entry->ip = last_info->ip;
3211 	entry->parent_ip = last_info->parent_ip;
3212 	entry->count = last_info->count;
3213 	func_repeats_set_delta_ts(entry, delta);
3214 
3215 	__buffer_unlock_commit(buffer, event);
3216 }
3217 
3218 /* created for use with alloc_percpu */
3219 struct trace_buffer_struct {
3220 	int nesting;
3221 	char buffer[4][TRACE_BUF_SIZE];
3222 };
3223 
3224 static struct trace_buffer_struct *trace_percpu_buffer;
3225 
3226 /*
3227  * This allows for lockless recording.  If we're nested too deeply, then
3228  * this returns NULL.
3229  */
3230 static char *get_trace_buf(void)
3231 {
3232 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233 
3234 	if (!buffer || buffer->nesting >= 4)
3235 		return NULL;
3236 
3237 	buffer->nesting++;
3238 
3239 	/* Interrupts must see nesting incremented before we use the buffer */
3240 	barrier();
3241 	return &buffer->buffer[buffer->nesting - 1][0];
3242 }
3243 
3244 static void put_trace_buf(void)
3245 {
3246 	/* Don't let the decrement of nesting leak before this */
3247 	barrier();
3248 	this_cpu_dec(trace_percpu_buffer->nesting);
3249 }
3250 
3251 static int alloc_percpu_trace_buffer(void)
3252 {
3253 	struct trace_buffer_struct *buffers;
3254 
3255 	if (trace_percpu_buffer)
3256 		return 0;
3257 
3258 	buffers = alloc_percpu(struct trace_buffer_struct);
3259 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260 		return -ENOMEM;
3261 
3262 	trace_percpu_buffer = buffers;
3263 	return 0;
3264 }
3265 
3266 static int buffers_allocated;
3267 
3268 void trace_printk_init_buffers(void)
3269 {
3270 	if (buffers_allocated)
3271 		return;
3272 
3273 	if (alloc_percpu_trace_buffer())
3274 		return;
3275 
3276 	/* trace_printk() is for debug use only. Don't use it in production. */
3277 
3278 	pr_warn("\n");
3279 	pr_warn("**********************************************************\n");
3280 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281 	pr_warn("**                                                      **\n");
3282 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283 	pr_warn("**                                                      **\n");
3284 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285 	pr_warn("** unsafe for production use.                           **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** If you see this message and you are not debugging    **\n");
3288 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291 	pr_warn("**********************************************************\n");
3292 
3293 	/* Expand the buffers to set size */
3294 	tracing_update_buffers();
3295 
3296 	buffers_allocated = 1;
3297 
3298 	/*
3299 	 * trace_printk_init_buffers() can be called by modules.
3300 	 * If that happens, then we need to start cmdline recording
3301 	 * directly here. If the global_trace.buffer is already
3302 	 * allocated here, then this was called by module code.
3303 	 */
3304 	if (global_trace.array_buffer.buffer)
3305 		tracing_start_cmdline_record();
3306 }
3307 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308 
3309 void trace_printk_start_comm(void)
3310 {
3311 	/* Start tracing comms if trace printk is set */
3312 	if (!buffers_allocated)
3313 		return;
3314 	tracing_start_cmdline_record();
3315 }
3316 
3317 static void trace_printk_start_stop_comm(int enabled)
3318 {
3319 	if (!buffers_allocated)
3320 		return;
3321 
3322 	if (enabled)
3323 		tracing_start_cmdline_record();
3324 	else
3325 		tracing_stop_cmdline_record();
3326 }
3327 
3328 /**
3329  * trace_vbprintk - write binary msg to tracing buffer
3330  * @ip:    The address of the caller
3331  * @fmt:   The string format to write to the buffer
3332  * @args:  Arguments for @fmt
3333  */
3334 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335 {
3336 	struct trace_event_call *call = &event_bprint;
3337 	struct ring_buffer_event *event;
3338 	struct trace_buffer *buffer;
3339 	struct trace_array *tr = &global_trace;
3340 	struct bprint_entry *entry;
3341 	unsigned int trace_ctx;
3342 	char *tbuffer;
3343 	int len = 0, size;
3344 
3345 	if (unlikely(tracing_selftest_running || tracing_disabled))
3346 		return 0;
3347 
3348 	/* Don't pollute graph traces with trace_vprintk internals */
3349 	pause_graph_tracing();
3350 
3351 	trace_ctx = tracing_gen_ctx();
3352 	preempt_disable_notrace();
3353 
3354 	tbuffer = get_trace_buf();
3355 	if (!tbuffer) {
3356 		len = 0;
3357 		goto out_nobuffer;
3358 	}
3359 
3360 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361 
3362 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363 		goto out_put;
3364 
3365 	size = sizeof(*entry) + sizeof(u32) * len;
3366 	buffer = tr->array_buffer.buffer;
3367 	ring_buffer_nest_start(buffer);
3368 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369 					    trace_ctx);
3370 	if (!event)
3371 		goto out;
3372 	entry = ring_buffer_event_data(event);
3373 	entry->ip			= ip;
3374 	entry->fmt			= fmt;
3375 
3376 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3378 		__buffer_unlock_commit(buffer, event);
3379 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380 	}
3381 
3382 out:
3383 	ring_buffer_nest_end(buffer);
3384 out_put:
3385 	put_trace_buf();
3386 
3387 out_nobuffer:
3388 	preempt_enable_notrace();
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 __printf(3, 0)
3396 static int
3397 __trace_array_vprintk(struct trace_buffer *buffer,
3398 		      unsigned long ip, const char *fmt, va_list args)
3399 {
3400 	struct trace_event_call *call = &event_print;
3401 	struct ring_buffer_event *event;
3402 	int len = 0, size;
3403 	struct print_entry *entry;
3404 	unsigned int trace_ctx;
3405 	char *tbuffer;
3406 
3407 	if (tracing_disabled || tracing_selftest_running)
3408 		return 0;
3409 
3410 	/* Don't pollute graph traces with trace_vprintk internals */
3411 	pause_graph_tracing();
3412 
3413 	trace_ctx = tracing_gen_ctx();
3414 	preempt_disable_notrace();
3415 
3416 
3417 	tbuffer = get_trace_buf();
3418 	if (!tbuffer) {
3419 		len = 0;
3420 		goto out_nobuffer;
3421 	}
3422 
3423 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424 
3425 	size = sizeof(*entry) + len + 1;
3426 	ring_buffer_nest_start(buffer);
3427 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428 					    trace_ctx);
3429 	if (!event)
3430 		goto out;
3431 	entry = ring_buffer_event_data(event);
3432 	entry->ip = ip;
3433 
3434 	memcpy(&entry->buf, tbuffer, len + 1);
3435 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3436 		__buffer_unlock_commit(buffer, event);
3437 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438 	}
3439 
3440 out:
3441 	ring_buffer_nest_end(buffer);
3442 	put_trace_buf();
3443 
3444 out_nobuffer:
3445 	preempt_enable_notrace();
3446 	unpause_graph_tracing();
3447 
3448 	return len;
3449 }
3450 
3451 __printf(3, 0)
3452 int trace_array_vprintk(struct trace_array *tr,
3453 			unsigned long ip, const char *fmt, va_list args)
3454 {
3455 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456 }
3457 
3458 /**
3459  * trace_array_printk - Print a message to a specific instance
3460  * @tr: The instance trace_array descriptor
3461  * @ip: The instruction pointer that this is called from.
3462  * @fmt: The format to print (printf format)
3463  *
3464  * If a subsystem sets up its own instance, they have the right to
3465  * printk strings into their tracing instance buffer using this
3466  * function. Note, this function will not write into the top level
3467  * buffer (use trace_printk() for that), as writing into the top level
3468  * buffer should only have events that can be individually disabled.
3469  * trace_printk() is only used for debugging a kernel, and should not
3470  * be ever incorporated in normal use.
3471  *
3472  * trace_array_printk() can be used, as it will not add noise to the
3473  * top level tracing buffer.
3474  *
3475  * Note, trace_array_init_printk() must be called on @tr before this
3476  * can be used.
3477  */
3478 __printf(3, 0)
3479 int trace_array_printk(struct trace_array *tr,
3480 		       unsigned long ip, const char *fmt, ...)
3481 {
3482 	int ret;
3483 	va_list ap;
3484 
3485 	if (!tr)
3486 		return -ENOENT;
3487 
3488 	/* This is only allowed for created instances */
3489 	if (tr == &global_trace)
3490 		return 0;
3491 
3492 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493 		return 0;
3494 
3495 	va_start(ap, fmt);
3496 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3497 	va_end(ap);
3498 	return ret;
3499 }
3500 EXPORT_SYMBOL_GPL(trace_array_printk);
3501 
3502 /**
3503  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504  * @tr: The trace array to initialize the buffers for
3505  *
3506  * As trace_array_printk() only writes into instances, they are OK to
3507  * have in the kernel (unlike trace_printk()). This needs to be called
3508  * before trace_array_printk() can be used on a trace_array.
3509  */
3510 int trace_array_init_printk(struct trace_array *tr)
3511 {
3512 	if (!tr)
3513 		return -ENOENT;
3514 
3515 	/* This is only allowed for created instances */
3516 	if (tr == &global_trace)
3517 		return -EINVAL;
3518 
3519 	return alloc_percpu_trace_buffer();
3520 }
3521 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522 
3523 __printf(3, 4)
3524 int trace_array_printk_buf(struct trace_buffer *buffer,
3525 			   unsigned long ip, const char *fmt, ...)
3526 {
3527 	int ret;
3528 	va_list ap;
3529 
3530 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531 		return 0;
3532 
3533 	va_start(ap, fmt);
3534 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535 	va_end(ap);
3536 	return ret;
3537 }
3538 
3539 __printf(2, 0)
3540 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541 {
3542 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3543 }
3544 EXPORT_SYMBOL_GPL(trace_vprintk);
3545 
3546 static void trace_iterator_increment(struct trace_iterator *iter)
3547 {
3548 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549 
3550 	iter->idx++;
3551 	if (buf_iter)
3552 		ring_buffer_iter_advance(buf_iter);
3553 }
3554 
3555 static struct trace_entry *
3556 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557 		unsigned long *lost_events)
3558 {
3559 	struct ring_buffer_event *event;
3560 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561 
3562 	if (buf_iter) {
3563 		event = ring_buffer_iter_peek(buf_iter, ts);
3564 		if (lost_events)
3565 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566 				(unsigned long)-1 : 0;
3567 	} else {
3568 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569 					 lost_events);
3570 	}
3571 
3572 	if (event) {
3573 		iter->ent_size = ring_buffer_event_length(event);
3574 		return ring_buffer_event_data(event);
3575 	}
3576 	iter->ent_size = 0;
3577 	return NULL;
3578 }
3579 
3580 static struct trace_entry *
3581 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582 		  unsigned long *missing_events, u64 *ent_ts)
3583 {
3584 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3585 	struct trace_entry *ent, *next = NULL;
3586 	unsigned long lost_events = 0, next_lost = 0;
3587 	int cpu_file = iter->cpu_file;
3588 	u64 next_ts = 0, ts;
3589 	int next_cpu = -1;
3590 	int next_size = 0;
3591 	int cpu;
3592 
3593 	/*
3594 	 * If we are in a per_cpu trace file, don't bother by iterating over
3595 	 * all cpu and peek directly.
3596 	 */
3597 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3599 			return NULL;
3600 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601 		if (ent_cpu)
3602 			*ent_cpu = cpu_file;
3603 
3604 		return ent;
3605 	}
3606 
3607 	for_each_tracing_cpu(cpu) {
3608 
3609 		if (ring_buffer_empty_cpu(buffer, cpu))
3610 			continue;
3611 
3612 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613 
3614 		/*
3615 		 * Pick the entry with the smallest timestamp:
3616 		 */
3617 		if (ent && (!next || ts < next_ts)) {
3618 			next = ent;
3619 			next_cpu = cpu;
3620 			next_ts = ts;
3621 			next_lost = lost_events;
3622 			next_size = iter->ent_size;
3623 		}
3624 	}
3625 
3626 	iter->ent_size = next_size;
3627 
3628 	if (ent_cpu)
3629 		*ent_cpu = next_cpu;
3630 
3631 	if (ent_ts)
3632 		*ent_ts = next_ts;
3633 
3634 	if (missing_events)
3635 		*missing_events = next_lost;
3636 
3637 	return next;
3638 }
3639 
3640 #define STATIC_FMT_BUF_SIZE	128
3641 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642 
3643 static char *trace_iter_expand_format(struct trace_iterator *iter)
3644 {
3645 	char *tmp;
3646 
3647 	/*
3648 	 * iter->tr is NULL when used with tp_printk, which makes
3649 	 * this get called where it is not safe to call krealloc().
3650 	 */
3651 	if (!iter->tr || iter->fmt == static_fmt_buf)
3652 		return NULL;
3653 
3654 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655 		       GFP_KERNEL);
3656 	if (tmp) {
3657 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658 		iter->fmt = tmp;
3659 	}
3660 
3661 	return tmp;
3662 }
3663 
3664 /* Returns true if the string is safe to dereference from an event */
3665 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666 {
3667 	unsigned long addr = (unsigned long)str;
3668 	struct trace_event *trace_event;
3669 	struct trace_event_call *event;
3670 
3671 	/* OK if part of the event data */
3672 	if ((addr >= (unsigned long)iter->ent) &&
3673 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3674 		return true;
3675 
3676 	/* OK if part of the temp seq buffer */
3677 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679 		return true;
3680 
3681 	/* Core rodata can not be freed */
3682 	if (is_kernel_rodata(addr))
3683 		return true;
3684 
3685 	if (trace_is_tracepoint_string(str))
3686 		return true;
3687 
3688 	/*
3689 	 * Now this could be a module event, referencing core module
3690 	 * data, which is OK.
3691 	 */
3692 	if (!iter->ent)
3693 		return false;
3694 
3695 	trace_event = ftrace_find_event(iter->ent->type);
3696 	if (!trace_event)
3697 		return false;
3698 
3699 	event = container_of(trace_event, struct trace_event_call, event);
3700 	if (!event->mod)
3701 		return false;
3702 
3703 	/* Would rather have rodata, but this will suffice */
3704 	if (within_module_core(addr, event->mod))
3705 		return true;
3706 
3707 	return false;
3708 }
3709 
3710 static const char *show_buffer(struct trace_seq *s)
3711 {
3712 	struct seq_buf *seq = &s->seq;
3713 
3714 	seq_buf_terminate(seq);
3715 
3716 	return seq->buffer;
3717 }
3718 
3719 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720 
3721 static int test_can_verify_check(const char *fmt, ...)
3722 {
3723 	char buf[16];
3724 	va_list ap;
3725 	int ret;
3726 
3727 	/*
3728 	 * The verifier is dependent on vsnprintf() modifies the va_list
3729 	 * passed to it, where it is sent as a reference. Some architectures
3730 	 * (like x86_32) passes it by value, which means that vsnprintf()
3731 	 * does not modify the va_list passed to it, and the verifier
3732 	 * would then need to be able to understand all the values that
3733 	 * vsnprintf can use. If it is passed by value, then the verifier
3734 	 * is disabled.
3735 	 */
3736 	va_start(ap, fmt);
3737 	vsnprintf(buf, 16, "%d", ap);
3738 	ret = va_arg(ap, int);
3739 	va_end(ap);
3740 
3741 	return ret;
3742 }
3743 
3744 static void test_can_verify(void)
3745 {
3746 	if (!test_can_verify_check("%d %d", 0, 1)) {
3747 		pr_info("trace event string verifier disabled\n");
3748 		static_branch_inc(&trace_no_verify);
3749 	}
3750 }
3751 
3752 /**
3753  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754  * @iter: The iterator that holds the seq buffer and the event being printed
3755  * @fmt: The format used to print the event
3756  * @ap: The va_list holding the data to print from @fmt.
3757  *
3758  * This writes the data into the @iter->seq buffer using the data from
3759  * @fmt and @ap. If the format has a %s, then the source of the string
3760  * is examined to make sure it is safe to print, otherwise it will
3761  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762  * pointer.
3763  */
3764 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765 			 va_list ap)
3766 {
3767 	const char *p = fmt;
3768 	const char *str;
3769 	int i, j;
3770 
3771 	if (WARN_ON_ONCE(!fmt))
3772 		return;
3773 
3774 	if (static_branch_unlikely(&trace_no_verify))
3775 		goto print;
3776 
3777 	/* Don't bother checking when doing a ftrace_dump() */
3778 	if (iter->fmt == static_fmt_buf)
3779 		goto print;
3780 
3781 	while (*p) {
3782 		bool star = false;
3783 		int len = 0;
3784 
3785 		j = 0;
3786 
3787 		/* We only care about %s and variants */
3788 		for (i = 0; p[i]; i++) {
3789 			if (i + 1 >= iter->fmt_size) {
3790 				/*
3791 				 * If we can't expand the copy buffer,
3792 				 * just print it.
3793 				 */
3794 				if (!trace_iter_expand_format(iter))
3795 					goto print;
3796 			}
3797 
3798 			if (p[i] == '\\' && p[i+1]) {
3799 				i++;
3800 				continue;
3801 			}
3802 			if (p[i] == '%') {
3803 				/* Need to test cases like %08.*s */
3804 				for (j = 1; p[i+j]; j++) {
3805 					if (isdigit(p[i+j]) ||
3806 					    p[i+j] == '.')
3807 						continue;
3808 					if (p[i+j] == '*') {
3809 						star = true;
3810 						continue;
3811 					}
3812 					break;
3813 				}
3814 				if (p[i+j] == 's')
3815 					break;
3816 				star = false;
3817 			}
3818 			j = 0;
3819 		}
3820 		/* If no %s found then just print normally */
3821 		if (!p[i])
3822 			break;
3823 
3824 		/* Copy up to the %s, and print that */
3825 		strncpy(iter->fmt, p, i);
3826 		iter->fmt[i] = '\0';
3827 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828 
3829 		if (star)
3830 			len = va_arg(ap, int);
3831 
3832 		/* The ap now points to the string data of the %s */
3833 		str = va_arg(ap, const char *);
3834 
3835 		/*
3836 		 * If you hit this warning, it is likely that the
3837 		 * trace event in question used %s on a string that
3838 		 * was saved at the time of the event, but may not be
3839 		 * around when the trace is read. Use __string(),
3840 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841 		 * instead. See samples/trace_events/trace-events-sample.h
3842 		 * for reference.
3843 		 */
3844 		if (WARN_ONCE(!trace_safe_str(iter, str),
3845 			      "fmt: '%s' current_buffer: '%s'",
3846 			      fmt, show_buffer(&iter->seq))) {
3847 			int ret;
3848 
3849 			/* Try to safely read the string */
3850 			if (star) {
3851 				if (len + 1 > iter->fmt_size)
3852 					len = iter->fmt_size - 1;
3853 				if (len < 0)
3854 					len = 0;
3855 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856 				iter->fmt[len] = 0;
3857 				star = false;
3858 			} else {
3859 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860 								  iter->fmt_size);
3861 			}
3862 			if (ret < 0)
3863 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3864 			else
3865 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866 						 str, iter->fmt);
3867 			str = "[UNSAFE-MEMORY]";
3868 			strcpy(iter->fmt, "%s");
3869 		} else {
3870 			strncpy(iter->fmt, p + i, j + 1);
3871 			iter->fmt[j+1] = '\0';
3872 		}
3873 		if (star)
3874 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875 		else
3876 			trace_seq_printf(&iter->seq, iter->fmt, str);
3877 
3878 		p += i + j + 1;
3879 	}
3880  print:
3881 	if (*p)
3882 		trace_seq_vprintf(&iter->seq, p, ap);
3883 }
3884 
3885 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886 {
3887 	const char *p, *new_fmt;
3888 	char *q;
3889 
3890 	if (WARN_ON_ONCE(!fmt))
3891 		return fmt;
3892 
3893 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894 		return fmt;
3895 
3896 	p = fmt;
3897 	new_fmt = q = iter->fmt;
3898 	while (*p) {
3899 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900 			if (!trace_iter_expand_format(iter))
3901 				return fmt;
3902 
3903 			q += iter->fmt - new_fmt;
3904 			new_fmt = iter->fmt;
3905 		}
3906 
3907 		*q++ = *p++;
3908 
3909 		/* Replace %p with %px */
3910 		if (p[-1] == '%') {
3911 			if (p[0] == '%') {
3912 				*q++ = *p++;
3913 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3914 				*q++ = *p++;
3915 				*q++ = 'x';
3916 			}
3917 		}
3918 	}
3919 	*q = '\0';
3920 
3921 	return new_fmt;
3922 }
3923 
3924 #define STATIC_TEMP_BUF_SIZE	128
3925 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926 
3927 /* Find the next real entry, without updating the iterator itself */
3928 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929 					  int *ent_cpu, u64 *ent_ts)
3930 {
3931 	/* __find_next_entry will reset ent_size */
3932 	int ent_size = iter->ent_size;
3933 	struct trace_entry *entry;
3934 
3935 	/*
3936 	 * If called from ftrace_dump(), then the iter->temp buffer
3937 	 * will be the static_temp_buf and not created from kmalloc.
3938 	 * If the entry size is greater than the buffer, we can
3939 	 * not save it. Just return NULL in that case. This is only
3940 	 * used to add markers when two consecutive events' time
3941 	 * stamps have a large delta. See trace_print_lat_context()
3942 	 */
3943 	if (iter->temp == static_temp_buf &&
3944 	    STATIC_TEMP_BUF_SIZE < ent_size)
3945 		return NULL;
3946 
3947 	/*
3948 	 * The __find_next_entry() may call peek_next_entry(), which may
3949 	 * call ring_buffer_peek() that may make the contents of iter->ent
3950 	 * undefined. Need to copy iter->ent now.
3951 	 */
3952 	if (iter->ent && iter->ent != iter->temp) {
3953 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955 			void *temp;
3956 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957 			if (!temp)
3958 				return NULL;
3959 			kfree(iter->temp);
3960 			iter->temp = temp;
3961 			iter->temp_size = iter->ent_size;
3962 		}
3963 		memcpy(iter->temp, iter->ent, iter->ent_size);
3964 		iter->ent = iter->temp;
3965 	}
3966 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967 	/* Put back the original ent_size */
3968 	iter->ent_size = ent_size;
3969 
3970 	return entry;
3971 }
3972 
3973 /* Find the next real entry, and increment the iterator to the next entry */
3974 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975 {
3976 	iter->ent = __find_next_entry(iter, &iter->cpu,
3977 				      &iter->lost_events, &iter->ts);
3978 
3979 	if (iter->ent)
3980 		trace_iterator_increment(iter);
3981 
3982 	return iter->ent ? iter : NULL;
3983 }
3984 
3985 static void trace_consume(struct trace_iterator *iter)
3986 {
3987 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988 			    &iter->lost_events);
3989 }
3990 
3991 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992 {
3993 	struct trace_iterator *iter = m->private;
3994 	int i = (int)*pos;
3995 	void *ent;
3996 
3997 	WARN_ON_ONCE(iter->leftover);
3998 
3999 	(*pos)++;
4000 
4001 	/* can't go backwards */
4002 	if (iter->idx > i)
4003 		return NULL;
4004 
4005 	if (iter->idx < 0)
4006 		ent = trace_find_next_entry_inc(iter);
4007 	else
4008 		ent = iter;
4009 
4010 	while (ent && iter->idx < i)
4011 		ent = trace_find_next_entry_inc(iter);
4012 
4013 	iter->pos = *pos;
4014 
4015 	return ent;
4016 }
4017 
4018 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019 {
4020 	struct ring_buffer_iter *buf_iter;
4021 	unsigned long entries = 0;
4022 	u64 ts;
4023 
4024 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025 
4026 	buf_iter = trace_buffer_iter(iter, cpu);
4027 	if (!buf_iter)
4028 		return;
4029 
4030 	ring_buffer_iter_reset(buf_iter);
4031 
4032 	/*
4033 	 * We could have the case with the max latency tracers
4034 	 * that a reset never took place on a cpu. This is evident
4035 	 * by the timestamp being before the start of the buffer.
4036 	 */
4037 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038 		if (ts >= iter->array_buffer->time_start)
4039 			break;
4040 		entries++;
4041 		ring_buffer_iter_advance(buf_iter);
4042 	}
4043 
4044 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045 }
4046 
4047 /*
4048  * The current tracer is copied to avoid a global locking
4049  * all around.
4050  */
4051 static void *s_start(struct seq_file *m, loff_t *pos)
4052 {
4053 	struct trace_iterator *iter = m->private;
4054 	struct trace_array *tr = iter->tr;
4055 	int cpu_file = iter->cpu_file;
4056 	void *p = NULL;
4057 	loff_t l = 0;
4058 	int cpu;
4059 
4060 	/*
4061 	 * copy the tracer to avoid using a global lock all around.
4062 	 * iter->trace is a copy of current_trace, the pointer to the
4063 	 * name may be used instead of a strcmp(), as iter->trace->name
4064 	 * will point to the same string as current_trace->name.
4065 	 */
4066 	mutex_lock(&trace_types_lock);
4067 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068 		*iter->trace = *tr->current_trace;
4069 	mutex_unlock(&trace_types_lock);
4070 
4071 #ifdef CONFIG_TRACER_MAX_TRACE
4072 	if (iter->snapshot && iter->trace->use_max_tr)
4073 		return ERR_PTR(-EBUSY);
4074 #endif
4075 
4076 	if (*pos != iter->pos) {
4077 		iter->ent = NULL;
4078 		iter->cpu = 0;
4079 		iter->idx = -1;
4080 
4081 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082 			for_each_tracing_cpu(cpu)
4083 				tracing_iter_reset(iter, cpu);
4084 		} else
4085 			tracing_iter_reset(iter, cpu_file);
4086 
4087 		iter->leftover = 0;
4088 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089 			;
4090 
4091 	} else {
4092 		/*
4093 		 * If we overflowed the seq_file before, then we want
4094 		 * to just reuse the trace_seq buffer again.
4095 		 */
4096 		if (iter->leftover)
4097 			p = iter;
4098 		else {
4099 			l = *pos - 1;
4100 			p = s_next(m, p, &l);
4101 		}
4102 	}
4103 
4104 	trace_event_read_lock();
4105 	trace_access_lock(cpu_file);
4106 	return p;
4107 }
4108 
4109 static void s_stop(struct seq_file *m, void *p)
4110 {
4111 	struct trace_iterator *iter = m->private;
4112 
4113 #ifdef CONFIG_TRACER_MAX_TRACE
4114 	if (iter->snapshot && iter->trace->use_max_tr)
4115 		return;
4116 #endif
4117 
4118 	trace_access_unlock(iter->cpu_file);
4119 	trace_event_read_unlock();
4120 }
4121 
4122 static void
4123 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124 		      unsigned long *entries, int cpu)
4125 {
4126 	unsigned long count;
4127 
4128 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129 	/*
4130 	 * If this buffer has skipped entries, then we hold all
4131 	 * entries for the trace and we need to ignore the
4132 	 * ones before the time stamp.
4133 	 */
4134 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136 		/* total is the same as the entries */
4137 		*total = count;
4138 	} else
4139 		*total = count +
4140 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4141 	*entries = count;
4142 }
4143 
4144 static void
4145 get_total_entries(struct array_buffer *buf,
4146 		  unsigned long *total, unsigned long *entries)
4147 {
4148 	unsigned long t, e;
4149 	int cpu;
4150 
4151 	*total = 0;
4152 	*entries = 0;
4153 
4154 	for_each_tracing_cpu(cpu) {
4155 		get_total_entries_cpu(buf, &t, &e, cpu);
4156 		*total += t;
4157 		*entries += e;
4158 	}
4159 }
4160 
4161 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162 {
4163 	unsigned long total, entries;
4164 
4165 	if (!tr)
4166 		tr = &global_trace;
4167 
4168 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169 
4170 	return entries;
4171 }
4172 
4173 unsigned long trace_total_entries(struct trace_array *tr)
4174 {
4175 	unsigned long total, entries;
4176 
4177 	if (!tr)
4178 		tr = &global_trace;
4179 
4180 	get_total_entries(&tr->array_buffer, &total, &entries);
4181 
4182 	return entries;
4183 }
4184 
4185 static void print_lat_help_header(struct seq_file *m)
4186 {
4187 	seq_puts(m, "#                    _------=> CPU#            \n"
4188 		    "#                   / _-----=> irqs-off        \n"
4189 		    "#                  | / _----=> need-resched    \n"
4190 		    "#                  || / _---=> hardirq/softirq \n"
4191 		    "#                  ||| / _--=> preempt-depth   \n"
4192 		    "#                  |||| /     delay            \n"
4193 		    "#  cmd     pid     ||||| time  |   caller      \n"
4194 		    "#     \\   /        |||||  \\    |   /         \n");
4195 }
4196 
4197 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198 {
4199 	unsigned long total;
4200 	unsigned long entries;
4201 
4202 	get_total_entries(buf, &total, &entries);
4203 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204 		   entries, total, num_online_cpus());
4205 	seq_puts(m, "#\n");
4206 }
4207 
4208 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209 				   unsigned int flags)
4210 {
4211 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212 
4213 	print_event_info(buf, m);
4214 
4215 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217 }
4218 
4219 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220 				       unsigned int flags)
4221 {
4222 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223 	const char *space = "            ";
4224 	int prec = tgid ? 12 : 2;
4225 
4226 	print_event_info(buf, m);
4227 
4228 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235 }
4236 
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241 	struct array_buffer *buf = iter->array_buffer;
4242 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243 	struct tracer *type = iter->trace;
4244 	unsigned long entries;
4245 	unsigned long total;
4246 	const char *name = "preemption";
4247 
4248 	name = type->name;
4249 
4250 	get_total_entries(buf, &total, &entries);
4251 
4252 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253 		   name, UTS_RELEASE);
4254 	seq_puts(m, "# -----------------------------------"
4255 		 "---------------------------------\n");
4256 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258 		   nsecs_to_usecs(data->saved_latency),
4259 		   entries,
4260 		   total,
4261 		   buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263 		   "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265 		   "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267 		   "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269 		   "preempt_rt",
4270 #else
4271 		   "unknown",
4272 #endif
4273 		   /* These are reserved for later use */
4274 		   0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278 	seq_puts(m, ")\n");
4279 #endif
4280 	seq_puts(m, "#    -----------------\n");
4281 	seq_printf(m, "#    | task: %.16s-%d "
4282 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283 		   data->comm, data->pid,
4284 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285 		   data->policy, data->rt_priority);
4286 	seq_puts(m, "#    -----------------\n");
4287 
4288 	if (data->critical_start) {
4289 		seq_puts(m, "#  => started at: ");
4290 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291 		trace_print_seq(m, &iter->seq);
4292 		seq_puts(m, "\n#  => ended at:   ");
4293 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294 		trace_print_seq(m, &iter->seq);
4295 		seq_puts(m, "\n#\n");
4296 	}
4297 
4298 	seq_puts(m, "#\n");
4299 }
4300 
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303 	struct trace_seq *s = &iter->seq;
4304 	struct trace_array *tr = iter->tr;
4305 
4306 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307 		return;
4308 
4309 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310 		return;
4311 
4312 	if (cpumask_available(iter->started) &&
4313 	    cpumask_test_cpu(iter->cpu, iter->started))
4314 		return;
4315 
4316 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317 		return;
4318 
4319 	if (cpumask_available(iter->started))
4320 		cpumask_set_cpu(iter->cpu, iter->started);
4321 
4322 	/* Don't print started cpu buffer for the first entry of the trace */
4323 	if (iter->idx > 1)
4324 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325 				iter->cpu);
4326 }
4327 
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330 	struct trace_array *tr = iter->tr;
4331 	struct trace_seq *s = &iter->seq;
4332 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333 	struct trace_entry *entry;
4334 	struct trace_event *event;
4335 
4336 	entry = iter->ent;
4337 
4338 	test_cpu_buff_start(iter);
4339 
4340 	event = ftrace_find_event(entry->type);
4341 
4342 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344 			trace_print_lat_context(iter);
4345 		else
4346 			trace_print_context(iter);
4347 	}
4348 
4349 	if (trace_seq_has_overflowed(s))
4350 		return TRACE_TYPE_PARTIAL_LINE;
4351 
4352 	if (event)
4353 		return event->funcs->trace(iter, sym_flags, event);
4354 
4355 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356 
4357 	return trace_handle_return(s);
4358 }
4359 
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362 	struct trace_array *tr = iter->tr;
4363 	struct trace_seq *s = &iter->seq;
4364 	struct trace_entry *entry;
4365 	struct trace_event *event;
4366 
4367 	entry = iter->ent;
4368 
4369 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370 		trace_seq_printf(s, "%d %d %llu ",
4371 				 entry->pid, iter->cpu, iter->ts);
4372 
4373 	if (trace_seq_has_overflowed(s))
4374 		return TRACE_TYPE_PARTIAL_LINE;
4375 
4376 	event = ftrace_find_event(entry->type);
4377 	if (event)
4378 		return event->funcs->raw(iter, 0, event);
4379 
4380 	trace_seq_printf(s, "%d ?\n", entry->type);
4381 
4382 	return trace_handle_return(s);
4383 }
4384 
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387 	struct trace_array *tr = iter->tr;
4388 	struct trace_seq *s = &iter->seq;
4389 	unsigned char newline = '\n';
4390 	struct trace_entry *entry;
4391 	struct trace_event *event;
4392 
4393 	entry = iter->ent;
4394 
4395 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4397 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4399 		if (trace_seq_has_overflowed(s))
4400 			return TRACE_TYPE_PARTIAL_LINE;
4401 	}
4402 
4403 	event = ftrace_find_event(entry->type);
4404 	if (event) {
4405 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406 		if (ret != TRACE_TYPE_HANDLED)
4407 			return ret;
4408 	}
4409 
4410 	SEQ_PUT_FIELD(s, newline);
4411 
4412 	return trace_handle_return(s);
4413 }
4414 
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417 	struct trace_array *tr = iter->tr;
4418 	struct trace_seq *s = &iter->seq;
4419 	struct trace_entry *entry;
4420 	struct trace_event *event;
4421 
4422 	entry = iter->ent;
4423 
4424 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425 		SEQ_PUT_FIELD(s, entry->pid);
4426 		SEQ_PUT_FIELD(s, iter->cpu);
4427 		SEQ_PUT_FIELD(s, iter->ts);
4428 		if (trace_seq_has_overflowed(s))
4429 			return TRACE_TYPE_PARTIAL_LINE;
4430 	}
4431 
4432 	event = ftrace_find_event(entry->type);
4433 	return event ? event->funcs->binary(iter, 0, event) :
4434 		TRACE_TYPE_HANDLED;
4435 }
4436 
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439 	struct ring_buffer_iter *buf_iter;
4440 	int cpu;
4441 
4442 	/* If we are looking at one CPU buffer, only check that one */
4443 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444 		cpu = iter->cpu_file;
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 		return 1;
4454 	}
4455 
4456 	for_each_tracing_cpu(cpu) {
4457 		buf_iter = trace_buffer_iter(iter, cpu);
4458 		if (buf_iter) {
4459 			if (!ring_buffer_iter_empty(buf_iter))
4460 				return 0;
4461 		} else {
4462 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463 				return 0;
4464 		}
4465 	}
4466 
4467 	return 1;
4468 }
4469 
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 	enum print_line_t ret;
4476 
4477 	if (iter->lost_events) {
4478 		if (iter->lost_events == (unsigned long)-1)
4479 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480 					 iter->cpu);
4481 		else
4482 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483 					 iter->cpu, iter->lost_events);
4484 		if (trace_seq_has_overflowed(&iter->seq))
4485 			return TRACE_TYPE_PARTIAL_LINE;
4486 	}
4487 
4488 	if (iter->trace && iter->trace->print_line) {
4489 		ret = iter->trace->print_line(iter);
4490 		if (ret != TRACE_TYPE_UNHANDLED)
4491 			return ret;
4492 	}
4493 
4494 	if (iter->ent->type == TRACE_BPUTS &&
4495 			trace_flags & TRACE_ITER_PRINTK &&
4496 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497 		return trace_print_bputs_msg_only(iter);
4498 
4499 	if (iter->ent->type == TRACE_BPRINT &&
4500 			trace_flags & TRACE_ITER_PRINTK &&
4501 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502 		return trace_print_bprintk_msg_only(iter);
4503 
4504 	if (iter->ent->type == TRACE_PRINT &&
4505 			trace_flags & TRACE_ITER_PRINTK &&
4506 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507 		return trace_print_printk_msg_only(iter);
4508 
4509 	if (trace_flags & TRACE_ITER_BIN)
4510 		return print_bin_fmt(iter);
4511 
4512 	if (trace_flags & TRACE_ITER_HEX)
4513 		return print_hex_fmt(iter);
4514 
4515 	if (trace_flags & TRACE_ITER_RAW)
4516 		return print_raw_fmt(iter);
4517 
4518 	return print_trace_fmt(iter);
4519 }
4520 
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523 	struct trace_iterator *iter = m->private;
4524 	struct trace_array *tr = iter->tr;
4525 
4526 	/* print nothing if the buffers are empty */
4527 	if (trace_empty(iter))
4528 		return;
4529 
4530 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531 		print_trace_header(m, iter);
4532 
4533 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534 		print_lat_help_header(m);
4535 }
4536 
4537 void trace_default_header(struct seq_file *m)
4538 {
4539 	struct trace_iterator *iter = m->private;
4540 	struct trace_array *tr = iter->tr;
4541 	unsigned long trace_flags = tr->trace_flags;
4542 
4543 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544 		return;
4545 
4546 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547 		/* print nothing if the buffers are empty */
4548 		if (trace_empty(iter))
4549 			return;
4550 		print_trace_header(m, iter);
4551 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4552 			print_lat_help_header(m);
4553 	} else {
4554 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4556 				print_func_help_header_irq(iter->array_buffer,
4557 							   m, trace_flags);
4558 			else
4559 				print_func_help_header(iter->array_buffer, m,
4560 						       trace_flags);
4561 		}
4562 	}
4563 }
4564 
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567 	if (!ftrace_is_dead())
4568 		return;
4569 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572 
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578 		    "#                      Takes a snapshot of the main buffer.\n"
4579 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580 		    "#                      (Doesn't have to be '2' works with any number that\n"
4581 		    "#                       is not a '0' or '1')\n");
4582 }
4583 
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592 		    "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595 		    "#                      (Doesn't have to be '2' works with any number that\n"
4596 		    "#                       is not a '0' or '1')\n");
4597 }
4598 
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601 	if (iter->tr->allocated_snapshot)
4602 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603 	else
4604 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605 
4606 	seq_puts(m, "# Snapshot commands:\n");
4607 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608 		show_snapshot_main_help(m);
4609 	else
4610 		show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616 
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619 	struct trace_iterator *iter = v;
4620 	int ret;
4621 
4622 	if (iter->ent == NULL) {
4623 		if (iter->tr) {
4624 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625 			seq_puts(m, "#\n");
4626 			test_ftrace_alive(m);
4627 		}
4628 		if (iter->snapshot && trace_empty(iter))
4629 			print_snapshot_help(m, iter);
4630 		else if (iter->trace && iter->trace->print_header)
4631 			iter->trace->print_header(m);
4632 		else
4633 			trace_default_header(m);
4634 
4635 	} else if (iter->leftover) {
4636 		/*
4637 		 * If we filled the seq_file buffer earlier, we
4638 		 * want to just show it now.
4639 		 */
4640 		ret = trace_print_seq(m, &iter->seq);
4641 
4642 		/* ret should this time be zero, but you never know */
4643 		iter->leftover = ret;
4644 
4645 	} else {
4646 		print_trace_line(iter);
4647 		ret = trace_print_seq(m, &iter->seq);
4648 		/*
4649 		 * If we overflow the seq_file buffer, then it will
4650 		 * ask us for this data again at start up.
4651 		 * Use that instead.
4652 		 *  ret is 0 if seq_file write succeeded.
4653 		 *        -1 otherwise.
4654 		 */
4655 		iter->leftover = ret;
4656 	}
4657 
4658 	return 0;
4659 }
4660 
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4668 		return (long)inode->i_cdev - 1;
4669 	return RING_BUFFER_ALL_CPUS;
4670 }
4671 
4672 static const struct seq_operations tracer_seq_ops = {
4673 	.start		= s_start,
4674 	.next		= s_next,
4675 	.stop		= s_stop,
4676 	.show		= s_show,
4677 };
4678 
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682 	struct trace_array *tr = inode->i_private;
4683 	struct trace_iterator *iter;
4684 	int cpu;
4685 
4686 	if (tracing_disabled)
4687 		return ERR_PTR(-ENODEV);
4688 
4689 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690 	if (!iter)
4691 		return ERR_PTR(-ENOMEM);
4692 
4693 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694 				    GFP_KERNEL);
4695 	if (!iter->buffer_iter)
4696 		goto release;
4697 
4698 	/*
4699 	 * trace_find_next_entry() may need to save off iter->ent.
4700 	 * It will place it into the iter->temp buffer. As most
4701 	 * events are less than 128, allocate a buffer of that size.
4702 	 * If one is greater, then trace_find_next_entry() will
4703 	 * allocate a new buffer to adjust for the bigger iter->ent.
4704 	 * It's not critical if it fails to get allocated here.
4705 	 */
4706 	iter->temp = kmalloc(128, GFP_KERNEL);
4707 	if (iter->temp)
4708 		iter->temp_size = 128;
4709 
4710 	/*
4711 	 * trace_event_printf() may need to modify given format
4712 	 * string to replace %p with %px so that it shows real address
4713 	 * instead of hash value. However, that is only for the event
4714 	 * tracing, other tracer may not need. Defer the allocation
4715 	 * until it is needed.
4716 	 */
4717 	iter->fmt = NULL;
4718 	iter->fmt_size = 0;
4719 
4720 	/*
4721 	 * We make a copy of the current tracer to avoid concurrent
4722 	 * changes on it while we are reading.
4723 	 */
4724 	mutex_lock(&trace_types_lock);
4725 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726 	if (!iter->trace)
4727 		goto fail;
4728 
4729 	*iter->trace = *tr->current_trace;
4730 
4731 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732 		goto fail;
4733 
4734 	iter->tr = tr;
4735 
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737 	/* Currently only the top directory has a snapshot */
4738 	if (tr->current_trace->print_max || snapshot)
4739 		iter->array_buffer = &tr->max_buffer;
4740 	else
4741 #endif
4742 		iter->array_buffer = &tr->array_buffer;
4743 	iter->snapshot = snapshot;
4744 	iter->pos = -1;
4745 	iter->cpu_file = tracing_get_cpu(inode);
4746 	mutex_init(&iter->mutex);
4747 
4748 	/* Notify the tracer early; before we stop tracing. */
4749 	if (iter->trace->open)
4750 		iter->trace->open(iter);
4751 
4752 	/* Annotate start of buffers if we had overruns */
4753 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4754 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755 
4756 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757 	if (trace_clocks[tr->clock_id].in_ns)
4758 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759 
4760 	/*
4761 	 * If pause-on-trace is enabled, then stop the trace while
4762 	 * dumping, unless this is the "snapshot" file
4763 	 */
4764 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765 		tracing_stop_tr(tr);
4766 
4767 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768 		for_each_tracing_cpu(cpu) {
4769 			iter->buffer_iter[cpu] =
4770 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4771 							 cpu, GFP_KERNEL);
4772 		}
4773 		ring_buffer_read_prepare_sync();
4774 		for_each_tracing_cpu(cpu) {
4775 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4776 			tracing_iter_reset(iter, cpu);
4777 		}
4778 	} else {
4779 		cpu = iter->cpu_file;
4780 		iter->buffer_iter[cpu] =
4781 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4782 						 cpu, GFP_KERNEL);
4783 		ring_buffer_read_prepare_sync();
4784 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4785 		tracing_iter_reset(iter, cpu);
4786 	}
4787 
4788 	mutex_unlock(&trace_types_lock);
4789 
4790 	return iter;
4791 
4792  fail:
4793 	mutex_unlock(&trace_types_lock);
4794 	kfree(iter->trace);
4795 	kfree(iter->temp);
4796 	kfree(iter->buffer_iter);
4797 release:
4798 	seq_release_private(inode, file);
4799 	return ERR_PTR(-ENOMEM);
4800 }
4801 
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804 	int ret;
4805 
4806 	ret = tracing_check_open_get_tr(NULL);
4807 	if (ret)
4808 		return ret;
4809 
4810 	filp->private_data = inode->i_private;
4811 	return 0;
4812 }
4813 
4814 bool tracing_is_disabled(void)
4815 {
4816 	return (tracing_disabled) ? true: false;
4817 }
4818 
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825 	struct trace_array *tr = inode->i_private;
4826 	int ret;
4827 
4828 	ret = tracing_check_open_get_tr(tr);
4829 	if (ret)
4830 		return ret;
4831 
4832 	filp->private_data = inode->i_private;
4833 
4834 	return 0;
4835 }
4836 
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 	struct seq_file *m = file->private_data;
4841 	struct trace_iterator *iter;
4842 	int cpu;
4843 
4844 	if (!(file->f_mode & FMODE_READ)) {
4845 		trace_array_put(tr);
4846 		return 0;
4847 	}
4848 
4849 	/* Writes do not use seq_file */
4850 	iter = m->private;
4851 	mutex_lock(&trace_types_lock);
4852 
4853 	for_each_tracing_cpu(cpu) {
4854 		if (iter->buffer_iter[cpu])
4855 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856 	}
4857 
4858 	if (iter->trace && iter->trace->close)
4859 		iter->trace->close(iter);
4860 
4861 	if (!iter->snapshot && tr->stop_count)
4862 		/* reenable tracing if it was previously enabled */
4863 		tracing_start_tr(tr);
4864 
4865 	__trace_array_put(tr);
4866 
4867 	mutex_unlock(&trace_types_lock);
4868 
4869 	mutex_destroy(&iter->mutex);
4870 	free_cpumask_var(iter->started);
4871 	kfree(iter->fmt);
4872 	kfree(iter->temp);
4873 	kfree(iter->trace);
4874 	kfree(iter->buffer_iter);
4875 	seq_release_private(inode, file);
4876 
4877 	return 0;
4878 }
4879 
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882 	struct trace_array *tr = inode->i_private;
4883 
4884 	trace_array_put(tr);
4885 	return 0;
4886 }
4887 
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890 	struct trace_array *tr = inode->i_private;
4891 
4892 	trace_array_put(tr);
4893 
4894 	return single_release(inode, file);
4895 }
4896 
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899 	struct trace_array *tr = inode->i_private;
4900 	struct trace_iterator *iter;
4901 	int ret;
4902 
4903 	ret = tracing_check_open_get_tr(tr);
4904 	if (ret)
4905 		return ret;
4906 
4907 	/* If this file was open for write, then erase contents */
4908 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909 		int cpu = tracing_get_cpu(inode);
4910 		struct array_buffer *trace_buf = &tr->array_buffer;
4911 
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913 		if (tr->current_trace->print_max)
4914 			trace_buf = &tr->max_buffer;
4915 #endif
4916 
4917 		if (cpu == RING_BUFFER_ALL_CPUS)
4918 			tracing_reset_online_cpus(trace_buf);
4919 		else
4920 			tracing_reset_cpu(trace_buf, cpu);
4921 	}
4922 
4923 	if (file->f_mode & FMODE_READ) {
4924 		iter = __tracing_open(inode, file, false);
4925 		if (IS_ERR(iter))
4926 			ret = PTR_ERR(iter);
4927 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929 	}
4930 
4931 	if (ret < 0)
4932 		trace_array_put(tr);
4933 
4934 	return ret;
4935 }
4936 
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947 
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952 	while (t && !trace_ok_for_array(t, tr))
4953 		t = t->next;
4954 
4955 	return t;
4956 }
4957 
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	struct trace_array *tr = m->private;
4962 	struct tracer *t = v;
4963 
4964 	(*pos)++;
4965 
4966 	if (t)
4967 		t = get_tracer_for_array(tr, t->next);
4968 
4969 	return t;
4970 }
4971 
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	struct trace_array *tr = m->private;
4975 	struct tracer *t;
4976 	loff_t l = 0;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	t = get_tracer_for_array(tr, trace_types);
4981 	for (; t && l < *pos; t = t_next(m, t, &l))
4982 			;
4983 
4984 	return t;
4985 }
4986 
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989 	mutex_unlock(&trace_types_lock);
4990 }
4991 
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994 	struct tracer *t = v;
4995 
4996 	if (!t)
4997 		return 0;
4998 
4999 	seq_puts(m, t->name);
5000 	if (t->next)
5001 		seq_putc(m, ' ');
5002 	else
5003 		seq_putc(m, '\n');
5004 
5005 	return 0;
5006 }
5007 
5008 static const struct seq_operations show_traces_seq_ops = {
5009 	.start		= t_start,
5010 	.next		= t_next,
5011 	.stop		= t_stop,
5012 	.show		= t_show,
5013 };
5014 
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017 	struct trace_array *tr = inode->i_private;
5018 	struct seq_file *m;
5019 	int ret;
5020 
5021 	ret = tracing_check_open_get_tr(tr);
5022 	if (ret)
5023 		return ret;
5024 
5025 	ret = seq_open(file, &show_traces_seq_ops);
5026 	if (ret) {
5027 		trace_array_put(tr);
5028 		return ret;
5029 	}
5030 
5031 	m = file->private_data;
5032 	m->private = tr;
5033 
5034 	return 0;
5035 }
5036 
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 	return seq_release(inode, file);
5043 }
5044 
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047 		   size_t count, loff_t *ppos)
5048 {
5049 	return count;
5050 }
5051 
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054 	int ret;
5055 
5056 	if (file->f_mode & FMODE_READ)
5057 		ret = seq_lseek(file, offset, whence);
5058 	else
5059 		file->f_pos = ret = 0;
5060 
5061 	return ret;
5062 }
5063 
5064 static const struct file_operations tracing_fops = {
5065 	.open		= tracing_open,
5066 	.read		= seq_read,
5067 	.write		= tracing_write_stub,
5068 	.llseek		= tracing_lseek,
5069 	.release	= tracing_release,
5070 };
5071 
5072 static const struct file_operations show_traces_fops = {
5073 	.open		= show_traces_open,
5074 	.read		= seq_read,
5075 	.llseek		= seq_lseek,
5076 	.release	= show_traces_release,
5077 };
5078 
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081 		     size_t count, loff_t *ppos)
5082 {
5083 	struct trace_array *tr = file_inode(filp)->i_private;
5084 	char *mask_str;
5085 	int len;
5086 
5087 	len = snprintf(NULL, 0, "%*pb\n",
5088 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089 	mask_str = kmalloc(len, GFP_KERNEL);
5090 	if (!mask_str)
5091 		return -ENOMEM;
5092 
5093 	len = snprintf(mask_str, len, "%*pb\n",
5094 		       cpumask_pr_args(tr->tracing_cpumask));
5095 	if (len >= count) {
5096 		count = -EINVAL;
5097 		goto out_err;
5098 	}
5099 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100 
5101 out_err:
5102 	kfree(mask_str);
5103 
5104 	return count;
5105 }
5106 
5107 int tracing_set_cpumask(struct trace_array *tr,
5108 			cpumask_var_t tracing_cpumask_new)
5109 {
5110 	int cpu;
5111 
5112 	if (!tr)
5113 		return -EINVAL;
5114 
5115 	local_irq_disable();
5116 	arch_spin_lock(&tr->max_lock);
5117 	for_each_tracing_cpu(cpu) {
5118 		/*
5119 		 * Increase/decrease the disabled counter if we are
5120 		 * about to flip a bit in the cpumask:
5121 		 */
5122 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126 		}
5127 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131 		}
5132 	}
5133 	arch_spin_unlock(&tr->max_lock);
5134 	local_irq_enable();
5135 
5136 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137 
5138 	return 0;
5139 }
5140 
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143 		      size_t count, loff_t *ppos)
5144 {
5145 	struct trace_array *tr = file_inode(filp)->i_private;
5146 	cpumask_var_t tracing_cpumask_new;
5147 	int err;
5148 
5149 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150 		return -ENOMEM;
5151 
5152 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153 	if (err)
5154 		goto err_free;
5155 
5156 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157 	if (err)
5158 		goto err_free;
5159 
5160 	free_cpumask_var(tracing_cpumask_new);
5161 
5162 	return count;
5163 
5164 err_free:
5165 	free_cpumask_var(tracing_cpumask_new);
5166 
5167 	return err;
5168 }
5169 
5170 static const struct file_operations tracing_cpumask_fops = {
5171 	.open		= tracing_open_generic_tr,
5172 	.read		= tracing_cpumask_read,
5173 	.write		= tracing_cpumask_write,
5174 	.release	= tracing_release_generic_tr,
5175 	.llseek		= generic_file_llseek,
5176 };
5177 
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180 	struct tracer_opt *trace_opts;
5181 	struct trace_array *tr = m->private;
5182 	u32 tracer_flags;
5183 	int i;
5184 
5185 	mutex_lock(&trace_types_lock);
5186 	tracer_flags = tr->current_trace->flags->val;
5187 	trace_opts = tr->current_trace->flags->opts;
5188 
5189 	for (i = 0; trace_options[i]; i++) {
5190 		if (tr->trace_flags & (1 << i))
5191 			seq_printf(m, "%s\n", trace_options[i]);
5192 		else
5193 			seq_printf(m, "no%s\n", trace_options[i]);
5194 	}
5195 
5196 	for (i = 0; trace_opts[i].name; i++) {
5197 		if (tracer_flags & trace_opts[i].bit)
5198 			seq_printf(m, "%s\n", trace_opts[i].name);
5199 		else
5200 			seq_printf(m, "no%s\n", trace_opts[i].name);
5201 	}
5202 	mutex_unlock(&trace_types_lock);
5203 
5204 	return 0;
5205 }
5206 
5207 static int __set_tracer_option(struct trace_array *tr,
5208 			       struct tracer_flags *tracer_flags,
5209 			       struct tracer_opt *opts, int neg)
5210 {
5211 	struct tracer *trace = tracer_flags->trace;
5212 	int ret;
5213 
5214 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215 	if (ret)
5216 		return ret;
5217 
5218 	if (neg)
5219 		tracer_flags->val &= ~opts->bit;
5220 	else
5221 		tracer_flags->val |= opts->bit;
5222 	return 0;
5223 }
5224 
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228 	struct tracer *trace = tr->current_trace;
5229 	struct tracer_flags *tracer_flags = trace->flags;
5230 	struct tracer_opt *opts = NULL;
5231 	int i;
5232 
5233 	for (i = 0; tracer_flags->opts[i].name; i++) {
5234 		opts = &tracer_flags->opts[i];
5235 
5236 		if (strcmp(cmp, opts->name) == 0)
5237 			return __set_tracer_option(tr, trace->flags, opts, neg);
5238 	}
5239 
5240 	return -EINVAL;
5241 }
5242 
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247 		return -1;
5248 
5249 	return 0;
5250 }
5251 
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254 	int *map;
5255 
5256 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5257 	    (mask == TRACE_ITER_RECORD_CMD))
5258 		lockdep_assert_held(&event_mutex);
5259 
5260 	/* do nothing if flag is already set */
5261 	if (!!(tr->trace_flags & mask) == !!enabled)
5262 		return 0;
5263 
5264 	/* Give the tracer a chance to approve the change */
5265 	if (tr->current_trace->flag_changed)
5266 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267 			return -EINVAL;
5268 
5269 	if (enabled)
5270 		tr->trace_flags |= mask;
5271 	else
5272 		tr->trace_flags &= ~mask;
5273 
5274 	if (mask == TRACE_ITER_RECORD_CMD)
5275 		trace_event_enable_cmd_record(enabled);
5276 
5277 	if (mask == TRACE_ITER_RECORD_TGID) {
5278 		if (!tgid_map) {
5279 			tgid_map_max = pid_max;
5280 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281 				       GFP_KERNEL);
5282 
5283 			/*
5284 			 * Pairs with smp_load_acquire() in
5285 			 * trace_find_tgid_ptr() to ensure that if it observes
5286 			 * the tgid_map we just allocated then it also observes
5287 			 * the corresponding tgid_map_max value.
5288 			 */
5289 			smp_store_release(&tgid_map, map);
5290 		}
5291 		if (!tgid_map) {
5292 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293 			return -ENOMEM;
5294 		}
5295 
5296 		trace_event_enable_tgid_record(enabled);
5297 	}
5298 
5299 	if (mask == TRACE_ITER_EVENT_FORK)
5300 		trace_event_follow_fork(tr, enabled);
5301 
5302 	if (mask == TRACE_ITER_FUNC_FORK)
5303 		ftrace_pid_follow_fork(tr, enabled);
5304 
5305 	if (mask == TRACE_ITER_OVERWRITE) {
5306 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310 	}
5311 
5312 	if (mask == TRACE_ITER_PRINTK) {
5313 		trace_printk_start_stop_comm(enabled);
5314 		trace_printk_control(enabled);
5315 	}
5316 
5317 	return 0;
5318 }
5319 
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322 	char *cmp;
5323 	int neg = 0;
5324 	int ret;
5325 	size_t orig_len = strlen(option);
5326 	int len;
5327 
5328 	cmp = strstrip(option);
5329 
5330 	len = str_has_prefix(cmp, "no");
5331 	if (len)
5332 		neg = 1;
5333 
5334 	cmp += len;
5335 
5336 	mutex_lock(&event_mutex);
5337 	mutex_lock(&trace_types_lock);
5338 
5339 	ret = match_string(trace_options, -1, cmp);
5340 	/* If no option could be set, test the specific tracer options */
5341 	if (ret < 0)
5342 		ret = set_tracer_option(tr, cmp, neg);
5343 	else
5344 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5345 
5346 	mutex_unlock(&trace_types_lock);
5347 	mutex_unlock(&event_mutex);
5348 
5349 	/*
5350 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5351 	 * turn it back into a space.
5352 	 */
5353 	if (orig_len > strlen(option))
5354 		option[strlen(option)] = ' ';
5355 
5356 	return ret;
5357 }
5358 
5359 static void __init apply_trace_boot_options(void)
5360 {
5361 	char *buf = trace_boot_options_buf;
5362 	char *option;
5363 
5364 	while (true) {
5365 		option = strsep(&buf, ",");
5366 
5367 		if (!option)
5368 			break;
5369 
5370 		if (*option)
5371 			trace_set_options(&global_trace, option);
5372 
5373 		/* Put back the comma to allow this to be called again */
5374 		if (buf)
5375 			*(buf - 1) = ',';
5376 	}
5377 }
5378 
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381 			size_t cnt, loff_t *ppos)
5382 {
5383 	struct seq_file *m = filp->private_data;
5384 	struct trace_array *tr = m->private;
5385 	char buf[64];
5386 	int ret;
5387 
5388 	if (cnt >= sizeof(buf))
5389 		return -EINVAL;
5390 
5391 	if (copy_from_user(buf, ubuf, cnt))
5392 		return -EFAULT;
5393 
5394 	buf[cnt] = 0;
5395 
5396 	ret = trace_set_options(tr, buf);
5397 	if (ret < 0)
5398 		return ret;
5399 
5400 	*ppos += cnt;
5401 
5402 	return cnt;
5403 }
5404 
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407 	struct trace_array *tr = inode->i_private;
5408 	int ret;
5409 
5410 	ret = tracing_check_open_get_tr(tr);
5411 	if (ret)
5412 		return ret;
5413 
5414 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415 	if (ret < 0)
5416 		trace_array_put(tr);
5417 
5418 	return ret;
5419 }
5420 
5421 static const struct file_operations tracing_iter_fops = {
5422 	.open		= tracing_trace_options_open,
5423 	.read		= seq_read,
5424 	.llseek		= seq_lseek,
5425 	.release	= tracing_single_release_tr,
5426 	.write		= tracing_trace_options_write,
5427 };
5428 
5429 static const char readme_msg[] =
5430 	"tracing mini-HOWTO:\n\n"
5431 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5432 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433 	" Important files:\n"
5434 	"  trace\t\t\t- The static contents of the buffer\n"
5435 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437 	"  current_tracer\t- function and latency tracers\n"
5438 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5439 	"  error_log\t- error log for failed commands (that support it)\n"
5440 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442 	"  trace_clock\t\t-change the clock used to order events\n"
5443 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5444 	"      global:   Synced across CPUs but slows tracing down.\n"
5445 	"     counter:   Not a clock, but just an increment\n"
5446 	"      uptime:   Jiffy counter from time of boot\n"
5447 	"        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449 	"     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5453 	"    absolute:   Absolute (standalone) timestamp\n"
5454 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5457 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458 	"\t\t\t  Remove sub-buffer with rmdir\n"
5459 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5460 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5461 	"\t\t\t  option name\n"
5462 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5465 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466 	"\t\t\t  functions\n"
5467 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5468 	"\t     modules: Can select a group via module\n"
5469 	"\t      Format: :mod:<module-name>\n"
5470 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471 	"\t    triggers: a command to perform when function is hit\n"
5472 	"\t      Format: <function>:<trigger>[:count]\n"
5473 	"\t     trigger: traceon, traceoff\n"
5474 	"\t\t      enable_event:<system>:<event>\n"
5475 	"\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477 	"\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480 	"\t\t      snapshot\n"
5481 #endif
5482 	"\t\t      dump\n"
5483 	"\t\t      cpudump\n"
5484 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486 	"\t     The first one will disable tracing every time do_fault is hit\n"
5487 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488 	"\t       The first time do trap is hit and it disables tracing, the\n"
5489 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5490 	"\t       the counter will not decrement. It only decrements when the\n"
5491 	"\t       trigger did work\n"
5492 	"\t     To remove trigger without count:\n"
5493 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494 	"\t     To remove trigger with a count:\n"
5495 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498 	"\t    modules: Can select a group via module command :mod:\n"
5499 	"\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503 	"\t\t    (function)\n"
5504 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505 	"\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5515 	"\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5519 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5520 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5521 	"\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524 	"\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540 	"\t  accepts: event-definitions (one definition per line)\n"
5541 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546 	"\t           -:[<group>/]<event>\n"
5547 #ifdef CONFIG_KPROBE_EVENTS
5548 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5549   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5550 #endif
5551 #ifdef CONFIG_UPROBE_EVENTS
5552   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5553 #endif
5554 	"\t     args: <name>=fetcharg[:type]\n"
5555 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5556 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5557 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5558 #else
5559 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5560 #endif
5561 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5562 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5563 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5564 	"\t           <type>\\[<array-size>\\]\n"
5565 #ifdef CONFIG_HIST_TRIGGERS
5566 	"\t    field: <stype> <name>;\n"
5567 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5568 	"\t           [unsigned] char/int/long\n"
5569 #endif
5570 #endif
5571 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5572 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5573 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5574 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5575 	"\t\t\t  events\n"
5576 	"      filter\t\t- If set, only events passing filter are traced\n"
5577 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5578 	"\t\t\t  <event>:\n"
5579 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5580 	"      filter\t\t- If set, only events passing filter are traced\n"
5581 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5582 	"\t    Format: <trigger>[:count][if <filter>]\n"
5583 	"\t   trigger: traceon, traceoff\n"
5584 	"\t            enable_event:<system>:<event>\n"
5585 	"\t            disable_event:<system>:<event>\n"
5586 #ifdef CONFIG_HIST_TRIGGERS
5587 	"\t            enable_hist:<system>:<event>\n"
5588 	"\t            disable_hist:<system>:<event>\n"
5589 #endif
5590 #ifdef CONFIG_STACKTRACE
5591 	"\t\t    stacktrace\n"
5592 #endif
5593 #ifdef CONFIG_TRACER_SNAPSHOT
5594 	"\t\t    snapshot\n"
5595 #endif
5596 #ifdef CONFIG_HIST_TRIGGERS
5597 	"\t\t    hist (see below)\n"
5598 #endif
5599 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5600 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5601 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5602 	"\t                  events/block/block_unplug/trigger\n"
5603 	"\t   The first disables tracing every time block_unplug is hit.\n"
5604 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5605 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5606 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5607 	"\t   Like function triggers, the counter is only decremented if it\n"
5608 	"\t    enabled or disabled tracing.\n"
5609 	"\t   To remove a trigger without a count:\n"
5610 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5611 	"\t   To remove a trigger with a count:\n"
5612 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5613 	"\t   Filters can be ignored when removing a trigger.\n"
5614 #ifdef CONFIG_HIST_TRIGGERS
5615 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5616 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5617 	"\t            [:values=<field1[,field2,...]>]\n"
5618 	"\t            [:sort=<field1[,field2,...]>]\n"
5619 	"\t            [:size=#entries]\n"
5620 	"\t            [:pause][:continue][:clear]\n"
5621 	"\t            [:name=histname1]\n"
5622 	"\t            [:<handler>.<action>]\n"
5623 	"\t            [if <filter>]\n\n"
5624 	"\t    Note, special fields can be used as well:\n"
5625 	"\t            common_timestamp - to record current timestamp\n"
5626 	"\t            common_cpu - to record the CPU the event happened on\n"
5627 	"\n"
5628 	"\t    When a matching event is hit, an entry is added to a hash\n"
5629 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5630 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5631 	"\t    correspond to fields in the event's format description.  Keys\n"
5632 	"\t    can be any field, or the special string 'stacktrace'.\n"
5633 	"\t    Compound keys consisting of up to two fields can be specified\n"
5634 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5635 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5636 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5637 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5638 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5639 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5640 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5641 	"\t    its histogram data will be shared with other triggers of the\n"
5642 	"\t    same name, and trigger hits will update this common data.\n\n"
5643 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5644 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5645 	"\t    triggers attached to an event, there will be a table for each\n"
5646 	"\t    trigger in the output.  The table displayed for a named\n"
5647 	"\t    trigger will be the same as any other instance having the\n"
5648 	"\t    same name.  The default format used to display a given field\n"
5649 	"\t    can be modified by appending any of the following modifiers\n"
5650 	"\t    to the field name, as applicable:\n\n"
5651 	"\t            .hex        display a number as a hex value\n"
5652 	"\t            .sym        display an address as a symbol\n"
5653 	"\t            .sym-offset display an address as a symbol and offset\n"
5654 	"\t            .execname   display a common_pid as a program name\n"
5655 	"\t            .syscall    display a syscall id as a syscall name\n"
5656 	"\t            .log2       display log2 value rather than raw number\n"
5657 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5658 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5659 	"\t    trigger or to start a hist trigger but not log any events\n"
5660 	"\t    until told to do so.  'continue' can be used to start or\n"
5661 	"\t    restart a paused hist trigger.\n\n"
5662 	"\t    The 'clear' parameter will clear the contents of a running\n"
5663 	"\t    hist trigger and leave its current paused/active state\n"
5664 	"\t    unchanged.\n\n"
5665 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5666 	"\t    have one event conditionally start and stop another event's\n"
5667 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5668 	"\t    the enable_event and disable_event triggers.\n\n"
5669 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5670 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5671 	"\t        <handler>.<action>\n\n"
5672 	"\t    The available handlers are:\n\n"
5673 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5674 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5675 	"\t        onchange(var)            - invoke action if var changes\n\n"
5676 	"\t    The available actions are:\n\n"
5677 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5678 	"\t        save(field,...)                      - save current event fields\n"
5679 #ifdef CONFIG_TRACER_SNAPSHOT
5680 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5681 #endif
5682 #ifdef CONFIG_SYNTH_EVENTS
5683 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5684 	"\t  Write into this file to define/undefine new synthetic events.\n"
5685 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5686 #endif
5687 #endif
5688 ;
5689 
5690 static ssize_t
5691 tracing_readme_read(struct file *filp, char __user *ubuf,
5692 		       size_t cnt, loff_t *ppos)
5693 {
5694 	return simple_read_from_buffer(ubuf, cnt, ppos,
5695 					readme_msg, strlen(readme_msg));
5696 }
5697 
5698 static const struct file_operations tracing_readme_fops = {
5699 	.open		= tracing_open_generic,
5700 	.read		= tracing_readme_read,
5701 	.llseek		= generic_file_llseek,
5702 };
5703 
5704 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5705 {
5706 	int pid = ++(*pos);
5707 
5708 	return trace_find_tgid_ptr(pid);
5709 }
5710 
5711 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5712 {
5713 	int pid = *pos;
5714 
5715 	return trace_find_tgid_ptr(pid);
5716 }
5717 
5718 static void saved_tgids_stop(struct seq_file *m, void *v)
5719 {
5720 }
5721 
5722 static int saved_tgids_show(struct seq_file *m, void *v)
5723 {
5724 	int *entry = (int *)v;
5725 	int pid = entry - tgid_map;
5726 	int tgid = *entry;
5727 
5728 	if (tgid == 0)
5729 		return SEQ_SKIP;
5730 
5731 	seq_printf(m, "%d %d\n", pid, tgid);
5732 	return 0;
5733 }
5734 
5735 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5736 	.start		= saved_tgids_start,
5737 	.stop		= saved_tgids_stop,
5738 	.next		= saved_tgids_next,
5739 	.show		= saved_tgids_show,
5740 };
5741 
5742 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5743 {
5744 	int ret;
5745 
5746 	ret = tracing_check_open_get_tr(NULL);
5747 	if (ret)
5748 		return ret;
5749 
5750 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5751 }
5752 
5753 
5754 static const struct file_operations tracing_saved_tgids_fops = {
5755 	.open		= tracing_saved_tgids_open,
5756 	.read		= seq_read,
5757 	.llseek		= seq_lseek,
5758 	.release	= seq_release,
5759 };
5760 
5761 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5762 {
5763 	unsigned int *ptr = v;
5764 
5765 	if (*pos || m->count)
5766 		ptr++;
5767 
5768 	(*pos)++;
5769 
5770 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5771 	     ptr++) {
5772 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5773 			continue;
5774 
5775 		return ptr;
5776 	}
5777 
5778 	return NULL;
5779 }
5780 
5781 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5782 {
5783 	void *v;
5784 	loff_t l = 0;
5785 
5786 	preempt_disable();
5787 	arch_spin_lock(&trace_cmdline_lock);
5788 
5789 	v = &savedcmd->map_cmdline_to_pid[0];
5790 	while (l <= *pos) {
5791 		v = saved_cmdlines_next(m, v, &l);
5792 		if (!v)
5793 			return NULL;
5794 	}
5795 
5796 	return v;
5797 }
5798 
5799 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5800 {
5801 	arch_spin_unlock(&trace_cmdline_lock);
5802 	preempt_enable();
5803 }
5804 
5805 static int saved_cmdlines_show(struct seq_file *m, void *v)
5806 {
5807 	char buf[TASK_COMM_LEN];
5808 	unsigned int *pid = v;
5809 
5810 	__trace_find_cmdline(*pid, buf);
5811 	seq_printf(m, "%d %s\n", *pid, buf);
5812 	return 0;
5813 }
5814 
5815 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5816 	.start		= saved_cmdlines_start,
5817 	.next		= saved_cmdlines_next,
5818 	.stop		= saved_cmdlines_stop,
5819 	.show		= saved_cmdlines_show,
5820 };
5821 
5822 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5823 {
5824 	int ret;
5825 
5826 	ret = tracing_check_open_get_tr(NULL);
5827 	if (ret)
5828 		return ret;
5829 
5830 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5831 }
5832 
5833 static const struct file_operations tracing_saved_cmdlines_fops = {
5834 	.open		= tracing_saved_cmdlines_open,
5835 	.read		= seq_read,
5836 	.llseek		= seq_lseek,
5837 	.release	= seq_release,
5838 };
5839 
5840 static ssize_t
5841 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5842 				 size_t cnt, loff_t *ppos)
5843 {
5844 	char buf[64];
5845 	int r;
5846 
5847 	arch_spin_lock(&trace_cmdline_lock);
5848 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5849 	arch_spin_unlock(&trace_cmdline_lock);
5850 
5851 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5852 }
5853 
5854 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5855 {
5856 	kfree(s->saved_cmdlines);
5857 	kfree(s->map_cmdline_to_pid);
5858 	kfree(s);
5859 }
5860 
5861 static int tracing_resize_saved_cmdlines(unsigned int val)
5862 {
5863 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5864 
5865 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5866 	if (!s)
5867 		return -ENOMEM;
5868 
5869 	if (allocate_cmdlines_buffer(val, s) < 0) {
5870 		kfree(s);
5871 		return -ENOMEM;
5872 	}
5873 
5874 	arch_spin_lock(&trace_cmdline_lock);
5875 	savedcmd_temp = savedcmd;
5876 	savedcmd = s;
5877 	arch_spin_unlock(&trace_cmdline_lock);
5878 	free_saved_cmdlines_buffer(savedcmd_temp);
5879 
5880 	return 0;
5881 }
5882 
5883 static ssize_t
5884 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5885 				  size_t cnt, loff_t *ppos)
5886 {
5887 	unsigned long val;
5888 	int ret;
5889 
5890 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5891 	if (ret)
5892 		return ret;
5893 
5894 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5895 	if (!val || val > PID_MAX_DEFAULT)
5896 		return -EINVAL;
5897 
5898 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5899 	if (ret < 0)
5900 		return ret;
5901 
5902 	*ppos += cnt;
5903 
5904 	return cnt;
5905 }
5906 
5907 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5908 	.open		= tracing_open_generic,
5909 	.read		= tracing_saved_cmdlines_size_read,
5910 	.write		= tracing_saved_cmdlines_size_write,
5911 };
5912 
5913 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5914 static union trace_eval_map_item *
5915 update_eval_map(union trace_eval_map_item *ptr)
5916 {
5917 	if (!ptr->map.eval_string) {
5918 		if (ptr->tail.next) {
5919 			ptr = ptr->tail.next;
5920 			/* Set ptr to the next real item (skip head) */
5921 			ptr++;
5922 		} else
5923 			return NULL;
5924 	}
5925 	return ptr;
5926 }
5927 
5928 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5929 {
5930 	union trace_eval_map_item *ptr = v;
5931 
5932 	/*
5933 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5934 	 * This really should never happen.
5935 	 */
5936 	(*pos)++;
5937 	ptr = update_eval_map(ptr);
5938 	if (WARN_ON_ONCE(!ptr))
5939 		return NULL;
5940 
5941 	ptr++;
5942 	ptr = update_eval_map(ptr);
5943 
5944 	return ptr;
5945 }
5946 
5947 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5948 {
5949 	union trace_eval_map_item *v;
5950 	loff_t l = 0;
5951 
5952 	mutex_lock(&trace_eval_mutex);
5953 
5954 	v = trace_eval_maps;
5955 	if (v)
5956 		v++;
5957 
5958 	while (v && l < *pos) {
5959 		v = eval_map_next(m, v, &l);
5960 	}
5961 
5962 	return v;
5963 }
5964 
5965 static void eval_map_stop(struct seq_file *m, void *v)
5966 {
5967 	mutex_unlock(&trace_eval_mutex);
5968 }
5969 
5970 static int eval_map_show(struct seq_file *m, void *v)
5971 {
5972 	union trace_eval_map_item *ptr = v;
5973 
5974 	seq_printf(m, "%s %ld (%s)\n",
5975 		   ptr->map.eval_string, ptr->map.eval_value,
5976 		   ptr->map.system);
5977 
5978 	return 0;
5979 }
5980 
5981 static const struct seq_operations tracing_eval_map_seq_ops = {
5982 	.start		= eval_map_start,
5983 	.next		= eval_map_next,
5984 	.stop		= eval_map_stop,
5985 	.show		= eval_map_show,
5986 };
5987 
5988 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5989 {
5990 	int ret;
5991 
5992 	ret = tracing_check_open_get_tr(NULL);
5993 	if (ret)
5994 		return ret;
5995 
5996 	return seq_open(filp, &tracing_eval_map_seq_ops);
5997 }
5998 
5999 static const struct file_operations tracing_eval_map_fops = {
6000 	.open		= tracing_eval_map_open,
6001 	.read		= seq_read,
6002 	.llseek		= seq_lseek,
6003 	.release	= seq_release,
6004 };
6005 
6006 static inline union trace_eval_map_item *
6007 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6008 {
6009 	/* Return tail of array given the head */
6010 	return ptr + ptr->head.length + 1;
6011 }
6012 
6013 static void
6014 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6015 			   int len)
6016 {
6017 	struct trace_eval_map **stop;
6018 	struct trace_eval_map **map;
6019 	union trace_eval_map_item *map_array;
6020 	union trace_eval_map_item *ptr;
6021 
6022 	stop = start + len;
6023 
6024 	/*
6025 	 * The trace_eval_maps contains the map plus a head and tail item,
6026 	 * where the head holds the module and length of array, and the
6027 	 * tail holds a pointer to the next list.
6028 	 */
6029 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6030 	if (!map_array) {
6031 		pr_warn("Unable to allocate trace eval mapping\n");
6032 		return;
6033 	}
6034 
6035 	mutex_lock(&trace_eval_mutex);
6036 
6037 	if (!trace_eval_maps)
6038 		trace_eval_maps = map_array;
6039 	else {
6040 		ptr = trace_eval_maps;
6041 		for (;;) {
6042 			ptr = trace_eval_jmp_to_tail(ptr);
6043 			if (!ptr->tail.next)
6044 				break;
6045 			ptr = ptr->tail.next;
6046 
6047 		}
6048 		ptr->tail.next = map_array;
6049 	}
6050 	map_array->head.mod = mod;
6051 	map_array->head.length = len;
6052 	map_array++;
6053 
6054 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6055 		map_array->map = **map;
6056 		map_array++;
6057 	}
6058 	memset(map_array, 0, sizeof(*map_array));
6059 
6060 	mutex_unlock(&trace_eval_mutex);
6061 }
6062 
6063 static void trace_create_eval_file(struct dentry *d_tracer)
6064 {
6065 	trace_create_file("eval_map", 0444, d_tracer,
6066 			  NULL, &tracing_eval_map_fops);
6067 }
6068 
6069 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6070 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6071 static inline void trace_insert_eval_map_file(struct module *mod,
6072 			      struct trace_eval_map **start, int len) { }
6073 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6074 
6075 static void trace_insert_eval_map(struct module *mod,
6076 				  struct trace_eval_map **start, int len)
6077 {
6078 	struct trace_eval_map **map;
6079 
6080 	if (len <= 0)
6081 		return;
6082 
6083 	map = start;
6084 
6085 	trace_event_eval_update(map, len);
6086 
6087 	trace_insert_eval_map_file(mod, start, len);
6088 }
6089 
6090 static ssize_t
6091 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6092 		       size_t cnt, loff_t *ppos)
6093 {
6094 	struct trace_array *tr = filp->private_data;
6095 	char buf[MAX_TRACER_SIZE+2];
6096 	int r;
6097 
6098 	mutex_lock(&trace_types_lock);
6099 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6100 	mutex_unlock(&trace_types_lock);
6101 
6102 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6103 }
6104 
6105 int tracer_init(struct tracer *t, struct trace_array *tr)
6106 {
6107 	tracing_reset_online_cpus(&tr->array_buffer);
6108 	return t->init(tr);
6109 }
6110 
6111 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6112 {
6113 	int cpu;
6114 
6115 	for_each_tracing_cpu(cpu)
6116 		per_cpu_ptr(buf->data, cpu)->entries = val;
6117 }
6118 
6119 #ifdef CONFIG_TRACER_MAX_TRACE
6120 /* resize @tr's buffer to the size of @size_tr's entries */
6121 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6122 					struct array_buffer *size_buf, int cpu_id)
6123 {
6124 	int cpu, ret = 0;
6125 
6126 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6127 		for_each_tracing_cpu(cpu) {
6128 			ret = ring_buffer_resize(trace_buf->buffer,
6129 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6130 			if (ret < 0)
6131 				break;
6132 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6133 				per_cpu_ptr(size_buf->data, cpu)->entries;
6134 		}
6135 	} else {
6136 		ret = ring_buffer_resize(trace_buf->buffer,
6137 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6138 		if (ret == 0)
6139 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6140 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6141 	}
6142 
6143 	return ret;
6144 }
6145 #endif /* CONFIG_TRACER_MAX_TRACE */
6146 
6147 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6148 					unsigned long size, int cpu)
6149 {
6150 	int ret;
6151 
6152 	/*
6153 	 * If kernel or user changes the size of the ring buffer
6154 	 * we use the size that was given, and we can forget about
6155 	 * expanding it later.
6156 	 */
6157 	ring_buffer_expanded = true;
6158 
6159 	/* May be called before buffers are initialized */
6160 	if (!tr->array_buffer.buffer)
6161 		return 0;
6162 
6163 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6164 	if (ret < 0)
6165 		return ret;
6166 
6167 #ifdef CONFIG_TRACER_MAX_TRACE
6168 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6169 	    !tr->current_trace->use_max_tr)
6170 		goto out;
6171 
6172 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6173 	if (ret < 0) {
6174 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6175 						     &tr->array_buffer, cpu);
6176 		if (r < 0) {
6177 			/*
6178 			 * AARGH! We are left with different
6179 			 * size max buffer!!!!
6180 			 * The max buffer is our "snapshot" buffer.
6181 			 * When a tracer needs a snapshot (one of the
6182 			 * latency tracers), it swaps the max buffer
6183 			 * with the saved snap shot. We succeeded to
6184 			 * update the size of the main buffer, but failed to
6185 			 * update the size of the max buffer. But when we tried
6186 			 * to reset the main buffer to the original size, we
6187 			 * failed there too. This is very unlikely to
6188 			 * happen, but if it does, warn and kill all
6189 			 * tracing.
6190 			 */
6191 			WARN_ON(1);
6192 			tracing_disabled = 1;
6193 		}
6194 		return ret;
6195 	}
6196 
6197 	if (cpu == RING_BUFFER_ALL_CPUS)
6198 		set_buffer_entries(&tr->max_buffer, size);
6199 	else
6200 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6201 
6202  out:
6203 #endif /* CONFIG_TRACER_MAX_TRACE */
6204 
6205 	if (cpu == RING_BUFFER_ALL_CPUS)
6206 		set_buffer_entries(&tr->array_buffer, size);
6207 	else
6208 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6209 
6210 	return ret;
6211 }
6212 
6213 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6214 				  unsigned long size, int cpu_id)
6215 {
6216 	int ret;
6217 
6218 	mutex_lock(&trace_types_lock);
6219 
6220 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6221 		/* make sure, this cpu is enabled in the mask */
6222 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6223 			ret = -EINVAL;
6224 			goto out;
6225 		}
6226 	}
6227 
6228 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6229 	if (ret < 0)
6230 		ret = -ENOMEM;
6231 
6232 out:
6233 	mutex_unlock(&trace_types_lock);
6234 
6235 	return ret;
6236 }
6237 
6238 
6239 /**
6240  * tracing_update_buffers - used by tracing facility to expand ring buffers
6241  *
6242  * To save on memory when the tracing is never used on a system with it
6243  * configured in. The ring buffers are set to a minimum size. But once
6244  * a user starts to use the tracing facility, then they need to grow
6245  * to their default size.
6246  *
6247  * This function is to be called when a tracer is about to be used.
6248  */
6249 int tracing_update_buffers(void)
6250 {
6251 	int ret = 0;
6252 
6253 	mutex_lock(&trace_types_lock);
6254 	if (!ring_buffer_expanded)
6255 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6256 						RING_BUFFER_ALL_CPUS);
6257 	mutex_unlock(&trace_types_lock);
6258 
6259 	return ret;
6260 }
6261 
6262 struct trace_option_dentry;
6263 
6264 static void
6265 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6266 
6267 /*
6268  * Used to clear out the tracer before deletion of an instance.
6269  * Must have trace_types_lock held.
6270  */
6271 static void tracing_set_nop(struct trace_array *tr)
6272 {
6273 	if (tr->current_trace == &nop_trace)
6274 		return;
6275 
6276 	tr->current_trace->enabled--;
6277 
6278 	if (tr->current_trace->reset)
6279 		tr->current_trace->reset(tr);
6280 
6281 	tr->current_trace = &nop_trace;
6282 }
6283 
6284 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6285 {
6286 	/* Only enable if the directory has been created already. */
6287 	if (!tr->dir)
6288 		return;
6289 
6290 	create_trace_option_files(tr, t);
6291 }
6292 
6293 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6294 {
6295 	struct tracer *t;
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297 	bool had_max_tr;
6298 #endif
6299 	int ret = 0;
6300 
6301 	mutex_lock(&trace_types_lock);
6302 
6303 	if (!ring_buffer_expanded) {
6304 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6305 						RING_BUFFER_ALL_CPUS);
6306 		if (ret < 0)
6307 			goto out;
6308 		ret = 0;
6309 	}
6310 
6311 	for (t = trace_types; t; t = t->next) {
6312 		if (strcmp(t->name, buf) == 0)
6313 			break;
6314 	}
6315 	if (!t) {
6316 		ret = -EINVAL;
6317 		goto out;
6318 	}
6319 	if (t == tr->current_trace)
6320 		goto out;
6321 
6322 #ifdef CONFIG_TRACER_SNAPSHOT
6323 	if (t->use_max_tr) {
6324 		arch_spin_lock(&tr->max_lock);
6325 		if (tr->cond_snapshot)
6326 			ret = -EBUSY;
6327 		arch_spin_unlock(&tr->max_lock);
6328 		if (ret)
6329 			goto out;
6330 	}
6331 #endif
6332 	/* Some tracers won't work on kernel command line */
6333 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6334 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6335 			t->name);
6336 		goto out;
6337 	}
6338 
6339 	/* Some tracers are only allowed for the top level buffer */
6340 	if (!trace_ok_for_array(t, tr)) {
6341 		ret = -EINVAL;
6342 		goto out;
6343 	}
6344 
6345 	/* If trace pipe files are being read, we can't change the tracer */
6346 	if (tr->trace_ref) {
6347 		ret = -EBUSY;
6348 		goto out;
6349 	}
6350 
6351 	trace_branch_disable();
6352 
6353 	tr->current_trace->enabled--;
6354 
6355 	if (tr->current_trace->reset)
6356 		tr->current_trace->reset(tr);
6357 
6358 	/* Current trace needs to be nop_trace before synchronize_rcu */
6359 	tr->current_trace = &nop_trace;
6360 
6361 #ifdef CONFIG_TRACER_MAX_TRACE
6362 	had_max_tr = tr->allocated_snapshot;
6363 
6364 	if (had_max_tr && !t->use_max_tr) {
6365 		/*
6366 		 * We need to make sure that the update_max_tr sees that
6367 		 * current_trace changed to nop_trace to keep it from
6368 		 * swapping the buffers after we resize it.
6369 		 * The update_max_tr is called from interrupts disabled
6370 		 * so a synchronized_sched() is sufficient.
6371 		 */
6372 		synchronize_rcu();
6373 		free_snapshot(tr);
6374 	}
6375 #endif
6376 
6377 #ifdef CONFIG_TRACER_MAX_TRACE
6378 	if (t->use_max_tr && !had_max_tr) {
6379 		ret = tracing_alloc_snapshot_instance(tr);
6380 		if (ret < 0)
6381 			goto out;
6382 	}
6383 #endif
6384 
6385 	if (t->init) {
6386 		ret = tracer_init(t, tr);
6387 		if (ret)
6388 			goto out;
6389 	}
6390 
6391 	tr->current_trace = t;
6392 	tr->current_trace->enabled++;
6393 	trace_branch_enable(tr);
6394  out:
6395 	mutex_unlock(&trace_types_lock);
6396 
6397 	return ret;
6398 }
6399 
6400 static ssize_t
6401 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6402 			size_t cnt, loff_t *ppos)
6403 {
6404 	struct trace_array *tr = filp->private_data;
6405 	char buf[MAX_TRACER_SIZE+1];
6406 	int i;
6407 	size_t ret;
6408 	int err;
6409 
6410 	ret = cnt;
6411 
6412 	if (cnt > MAX_TRACER_SIZE)
6413 		cnt = MAX_TRACER_SIZE;
6414 
6415 	if (copy_from_user(buf, ubuf, cnt))
6416 		return -EFAULT;
6417 
6418 	buf[cnt] = 0;
6419 
6420 	/* strip ending whitespace. */
6421 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6422 		buf[i] = 0;
6423 
6424 	err = tracing_set_tracer(tr, buf);
6425 	if (err)
6426 		return err;
6427 
6428 	*ppos += ret;
6429 
6430 	return ret;
6431 }
6432 
6433 static ssize_t
6434 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6435 		   size_t cnt, loff_t *ppos)
6436 {
6437 	char buf[64];
6438 	int r;
6439 
6440 	r = snprintf(buf, sizeof(buf), "%ld\n",
6441 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6442 	if (r > sizeof(buf))
6443 		r = sizeof(buf);
6444 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6445 }
6446 
6447 static ssize_t
6448 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6449 		    size_t cnt, loff_t *ppos)
6450 {
6451 	unsigned long val;
6452 	int ret;
6453 
6454 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6455 	if (ret)
6456 		return ret;
6457 
6458 	*ptr = val * 1000;
6459 
6460 	return cnt;
6461 }
6462 
6463 static ssize_t
6464 tracing_thresh_read(struct file *filp, char __user *ubuf,
6465 		    size_t cnt, loff_t *ppos)
6466 {
6467 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6468 }
6469 
6470 static ssize_t
6471 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6472 		     size_t cnt, loff_t *ppos)
6473 {
6474 	struct trace_array *tr = filp->private_data;
6475 	int ret;
6476 
6477 	mutex_lock(&trace_types_lock);
6478 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6479 	if (ret < 0)
6480 		goto out;
6481 
6482 	if (tr->current_trace->update_thresh) {
6483 		ret = tr->current_trace->update_thresh(tr);
6484 		if (ret < 0)
6485 			goto out;
6486 	}
6487 
6488 	ret = cnt;
6489 out:
6490 	mutex_unlock(&trace_types_lock);
6491 
6492 	return ret;
6493 }
6494 
6495 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6496 
6497 static ssize_t
6498 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6499 		     size_t cnt, loff_t *ppos)
6500 {
6501 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6502 }
6503 
6504 static ssize_t
6505 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6506 		      size_t cnt, loff_t *ppos)
6507 {
6508 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6509 }
6510 
6511 #endif
6512 
6513 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6514 {
6515 	struct trace_array *tr = inode->i_private;
6516 	struct trace_iterator *iter;
6517 	int ret;
6518 
6519 	ret = tracing_check_open_get_tr(tr);
6520 	if (ret)
6521 		return ret;
6522 
6523 	mutex_lock(&trace_types_lock);
6524 
6525 	/* create a buffer to store the information to pass to userspace */
6526 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6527 	if (!iter) {
6528 		ret = -ENOMEM;
6529 		__trace_array_put(tr);
6530 		goto out;
6531 	}
6532 
6533 	trace_seq_init(&iter->seq);
6534 	iter->trace = tr->current_trace;
6535 
6536 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6537 		ret = -ENOMEM;
6538 		goto fail;
6539 	}
6540 
6541 	/* trace pipe does not show start of buffer */
6542 	cpumask_setall(iter->started);
6543 
6544 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6545 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6546 
6547 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6548 	if (trace_clocks[tr->clock_id].in_ns)
6549 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6550 
6551 	iter->tr = tr;
6552 	iter->array_buffer = &tr->array_buffer;
6553 	iter->cpu_file = tracing_get_cpu(inode);
6554 	mutex_init(&iter->mutex);
6555 	filp->private_data = iter;
6556 
6557 	if (iter->trace->pipe_open)
6558 		iter->trace->pipe_open(iter);
6559 
6560 	nonseekable_open(inode, filp);
6561 
6562 	tr->trace_ref++;
6563 out:
6564 	mutex_unlock(&trace_types_lock);
6565 	return ret;
6566 
6567 fail:
6568 	kfree(iter);
6569 	__trace_array_put(tr);
6570 	mutex_unlock(&trace_types_lock);
6571 	return ret;
6572 }
6573 
6574 static int tracing_release_pipe(struct inode *inode, struct file *file)
6575 {
6576 	struct trace_iterator *iter = file->private_data;
6577 	struct trace_array *tr = inode->i_private;
6578 
6579 	mutex_lock(&trace_types_lock);
6580 
6581 	tr->trace_ref--;
6582 
6583 	if (iter->trace->pipe_close)
6584 		iter->trace->pipe_close(iter);
6585 
6586 	mutex_unlock(&trace_types_lock);
6587 
6588 	free_cpumask_var(iter->started);
6589 	mutex_destroy(&iter->mutex);
6590 	kfree(iter);
6591 
6592 	trace_array_put(tr);
6593 
6594 	return 0;
6595 }
6596 
6597 static __poll_t
6598 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6599 {
6600 	struct trace_array *tr = iter->tr;
6601 
6602 	/* Iterators are static, they should be filled or empty */
6603 	if (trace_buffer_iter(iter, iter->cpu_file))
6604 		return EPOLLIN | EPOLLRDNORM;
6605 
6606 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6607 		/*
6608 		 * Always select as readable when in blocking mode
6609 		 */
6610 		return EPOLLIN | EPOLLRDNORM;
6611 	else
6612 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6613 					     filp, poll_table);
6614 }
6615 
6616 static __poll_t
6617 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6618 {
6619 	struct trace_iterator *iter = filp->private_data;
6620 
6621 	return trace_poll(iter, filp, poll_table);
6622 }
6623 
6624 /* Must be called with iter->mutex held. */
6625 static int tracing_wait_pipe(struct file *filp)
6626 {
6627 	struct trace_iterator *iter = filp->private_data;
6628 	int ret;
6629 
6630 	while (trace_empty(iter)) {
6631 
6632 		if ((filp->f_flags & O_NONBLOCK)) {
6633 			return -EAGAIN;
6634 		}
6635 
6636 		/*
6637 		 * We block until we read something and tracing is disabled.
6638 		 * We still block if tracing is disabled, but we have never
6639 		 * read anything. This allows a user to cat this file, and
6640 		 * then enable tracing. But after we have read something,
6641 		 * we give an EOF when tracing is again disabled.
6642 		 *
6643 		 * iter->pos will be 0 if we haven't read anything.
6644 		 */
6645 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6646 			break;
6647 
6648 		mutex_unlock(&iter->mutex);
6649 
6650 		ret = wait_on_pipe(iter, 0);
6651 
6652 		mutex_lock(&iter->mutex);
6653 
6654 		if (ret)
6655 			return ret;
6656 	}
6657 
6658 	return 1;
6659 }
6660 
6661 /*
6662  * Consumer reader.
6663  */
6664 static ssize_t
6665 tracing_read_pipe(struct file *filp, char __user *ubuf,
6666 		  size_t cnt, loff_t *ppos)
6667 {
6668 	struct trace_iterator *iter = filp->private_data;
6669 	ssize_t sret;
6670 
6671 	/*
6672 	 * Avoid more than one consumer on a single file descriptor
6673 	 * This is just a matter of traces coherency, the ring buffer itself
6674 	 * is protected.
6675 	 */
6676 	mutex_lock(&iter->mutex);
6677 
6678 	/* return any leftover data */
6679 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6680 	if (sret != -EBUSY)
6681 		goto out;
6682 
6683 	trace_seq_init(&iter->seq);
6684 
6685 	if (iter->trace->read) {
6686 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6687 		if (sret)
6688 			goto out;
6689 	}
6690 
6691 waitagain:
6692 	sret = tracing_wait_pipe(filp);
6693 	if (sret <= 0)
6694 		goto out;
6695 
6696 	/* stop when tracing is finished */
6697 	if (trace_empty(iter)) {
6698 		sret = 0;
6699 		goto out;
6700 	}
6701 
6702 	if (cnt >= PAGE_SIZE)
6703 		cnt = PAGE_SIZE - 1;
6704 
6705 	/* reset all but tr, trace, and overruns */
6706 	memset(&iter->seq, 0,
6707 	       sizeof(struct trace_iterator) -
6708 	       offsetof(struct trace_iterator, seq));
6709 	cpumask_clear(iter->started);
6710 	trace_seq_init(&iter->seq);
6711 	iter->pos = -1;
6712 
6713 	trace_event_read_lock();
6714 	trace_access_lock(iter->cpu_file);
6715 	while (trace_find_next_entry_inc(iter) != NULL) {
6716 		enum print_line_t ret;
6717 		int save_len = iter->seq.seq.len;
6718 
6719 		ret = print_trace_line(iter);
6720 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721 			/* don't print partial lines */
6722 			iter->seq.seq.len = save_len;
6723 			break;
6724 		}
6725 		if (ret != TRACE_TYPE_NO_CONSUME)
6726 			trace_consume(iter);
6727 
6728 		if (trace_seq_used(&iter->seq) >= cnt)
6729 			break;
6730 
6731 		/*
6732 		 * Setting the full flag means we reached the trace_seq buffer
6733 		 * size and we should leave by partial output condition above.
6734 		 * One of the trace_seq_* functions is not used properly.
6735 		 */
6736 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6737 			  iter->ent->type);
6738 	}
6739 	trace_access_unlock(iter->cpu_file);
6740 	trace_event_read_unlock();
6741 
6742 	/* Now copy what we have to the user */
6743 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6744 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6745 		trace_seq_init(&iter->seq);
6746 
6747 	/*
6748 	 * If there was nothing to send to user, in spite of consuming trace
6749 	 * entries, go back to wait for more entries.
6750 	 */
6751 	if (sret == -EBUSY)
6752 		goto waitagain;
6753 
6754 out:
6755 	mutex_unlock(&iter->mutex);
6756 
6757 	return sret;
6758 }
6759 
6760 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6761 				     unsigned int idx)
6762 {
6763 	__free_page(spd->pages[idx]);
6764 }
6765 
6766 static size_t
6767 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6768 {
6769 	size_t count;
6770 	int save_len;
6771 	int ret;
6772 
6773 	/* Seq buffer is page-sized, exactly what we need. */
6774 	for (;;) {
6775 		save_len = iter->seq.seq.len;
6776 		ret = print_trace_line(iter);
6777 
6778 		if (trace_seq_has_overflowed(&iter->seq)) {
6779 			iter->seq.seq.len = save_len;
6780 			break;
6781 		}
6782 
6783 		/*
6784 		 * This should not be hit, because it should only
6785 		 * be set if the iter->seq overflowed. But check it
6786 		 * anyway to be safe.
6787 		 */
6788 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789 			iter->seq.seq.len = save_len;
6790 			break;
6791 		}
6792 
6793 		count = trace_seq_used(&iter->seq) - save_len;
6794 		if (rem < count) {
6795 			rem = 0;
6796 			iter->seq.seq.len = save_len;
6797 			break;
6798 		}
6799 
6800 		if (ret != TRACE_TYPE_NO_CONSUME)
6801 			trace_consume(iter);
6802 		rem -= count;
6803 		if (!trace_find_next_entry_inc(iter))	{
6804 			rem = 0;
6805 			iter->ent = NULL;
6806 			break;
6807 		}
6808 	}
6809 
6810 	return rem;
6811 }
6812 
6813 static ssize_t tracing_splice_read_pipe(struct file *filp,
6814 					loff_t *ppos,
6815 					struct pipe_inode_info *pipe,
6816 					size_t len,
6817 					unsigned int flags)
6818 {
6819 	struct page *pages_def[PIPE_DEF_BUFFERS];
6820 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6821 	struct trace_iterator *iter = filp->private_data;
6822 	struct splice_pipe_desc spd = {
6823 		.pages		= pages_def,
6824 		.partial	= partial_def,
6825 		.nr_pages	= 0, /* This gets updated below. */
6826 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6827 		.ops		= &default_pipe_buf_ops,
6828 		.spd_release	= tracing_spd_release_pipe,
6829 	};
6830 	ssize_t ret;
6831 	size_t rem;
6832 	unsigned int i;
6833 
6834 	if (splice_grow_spd(pipe, &spd))
6835 		return -ENOMEM;
6836 
6837 	mutex_lock(&iter->mutex);
6838 
6839 	if (iter->trace->splice_read) {
6840 		ret = iter->trace->splice_read(iter, filp,
6841 					       ppos, pipe, len, flags);
6842 		if (ret)
6843 			goto out_err;
6844 	}
6845 
6846 	ret = tracing_wait_pipe(filp);
6847 	if (ret <= 0)
6848 		goto out_err;
6849 
6850 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6851 		ret = -EFAULT;
6852 		goto out_err;
6853 	}
6854 
6855 	trace_event_read_lock();
6856 	trace_access_lock(iter->cpu_file);
6857 
6858 	/* Fill as many pages as possible. */
6859 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6860 		spd.pages[i] = alloc_page(GFP_KERNEL);
6861 		if (!spd.pages[i])
6862 			break;
6863 
6864 		rem = tracing_fill_pipe_page(rem, iter);
6865 
6866 		/* Copy the data into the page, so we can start over. */
6867 		ret = trace_seq_to_buffer(&iter->seq,
6868 					  page_address(spd.pages[i]),
6869 					  trace_seq_used(&iter->seq));
6870 		if (ret < 0) {
6871 			__free_page(spd.pages[i]);
6872 			break;
6873 		}
6874 		spd.partial[i].offset = 0;
6875 		spd.partial[i].len = trace_seq_used(&iter->seq);
6876 
6877 		trace_seq_init(&iter->seq);
6878 	}
6879 
6880 	trace_access_unlock(iter->cpu_file);
6881 	trace_event_read_unlock();
6882 	mutex_unlock(&iter->mutex);
6883 
6884 	spd.nr_pages = i;
6885 
6886 	if (i)
6887 		ret = splice_to_pipe(pipe, &spd);
6888 	else
6889 		ret = 0;
6890 out:
6891 	splice_shrink_spd(&spd);
6892 	return ret;
6893 
6894 out_err:
6895 	mutex_unlock(&iter->mutex);
6896 	goto out;
6897 }
6898 
6899 static ssize_t
6900 tracing_entries_read(struct file *filp, char __user *ubuf,
6901 		     size_t cnt, loff_t *ppos)
6902 {
6903 	struct inode *inode = file_inode(filp);
6904 	struct trace_array *tr = inode->i_private;
6905 	int cpu = tracing_get_cpu(inode);
6906 	char buf[64];
6907 	int r = 0;
6908 	ssize_t ret;
6909 
6910 	mutex_lock(&trace_types_lock);
6911 
6912 	if (cpu == RING_BUFFER_ALL_CPUS) {
6913 		int cpu, buf_size_same;
6914 		unsigned long size;
6915 
6916 		size = 0;
6917 		buf_size_same = 1;
6918 		/* check if all cpu sizes are same */
6919 		for_each_tracing_cpu(cpu) {
6920 			/* fill in the size from first enabled cpu */
6921 			if (size == 0)
6922 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6923 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6924 				buf_size_same = 0;
6925 				break;
6926 			}
6927 		}
6928 
6929 		if (buf_size_same) {
6930 			if (!ring_buffer_expanded)
6931 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6932 					    size >> 10,
6933 					    trace_buf_size >> 10);
6934 			else
6935 				r = sprintf(buf, "%lu\n", size >> 10);
6936 		} else
6937 			r = sprintf(buf, "X\n");
6938 	} else
6939 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6940 
6941 	mutex_unlock(&trace_types_lock);
6942 
6943 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6944 	return ret;
6945 }
6946 
6947 static ssize_t
6948 tracing_entries_write(struct file *filp, const char __user *ubuf,
6949 		      size_t cnt, loff_t *ppos)
6950 {
6951 	struct inode *inode = file_inode(filp);
6952 	struct trace_array *tr = inode->i_private;
6953 	unsigned long val;
6954 	int ret;
6955 
6956 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6957 	if (ret)
6958 		return ret;
6959 
6960 	/* must have at least 1 entry */
6961 	if (!val)
6962 		return -EINVAL;
6963 
6964 	/* value is in KB */
6965 	val <<= 10;
6966 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6967 	if (ret < 0)
6968 		return ret;
6969 
6970 	*ppos += cnt;
6971 
6972 	return cnt;
6973 }
6974 
6975 static ssize_t
6976 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6977 				size_t cnt, loff_t *ppos)
6978 {
6979 	struct trace_array *tr = filp->private_data;
6980 	char buf[64];
6981 	int r, cpu;
6982 	unsigned long size = 0, expanded_size = 0;
6983 
6984 	mutex_lock(&trace_types_lock);
6985 	for_each_tracing_cpu(cpu) {
6986 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6987 		if (!ring_buffer_expanded)
6988 			expanded_size += trace_buf_size >> 10;
6989 	}
6990 	if (ring_buffer_expanded)
6991 		r = sprintf(buf, "%lu\n", size);
6992 	else
6993 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6994 	mutex_unlock(&trace_types_lock);
6995 
6996 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6997 }
6998 
6999 static ssize_t
7000 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7001 			  size_t cnt, loff_t *ppos)
7002 {
7003 	/*
7004 	 * There is no need to read what the user has written, this function
7005 	 * is just to make sure that there is no error when "echo" is used
7006 	 */
7007 
7008 	*ppos += cnt;
7009 
7010 	return cnt;
7011 }
7012 
7013 static int
7014 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7015 {
7016 	struct trace_array *tr = inode->i_private;
7017 
7018 	/* disable tracing ? */
7019 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7020 		tracer_tracing_off(tr);
7021 	/* resize the ring buffer to 0 */
7022 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7023 
7024 	trace_array_put(tr);
7025 
7026 	return 0;
7027 }
7028 
7029 static ssize_t
7030 tracing_mark_write(struct file *filp, const char __user *ubuf,
7031 					size_t cnt, loff_t *fpos)
7032 {
7033 	struct trace_array *tr = filp->private_data;
7034 	struct ring_buffer_event *event;
7035 	enum event_trigger_type tt = ETT_NONE;
7036 	struct trace_buffer *buffer;
7037 	struct print_entry *entry;
7038 	ssize_t written;
7039 	int size;
7040 	int len;
7041 
7042 /* Used in tracing_mark_raw_write() as well */
7043 #define FAULTED_STR "<faulted>"
7044 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7045 
7046 	if (tracing_disabled)
7047 		return -EINVAL;
7048 
7049 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7050 		return -EINVAL;
7051 
7052 	if (cnt > TRACE_BUF_SIZE)
7053 		cnt = TRACE_BUF_SIZE;
7054 
7055 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7056 
7057 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7058 
7059 	/* If less than "<faulted>", then make sure we can still add that */
7060 	if (cnt < FAULTED_SIZE)
7061 		size += FAULTED_SIZE - cnt;
7062 
7063 	buffer = tr->array_buffer.buffer;
7064 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7065 					    tracing_gen_ctx());
7066 	if (unlikely(!event))
7067 		/* Ring buffer disabled, return as if not open for write */
7068 		return -EBADF;
7069 
7070 	entry = ring_buffer_event_data(event);
7071 	entry->ip = _THIS_IP_;
7072 
7073 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7074 	if (len) {
7075 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7076 		cnt = FAULTED_SIZE;
7077 		written = -EFAULT;
7078 	} else
7079 		written = cnt;
7080 
7081 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7082 		/* do not add \n before testing triggers, but add \0 */
7083 		entry->buf[cnt] = '\0';
7084 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7085 	}
7086 
7087 	if (entry->buf[cnt - 1] != '\n') {
7088 		entry->buf[cnt] = '\n';
7089 		entry->buf[cnt + 1] = '\0';
7090 	} else
7091 		entry->buf[cnt] = '\0';
7092 
7093 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7094 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7095 	__buffer_unlock_commit(buffer, event);
7096 
7097 	if (tt)
7098 		event_triggers_post_call(tr->trace_marker_file, tt);
7099 
7100 	if (written > 0)
7101 		*fpos += written;
7102 
7103 	return written;
7104 }
7105 
7106 /* Limit it for now to 3K (including tag) */
7107 #define RAW_DATA_MAX_SIZE (1024*3)
7108 
7109 static ssize_t
7110 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7111 					size_t cnt, loff_t *fpos)
7112 {
7113 	struct trace_array *tr = filp->private_data;
7114 	struct ring_buffer_event *event;
7115 	struct trace_buffer *buffer;
7116 	struct raw_data_entry *entry;
7117 	ssize_t written;
7118 	int size;
7119 	int len;
7120 
7121 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7122 
7123 	if (tracing_disabled)
7124 		return -EINVAL;
7125 
7126 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7127 		return -EINVAL;
7128 
7129 	/* The marker must at least have a tag id */
7130 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7131 		return -EINVAL;
7132 
7133 	if (cnt > TRACE_BUF_SIZE)
7134 		cnt = TRACE_BUF_SIZE;
7135 
7136 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7137 
7138 	size = sizeof(*entry) + cnt;
7139 	if (cnt < FAULT_SIZE_ID)
7140 		size += FAULT_SIZE_ID - cnt;
7141 
7142 	buffer = tr->array_buffer.buffer;
7143 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7144 					    tracing_gen_ctx());
7145 	if (!event)
7146 		/* Ring buffer disabled, return as if not open for write */
7147 		return -EBADF;
7148 
7149 	entry = ring_buffer_event_data(event);
7150 
7151 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7152 	if (len) {
7153 		entry->id = -1;
7154 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7155 		written = -EFAULT;
7156 	} else
7157 		written = cnt;
7158 
7159 	__buffer_unlock_commit(buffer, event);
7160 
7161 	if (written > 0)
7162 		*fpos += written;
7163 
7164 	return written;
7165 }
7166 
7167 static int tracing_clock_show(struct seq_file *m, void *v)
7168 {
7169 	struct trace_array *tr = m->private;
7170 	int i;
7171 
7172 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7173 		seq_printf(m,
7174 			"%s%s%s%s", i ? " " : "",
7175 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7176 			i == tr->clock_id ? "]" : "");
7177 	seq_putc(m, '\n');
7178 
7179 	return 0;
7180 }
7181 
7182 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7183 {
7184 	int i;
7185 
7186 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7187 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7188 			break;
7189 	}
7190 	if (i == ARRAY_SIZE(trace_clocks))
7191 		return -EINVAL;
7192 
7193 	mutex_lock(&trace_types_lock);
7194 
7195 	tr->clock_id = i;
7196 
7197 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7198 
7199 	/*
7200 	 * New clock may not be consistent with the previous clock.
7201 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7202 	 */
7203 	tracing_reset_online_cpus(&tr->array_buffer);
7204 
7205 #ifdef CONFIG_TRACER_MAX_TRACE
7206 	if (tr->max_buffer.buffer)
7207 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7208 	tracing_reset_online_cpus(&tr->max_buffer);
7209 #endif
7210 
7211 	mutex_unlock(&trace_types_lock);
7212 
7213 	return 0;
7214 }
7215 
7216 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7217 				   size_t cnt, loff_t *fpos)
7218 {
7219 	struct seq_file *m = filp->private_data;
7220 	struct trace_array *tr = m->private;
7221 	char buf[64];
7222 	const char *clockstr;
7223 	int ret;
7224 
7225 	if (cnt >= sizeof(buf))
7226 		return -EINVAL;
7227 
7228 	if (copy_from_user(buf, ubuf, cnt))
7229 		return -EFAULT;
7230 
7231 	buf[cnt] = 0;
7232 
7233 	clockstr = strstrip(buf);
7234 
7235 	ret = tracing_set_clock(tr, clockstr);
7236 	if (ret)
7237 		return ret;
7238 
7239 	*fpos += cnt;
7240 
7241 	return cnt;
7242 }
7243 
7244 static int tracing_clock_open(struct inode *inode, struct file *file)
7245 {
7246 	struct trace_array *tr = inode->i_private;
7247 	int ret;
7248 
7249 	ret = tracing_check_open_get_tr(tr);
7250 	if (ret)
7251 		return ret;
7252 
7253 	ret = single_open(file, tracing_clock_show, inode->i_private);
7254 	if (ret < 0)
7255 		trace_array_put(tr);
7256 
7257 	return ret;
7258 }
7259 
7260 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7261 {
7262 	struct trace_array *tr = m->private;
7263 
7264 	mutex_lock(&trace_types_lock);
7265 
7266 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7267 		seq_puts(m, "delta [absolute]\n");
7268 	else
7269 		seq_puts(m, "[delta] absolute\n");
7270 
7271 	mutex_unlock(&trace_types_lock);
7272 
7273 	return 0;
7274 }
7275 
7276 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7277 {
7278 	struct trace_array *tr = inode->i_private;
7279 	int ret;
7280 
7281 	ret = tracing_check_open_get_tr(tr);
7282 	if (ret)
7283 		return ret;
7284 
7285 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7286 	if (ret < 0)
7287 		trace_array_put(tr);
7288 
7289 	return ret;
7290 }
7291 
7292 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7293 {
7294 	if (rbe == this_cpu_read(trace_buffered_event))
7295 		return ring_buffer_time_stamp(buffer);
7296 
7297 	return ring_buffer_event_time_stamp(buffer, rbe);
7298 }
7299 
7300 /*
7301  * Set or disable using the per CPU trace_buffer_event when possible.
7302  */
7303 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7304 {
7305 	int ret = 0;
7306 
7307 	mutex_lock(&trace_types_lock);
7308 
7309 	if (set && tr->no_filter_buffering_ref++)
7310 		goto out;
7311 
7312 	if (!set) {
7313 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7314 			ret = -EINVAL;
7315 			goto out;
7316 		}
7317 
7318 		--tr->no_filter_buffering_ref;
7319 	}
7320  out:
7321 	mutex_unlock(&trace_types_lock);
7322 
7323 	return ret;
7324 }
7325 
7326 struct ftrace_buffer_info {
7327 	struct trace_iterator	iter;
7328 	void			*spare;
7329 	unsigned int		spare_cpu;
7330 	unsigned int		read;
7331 };
7332 
7333 #ifdef CONFIG_TRACER_SNAPSHOT
7334 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7335 {
7336 	struct trace_array *tr = inode->i_private;
7337 	struct trace_iterator *iter;
7338 	struct seq_file *m;
7339 	int ret;
7340 
7341 	ret = tracing_check_open_get_tr(tr);
7342 	if (ret)
7343 		return ret;
7344 
7345 	if (file->f_mode & FMODE_READ) {
7346 		iter = __tracing_open(inode, file, true);
7347 		if (IS_ERR(iter))
7348 			ret = PTR_ERR(iter);
7349 	} else {
7350 		/* Writes still need the seq_file to hold the private data */
7351 		ret = -ENOMEM;
7352 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7353 		if (!m)
7354 			goto out;
7355 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7356 		if (!iter) {
7357 			kfree(m);
7358 			goto out;
7359 		}
7360 		ret = 0;
7361 
7362 		iter->tr = tr;
7363 		iter->array_buffer = &tr->max_buffer;
7364 		iter->cpu_file = tracing_get_cpu(inode);
7365 		m->private = iter;
7366 		file->private_data = m;
7367 	}
7368 out:
7369 	if (ret < 0)
7370 		trace_array_put(tr);
7371 
7372 	return ret;
7373 }
7374 
7375 static ssize_t
7376 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7377 		       loff_t *ppos)
7378 {
7379 	struct seq_file *m = filp->private_data;
7380 	struct trace_iterator *iter = m->private;
7381 	struct trace_array *tr = iter->tr;
7382 	unsigned long val;
7383 	int ret;
7384 
7385 	ret = tracing_update_buffers();
7386 	if (ret < 0)
7387 		return ret;
7388 
7389 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7390 	if (ret)
7391 		return ret;
7392 
7393 	mutex_lock(&trace_types_lock);
7394 
7395 	if (tr->current_trace->use_max_tr) {
7396 		ret = -EBUSY;
7397 		goto out;
7398 	}
7399 
7400 	arch_spin_lock(&tr->max_lock);
7401 	if (tr->cond_snapshot)
7402 		ret = -EBUSY;
7403 	arch_spin_unlock(&tr->max_lock);
7404 	if (ret)
7405 		goto out;
7406 
7407 	switch (val) {
7408 	case 0:
7409 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7410 			ret = -EINVAL;
7411 			break;
7412 		}
7413 		if (tr->allocated_snapshot)
7414 			free_snapshot(tr);
7415 		break;
7416 	case 1:
7417 /* Only allow per-cpu swap if the ring buffer supports it */
7418 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7419 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7420 			ret = -EINVAL;
7421 			break;
7422 		}
7423 #endif
7424 		if (tr->allocated_snapshot)
7425 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7426 					&tr->array_buffer, iter->cpu_file);
7427 		else
7428 			ret = tracing_alloc_snapshot_instance(tr);
7429 		if (ret < 0)
7430 			break;
7431 		local_irq_disable();
7432 		/* Now, we're going to swap */
7433 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7434 			update_max_tr(tr, current, smp_processor_id(), NULL);
7435 		else
7436 			update_max_tr_single(tr, current, iter->cpu_file);
7437 		local_irq_enable();
7438 		break;
7439 	default:
7440 		if (tr->allocated_snapshot) {
7441 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7442 				tracing_reset_online_cpus(&tr->max_buffer);
7443 			else
7444 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7445 		}
7446 		break;
7447 	}
7448 
7449 	if (ret >= 0) {
7450 		*ppos += cnt;
7451 		ret = cnt;
7452 	}
7453 out:
7454 	mutex_unlock(&trace_types_lock);
7455 	return ret;
7456 }
7457 
7458 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7459 {
7460 	struct seq_file *m = file->private_data;
7461 	int ret;
7462 
7463 	ret = tracing_release(inode, file);
7464 
7465 	if (file->f_mode & FMODE_READ)
7466 		return ret;
7467 
7468 	/* If write only, the seq_file is just a stub */
7469 	if (m)
7470 		kfree(m->private);
7471 	kfree(m);
7472 
7473 	return 0;
7474 }
7475 
7476 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7477 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7478 				    size_t count, loff_t *ppos);
7479 static int tracing_buffers_release(struct inode *inode, struct file *file);
7480 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7481 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7482 
7483 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7484 {
7485 	struct ftrace_buffer_info *info;
7486 	int ret;
7487 
7488 	/* The following checks for tracefs lockdown */
7489 	ret = tracing_buffers_open(inode, filp);
7490 	if (ret < 0)
7491 		return ret;
7492 
7493 	info = filp->private_data;
7494 
7495 	if (info->iter.trace->use_max_tr) {
7496 		tracing_buffers_release(inode, filp);
7497 		return -EBUSY;
7498 	}
7499 
7500 	info->iter.snapshot = true;
7501 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7502 
7503 	return ret;
7504 }
7505 
7506 #endif /* CONFIG_TRACER_SNAPSHOT */
7507 
7508 
7509 static const struct file_operations tracing_thresh_fops = {
7510 	.open		= tracing_open_generic,
7511 	.read		= tracing_thresh_read,
7512 	.write		= tracing_thresh_write,
7513 	.llseek		= generic_file_llseek,
7514 };
7515 
7516 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7517 static const struct file_operations tracing_max_lat_fops = {
7518 	.open		= tracing_open_generic,
7519 	.read		= tracing_max_lat_read,
7520 	.write		= tracing_max_lat_write,
7521 	.llseek		= generic_file_llseek,
7522 };
7523 #endif
7524 
7525 static const struct file_operations set_tracer_fops = {
7526 	.open		= tracing_open_generic,
7527 	.read		= tracing_set_trace_read,
7528 	.write		= tracing_set_trace_write,
7529 	.llseek		= generic_file_llseek,
7530 };
7531 
7532 static const struct file_operations tracing_pipe_fops = {
7533 	.open		= tracing_open_pipe,
7534 	.poll		= tracing_poll_pipe,
7535 	.read		= tracing_read_pipe,
7536 	.splice_read	= tracing_splice_read_pipe,
7537 	.release	= tracing_release_pipe,
7538 	.llseek		= no_llseek,
7539 };
7540 
7541 static const struct file_operations tracing_entries_fops = {
7542 	.open		= tracing_open_generic_tr,
7543 	.read		= tracing_entries_read,
7544 	.write		= tracing_entries_write,
7545 	.llseek		= generic_file_llseek,
7546 	.release	= tracing_release_generic_tr,
7547 };
7548 
7549 static const struct file_operations tracing_total_entries_fops = {
7550 	.open		= tracing_open_generic_tr,
7551 	.read		= tracing_total_entries_read,
7552 	.llseek		= generic_file_llseek,
7553 	.release	= tracing_release_generic_tr,
7554 };
7555 
7556 static const struct file_operations tracing_free_buffer_fops = {
7557 	.open		= tracing_open_generic_tr,
7558 	.write		= tracing_free_buffer_write,
7559 	.release	= tracing_free_buffer_release,
7560 };
7561 
7562 static const struct file_operations tracing_mark_fops = {
7563 	.open		= tracing_open_generic_tr,
7564 	.write		= tracing_mark_write,
7565 	.llseek		= generic_file_llseek,
7566 	.release	= tracing_release_generic_tr,
7567 };
7568 
7569 static const struct file_operations tracing_mark_raw_fops = {
7570 	.open		= tracing_open_generic_tr,
7571 	.write		= tracing_mark_raw_write,
7572 	.llseek		= generic_file_llseek,
7573 	.release	= tracing_release_generic_tr,
7574 };
7575 
7576 static const struct file_operations trace_clock_fops = {
7577 	.open		= tracing_clock_open,
7578 	.read		= seq_read,
7579 	.llseek		= seq_lseek,
7580 	.release	= tracing_single_release_tr,
7581 	.write		= tracing_clock_write,
7582 };
7583 
7584 static const struct file_operations trace_time_stamp_mode_fops = {
7585 	.open		= tracing_time_stamp_mode_open,
7586 	.read		= seq_read,
7587 	.llseek		= seq_lseek,
7588 	.release	= tracing_single_release_tr,
7589 };
7590 
7591 #ifdef CONFIG_TRACER_SNAPSHOT
7592 static const struct file_operations snapshot_fops = {
7593 	.open		= tracing_snapshot_open,
7594 	.read		= seq_read,
7595 	.write		= tracing_snapshot_write,
7596 	.llseek		= tracing_lseek,
7597 	.release	= tracing_snapshot_release,
7598 };
7599 
7600 static const struct file_operations snapshot_raw_fops = {
7601 	.open		= snapshot_raw_open,
7602 	.read		= tracing_buffers_read,
7603 	.release	= tracing_buffers_release,
7604 	.splice_read	= tracing_buffers_splice_read,
7605 	.llseek		= no_llseek,
7606 };
7607 
7608 #endif /* CONFIG_TRACER_SNAPSHOT */
7609 
7610 /*
7611  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7612  * @filp: The active open file structure
7613  * @ubuf: The userspace provided buffer to read value into
7614  * @cnt: The maximum number of bytes to read
7615  * @ppos: The current "file" position
7616  *
7617  * This function implements the write interface for a struct trace_min_max_param.
7618  * The filp->private_data must point to a trace_min_max_param structure that
7619  * defines where to write the value, the min and the max acceptable values,
7620  * and a lock to protect the write.
7621  */
7622 static ssize_t
7623 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7624 {
7625 	struct trace_min_max_param *param = filp->private_data;
7626 	u64 val;
7627 	int err;
7628 
7629 	if (!param)
7630 		return -EFAULT;
7631 
7632 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7633 	if (err)
7634 		return err;
7635 
7636 	if (param->lock)
7637 		mutex_lock(param->lock);
7638 
7639 	if (param->min && val < *param->min)
7640 		err = -EINVAL;
7641 
7642 	if (param->max && val > *param->max)
7643 		err = -EINVAL;
7644 
7645 	if (!err)
7646 		*param->val = val;
7647 
7648 	if (param->lock)
7649 		mutex_unlock(param->lock);
7650 
7651 	if (err)
7652 		return err;
7653 
7654 	return cnt;
7655 }
7656 
7657 /*
7658  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7659  * @filp: The active open file structure
7660  * @ubuf: The userspace provided buffer to read value into
7661  * @cnt: The maximum number of bytes to read
7662  * @ppos: The current "file" position
7663  *
7664  * This function implements the read interface for a struct trace_min_max_param.
7665  * The filp->private_data must point to a trace_min_max_param struct with valid
7666  * data.
7667  */
7668 static ssize_t
7669 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7670 {
7671 	struct trace_min_max_param *param = filp->private_data;
7672 	char buf[U64_STR_SIZE];
7673 	int len;
7674 	u64 val;
7675 
7676 	if (!param)
7677 		return -EFAULT;
7678 
7679 	val = *param->val;
7680 
7681 	if (cnt > sizeof(buf))
7682 		cnt = sizeof(buf);
7683 
7684 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7685 
7686 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7687 }
7688 
7689 const struct file_operations trace_min_max_fops = {
7690 	.open		= tracing_open_generic,
7691 	.read		= trace_min_max_read,
7692 	.write		= trace_min_max_write,
7693 };
7694 
7695 #define TRACING_LOG_ERRS_MAX	8
7696 #define TRACING_LOG_LOC_MAX	128
7697 
7698 #define CMD_PREFIX "  Command: "
7699 
7700 struct err_info {
7701 	const char	**errs;	/* ptr to loc-specific array of err strings */
7702 	u8		type;	/* index into errs -> specific err string */
7703 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7704 	u64		ts;
7705 };
7706 
7707 struct tracing_log_err {
7708 	struct list_head	list;
7709 	struct err_info		info;
7710 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7711 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7712 };
7713 
7714 static DEFINE_MUTEX(tracing_err_log_lock);
7715 
7716 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7717 {
7718 	struct tracing_log_err *err;
7719 
7720 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7721 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7722 		if (!err)
7723 			err = ERR_PTR(-ENOMEM);
7724 		tr->n_err_log_entries++;
7725 
7726 		return err;
7727 	}
7728 
7729 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7730 	list_del(&err->list);
7731 
7732 	return err;
7733 }
7734 
7735 /**
7736  * err_pos - find the position of a string within a command for error careting
7737  * @cmd: The tracing command that caused the error
7738  * @str: The string to position the caret at within @cmd
7739  *
7740  * Finds the position of the first occurrence of @str within @cmd.  The
7741  * return value can be passed to tracing_log_err() for caret placement
7742  * within @cmd.
7743  *
7744  * Returns the index within @cmd of the first occurrence of @str or 0
7745  * if @str was not found.
7746  */
7747 unsigned int err_pos(char *cmd, const char *str)
7748 {
7749 	char *found;
7750 
7751 	if (WARN_ON(!strlen(cmd)))
7752 		return 0;
7753 
7754 	found = strstr(cmd, str);
7755 	if (found)
7756 		return found - cmd;
7757 
7758 	return 0;
7759 }
7760 
7761 /**
7762  * tracing_log_err - write an error to the tracing error log
7763  * @tr: The associated trace array for the error (NULL for top level array)
7764  * @loc: A string describing where the error occurred
7765  * @cmd: The tracing command that caused the error
7766  * @errs: The array of loc-specific static error strings
7767  * @type: The index into errs[], which produces the specific static err string
7768  * @pos: The position the caret should be placed in the cmd
7769  *
7770  * Writes an error into tracing/error_log of the form:
7771  *
7772  * <loc>: error: <text>
7773  *   Command: <cmd>
7774  *              ^
7775  *
7776  * tracing/error_log is a small log file containing the last
7777  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7778  * unless there has been a tracing error, and the error log can be
7779  * cleared and have its memory freed by writing the empty string in
7780  * truncation mode to it i.e. echo > tracing/error_log.
7781  *
7782  * NOTE: the @errs array along with the @type param are used to
7783  * produce a static error string - this string is not copied and saved
7784  * when the error is logged - only a pointer to it is saved.  See
7785  * existing callers for examples of how static strings are typically
7786  * defined for use with tracing_log_err().
7787  */
7788 void tracing_log_err(struct trace_array *tr,
7789 		     const char *loc, const char *cmd,
7790 		     const char **errs, u8 type, u8 pos)
7791 {
7792 	struct tracing_log_err *err;
7793 
7794 	if (!tr)
7795 		tr = &global_trace;
7796 
7797 	mutex_lock(&tracing_err_log_lock);
7798 	err = get_tracing_log_err(tr);
7799 	if (PTR_ERR(err) == -ENOMEM) {
7800 		mutex_unlock(&tracing_err_log_lock);
7801 		return;
7802 	}
7803 
7804 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7805 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7806 
7807 	err->info.errs = errs;
7808 	err->info.type = type;
7809 	err->info.pos = pos;
7810 	err->info.ts = local_clock();
7811 
7812 	list_add_tail(&err->list, &tr->err_log);
7813 	mutex_unlock(&tracing_err_log_lock);
7814 }
7815 
7816 static void clear_tracing_err_log(struct trace_array *tr)
7817 {
7818 	struct tracing_log_err *err, *next;
7819 
7820 	mutex_lock(&tracing_err_log_lock);
7821 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7822 		list_del(&err->list);
7823 		kfree(err);
7824 	}
7825 
7826 	tr->n_err_log_entries = 0;
7827 	mutex_unlock(&tracing_err_log_lock);
7828 }
7829 
7830 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7831 {
7832 	struct trace_array *tr = m->private;
7833 
7834 	mutex_lock(&tracing_err_log_lock);
7835 
7836 	return seq_list_start(&tr->err_log, *pos);
7837 }
7838 
7839 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7840 {
7841 	struct trace_array *tr = m->private;
7842 
7843 	return seq_list_next(v, &tr->err_log, pos);
7844 }
7845 
7846 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7847 {
7848 	mutex_unlock(&tracing_err_log_lock);
7849 }
7850 
7851 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7852 {
7853 	u8 i;
7854 
7855 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7856 		seq_putc(m, ' ');
7857 	for (i = 0; i < pos; i++)
7858 		seq_putc(m, ' ');
7859 	seq_puts(m, "^\n");
7860 }
7861 
7862 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7863 {
7864 	struct tracing_log_err *err = v;
7865 
7866 	if (err) {
7867 		const char *err_text = err->info.errs[err->info.type];
7868 		u64 sec = err->info.ts;
7869 		u32 nsec;
7870 
7871 		nsec = do_div(sec, NSEC_PER_SEC);
7872 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7873 			   err->loc, err_text);
7874 		seq_printf(m, "%s", err->cmd);
7875 		tracing_err_log_show_pos(m, err->info.pos);
7876 	}
7877 
7878 	return 0;
7879 }
7880 
7881 static const struct seq_operations tracing_err_log_seq_ops = {
7882 	.start  = tracing_err_log_seq_start,
7883 	.next   = tracing_err_log_seq_next,
7884 	.stop   = tracing_err_log_seq_stop,
7885 	.show   = tracing_err_log_seq_show
7886 };
7887 
7888 static int tracing_err_log_open(struct inode *inode, struct file *file)
7889 {
7890 	struct trace_array *tr = inode->i_private;
7891 	int ret = 0;
7892 
7893 	ret = tracing_check_open_get_tr(tr);
7894 	if (ret)
7895 		return ret;
7896 
7897 	/* If this file was opened for write, then erase contents */
7898 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7899 		clear_tracing_err_log(tr);
7900 
7901 	if (file->f_mode & FMODE_READ) {
7902 		ret = seq_open(file, &tracing_err_log_seq_ops);
7903 		if (!ret) {
7904 			struct seq_file *m = file->private_data;
7905 			m->private = tr;
7906 		} else {
7907 			trace_array_put(tr);
7908 		}
7909 	}
7910 	return ret;
7911 }
7912 
7913 static ssize_t tracing_err_log_write(struct file *file,
7914 				     const char __user *buffer,
7915 				     size_t count, loff_t *ppos)
7916 {
7917 	return count;
7918 }
7919 
7920 static int tracing_err_log_release(struct inode *inode, struct file *file)
7921 {
7922 	struct trace_array *tr = inode->i_private;
7923 
7924 	trace_array_put(tr);
7925 
7926 	if (file->f_mode & FMODE_READ)
7927 		seq_release(inode, file);
7928 
7929 	return 0;
7930 }
7931 
7932 static const struct file_operations tracing_err_log_fops = {
7933 	.open           = tracing_err_log_open,
7934 	.write		= tracing_err_log_write,
7935 	.read           = seq_read,
7936 	.llseek         = seq_lseek,
7937 	.release        = tracing_err_log_release,
7938 };
7939 
7940 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7941 {
7942 	struct trace_array *tr = inode->i_private;
7943 	struct ftrace_buffer_info *info;
7944 	int ret;
7945 
7946 	ret = tracing_check_open_get_tr(tr);
7947 	if (ret)
7948 		return ret;
7949 
7950 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7951 	if (!info) {
7952 		trace_array_put(tr);
7953 		return -ENOMEM;
7954 	}
7955 
7956 	mutex_lock(&trace_types_lock);
7957 
7958 	info->iter.tr		= tr;
7959 	info->iter.cpu_file	= tracing_get_cpu(inode);
7960 	info->iter.trace	= tr->current_trace;
7961 	info->iter.array_buffer = &tr->array_buffer;
7962 	info->spare		= NULL;
7963 	/* Force reading ring buffer for first read */
7964 	info->read		= (unsigned int)-1;
7965 
7966 	filp->private_data = info;
7967 
7968 	tr->trace_ref++;
7969 
7970 	mutex_unlock(&trace_types_lock);
7971 
7972 	ret = nonseekable_open(inode, filp);
7973 	if (ret < 0)
7974 		trace_array_put(tr);
7975 
7976 	return ret;
7977 }
7978 
7979 static __poll_t
7980 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7981 {
7982 	struct ftrace_buffer_info *info = filp->private_data;
7983 	struct trace_iterator *iter = &info->iter;
7984 
7985 	return trace_poll(iter, filp, poll_table);
7986 }
7987 
7988 static ssize_t
7989 tracing_buffers_read(struct file *filp, char __user *ubuf,
7990 		     size_t count, loff_t *ppos)
7991 {
7992 	struct ftrace_buffer_info *info = filp->private_data;
7993 	struct trace_iterator *iter = &info->iter;
7994 	ssize_t ret = 0;
7995 	ssize_t size;
7996 
7997 	if (!count)
7998 		return 0;
7999 
8000 #ifdef CONFIG_TRACER_MAX_TRACE
8001 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8002 		return -EBUSY;
8003 #endif
8004 
8005 	if (!info->spare) {
8006 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8007 							  iter->cpu_file);
8008 		if (IS_ERR(info->spare)) {
8009 			ret = PTR_ERR(info->spare);
8010 			info->spare = NULL;
8011 		} else {
8012 			info->spare_cpu = iter->cpu_file;
8013 		}
8014 	}
8015 	if (!info->spare)
8016 		return ret;
8017 
8018 	/* Do we have previous read data to read? */
8019 	if (info->read < PAGE_SIZE)
8020 		goto read;
8021 
8022  again:
8023 	trace_access_lock(iter->cpu_file);
8024 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8025 				    &info->spare,
8026 				    count,
8027 				    iter->cpu_file, 0);
8028 	trace_access_unlock(iter->cpu_file);
8029 
8030 	if (ret < 0) {
8031 		if (trace_empty(iter)) {
8032 			if ((filp->f_flags & O_NONBLOCK))
8033 				return -EAGAIN;
8034 
8035 			ret = wait_on_pipe(iter, 0);
8036 			if (ret)
8037 				return ret;
8038 
8039 			goto again;
8040 		}
8041 		return 0;
8042 	}
8043 
8044 	info->read = 0;
8045  read:
8046 	size = PAGE_SIZE - info->read;
8047 	if (size > count)
8048 		size = count;
8049 
8050 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8051 	if (ret == size)
8052 		return -EFAULT;
8053 
8054 	size -= ret;
8055 
8056 	*ppos += size;
8057 	info->read += size;
8058 
8059 	return size;
8060 }
8061 
8062 static int tracing_buffers_release(struct inode *inode, struct file *file)
8063 {
8064 	struct ftrace_buffer_info *info = file->private_data;
8065 	struct trace_iterator *iter = &info->iter;
8066 
8067 	mutex_lock(&trace_types_lock);
8068 
8069 	iter->tr->trace_ref--;
8070 
8071 	__trace_array_put(iter->tr);
8072 
8073 	if (info->spare)
8074 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8075 					   info->spare_cpu, info->spare);
8076 	kvfree(info);
8077 
8078 	mutex_unlock(&trace_types_lock);
8079 
8080 	return 0;
8081 }
8082 
8083 struct buffer_ref {
8084 	struct trace_buffer	*buffer;
8085 	void			*page;
8086 	int			cpu;
8087 	refcount_t		refcount;
8088 };
8089 
8090 static void buffer_ref_release(struct buffer_ref *ref)
8091 {
8092 	if (!refcount_dec_and_test(&ref->refcount))
8093 		return;
8094 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8095 	kfree(ref);
8096 }
8097 
8098 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8099 				    struct pipe_buffer *buf)
8100 {
8101 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8102 
8103 	buffer_ref_release(ref);
8104 	buf->private = 0;
8105 }
8106 
8107 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8108 				struct pipe_buffer *buf)
8109 {
8110 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8111 
8112 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8113 		return false;
8114 
8115 	refcount_inc(&ref->refcount);
8116 	return true;
8117 }
8118 
8119 /* Pipe buffer operations for a buffer. */
8120 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8121 	.release		= buffer_pipe_buf_release,
8122 	.get			= buffer_pipe_buf_get,
8123 };
8124 
8125 /*
8126  * Callback from splice_to_pipe(), if we need to release some pages
8127  * at the end of the spd in case we error'ed out in filling the pipe.
8128  */
8129 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8130 {
8131 	struct buffer_ref *ref =
8132 		(struct buffer_ref *)spd->partial[i].private;
8133 
8134 	buffer_ref_release(ref);
8135 	spd->partial[i].private = 0;
8136 }
8137 
8138 static ssize_t
8139 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8140 			    struct pipe_inode_info *pipe, size_t len,
8141 			    unsigned int flags)
8142 {
8143 	struct ftrace_buffer_info *info = file->private_data;
8144 	struct trace_iterator *iter = &info->iter;
8145 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8146 	struct page *pages_def[PIPE_DEF_BUFFERS];
8147 	struct splice_pipe_desc spd = {
8148 		.pages		= pages_def,
8149 		.partial	= partial_def,
8150 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8151 		.ops		= &buffer_pipe_buf_ops,
8152 		.spd_release	= buffer_spd_release,
8153 	};
8154 	struct buffer_ref *ref;
8155 	int entries, i;
8156 	ssize_t ret = 0;
8157 
8158 #ifdef CONFIG_TRACER_MAX_TRACE
8159 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8160 		return -EBUSY;
8161 #endif
8162 
8163 	if (*ppos & (PAGE_SIZE - 1))
8164 		return -EINVAL;
8165 
8166 	if (len & (PAGE_SIZE - 1)) {
8167 		if (len < PAGE_SIZE)
8168 			return -EINVAL;
8169 		len &= PAGE_MASK;
8170 	}
8171 
8172 	if (splice_grow_spd(pipe, &spd))
8173 		return -ENOMEM;
8174 
8175  again:
8176 	trace_access_lock(iter->cpu_file);
8177 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8178 
8179 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8180 		struct page *page;
8181 		int r;
8182 
8183 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8184 		if (!ref) {
8185 			ret = -ENOMEM;
8186 			break;
8187 		}
8188 
8189 		refcount_set(&ref->refcount, 1);
8190 		ref->buffer = iter->array_buffer->buffer;
8191 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8192 		if (IS_ERR(ref->page)) {
8193 			ret = PTR_ERR(ref->page);
8194 			ref->page = NULL;
8195 			kfree(ref);
8196 			break;
8197 		}
8198 		ref->cpu = iter->cpu_file;
8199 
8200 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8201 					  len, iter->cpu_file, 1);
8202 		if (r < 0) {
8203 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8204 						   ref->page);
8205 			kfree(ref);
8206 			break;
8207 		}
8208 
8209 		page = virt_to_page(ref->page);
8210 
8211 		spd.pages[i] = page;
8212 		spd.partial[i].len = PAGE_SIZE;
8213 		spd.partial[i].offset = 0;
8214 		spd.partial[i].private = (unsigned long)ref;
8215 		spd.nr_pages++;
8216 		*ppos += PAGE_SIZE;
8217 
8218 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8219 	}
8220 
8221 	trace_access_unlock(iter->cpu_file);
8222 	spd.nr_pages = i;
8223 
8224 	/* did we read anything? */
8225 	if (!spd.nr_pages) {
8226 		if (ret)
8227 			goto out;
8228 
8229 		ret = -EAGAIN;
8230 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8231 			goto out;
8232 
8233 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8234 		if (ret)
8235 			goto out;
8236 
8237 		goto again;
8238 	}
8239 
8240 	ret = splice_to_pipe(pipe, &spd);
8241 out:
8242 	splice_shrink_spd(&spd);
8243 
8244 	return ret;
8245 }
8246 
8247 static const struct file_operations tracing_buffers_fops = {
8248 	.open		= tracing_buffers_open,
8249 	.read		= tracing_buffers_read,
8250 	.poll		= tracing_buffers_poll,
8251 	.release	= tracing_buffers_release,
8252 	.splice_read	= tracing_buffers_splice_read,
8253 	.llseek		= no_llseek,
8254 };
8255 
8256 static ssize_t
8257 tracing_stats_read(struct file *filp, char __user *ubuf,
8258 		   size_t count, loff_t *ppos)
8259 {
8260 	struct inode *inode = file_inode(filp);
8261 	struct trace_array *tr = inode->i_private;
8262 	struct array_buffer *trace_buf = &tr->array_buffer;
8263 	int cpu = tracing_get_cpu(inode);
8264 	struct trace_seq *s;
8265 	unsigned long cnt;
8266 	unsigned long long t;
8267 	unsigned long usec_rem;
8268 
8269 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8270 	if (!s)
8271 		return -ENOMEM;
8272 
8273 	trace_seq_init(s);
8274 
8275 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8276 	trace_seq_printf(s, "entries: %ld\n", cnt);
8277 
8278 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8279 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8280 
8281 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8282 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8283 
8284 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8285 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8286 
8287 	if (trace_clocks[tr->clock_id].in_ns) {
8288 		/* local or global for trace_clock */
8289 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8290 		usec_rem = do_div(t, USEC_PER_SEC);
8291 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8292 								t, usec_rem);
8293 
8294 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8295 		usec_rem = do_div(t, USEC_PER_SEC);
8296 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8297 	} else {
8298 		/* counter or tsc mode for trace_clock */
8299 		trace_seq_printf(s, "oldest event ts: %llu\n",
8300 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8301 
8302 		trace_seq_printf(s, "now ts: %llu\n",
8303 				ring_buffer_time_stamp(trace_buf->buffer));
8304 	}
8305 
8306 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8307 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8308 
8309 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8310 	trace_seq_printf(s, "read events: %ld\n", cnt);
8311 
8312 	count = simple_read_from_buffer(ubuf, count, ppos,
8313 					s->buffer, trace_seq_used(s));
8314 
8315 	kfree(s);
8316 
8317 	return count;
8318 }
8319 
8320 static const struct file_operations tracing_stats_fops = {
8321 	.open		= tracing_open_generic_tr,
8322 	.read		= tracing_stats_read,
8323 	.llseek		= generic_file_llseek,
8324 	.release	= tracing_release_generic_tr,
8325 };
8326 
8327 #ifdef CONFIG_DYNAMIC_FTRACE
8328 
8329 static ssize_t
8330 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8331 		  size_t cnt, loff_t *ppos)
8332 {
8333 	ssize_t ret;
8334 	char *buf;
8335 	int r;
8336 
8337 	/* 256 should be plenty to hold the amount needed */
8338 	buf = kmalloc(256, GFP_KERNEL);
8339 	if (!buf)
8340 		return -ENOMEM;
8341 
8342 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8343 		      ftrace_update_tot_cnt,
8344 		      ftrace_number_of_pages,
8345 		      ftrace_number_of_groups);
8346 
8347 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8348 	kfree(buf);
8349 	return ret;
8350 }
8351 
8352 static const struct file_operations tracing_dyn_info_fops = {
8353 	.open		= tracing_open_generic,
8354 	.read		= tracing_read_dyn_info,
8355 	.llseek		= generic_file_llseek,
8356 };
8357 #endif /* CONFIG_DYNAMIC_FTRACE */
8358 
8359 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8360 static void
8361 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8362 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8363 		void *data)
8364 {
8365 	tracing_snapshot_instance(tr);
8366 }
8367 
8368 static void
8369 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8370 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8371 		      void *data)
8372 {
8373 	struct ftrace_func_mapper *mapper = data;
8374 	long *count = NULL;
8375 
8376 	if (mapper)
8377 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8378 
8379 	if (count) {
8380 
8381 		if (*count <= 0)
8382 			return;
8383 
8384 		(*count)--;
8385 	}
8386 
8387 	tracing_snapshot_instance(tr);
8388 }
8389 
8390 static int
8391 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8392 		      struct ftrace_probe_ops *ops, void *data)
8393 {
8394 	struct ftrace_func_mapper *mapper = data;
8395 	long *count = NULL;
8396 
8397 	seq_printf(m, "%ps:", (void *)ip);
8398 
8399 	seq_puts(m, "snapshot");
8400 
8401 	if (mapper)
8402 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8403 
8404 	if (count)
8405 		seq_printf(m, ":count=%ld\n", *count);
8406 	else
8407 		seq_puts(m, ":unlimited\n");
8408 
8409 	return 0;
8410 }
8411 
8412 static int
8413 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8414 		     unsigned long ip, void *init_data, void **data)
8415 {
8416 	struct ftrace_func_mapper *mapper = *data;
8417 
8418 	if (!mapper) {
8419 		mapper = allocate_ftrace_func_mapper();
8420 		if (!mapper)
8421 			return -ENOMEM;
8422 		*data = mapper;
8423 	}
8424 
8425 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8426 }
8427 
8428 static void
8429 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8430 		     unsigned long ip, void *data)
8431 {
8432 	struct ftrace_func_mapper *mapper = data;
8433 
8434 	if (!ip) {
8435 		if (!mapper)
8436 			return;
8437 		free_ftrace_func_mapper(mapper, NULL);
8438 		return;
8439 	}
8440 
8441 	ftrace_func_mapper_remove_ip(mapper, ip);
8442 }
8443 
8444 static struct ftrace_probe_ops snapshot_probe_ops = {
8445 	.func			= ftrace_snapshot,
8446 	.print			= ftrace_snapshot_print,
8447 };
8448 
8449 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8450 	.func			= ftrace_count_snapshot,
8451 	.print			= ftrace_snapshot_print,
8452 	.init			= ftrace_snapshot_init,
8453 	.free			= ftrace_snapshot_free,
8454 };
8455 
8456 static int
8457 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8458 			       char *glob, char *cmd, char *param, int enable)
8459 {
8460 	struct ftrace_probe_ops *ops;
8461 	void *count = (void *)-1;
8462 	char *number;
8463 	int ret;
8464 
8465 	if (!tr)
8466 		return -ENODEV;
8467 
8468 	/* hash funcs only work with set_ftrace_filter */
8469 	if (!enable)
8470 		return -EINVAL;
8471 
8472 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8473 
8474 	if (glob[0] == '!')
8475 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8476 
8477 	if (!param)
8478 		goto out_reg;
8479 
8480 	number = strsep(&param, ":");
8481 
8482 	if (!strlen(number))
8483 		goto out_reg;
8484 
8485 	/*
8486 	 * We use the callback data field (which is a pointer)
8487 	 * as our counter.
8488 	 */
8489 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8490 	if (ret)
8491 		return ret;
8492 
8493  out_reg:
8494 	ret = tracing_alloc_snapshot_instance(tr);
8495 	if (ret < 0)
8496 		goto out;
8497 
8498 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8499 
8500  out:
8501 	return ret < 0 ? ret : 0;
8502 }
8503 
8504 static struct ftrace_func_command ftrace_snapshot_cmd = {
8505 	.name			= "snapshot",
8506 	.func			= ftrace_trace_snapshot_callback,
8507 };
8508 
8509 static __init int register_snapshot_cmd(void)
8510 {
8511 	return register_ftrace_command(&ftrace_snapshot_cmd);
8512 }
8513 #else
8514 static inline __init int register_snapshot_cmd(void) { return 0; }
8515 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8516 
8517 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8518 {
8519 	if (WARN_ON(!tr->dir))
8520 		return ERR_PTR(-ENODEV);
8521 
8522 	/* Top directory uses NULL as the parent */
8523 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8524 		return NULL;
8525 
8526 	/* All sub buffers have a descriptor */
8527 	return tr->dir;
8528 }
8529 
8530 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8531 {
8532 	struct dentry *d_tracer;
8533 
8534 	if (tr->percpu_dir)
8535 		return tr->percpu_dir;
8536 
8537 	d_tracer = tracing_get_dentry(tr);
8538 	if (IS_ERR(d_tracer))
8539 		return NULL;
8540 
8541 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8542 
8543 	MEM_FAIL(!tr->percpu_dir,
8544 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8545 
8546 	return tr->percpu_dir;
8547 }
8548 
8549 static struct dentry *
8550 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8551 		      void *data, long cpu, const struct file_operations *fops)
8552 {
8553 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8554 
8555 	if (ret) /* See tracing_get_cpu() */
8556 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8557 	return ret;
8558 }
8559 
8560 static void
8561 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8562 {
8563 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8564 	struct dentry *d_cpu;
8565 	char cpu_dir[30]; /* 30 characters should be more than enough */
8566 
8567 	if (!d_percpu)
8568 		return;
8569 
8570 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8571 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8572 	if (!d_cpu) {
8573 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8574 		return;
8575 	}
8576 
8577 	/* per cpu trace_pipe */
8578 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8579 				tr, cpu, &tracing_pipe_fops);
8580 
8581 	/* per cpu trace */
8582 	trace_create_cpu_file("trace", 0644, d_cpu,
8583 				tr, cpu, &tracing_fops);
8584 
8585 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8586 				tr, cpu, &tracing_buffers_fops);
8587 
8588 	trace_create_cpu_file("stats", 0444, d_cpu,
8589 				tr, cpu, &tracing_stats_fops);
8590 
8591 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8592 				tr, cpu, &tracing_entries_fops);
8593 
8594 #ifdef CONFIG_TRACER_SNAPSHOT
8595 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8596 				tr, cpu, &snapshot_fops);
8597 
8598 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8599 				tr, cpu, &snapshot_raw_fops);
8600 #endif
8601 }
8602 
8603 #ifdef CONFIG_FTRACE_SELFTEST
8604 /* Let selftest have access to static functions in this file */
8605 #include "trace_selftest.c"
8606 #endif
8607 
8608 static ssize_t
8609 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8610 			loff_t *ppos)
8611 {
8612 	struct trace_option_dentry *topt = filp->private_data;
8613 	char *buf;
8614 
8615 	if (topt->flags->val & topt->opt->bit)
8616 		buf = "1\n";
8617 	else
8618 		buf = "0\n";
8619 
8620 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8621 }
8622 
8623 static ssize_t
8624 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8625 			 loff_t *ppos)
8626 {
8627 	struct trace_option_dentry *topt = filp->private_data;
8628 	unsigned long val;
8629 	int ret;
8630 
8631 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8632 	if (ret)
8633 		return ret;
8634 
8635 	if (val != 0 && val != 1)
8636 		return -EINVAL;
8637 
8638 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8639 		mutex_lock(&trace_types_lock);
8640 		ret = __set_tracer_option(topt->tr, topt->flags,
8641 					  topt->opt, !val);
8642 		mutex_unlock(&trace_types_lock);
8643 		if (ret)
8644 			return ret;
8645 	}
8646 
8647 	*ppos += cnt;
8648 
8649 	return cnt;
8650 }
8651 
8652 
8653 static const struct file_operations trace_options_fops = {
8654 	.open = tracing_open_generic,
8655 	.read = trace_options_read,
8656 	.write = trace_options_write,
8657 	.llseek	= generic_file_llseek,
8658 };
8659 
8660 /*
8661  * In order to pass in both the trace_array descriptor as well as the index
8662  * to the flag that the trace option file represents, the trace_array
8663  * has a character array of trace_flags_index[], which holds the index
8664  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8665  * The address of this character array is passed to the flag option file
8666  * read/write callbacks.
8667  *
8668  * In order to extract both the index and the trace_array descriptor,
8669  * get_tr_index() uses the following algorithm.
8670  *
8671  *   idx = *ptr;
8672  *
8673  * As the pointer itself contains the address of the index (remember
8674  * index[1] == 1).
8675  *
8676  * Then to get the trace_array descriptor, by subtracting that index
8677  * from the ptr, we get to the start of the index itself.
8678  *
8679  *   ptr - idx == &index[0]
8680  *
8681  * Then a simple container_of() from that pointer gets us to the
8682  * trace_array descriptor.
8683  */
8684 static void get_tr_index(void *data, struct trace_array **ptr,
8685 			 unsigned int *pindex)
8686 {
8687 	*pindex = *(unsigned char *)data;
8688 
8689 	*ptr = container_of(data - *pindex, struct trace_array,
8690 			    trace_flags_index);
8691 }
8692 
8693 static ssize_t
8694 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8695 			loff_t *ppos)
8696 {
8697 	void *tr_index = filp->private_data;
8698 	struct trace_array *tr;
8699 	unsigned int index;
8700 	char *buf;
8701 
8702 	get_tr_index(tr_index, &tr, &index);
8703 
8704 	if (tr->trace_flags & (1 << index))
8705 		buf = "1\n";
8706 	else
8707 		buf = "0\n";
8708 
8709 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8710 }
8711 
8712 static ssize_t
8713 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8714 			 loff_t *ppos)
8715 {
8716 	void *tr_index = filp->private_data;
8717 	struct trace_array *tr;
8718 	unsigned int index;
8719 	unsigned long val;
8720 	int ret;
8721 
8722 	get_tr_index(tr_index, &tr, &index);
8723 
8724 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8725 	if (ret)
8726 		return ret;
8727 
8728 	if (val != 0 && val != 1)
8729 		return -EINVAL;
8730 
8731 	mutex_lock(&event_mutex);
8732 	mutex_lock(&trace_types_lock);
8733 	ret = set_tracer_flag(tr, 1 << index, val);
8734 	mutex_unlock(&trace_types_lock);
8735 	mutex_unlock(&event_mutex);
8736 
8737 	if (ret < 0)
8738 		return ret;
8739 
8740 	*ppos += cnt;
8741 
8742 	return cnt;
8743 }
8744 
8745 static const struct file_operations trace_options_core_fops = {
8746 	.open = tracing_open_generic,
8747 	.read = trace_options_core_read,
8748 	.write = trace_options_core_write,
8749 	.llseek = generic_file_llseek,
8750 };
8751 
8752 struct dentry *trace_create_file(const char *name,
8753 				 umode_t mode,
8754 				 struct dentry *parent,
8755 				 void *data,
8756 				 const struct file_operations *fops)
8757 {
8758 	struct dentry *ret;
8759 
8760 	ret = tracefs_create_file(name, mode, parent, data, fops);
8761 	if (!ret)
8762 		pr_warn("Could not create tracefs '%s' entry\n", name);
8763 
8764 	return ret;
8765 }
8766 
8767 
8768 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8769 {
8770 	struct dentry *d_tracer;
8771 
8772 	if (tr->options)
8773 		return tr->options;
8774 
8775 	d_tracer = tracing_get_dentry(tr);
8776 	if (IS_ERR(d_tracer))
8777 		return NULL;
8778 
8779 	tr->options = tracefs_create_dir("options", d_tracer);
8780 	if (!tr->options) {
8781 		pr_warn("Could not create tracefs directory 'options'\n");
8782 		return NULL;
8783 	}
8784 
8785 	return tr->options;
8786 }
8787 
8788 static void
8789 create_trace_option_file(struct trace_array *tr,
8790 			 struct trace_option_dentry *topt,
8791 			 struct tracer_flags *flags,
8792 			 struct tracer_opt *opt)
8793 {
8794 	struct dentry *t_options;
8795 
8796 	t_options = trace_options_init_dentry(tr);
8797 	if (!t_options)
8798 		return;
8799 
8800 	topt->flags = flags;
8801 	topt->opt = opt;
8802 	topt->tr = tr;
8803 
8804 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8805 				    &trace_options_fops);
8806 
8807 }
8808 
8809 static void
8810 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8811 {
8812 	struct trace_option_dentry *topts;
8813 	struct trace_options *tr_topts;
8814 	struct tracer_flags *flags;
8815 	struct tracer_opt *opts;
8816 	int cnt;
8817 	int i;
8818 
8819 	if (!tracer)
8820 		return;
8821 
8822 	flags = tracer->flags;
8823 
8824 	if (!flags || !flags->opts)
8825 		return;
8826 
8827 	/*
8828 	 * If this is an instance, only create flags for tracers
8829 	 * the instance may have.
8830 	 */
8831 	if (!trace_ok_for_array(tracer, tr))
8832 		return;
8833 
8834 	for (i = 0; i < tr->nr_topts; i++) {
8835 		/* Make sure there's no duplicate flags. */
8836 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8837 			return;
8838 	}
8839 
8840 	opts = flags->opts;
8841 
8842 	for (cnt = 0; opts[cnt].name; cnt++)
8843 		;
8844 
8845 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8846 	if (!topts)
8847 		return;
8848 
8849 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8850 			    GFP_KERNEL);
8851 	if (!tr_topts) {
8852 		kfree(topts);
8853 		return;
8854 	}
8855 
8856 	tr->topts = tr_topts;
8857 	tr->topts[tr->nr_topts].tracer = tracer;
8858 	tr->topts[tr->nr_topts].topts = topts;
8859 	tr->nr_topts++;
8860 
8861 	for (cnt = 0; opts[cnt].name; cnt++) {
8862 		create_trace_option_file(tr, &topts[cnt], flags,
8863 					 &opts[cnt]);
8864 		MEM_FAIL(topts[cnt].entry == NULL,
8865 			  "Failed to create trace option: %s",
8866 			  opts[cnt].name);
8867 	}
8868 }
8869 
8870 static struct dentry *
8871 create_trace_option_core_file(struct trace_array *tr,
8872 			      const char *option, long index)
8873 {
8874 	struct dentry *t_options;
8875 
8876 	t_options = trace_options_init_dentry(tr);
8877 	if (!t_options)
8878 		return NULL;
8879 
8880 	return trace_create_file(option, 0644, t_options,
8881 				 (void *)&tr->trace_flags_index[index],
8882 				 &trace_options_core_fops);
8883 }
8884 
8885 static void create_trace_options_dir(struct trace_array *tr)
8886 {
8887 	struct dentry *t_options;
8888 	bool top_level = tr == &global_trace;
8889 	int i;
8890 
8891 	t_options = trace_options_init_dentry(tr);
8892 	if (!t_options)
8893 		return;
8894 
8895 	for (i = 0; trace_options[i]; i++) {
8896 		if (top_level ||
8897 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8898 			create_trace_option_core_file(tr, trace_options[i], i);
8899 	}
8900 }
8901 
8902 static ssize_t
8903 rb_simple_read(struct file *filp, char __user *ubuf,
8904 	       size_t cnt, loff_t *ppos)
8905 {
8906 	struct trace_array *tr = filp->private_data;
8907 	char buf[64];
8908 	int r;
8909 
8910 	r = tracer_tracing_is_on(tr);
8911 	r = sprintf(buf, "%d\n", r);
8912 
8913 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8914 }
8915 
8916 static ssize_t
8917 rb_simple_write(struct file *filp, const char __user *ubuf,
8918 		size_t cnt, loff_t *ppos)
8919 {
8920 	struct trace_array *tr = filp->private_data;
8921 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8922 	unsigned long val;
8923 	int ret;
8924 
8925 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8926 	if (ret)
8927 		return ret;
8928 
8929 	if (buffer) {
8930 		mutex_lock(&trace_types_lock);
8931 		if (!!val == tracer_tracing_is_on(tr)) {
8932 			val = 0; /* do nothing */
8933 		} else if (val) {
8934 			tracer_tracing_on(tr);
8935 			if (tr->current_trace->start)
8936 				tr->current_trace->start(tr);
8937 		} else {
8938 			tracer_tracing_off(tr);
8939 			if (tr->current_trace->stop)
8940 				tr->current_trace->stop(tr);
8941 		}
8942 		mutex_unlock(&trace_types_lock);
8943 	}
8944 
8945 	(*ppos)++;
8946 
8947 	return cnt;
8948 }
8949 
8950 static const struct file_operations rb_simple_fops = {
8951 	.open		= tracing_open_generic_tr,
8952 	.read		= rb_simple_read,
8953 	.write		= rb_simple_write,
8954 	.release	= tracing_release_generic_tr,
8955 	.llseek		= default_llseek,
8956 };
8957 
8958 static ssize_t
8959 buffer_percent_read(struct file *filp, char __user *ubuf,
8960 		    size_t cnt, loff_t *ppos)
8961 {
8962 	struct trace_array *tr = filp->private_data;
8963 	char buf[64];
8964 	int r;
8965 
8966 	r = tr->buffer_percent;
8967 	r = sprintf(buf, "%d\n", r);
8968 
8969 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8970 }
8971 
8972 static ssize_t
8973 buffer_percent_write(struct file *filp, const char __user *ubuf,
8974 		     size_t cnt, loff_t *ppos)
8975 {
8976 	struct trace_array *tr = filp->private_data;
8977 	unsigned long val;
8978 	int ret;
8979 
8980 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8981 	if (ret)
8982 		return ret;
8983 
8984 	if (val > 100)
8985 		return -EINVAL;
8986 
8987 	if (!val)
8988 		val = 1;
8989 
8990 	tr->buffer_percent = val;
8991 
8992 	(*ppos)++;
8993 
8994 	return cnt;
8995 }
8996 
8997 static const struct file_operations buffer_percent_fops = {
8998 	.open		= tracing_open_generic_tr,
8999 	.read		= buffer_percent_read,
9000 	.write		= buffer_percent_write,
9001 	.release	= tracing_release_generic_tr,
9002 	.llseek		= default_llseek,
9003 };
9004 
9005 static struct dentry *trace_instance_dir;
9006 
9007 static void
9008 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9009 
9010 static int
9011 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9012 {
9013 	enum ring_buffer_flags rb_flags;
9014 
9015 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9016 
9017 	buf->tr = tr;
9018 
9019 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9020 	if (!buf->buffer)
9021 		return -ENOMEM;
9022 
9023 	buf->data = alloc_percpu(struct trace_array_cpu);
9024 	if (!buf->data) {
9025 		ring_buffer_free(buf->buffer);
9026 		buf->buffer = NULL;
9027 		return -ENOMEM;
9028 	}
9029 
9030 	/* Allocate the first page for all buffers */
9031 	set_buffer_entries(&tr->array_buffer,
9032 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9033 
9034 	return 0;
9035 }
9036 
9037 static int allocate_trace_buffers(struct trace_array *tr, int size)
9038 {
9039 	int ret;
9040 
9041 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9042 	if (ret)
9043 		return ret;
9044 
9045 #ifdef CONFIG_TRACER_MAX_TRACE
9046 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9047 				    allocate_snapshot ? size : 1);
9048 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9049 		ring_buffer_free(tr->array_buffer.buffer);
9050 		tr->array_buffer.buffer = NULL;
9051 		free_percpu(tr->array_buffer.data);
9052 		tr->array_buffer.data = NULL;
9053 		return -ENOMEM;
9054 	}
9055 	tr->allocated_snapshot = allocate_snapshot;
9056 
9057 	/*
9058 	 * Only the top level trace array gets its snapshot allocated
9059 	 * from the kernel command line.
9060 	 */
9061 	allocate_snapshot = false;
9062 #endif
9063 
9064 	return 0;
9065 }
9066 
9067 static void free_trace_buffer(struct array_buffer *buf)
9068 {
9069 	if (buf->buffer) {
9070 		ring_buffer_free(buf->buffer);
9071 		buf->buffer = NULL;
9072 		free_percpu(buf->data);
9073 		buf->data = NULL;
9074 	}
9075 }
9076 
9077 static void free_trace_buffers(struct trace_array *tr)
9078 {
9079 	if (!tr)
9080 		return;
9081 
9082 	free_trace_buffer(&tr->array_buffer);
9083 
9084 #ifdef CONFIG_TRACER_MAX_TRACE
9085 	free_trace_buffer(&tr->max_buffer);
9086 #endif
9087 }
9088 
9089 static void init_trace_flags_index(struct trace_array *tr)
9090 {
9091 	int i;
9092 
9093 	/* Used by the trace options files */
9094 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9095 		tr->trace_flags_index[i] = i;
9096 }
9097 
9098 static void __update_tracer_options(struct trace_array *tr)
9099 {
9100 	struct tracer *t;
9101 
9102 	for (t = trace_types; t; t = t->next)
9103 		add_tracer_options(tr, t);
9104 }
9105 
9106 static void update_tracer_options(struct trace_array *tr)
9107 {
9108 	mutex_lock(&trace_types_lock);
9109 	__update_tracer_options(tr);
9110 	mutex_unlock(&trace_types_lock);
9111 }
9112 
9113 /* Must have trace_types_lock held */
9114 struct trace_array *trace_array_find(const char *instance)
9115 {
9116 	struct trace_array *tr, *found = NULL;
9117 
9118 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9119 		if (tr->name && strcmp(tr->name, instance) == 0) {
9120 			found = tr;
9121 			break;
9122 		}
9123 	}
9124 
9125 	return found;
9126 }
9127 
9128 struct trace_array *trace_array_find_get(const char *instance)
9129 {
9130 	struct trace_array *tr;
9131 
9132 	mutex_lock(&trace_types_lock);
9133 	tr = trace_array_find(instance);
9134 	if (tr)
9135 		tr->ref++;
9136 	mutex_unlock(&trace_types_lock);
9137 
9138 	return tr;
9139 }
9140 
9141 static int trace_array_create_dir(struct trace_array *tr)
9142 {
9143 	int ret;
9144 
9145 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9146 	if (!tr->dir)
9147 		return -EINVAL;
9148 
9149 	ret = event_trace_add_tracer(tr->dir, tr);
9150 	if (ret) {
9151 		tracefs_remove(tr->dir);
9152 		return ret;
9153 	}
9154 
9155 	init_tracer_tracefs(tr, tr->dir);
9156 	__update_tracer_options(tr);
9157 
9158 	return ret;
9159 }
9160 
9161 static struct trace_array *trace_array_create(const char *name)
9162 {
9163 	struct trace_array *tr;
9164 	int ret;
9165 
9166 	ret = -ENOMEM;
9167 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9168 	if (!tr)
9169 		return ERR_PTR(ret);
9170 
9171 	tr->name = kstrdup(name, GFP_KERNEL);
9172 	if (!tr->name)
9173 		goto out_free_tr;
9174 
9175 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9176 		goto out_free_tr;
9177 
9178 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9179 
9180 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9181 
9182 	raw_spin_lock_init(&tr->start_lock);
9183 
9184 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9185 
9186 	tr->current_trace = &nop_trace;
9187 
9188 	INIT_LIST_HEAD(&tr->systems);
9189 	INIT_LIST_HEAD(&tr->events);
9190 	INIT_LIST_HEAD(&tr->hist_vars);
9191 	INIT_LIST_HEAD(&tr->err_log);
9192 
9193 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9194 		goto out_free_tr;
9195 
9196 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9197 		goto out_free_tr;
9198 
9199 	ftrace_init_trace_array(tr);
9200 
9201 	init_trace_flags_index(tr);
9202 
9203 	if (trace_instance_dir) {
9204 		ret = trace_array_create_dir(tr);
9205 		if (ret)
9206 			goto out_free_tr;
9207 	} else
9208 		__trace_early_add_events(tr);
9209 
9210 	list_add(&tr->list, &ftrace_trace_arrays);
9211 
9212 	tr->ref++;
9213 
9214 	return tr;
9215 
9216  out_free_tr:
9217 	ftrace_free_ftrace_ops(tr);
9218 	free_trace_buffers(tr);
9219 	free_cpumask_var(tr->tracing_cpumask);
9220 	kfree(tr->name);
9221 	kfree(tr);
9222 
9223 	return ERR_PTR(ret);
9224 }
9225 
9226 static int instance_mkdir(const char *name)
9227 {
9228 	struct trace_array *tr;
9229 	int ret;
9230 
9231 	mutex_lock(&event_mutex);
9232 	mutex_lock(&trace_types_lock);
9233 
9234 	ret = -EEXIST;
9235 	if (trace_array_find(name))
9236 		goto out_unlock;
9237 
9238 	tr = trace_array_create(name);
9239 
9240 	ret = PTR_ERR_OR_ZERO(tr);
9241 
9242 out_unlock:
9243 	mutex_unlock(&trace_types_lock);
9244 	mutex_unlock(&event_mutex);
9245 	return ret;
9246 }
9247 
9248 /**
9249  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9250  * @name: The name of the trace array to be looked up/created.
9251  *
9252  * Returns pointer to trace array with given name.
9253  * NULL, if it cannot be created.
9254  *
9255  * NOTE: This function increments the reference counter associated with the
9256  * trace array returned. This makes sure it cannot be freed while in use.
9257  * Use trace_array_put() once the trace array is no longer needed.
9258  * If the trace_array is to be freed, trace_array_destroy() needs to
9259  * be called after the trace_array_put(), or simply let user space delete
9260  * it from the tracefs instances directory. But until the
9261  * trace_array_put() is called, user space can not delete it.
9262  *
9263  */
9264 struct trace_array *trace_array_get_by_name(const char *name)
9265 {
9266 	struct trace_array *tr;
9267 
9268 	mutex_lock(&event_mutex);
9269 	mutex_lock(&trace_types_lock);
9270 
9271 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9272 		if (tr->name && strcmp(tr->name, name) == 0)
9273 			goto out_unlock;
9274 	}
9275 
9276 	tr = trace_array_create(name);
9277 
9278 	if (IS_ERR(tr))
9279 		tr = NULL;
9280 out_unlock:
9281 	if (tr)
9282 		tr->ref++;
9283 
9284 	mutex_unlock(&trace_types_lock);
9285 	mutex_unlock(&event_mutex);
9286 	return tr;
9287 }
9288 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9289 
9290 static int __remove_instance(struct trace_array *tr)
9291 {
9292 	int i;
9293 
9294 	/* Reference counter for a newly created trace array = 1. */
9295 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9296 		return -EBUSY;
9297 
9298 	list_del(&tr->list);
9299 
9300 	/* Disable all the flags that were enabled coming in */
9301 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9302 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9303 			set_tracer_flag(tr, 1 << i, 0);
9304 	}
9305 
9306 	tracing_set_nop(tr);
9307 	clear_ftrace_function_probes(tr);
9308 	event_trace_del_tracer(tr);
9309 	ftrace_clear_pids(tr);
9310 	ftrace_destroy_function_files(tr);
9311 	tracefs_remove(tr->dir);
9312 	free_percpu(tr->last_func_repeats);
9313 	free_trace_buffers(tr);
9314 
9315 	for (i = 0; i < tr->nr_topts; i++) {
9316 		kfree(tr->topts[i].topts);
9317 	}
9318 	kfree(tr->topts);
9319 
9320 	free_cpumask_var(tr->tracing_cpumask);
9321 	kfree(tr->name);
9322 	kfree(tr);
9323 
9324 	return 0;
9325 }
9326 
9327 int trace_array_destroy(struct trace_array *this_tr)
9328 {
9329 	struct trace_array *tr;
9330 	int ret;
9331 
9332 	if (!this_tr)
9333 		return -EINVAL;
9334 
9335 	mutex_lock(&event_mutex);
9336 	mutex_lock(&trace_types_lock);
9337 
9338 	ret = -ENODEV;
9339 
9340 	/* Making sure trace array exists before destroying it. */
9341 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9342 		if (tr == this_tr) {
9343 			ret = __remove_instance(tr);
9344 			break;
9345 		}
9346 	}
9347 
9348 	mutex_unlock(&trace_types_lock);
9349 	mutex_unlock(&event_mutex);
9350 
9351 	return ret;
9352 }
9353 EXPORT_SYMBOL_GPL(trace_array_destroy);
9354 
9355 static int instance_rmdir(const char *name)
9356 {
9357 	struct trace_array *tr;
9358 	int ret;
9359 
9360 	mutex_lock(&event_mutex);
9361 	mutex_lock(&trace_types_lock);
9362 
9363 	ret = -ENODEV;
9364 	tr = trace_array_find(name);
9365 	if (tr)
9366 		ret = __remove_instance(tr);
9367 
9368 	mutex_unlock(&trace_types_lock);
9369 	mutex_unlock(&event_mutex);
9370 
9371 	return ret;
9372 }
9373 
9374 static __init void create_trace_instances(struct dentry *d_tracer)
9375 {
9376 	struct trace_array *tr;
9377 
9378 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9379 							 instance_mkdir,
9380 							 instance_rmdir);
9381 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9382 		return;
9383 
9384 	mutex_lock(&event_mutex);
9385 	mutex_lock(&trace_types_lock);
9386 
9387 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9388 		if (!tr->name)
9389 			continue;
9390 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9391 			     "Failed to create instance directory\n"))
9392 			break;
9393 	}
9394 
9395 	mutex_unlock(&trace_types_lock);
9396 	mutex_unlock(&event_mutex);
9397 }
9398 
9399 static void
9400 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9401 {
9402 	struct trace_event_file *file;
9403 	int cpu;
9404 
9405 	trace_create_file("available_tracers", 0444, d_tracer,
9406 			tr, &show_traces_fops);
9407 
9408 	trace_create_file("current_tracer", 0644, d_tracer,
9409 			tr, &set_tracer_fops);
9410 
9411 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9412 			  tr, &tracing_cpumask_fops);
9413 
9414 	trace_create_file("trace_options", 0644, d_tracer,
9415 			  tr, &tracing_iter_fops);
9416 
9417 	trace_create_file("trace", 0644, d_tracer,
9418 			  tr, &tracing_fops);
9419 
9420 	trace_create_file("trace_pipe", 0444, d_tracer,
9421 			  tr, &tracing_pipe_fops);
9422 
9423 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9424 			  tr, &tracing_entries_fops);
9425 
9426 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9427 			  tr, &tracing_total_entries_fops);
9428 
9429 	trace_create_file("free_buffer", 0200, d_tracer,
9430 			  tr, &tracing_free_buffer_fops);
9431 
9432 	trace_create_file("trace_marker", 0220, d_tracer,
9433 			  tr, &tracing_mark_fops);
9434 
9435 	file = __find_event_file(tr, "ftrace", "print");
9436 	if (file && file->dir)
9437 		trace_create_file("trigger", 0644, file->dir, file,
9438 				  &event_trigger_fops);
9439 	tr->trace_marker_file = file;
9440 
9441 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9442 			  tr, &tracing_mark_raw_fops);
9443 
9444 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9445 			  &trace_clock_fops);
9446 
9447 	trace_create_file("tracing_on", 0644, d_tracer,
9448 			  tr, &rb_simple_fops);
9449 
9450 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9451 			  &trace_time_stamp_mode_fops);
9452 
9453 	tr->buffer_percent = 50;
9454 
9455 	trace_create_file("buffer_percent", 0444, d_tracer,
9456 			tr, &buffer_percent_fops);
9457 
9458 	create_trace_options_dir(tr);
9459 
9460 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9461 	trace_create_maxlat_file(tr, d_tracer);
9462 #endif
9463 
9464 	if (ftrace_create_function_files(tr, d_tracer))
9465 		MEM_FAIL(1, "Could not allocate function filter files");
9466 
9467 #ifdef CONFIG_TRACER_SNAPSHOT
9468 	trace_create_file("snapshot", 0644, d_tracer,
9469 			  tr, &snapshot_fops);
9470 #endif
9471 
9472 	trace_create_file("error_log", 0644, d_tracer,
9473 			  tr, &tracing_err_log_fops);
9474 
9475 	for_each_tracing_cpu(cpu)
9476 		tracing_init_tracefs_percpu(tr, cpu);
9477 
9478 	ftrace_init_tracefs(tr, d_tracer);
9479 }
9480 
9481 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9482 {
9483 	struct vfsmount *mnt;
9484 	struct file_system_type *type;
9485 
9486 	/*
9487 	 * To maintain backward compatibility for tools that mount
9488 	 * debugfs to get to the tracing facility, tracefs is automatically
9489 	 * mounted to the debugfs/tracing directory.
9490 	 */
9491 	type = get_fs_type("tracefs");
9492 	if (!type)
9493 		return NULL;
9494 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9495 	put_filesystem(type);
9496 	if (IS_ERR(mnt))
9497 		return NULL;
9498 	mntget(mnt);
9499 
9500 	return mnt;
9501 }
9502 
9503 /**
9504  * tracing_init_dentry - initialize top level trace array
9505  *
9506  * This is called when creating files or directories in the tracing
9507  * directory. It is called via fs_initcall() by any of the boot up code
9508  * and expects to return the dentry of the top level tracing directory.
9509  */
9510 int tracing_init_dentry(void)
9511 {
9512 	struct trace_array *tr = &global_trace;
9513 
9514 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9515 		pr_warn("Tracing disabled due to lockdown\n");
9516 		return -EPERM;
9517 	}
9518 
9519 	/* The top level trace array uses  NULL as parent */
9520 	if (tr->dir)
9521 		return 0;
9522 
9523 	if (WARN_ON(!tracefs_initialized()))
9524 		return -ENODEV;
9525 
9526 	/*
9527 	 * As there may still be users that expect the tracing
9528 	 * files to exist in debugfs/tracing, we must automount
9529 	 * the tracefs file system there, so older tools still
9530 	 * work with the newer kernel.
9531 	 */
9532 	tr->dir = debugfs_create_automount("tracing", NULL,
9533 					   trace_automount, NULL);
9534 
9535 	return 0;
9536 }
9537 
9538 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9539 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9540 
9541 static struct workqueue_struct *eval_map_wq __initdata;
9542 static struct work_struct eval_map_work __initdata;
9543 
9544 static void __init eval_map_work_func(struct work_struct *work)
9545 {
9546 	int len;
9547 
9548 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9549 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9550 }
9551 
9552 static int __init trace_eval_init(void)
9553 {
9554 	INIT_WORK(&eval_map_work, eval_map_work_func);
9555 
9556 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9557 	if (!eval_map_wq) {
9558 		pr_err("Unable to allocate eval_map_wq\n");
9559 		/* Do work here */
9560 		eval_map_work_func(&eval_map_work);
9561 		return -ENOMEM;
9562 	}
9563 
9564 	queue_work(eval_map_wq, &eval_map_work);
9565 	return 0;
9566 }
9567 
9568 static int __init trace_eval_sync(void)
9569 {
9570 	/* Make sure the eval map updates are finished */
9571 	if (eval_map_wq)
9572 		destroy_workqueue(eval_map_wq);
9573 	return 0;
9574 }
9575 
9576 late_initcall_sync(trace_eval_sync);
9577 
9578 
9579 #ifdef CONFIG_MODULES
9580 static void trace_module_add_evals(struct module *mod)
9581 {
9582 	if (!mod->num_trace_evals)
9583 		return;
9584 
9585 	/*
9586 	 * Modules with bad taint do not have events created, do
9587 	 * not bother with enums either.
9588 	 */
9589 	if (trace_module_has_bad_taint(mod))
9590 		return;
9591 
9592 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9593 }
9594 
9595 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9596 static void trace_module_remove_evals(struct module *mod)
9597 {
9598 	union trace_eval_map_item *map;
9599 	union trace_eval_map_item **last = &trace_eval_maps;
9600 
9601 	if (!mod->num_trace_evals)
9602 		return;
9603 
9604 	mutex_lock(&trace_eval_mutex);
9605 
9606 	map = trace_eval_maps;
9607 
9608 	while (map) {
9609 		if (map->head.mod == mod)
9610 			break;
9611 		map = trace_eval_jmp_to_tail(map);
9612 		last = &map->tail.next;
9613 		map = map->tail.next;
9614 	}
9615 	if (!map)
9616 		goto out;
9617 
9618 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9619 	kfree(map);
9620  out:
9621 	mutex_unlock(&trace_eval_mutex);
9622 }
9623 #else
9624 static inline void trace_module_remove_evals(struct module *mod) { }
9625 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9626 
9627 static int trace_module_notify(struct notifier_block *self,
9628 			       unsigned long val, void *data)
9629 {
9630 	struct module *mod = data;
9631 
9632 	switch (val) {
9633 	case MODULE_STATE_COMING:
9634 		trace_module_add_evals(mod);
9635 		break;
9636 	case MODULE_STATE_GOING:
9637 		trace_module_remove_evals(mod);
9638 		break;
9639 	}
9640 
9641 	return NOTIFY_OK;
9642 }
9643 
9644 static struct notifier_block trace_module_nb = {
9645 	.notifier_call = trace_module_notify,
9646 	.priority = 0,
9647 };
9648 #endif /* CONFIG_MODULES */
9649 
9650 static __init int tracer_init_tracefs(void)
9651 {
9652 	int ret;
9653 
9654 	trace_access_lock_init();
9655 
9656 	ret = tracing_init_dentry();
9657 	if (ret)
9658 		return 0;
9659 
9660 	event_trace_init();
9661 
9662 	init_tracer_tracefs(&global_trace, NULL);
9663 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9664 
9665 	trace_create_file("tracing_thresh", 0644, NULL,
9666 			&global_trace, &tracing_thresh_fops);
9667 
9668 	trace_create_file("README", 0444, NULL,
9669 			NULL, &tracing_readme_fops);
9670 
9671 	trace_create_file("saved_cmdlines", 0444, NULL,
9672 			NULL, &tracing_saved_cmdlines_fops);
9673 
9674 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9675 			  NULL, &tracing_saved_cmdlines_size_fops);
9676 
9677 	trace_create_file("saved_tgids", 0444, NULL,
9678 			NULL, &tracing_saved_tgids_fops);
9679 
9680 	trace_eval_init();
9681 
9682 	trace_create_eval_file(NULL);
9683 
9684 #ifdef CONFIG_MODULES
9685 	register_module_notifier(&trace_module_nb);
9686 #endif
9687 
9688 #ifdef CONFIG_DYNAMIC_FTRACE
9689 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9690 			NULL, &tracing_dyn_info_fops);
9691 #endif
9692 
9693 	create_trace_instances(NULL);
9694 
9695 	update_tracer_options(&global_trace);
9696 
9697 	return 0;
9698 }
9699 
9700 fs_initcall(tracer_init_tracefs);
9701 
9702 static int trace_panic_handler(struct notifier_block *this,
9703 			       unsigned long event, void *unused)
9704 {
9705 	if (ftrace_dump_on_oops)
9706 		ftrace_dump(ftrace_dump_on_oops);
9707 	return NOTIFY_OK;
9708 }
9709 
9710 static struct notifier_block trace_panic_notifier = {
9711 	.notifier_call  = trace_panic_handler,
9712 	.next           = NULL,
9713 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9714 };
9715 
9716 static int trace_die_handler(struct notifier_block *self,
9717 			     unsigned long val,
9718 			     void *data)
9719 {
9720 	switch (val) {
9721 	case DIE_OOPS:
9722 		if (ftrace_dump_on_oops)
9723 			ftrace_dump(ftrace_dump_on_oops);
9724 		break;
9725 	default:
9726 		break;
9727 	}
9728 	return NOTIFY_OK;
9729 }
9730 
9731 static struct notifier_block trace_die_notifier = {
9732 	.notifier_call = trace_die_handler,
9733 	.priority = 200
9734 };
9735 
9736 /*
9737  * printk is set to max of 1024, we really don't need it that big.
9738  * Nothing should be printing 1000 characters anyway.
9739  */
9740 #define TRACE_MAX_PRINT		1000
9741 
9742 /*
9743  * Define here KERN_TRACE so that we have one place to modify
9744  * it if we decide to change what log level the ftrace dump
9745  * should be at.
9746  */
9747 #define KERN_TRACE		KERN_EMERG
9748 
9749 void
9750 trace_printk_seq(struct trace_seq *s)
9751 {
9752 	/* Probably should print a warning here. */
9753 	if (s->seq.len >= TRACE_MAX_PRINT)
9754 		s->seq.len = TRACE_MAX_PRINT;
9755 
9756 	/*
9757 	 * More paranoid code. Although the buffer size is set to
9758 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9759 	 * an extra layer of protection.
9760 	 */
9761 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9762 		s->seq.len = s->seq.size - 1;
9763 
9764 	/* should be zero ended, but we are paranoid. */
9765 	s->buffer[s->seq.len] = 0;
9766 
9767 	printk(KERN_TRACE "%s", s->buffer);
9768 
9769 	trace_seq_init(s);
9770 }
9771 
9772 void trace_init_global_iter(struct trace_iterator *iter)
9773 {
9774 	iter->tr = &global_trace;
9775 	iter->trace = iter->tr->current_trace;
9776 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9777 	iter->array_buffer = &global_trace.array_buffer;
9778 
9779 	if (iter->trace && iter->trace->open)
9780 		iter->trace->open(iter);
9781 
9782 	/* Annotate start of buffers if we had overruns */
9783 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9784 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9785 
9786 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9787 	if (trace_clocks[iter->tr->clock_id].in_ns)
9788 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9789 }
9790 
9791 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9792 {
9793 	/* use static because iter can be a bit big for the stack */
9794 	static struct trace_iterator iter;
9795 	static atomic_t dump_running;
9796 	struct trace_array *tr = &global_trace;
9797 	unsigned int old_userobj;
9798 	unsigned long flags;
9799 	int cnt = 0, cpu;
9800 
9801 	/* Only allow one dump user at a time. */
9802 	if (atomic_inc_return(&dump_running) != 1) {
9803 		atomic_dec(&dump_running);
9804 		return;
9805 	}
9806 
9807 	/*
9808 	 * Always turn off tracing when we dump.
9809 	 * We don't need to show trace output of what happens
9810 	 * between multiple crashes.
9811 	 *
9812 	 * If the user does a sysrq-z, then they can re-enable
9813 	 * tracing with echo 1 > tracing_on.
9814 	 */
9815 	tracing_off();
9816 
9817 	local_irq_save(flags);
9818 
9819 	/* Simulate the iterator */
9820 	trace_init_global_iter(&iter);
9821 	/* Can not use kmalloc for iter.temp and iter.fmt */
9822 	iter.temp = static_temp_buf;
9823 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9824 	iter.fmt = static_fmt_buf;
9825 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9826 
9827 	for_each_tracing_cpu(cpu) {
9828 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9829 	}
9830 
9831 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9832 
9833 	/* don't look at user memory in panic mode */
9834 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9835 
9836 	switch (oops_dump_mode) {
9837 	case DUMP_ALL:
9838 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9839 		break;
9840 	case DUMP_ORIG:
9841 		iter.cpu_file = raw_smp_processor_id();
9842 		break;
9843 	case DUMP_NONE:
9844 		goto out_enable;
9845 	default:
9846 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9847 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9848 	}
9849 
9850 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9851 
9852 	/* Did function tracer already get disabled? */
9853 	if (ftrace_is_dead()) {
9854 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9855 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9856 	}
9857 
9858 	/*
9859 	 * We need to stop all tracing on all CPUS to read
9860 	 * the next buffer. This is a bit expensive, but is
9861 	 * not done often. We fill all what we can read,
9862 	 * and then release the locks again.
9863 	 */
9864 
9865 	while (!trace_empty(&iter)) {
9866 
9867 		if (!cnt)
9868 			printk(KERN_TRACE "---------------------------------\n");
9869 
9870 		cnt++;
9871 
9872 		trace_iterator_reset(&iter);
9873 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9874 
9875 		if (trace_find_next_entry_inc(&iter) != NULL) {
9876 			int ret;
9877 
9878 			ret = print_trace_line(&iter);
9879 			if (ret != TRACE_TYPE_NO_CONSUME)
9880 				trace_consume(&iter);
9881 		}
9882 		touch_nmi_watchdog();
9883 
9884 		trace_printk_seq(&iter.seq);
9885 	}
9886 
9887 	if (!cnt)
9888 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9889 	else
9890 		printk(KERN_TRACE "---------------------------------\n");
9891 
9892  out_enable:
9893 	tr->trace_flags |= old_userobj;
9894 
9895 	for_each_tracing_cpu(cpu) {
9896 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9897 	}
9898 	atomic_dec(&dump_running);
9899 	local_irq_restore(flags);
9900 }
9901 EXPORT_SYMBOL_GPL(ftrace_dump);
9902 
9903 #define WRITE_BUFSIZE  4096
9904 
9905 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9906 				size_t count, loff_t *ppos,
9907 				int (*createfn)(const char *))
9908 {
9909 	char *kbuf, *buf, *tmp;
9910 	int ret = 0;
9911 	size_t done = 0;
9912 	size_t size;
9913 
9914 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9915 	if (!kbuf)
9916 		return -ENOMEM;
9917 
9918 	while (done < count) {
9919 		size = count - done;
9920 
9921 		if (size >= WRITE_BUFSIZE)
9922 			size = WRITE_BUFSIZE - 1;
9923 
9924 		if (copy_from_user(kbuf, buffer + done, size)) {
9925 			ret = -EFAULT;
9926 			goto out;
9927 		}
9928 		kbuf[size] = '\0';
9929 		buf = kbuf;
9930 		do {
9931 			tmp = strchr(buf, '\n');
9932 			if (tmp) {
9933 				*tmp = '\0';
9934 				size = tmp - buf + 1;
9935 			} else {
9936 				size = strlen(buf);
9937 				if (done + size < count) {
9938 					if (buf != kbuf)
9939 						break;
9940 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9941 					pr_warn("Line length is too long: Should be less than %d\n",
9942 						WRITE_BUFSIZE - 2);
9943 					ret = -EINVAL;
9944 					goto out;
9945 				}
9946 			}
9947 			done += size;
9948 
9949 			/* Remove comments */
9950 			tmp = strchr(buf, '#');
9951 
9952 			if (tmp)
9953 				*tmp = '\0';
9954 
9955 			ret = createfn(buf);
9956 			if (ret)
9957 				goto out;
9958 			buf += size;
9959 
9960 		} while (done < count);
9961 	}
9962 	ret = done;
9963 
9964 out:
9965 	kfree(kbuf);
9966 
9967 	return ret;
9968 }
9969 
9970 __init static int tracer_alloc_buffers(void)
9971 {
9972 	int ring_buf_size;
9973 	int ret = -ENOMEM;
9974 
9975 
9976 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9977 		pr_warn("Tracing disabled due to lockdown\n");
9978 		return -EPERM;
9979 	}
9980 
9981 	/*
9982 	 * Make sure we don't accidentally add more trace options
9983 	 * than we have bits for.
9984 	 */
9985 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9986 
9987 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9988 		goto out;
9989 
9990 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9991 		goto out_free_buffer_mask;
9992 
9993 	/* Only allocate trace_printk buffers if a trace_printk exists */
9994 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9995 		/* Must be called before global_trace.buffer is allocated */
9996 		trace_printk_init_buffers();
9997 
9998 	/* To save memory, keep the ring buffer size to its minimum */
9999 	if (ring_buffer_expanded)
10000 		ring_buf_size = trace_buf_size;
10001 	else
10002 		ring_buf_size = 1;
10003 
10004 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10005 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10006 
10007 	raw_spin_lock_init(&global_trace.start_lock);
10008 
10009 	/*
10010 	 * The prepare callbacks allocates some memory for the ring buffer. We
10011 	 * don't free the buffer if the CPU goes down. If we were to free
10012 	 * the buffer, then the user would lose any trace that was in the
10013 	 * buffer. The memory will be removed once the "instance" is removed.
10014 	 */
10015 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10016 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10017 				      NULL);
10018 	if (ret < 0)
10019 		goto out_free_cpumask;
10020 	/* Used for event triggers */
10021 	ret = -ENOMEM;
10022 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10023 	if (!temp_buffer)
10024 		goto out_rm_hp_state;
10025 
10026 	if (trace_create_savedcmd() < 0)
10027 		goto out_free_temp_buffer;
10028 
10029 	/* TODO: make the number of buffers hot pluggable with CPUS */
10030 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10031 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10032 		goto out_free_savedcmd;
10033 	}
10034 
10035 	if (global_trace.buffer_disabled)
10036 		tracing_off();
10037 
10038 	if (trace_boot_clock) {
10039 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10040 		if (ret < 0)
10041 			pr_warn("Trace clock %s not defined, going back to default\n",
10042 				trace_boot_clock);
10043 	}
10044 
10045 	/*
10046 	 * register_tracer() might reference current_trace, so it
10047 	 * needs to be set before we register anything. This is
10048 	 * just a bootstrap of current_trace anyway.
10049 	 */
10050 	global_trace.current_trace = &nop_trace;
10051 
10052 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10053 
10054 	ftrace_init_global_array_ops(&global_trace);
10055 
10056 	init_trace_flags_index(&global_trace);
10057 
10058 	register_tracer(&nop_trace);
10059 
10060 	/* Function tracing may start here (via kernel command line) */
10061 	init_function_trace();
10062 
10063 	/* All seems OK, enable tracing */
10064 	tracing_disabled = 0;
10065 
10066 	atomic_notifier_chain_register(&panic_notifier_list,
10067 				       &trace_panic_notifier);
10068 
10069 	register_die_notifier(&trace_die_notifier);
10070 
10071 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10072 
10073 	INIT_LIST_HEAD(&global_trace.systems);
10074 	INIT_LIST_HEAD(&global_trace.events);
10075 	INIT_LIST_HEAD(&global_trace.hist_vars);
10076 	INIT_LIST_HEAD(&global_trace.err_log);
10077 	list_add(&global_trace.list, &ftrace_trace_arrays);
10078 
10079 	apply_trace_boot_options();
10080 
10081 	register_snapshot_cmd();
10082 
10083 	test_can_verify();
10084 
10085 	return 0;
10086 
10087 out_free_savedcmd:
10088 	free_saved_cmdlines_buffer(savedcmd);
10089 out_free_temp_buffer:
10090 	ring_buffer_free(temp_buffer);
10091 out_rm_hp_state:
10092 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10093 out_free_cpumask:
10094 	free_cpumask_var(global_trace.tracing_cpumask);
10095 out_free_buffer_mask:
10096 	free_cpumask_var(tracing_buffer_mask);
10097 out:
10098 	return ret;
10099 }
10100 
10101 void __init early_trace_init(void)
10102 {
10103 	if (tracepoint_printk) {
10104 		tracepoint_print_iter =
10105 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10106 		if (MEM_FAIL(!tracepoint_print_iter,
10107 			     "Failed to allocate trace iterator\n"))
10108 			tracepoint_printk = 0;
10109 		else
10110 			static_key_enable(&tracepoint_printk_key.key);
10111 	}
10112 	tracer_alloc_buffers();
10113 }
10114 
10115 void __init trace_init(void)
10116 {
10117 	trace_event_init();
10118 }
10119 
10120 __init static void clear_boot_tracer(void)
10121 {
10122 	/*
10123 	 * The default tracer at boot buffer is an init section.
10124 	 * This function is called in lateinit. If we did not
10125 	 * find the boot tracer, then clear it out, to prevent
10126 	 * later registration from accessing the buffer that is
10127 	 * about to be freed.
10128 	 */
10129 	if (!default_bootup_tracer)
10130 		return;
10131 
10132 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10133 	       default_bootup_tracer);
10134 	default_bootup_tracer = NULL;
10135 }
10136 
10137 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10138 __init static void tracing_set_default_clock(void)
10139 {
10140 	/* sched_clock_stable() is determined in late_initcall */
10141 	if (!trace_boot_clock && !sched_clock_stable()) {
10142 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10143 			pr_warn("Can not set tracing clock due to lockdown\n");
10144 			return;
10145 		}
10146 
10147 		printk(KERN_WARNING
10148 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10149 		       "If you want to keep using the local clock, then add:\n"
10150 		       "  \"trace_clock=local\"\n"
10151 		       "on the kernel command line\n");
10152 		tracing_set_clock(&global_trace, "global");
10153 	}
10154 }
10155 #else
10156 static inline void tracing_set_default_clock(void) { }
10157 #endif
10158 
10159 __init static int late_trace_init(void)
10160 {
10161 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10162 		static_key_disable(&tracepoint_printk_key.key);
10163 		tracepoint_printk = 0;
10164 	}
10165 
10166 	tracing_set_default_clock();
10167 	clear_boot_tracer();
10168 	return 0;
10169 }
10170 
10171 late_initcall_sync(late_trace_init);
10172