xref: /openbmc/linux/kernel/trace/trace.c (revision b9221f71)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
1748 	|| defined(CONFIG_OSNOISE_TRACER)
1749 
1750 #define trace_create_maxlat_file(tr, d_tracer)				\
1751 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1752 			  &tr->max_latency, &tracing_max_lat_fops)
1753 
1754 #else
1755 #define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
1756 #endif
1757 
1758 #ifdef CONFIG_TRACER_MAX_TRACE
1759 /*
1760  * Copy the new maximum trace into the separate maximum-trace
1761  * structure. (this way the maximum trace is permanently saved,
1762  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763  */
1764 static void
1765 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766 {
1767 	struct array_buffer *trace_buf = &tr->array_buffer;
1768 	struct array_buffer *max_buf = &tr->max_buffer;
1769 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771 
1772 	max_buf->cpu = cpu;
1773 	max_buf->time_start = data->preempt_timestamp;
1774 
1775 	max_data->saved_latency = tr->max_latency;
1776 	max_data->critical_start = data->critical_start;
1777 	max_data->critical_end = data->critical_end;
1778 
1779 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780 	max_data->pid = tsk->pid;
1781 	/*
1782 	 * If tsk == current, then use current_uid(), as that does not use
1783 	 * RCU. The irq tracer can be called out of RCU scope.
1784 	 */
1785 	if (tsk == current)
1786 		max_data->uid = current_uid();
1787 	else
1788 		max_data->uid = task_uid(tsk);
1789 
1790 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791 	max_data->policy = tsk->policy;
1792 	max_data->rt_priority = tsk->rt_priority;
1793 
1794 	/* record this tasks comm */
1795 	tracing_record_cmdline(tsk);
1796 	latency_fsnotify(tr);
1797 }
1798 
1799 /**
1800  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801  * @tr: tracer
1802  * @tsk: the task with the latency
1803  * @cpu: The cpu that initiated the trace.
1804  * @cond_data: User data associated with a conditional snapshot
1805  *
1806  * Flip the buffers between the @tr and the max_tr and record information
1807  * about which task was the cause of this latency.
1808  */
1809 void
1810 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811 	      void *cond_data)
1812 {
1813 	if (tr->stop_count)
1814 		return;
1815 
1816 	WARN_ON_ONCE(!irqs_disabled());
1817 
1818 	if (!tr->allocated_snapshot) {
1819 		/* Only the nop tracer should hit this when disabling */
1820 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821 		return;
1822 	}
1823 
1824 	arch_spin_lock(&tr->max_lock);
1825 
1826 	/* Inherit the recordable setting from array_buffer */
1827 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828 		ring_buffer_record_on(tr->max_buffer.buffer);
1829 	else
1830 		ring_buffer_record_off(tr->max_buffer.buffer);
1831 
1832 #ifdef CONFIG_TRACER_SNAPSHOT
1833 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834 		goto out_unlock;
1835 #endif
1836 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837 
1838 	__update_max_tr(tr, tsk, cpu);
1839 
1840  out_unlock:
1841 	arch_spin_unlock(&tr->max_lock);
1842 }
1843 
1844 /**
1845  * update_max_tr_single - only copy one trace over, and reset the rest
1846  * @tr: tracer
1847  * @tsk: task with the latency
1848  * @cpu: the cpu of the buffer to copy.
1849  *
1850  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851  */
1852 void
1853 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854 {
1855 	int ret;
1856 
1857 	if (tr->stop_count)
1858 		return;
1859 
1860 	WARN_ON_ONCE(!irqs_disabled());
1861 	if (!tr->allocated_snapshot) {
1862 		/* Only the nop tracer should hit this when disabling */
1863 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864 		return;
1865 	}
1866 
1867 	arch_spin_lock(&tr->max_lock);
1868 
1869 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870 
1871 	if (ret == -EBUSY) {
1872 		/*
1873 		 * We failed to swap the buffer due to a commit taking
1874 		 * place on this CPU. We fail to record, but we reset
1875 		 * the max trace buffer (no one writes directly to it)
1876 		 * and flag that it failed.
1877 		 */
1878 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879 			"Failed to swap buffers due to commit in progress\n");
1880 	}
1881 
1882 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883 
1884 	__update_max_tr(tr, tsk, cpu);
1885 	arch_spin_unlock(&tr->max_lock);
1886 }
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888 
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891 	/* Iterators are static, they should be filled or empty */
1892 	if (trace_buffer_iter(iter, iter->cpu_file))
1893 		return 0;
1894 
1895 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896 				full);
1897 }
1898 
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901 
1902 struct trace_selftests {
1903 	struct list_head		list;
1904 	struct tracer			*type;
1905 };
1906 
1907 static LIST_HEAD(postponed_selftests);
1908 
1909 static int save_selftest(struct tracer *type)
1910 {
1911 	struct trace_selftests *selftest;
1912 
1913 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914 	if (!selftest)
1915 		return -ENOMEM;
1916 
1917 	selftest->type = type;
1918 	list_add(&selftest->list, &postponed_selftests);
1919 	return 0;
1920 }
1921 
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924 	struct trace_array *tr = &global_trace;
1925 	struct tracer *saved_tracer = tr->current_trace;
1926 	int ret;
1927 
1928 	if (!type->selftest || tracing_selftest_disabled)
1929 		return 0;
1930 
1931 	/*
1932 	 * If a tracer registers early in boot up (before scheduling is
1933 	 * initialized and such), then do not run its selftests yet.
1934 	 * Instead, run it a little later in the boot process.
1935 	 */
1936 	if (!selftests_can_run)
1937 		return save_selftest(type);
1938 
1939 	if (!tracing_is_on()) {
1940 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941 			type->name);
1942 		return 0;
1943 	}
1944 
1945 	/*
1946 	 * Run a selftest on this tracer.
1947 	 * Here we reset the trace buffer, and set the current
1948 	 * tracer to be this tracer. The tracer can then run some
1949 	 * internal tracing to verify that everything is in order.
1950 	 * If we fail, we do not register this tracer.
1951 	 */
1952 	tracing_reset_online_cpus(&tr->array_buffer);
1953 
1954 	tr->current_trace = type;
1955 
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957 	if (type->use_max_tr) {
1958 		/* If we expanded the buffers, make sure the max is expanded too */
1959 		if (ring_buffer_expanded)
1960 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961 					   RING_BUFFER_ALL_CPUS);
1962 		tr->allocated_snapshot = true;
1963 	}
1964 #endif
1965 
1966 	/* the test is responsible for initializing and enabling */
1967 	pr_info("Testing tracer %s: ", type->name);
1968 	ret = type->selftest(type, tr);
1969 	/* the test is responsible for resetting too */
1970 	tr->current_trace = saved_tracer;
1971 	if (ret) {
1972 		printk(KERN_CONT "FAILED!\n");
1973 		/* Add the warning after printing 'FAILED' */
1974 		WARN_ON(1);
1975 		return -1;
1976 	}
1977 	/* Only reset on passing, to avoid touching corrupted buffers */
1978 	tracing_reset_online_cpus(&tr->array_buffer);
1979 
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981 	if (type->use_max_tr) {
1982 		tr->allocated_snapshot = false;
1983 
1984 		/* Shrink the max buffer again */
1985 		if (ring_buffer_expanded)
1986 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1987 					   RING_BUFFER_ALL_CPUS);
1988 	}
1989 #endif
1990 
1991 	printk(KERN_CONT "PASSED\n");
1992 	return 0;
1993 }
1994 
1995 static __init int init_trace_selftests(void)
1996 {
1997 	struct trace_selftests *p, *n;
1998 	struct tracer *t, **last;
1999 	int ret;
2000 
2001 	selftests_can_run = true;
2002 
2003 	mutex_lock(&trace_types_lock);
2004 
2005 	if (list_empty(&postponed_selftests))
2006 		goto out;
2007 
2008 	pr_info("Running postponed tracer tests:\n");
2009 
2010 	tracing_selftest_running = true;
2011 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012 		/* This loop can take minutes when sanitizers are enabled, so
2013 		 * lets make sure we allow RCU processing.
2014 		 */
2015 		cond_resched();
2016 		ret = run_tracer_selftest(p->type);
2017 		/* If the test fails, then warn and remove from available_tracers */
2018 		if (ret < 0) {
2019 			WARN(1, "tracer: %s failed selftest, disabling\n",
2020 			     p->type->name);
2021 			last = &trace_types;
2022 			for (t = trace_types; t; t = t->next) {
2023 				if (t == p->type) {
2024 					*last = t->next;
2025 					break;
2026 				}
2027 				last = &t->next;
2028 			}
2029 		}
2030 		list_del(&p->list);
2031 		kfree(p);
2032 	}
2033 	tracing_selftest_running = false;
2034 
2035  out:
2036 	mutex_unlock(&trace_types_lock);
2037 
2038 	return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044 	return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047 
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049 
2050 static void __init apply_trace_boot_options(void);
2051 
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060 	struct tracer *t;
2061 	int ret = 0;
2062 
2063 	if (!type->name) {
2064 		pr_info("Tracer must have a name\n");
2065 		return -1;
2066 	}
2067 
2068 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070 		return -1;
2071 	}
2072 
2073 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074 		pr_warn("Can not register tracer %s due to lockdown\n",
2075 			   type->name);
2076 		return -EPERM;
2077 	}
2078 
2079 	mutex_lock(&trace_types_lock);
2080 
2081 	tracing_selftest_running = true;
2082 
2083 	for (t = trace_types; t; t = t->next) {
2084 		if (strcmp(type->name, t->name) == 0) {
2085 			/* already found */
2086 			pr_info("Tracer %s already registered\n",
2087 				type->name);
2088 			ret = -1;
2089 			goto out;
2090 		}
2091 	}
2092 
2093 	if (!type->set_flag)
2094 		type->set_flag = &dummy_set_flag;
2095 	if (!type->flags) {
2096 		/*allocate a dummy tracer_flags*/
2097 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098 		if (!type->flags) {
2099 			ret = -ENOMEM;
2100 			goto out;
2101 		}
2102 		type->flags->val = 0;
2103 		type->flags->opts = dummy_tracer_opt;
2104 	} else
2105 		if (!type->flags->opts)
2106 			type->flags->opts = dummy_tracer_opt;
2107 
2108 	/* store the tracer for __set_tracer_option */
2109 	type->flags->trace = type;
2110 
2111 	ret = run_tracer_selftest(type);
2112 	if (ret < 0)
2113 		goto out;
2114 
2115 	type->next = trace_types;
2116 	trace_types = type;
2117 	add_tracer_options(&global_trace, type);
2118 
2119  out:
2120 	tracing_selftest_running = false;
2121 	mutex_unlock(&trace_types_lock);
2122 
2123 	if (ret || !default_bootup_tracer)
2124 		goto out_unlock;
2125 
2126 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127 		goto out_unlock;
2128 
2129 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130 	/* Do we want this tracer to start on bootup? */
2131 	tracing_set_tracer(&global_trace, type->name);
2132 	default_bootup_tracer = NULL;
2133 
2134 	apply_trace_boot_options();
2135 
2136 	/* disable other selftests, since this will break it. */
2137 	disable_tracing_selftest("running a tracer");
2138 
2139  out_unlock:
2140 	return ret;
2141 }
2142 
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145 	struct trace_buffer *buffer = buf->buffer;
2146 
2147 	if (!buffer)
2148 		return;
2149 
2150 	ring_buffer_record_disable(buffer);
2151 
2152 	/* Make sure all commits have finished */
2153 	synchronize_rcu();
2154 	ring_buffer_reset_cpu(buffer, cpu);
2155 
2156 	ring_buffer_record_enable(buffer);
2157 }
2158 
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161 	struct trace_buffer *buffer = buf->buffer;
2162 
2163 	if (!buffer)
2164 		return;
2165 
2166 	ring_buffer_record_disable(buffer);
2167 
2168 	/* Make sure all commits have finished */
2169 	synchronize_rcu();
2170 
2171 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172 
2173 	ring_buffer_reset_online_cpus(buffer);
2174 
2175 	ring_buffer_record_enable(buffer);
2176 }
2177 
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus(void)
2180 {
2181 	struct trace_array *tr;
2182 
2183 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184 		if (!tr->clear_trace)
2185 			continue;
2186 		tr->clear_trace = false;
2187 		tracing_reset_online_cpus(&tr->array_buffer);
2188 #ifdef CONFIG_TRACER_MAX_TRACE
2189 		tracing_reset_online_cpus(&tr->max_buffer);
2190 #endif
2191 	}
2192 }
2193 
2194 /*
2195  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2196  * is the tgid last observed corresponding to pid=i.
2197  */
2198 static int *tgid_map;
2199 
2200 /* The maximum valid index into tgid_map. */
2201 static size_t tgid_map_max;
2202 
2203 #define SAVED_CMDLINES_DEFAULT 128
2204 #define NO_CMDLINE_MAP UINT_MAX
2205 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2206 struct saved_cmdlines_buffer {
2207 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2208 	unsigned *map_cmdline_to_pid;
2209 	unsigned cmdline_num;
2210 	int cmdline_idx;
2211 	char *saved_cmdlines;
2212 };
2213 static struct saved_cmdlines_buffer *savedcmd;
2214 
2215 static inline char *get_saved_cmdlines(int idx)
2216 {
2217 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2218 }
2219 
2220 static inline void set_cmdline(int idx, const char *cmdline)
2221 {
2222 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2223 }
2224 
2225 static int allocate_cmdlines_buffer(unsigned int val,
2226 				    struct saved_cmdlines_buffer *s)
2227 {
2228 	s->map_cmdline_to_pid = kmalloc_array(val,
2229 					      sizeof(*s->map_cmdline_to_pid),
2230 					      GFP_KERNEL);
2231 	if (!s->map_cmdline_to_pid)
2232 		return -ENOMEM;
2233 
2234 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2235 	if (!s->saved_cmdlines) {
2236 		kfree(s->map_cmdline_to_pid);
2237 		return -ENOMEM;
2238 	}
2239 
2240 	s->cmdline_idx = 0;
2241 	s->cmdline_num = val;
2242 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2243 	       sizeof(s->map_pid_to_cmdline));
2244 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2245 	       val * sizeof(*s->map_cmdline_to_pid));
2246 
2247 	return 0;
2248 }
2249 
2250 static int trace_create_savedcmd(void)
2251 {
2252 	int ret;
2253 
2254 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2255 	if (!savedcmd)
2256 		return -ENOMEM;
2257 
2258 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2259 	if (ret < 0) {
2260 		kfree(savedcmd);
2261 		savedcmd = NULL;
2262 		return -ENOMEM;
2263 	}
2264 
2265 	return 0;
2266 }
2267 
2268 int is_tracing_stopped(void)
2269 {
2270 	return global_trace.stop_count;
2271 }
2272 
2273 /**
2274  * tracing_start - quick start of the tracer
2275  *
2276  * If tracing is enabled but was stopped by tracing_stop,
2277  * this will start the tracer back up.
2278  */
2279 void tracing_start(void)
2280 {
2281 	struct trace_buffer *buffer;
2282 	unsigned long flags;
2283 
2284 	if (tracing_disabled)
2285 		return;
2286 
2287 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2288 	if (--global_trace.stop_count) {
2289 		if (global_trace.stop_count < 0) {
2290 			/* Someone screwed up their debugging */
2291 			WARN_ON_ONCE(1);
2292 			global_trace.stop_count = 0;
2293 		}
2294 		goto out;
2295 	}
2296 
2297 	/* Prevent the buffers from switching */
2298 	arch_spin_lock(&global_trace.max_lock);
2299 
2300 	buffer = global_trace.array_buffer.buffer;
2301 	if (buffer)
2302 		ring_buffer_record_enable(buffer);
2303 
2304 #ifdef CONFIG_TRACER_MAX_TRACE
2305 	buffer = global_trace.max_buffer.buffer;
2306 	if (buffer)
2307 		ring_buffer_record_enable(buffer);
2308 #endif
2309 
2310 	arch_spin_unlock(&global_trace.max_lock);
2311 
2312  out:
2313 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2314 }
2315 
2316 static void tracing_start_tr(struct trace_array *tr)
2317 {
2318 	struct trace_buffer *buffer;
2319 	unsigned long flags;
2320 
2321 	if (tracing_disabled)
2322 		return;
2323 
2324 	/* If global, we need to also start the max tracer */
2325 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2326 		return tracing_start();
2327 
2328 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2329 
2330 	if (--tr->stop_count) {
2331 		if (tr->stop_count < 0) {
2332 			/* Someone screwed up their debugging */
2333 			WARN_ON_ONCE(1);
2334 			tr->stop_count = 0;
2335 		}
2336 		goto out;
2337 	}
2338 
2339 	buffer = tr->array_buffer.buffer;
2340 	if (buffer)
2341 		ring_buffer_record_enable(buffer);
2342 
2343  out:
2344 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2345 }
2346 
2347 /**
2348  * tracing_stop - quick stop of the tracer
2349  *
2350  * Light weight way to stop tracing. Use in conjunction with
2351  * tracing_start.
2352  */
2353 void tracing_stop(void)
2354 {
2355 	struct trace_buffer *buffer;
2356 	unsigned long flags;
2357 
2358 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2359 	if (global_trace.stop_count++)
2360 		goto out;
2361 
2362 	/* Prevent the buffers from switching */
2363 	arch_spin_lock(&global_trace.max_lock);
2364 
2365 	buffer = global_trace.array_buffer.buffer;
2366 	if (buffer)
2367 		ring_buffer_record_disable(buffer);
2368 
2369 #ifdef CONFIG_TRACER_MAX_TRACE
2370 	buffer = global_trace.max_buffer.buffer;
2371 	if (buffer)
2372 		ring_buffer_record_disable(buffer);
2373 #endif
2374 
2375 	arch_spin_unlock(&global_trace.max_lock);
2376 
2377  out:
2378 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2379 }
2380 
2381 static void tracing_stop_tr(struct trace_array *tr)
2382 {
2383 	struct trace_buffer *buffer;
2384 	unsigned long flags;
2385 
2386 	/* If global, we need to also stop the max tracer */
2387 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2388 		return tracing_stop();
2389 
2390 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2391 	if (tr->stop_count++)
2392 		goto out;
2393 
2394 	buffer = tr->array_buffer.buffer;
2395 	if (buffer)
2396 		ring_buffer_record_disable(buffer);
2397 
2398  out:
2399 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2400 }
2401 
2402 static int trace_save_cmdline(struct task_struct *tsk)
2403 {
2404 	unsigned tpid, idx;
2405 
2406 	/* treat recording of idle task as a success */
2407 	if (!tsk->pid)
2408 		return 1;
2409 
2410 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2411 
2412 	/*
2413 	 * It's not the end of the world if we don't get
2414 	 * the lock, but we also don't want to spin
2415 	 * nor do we want to disable interrupts,
2416 	 * so if we miss here, then better luck next time.
2417 	 */
2418 	if (!arch_spin_trylock(&trace_cmdline_lock))
2419 		return 0;
2420 
2421 	idx = savedcmd->map_pid_to_cmdline[tpid];
2422 	if (idx == NO_CMDLINE_MAP) {
2423 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2424 
2425 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2426 		savedcmd->cmdline_idx = idx;
2427 	}
2428 
2429 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2430 	set_cmdline(idx, tsk->comm);
2431 
2432 	arch_spin_unlock(&trace_cmdline_lock);
2433 
2434 	return 1;
2435 }
2436 
2437 static void __trace_find_cmdline(int pid, char comm[])
2438 {
2439 	unsigned map;
2440 	int tpid;
2441 
2442 	if (!pid) {
2443 		strcpy(comm, "<idle>");
2444 		return;
2445 	}
2446 
2447 	if (WARN_ON_ONCE(pid < 0)) {
2448 		strcpy(comm, "<XXX>");
2449 		return;
2450 	}
2451 
2452 	tpid = pid & (PID_MAX_DEFAULT - 1);
2453 	map = savedcmd->map_pid_to_cmdline[tpid];
2454 	if (map != NO_CMDLINE_MAP) {
2455 		tpid = savedcmd->map_cmdline_to_pid[map];
2456 		if (tpid == pid) {
2457 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2458 			return;
2459 		}
2460 	}
2461 	strcpy(comm, "<...>");
2462 }
2463 
2464 void trace_find_cmdline(int pid, char comm[])
2465 {
2466 	preempt_disable();
2467 	arch_spin_lock(&trace_cmdline_lock);
2468 
2469 	__trace_find_cmdline(pid, comm);
2470 
2471 	arch_spin_unlock(&trace_cmdline_lock);
2472 	preempt_enable();
2473 }
2474 
2475 static int *trace_find_tgid_ptr(int pid)
2476 {
2477 	/*
2478 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2479 	 * if we observe a non-NULL tgid_map then we also observe the correct
2480 	 * tgid_map_max.
2481 	 */
2482 	int *map = smp_load_acquire(&tgid_map);
2483 
2484 	if (unlikely(!map || pid > tgid_map_max))
2485 		return NULL;
2486 
2487 	return &map[pid];
2488 }
2489 
2490 int trace_find_tgid(int pid)
2491 {
2492 	int *ptr = trace_find_tgid_ptr(pid);
2493 
2494 	return ptr ? *ptr : 0;
2495 }
2496 
2497 static int trace_save_tgid(struct task_struct *tsk)
2498 {
2499 	int *ptr;
2500 
2501 	/* treat recording of idle task as a success */
2502 	if (!tsk->pid)
2503 		return 1;
2504 
2505 	ptr = trace_find_tgid_ptr(tsk->pid);
2506 	if (!ptr)
2507 		return 0;
2508 
2509 	*ptr = tsk->tgid;
2510 	return 1;
2511 }
2512 
2513 static bool tracing_record_taskinfo_skip(int flags)
2514 {
2515 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2516 		return true;
2517 	if (!__this_cpu_read(trace_taskinfo_save))
2518 		return true;
2519 	return false;
2520 }
2521 
2522 /**
2523  * tracing_record_taskinfo - record the task info of a task
2524  *
2525  * @task:  task to record
2526  * @flags: TRACE_RECORD_CMDLINE for recording comm
2527  *         TRACE_RECORD_TGID for recording tgid
2528  */
2529 void tracing_record_taskinfo(struct task_struct *task, int flags)
2530 {
2531 	bool done;
2532 
2533 	if (tracing_record_taskinfo_skip(flags))
2534 		return;
2535 
2536 	/*
2537 	 * Record as much task information as possible. If some fail, continue
2538 	 * to try to record the others.
2539 	 */
2540 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2541 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2542 
2543 	/* If recording any information failed, retry again soon. */
2544 	if (!done)
2545 		return;
2546 
2547 	__this_cpu_write(trace_taskinfo_save, false);
2548 }
2549 
2550 /**
2551  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2552  *
2553  * @prev: previous task during sched_switch
2554  * @next: next task during sched_switch
2555  * @flags: TRACE_RECORD_CMDLINE for recording comm
2556  *         TRACE_RECORD_TGID for recording tgid
2557  */
2558 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2559 					  struct task_struct *next, int flags)
2560 {
2561 	bool done;
2562 
2563 	if (tracing_record_taskinfo_skip(flags))
2564 		return;
2565 
2566 	/*
2567 	 * Record as much task information as possible. If some fail, continue
2568 	 * to try to record the others.
2569 	 */
2570 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2571 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2572 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2574 
2575 	/* If recording any information failed, retry again soon. */
2576 	if (!done)
2577 		return;
2578 
2579 	__this_cpu_write(trace_taskinfo_save, false);
2580 }
2581 
2582 /* Helpers to record a specific task information */
2583 void tracing_record_cmdline(struct task_struct *task)
2584 {
2585 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2586 }
2587 
2588 void tracing_record_tgid(struct task_struct *task)
2589 {
2590 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2591 }
2592 
2593 /*
2594  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2595  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2596  * simplifies those functions and keeps them in sync.
2597  */
2598 enum print_line_t trace_handle_return(struct trace_seq *s)
2599 {
2600 	return trace_seq_has_overflowed(s) ?
2601 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2602 }
2603 EXPORT_SYMBOL_GPL(trace_handle_return);
2604 
2605 static unsigned short migration_disable_value(void)
2606 {
2607 #if defined(CONFIG_SMP)
2608 	return current->migration_disabled;
2609 #else
2610 	return 0;
2611 #endif
2612 }
2613 
2614 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2615 {
2616 	unsigned int trace_flags = irqs_status;
2617 	unsigned int pc;
2618 
2619 	pc = preempt_count();
2620 
2621 	if (pc & NMI_MASK)
2622 		trace_flags |= TRACE_FLAG_NMI;
2623 	if (pc & HARDIRQ_MASK)
2624 		trace_flags |= TRACE_FLAG_HARDIRQ;
2625 	if (in_serving_softirq())
2626 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2627 
2628 	if (tif_need_resched())
2629 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630 	if (test_preempt_need_resched())
2631 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635 
2636 struct ring_buffer_event *
2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638 			  int type,
2639 			  unsigned long len,
2640 			  unsigned int trace_ctx)
2641 {
2642 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644 
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648 
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
2663 void trace_buffered_event_enable(void)
2664 {
2665 	struct ring_buffer_event *event;
2666 	struct page *page;
2667 	int cpu;
2668 
2669 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670 
2671 	if (trace_buffered_event_ref++)
2672 		return;
2673 
2674 	for_each_tracing_cpu(cpu) {
2675 		page = alloc_pages_node(cpu_to_node(cpu),
2676 					GFP_KERNEL | __GFP_NORETRY, 0);
2677 		if (!page)
2678 			goto failed;
2679 
2680 		event = page_address(page);
2681 		memset(event, 0, sizeof(*event));
2682 
2683 		per_cpu(trace_buffered_event, cpu) = event;
2684 
2685 		preempt_disable();
2686 		if (cpu == smp_processor_id() &&
2687 		    __this_cpu_read(trace_buffered_event) !=
2688 		    per_cpu(trace_buffered_event, cpu))
2689 			WARN_ON_ONCE(1);
2690 		preempt_enable();
2691 	}
2692 
2693 	return;
2694  failed:
2695 	trace_buffered_event_disable();
2696 }
2697 
2698 static void enable_trace_buffered_event(void *data)
2699 {
2700 	/* Probably not needed, but do it anyway */
2701 	smp_rmb();
2702 	this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704 
2705 static void disable_trace_buffered_event(void *data)
2706 {
2707 	this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709 
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
2718 void trace_buffered_event_disable(void)
2719 {
2720 	int cpu;
2721 
2722 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723 
2724 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725 		return;
2726 
2727 	if (--trace_buffered_event_ref)
2728 		return;
2729 
2730 	preempt_disable();
2731 	/* For each CPU, set the buffer as used. */
2732 	smp_call_function_many(tracing_buffer_mask,
2733 			       disable_trace_buffered_event, NULL, 1);
2734 	preempt_enable();
2735 
2736 	/* Wait for all current users to finish */
2737 	synchronize_rcu();
2738 
2739 	for_each_tracing_cpu(cpu) {
2740 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741 		per_cpu(trace_buffered_event, cpu) = NULL;
2742 	}
2743 	/*
2744 	 * Make sure trace_buffered_event is NULL before clearing
2745 	 * trace_buffered_event_cnt.
2746 	 */
2747 	smp_wmb();
2748 
2749 	preempt_disable();
2750 	/* Do the work on each cpu */
2751 	smp_call_function_many(tracing_buffer_mask,
2752 			       enable_trace_buffered_event, NULL, 1);
2753 	preempt_enable();
2754 }
2755 
2756 static struct trace_buffer *temp_buffer;
2757 
2758 struct ring_buffer_event *
2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760 			  struct trace_event_file *trace_file,
2761 			  int type, unsigned long len,
2762 			  unsigned int trace_ctx)
2763 {
2764 	struct ring_buffer_event *entry;
2765 	struct trace_array *tr = trace_file->tr;
2766 	int val;
2767 
2768 	*current_rb = tr->array_buffer.buffer;
2769 
2770 	if (!tr->no_filter_buffering_ref &&
2771 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2772 	    (entry = this_cpu_read(trace_buffered_event))) {
2773 		/*
2774 		 * Filtering is on, so try to use the per cpu buffer first.
2775 		 * This buffer will simulate a ring_buffer_event,
2776 		 * where the type_len is zero and the array[0] will
2777 		 * hold the full length.
2778 		 * (see include/linux/ring-buffer.h for details on
2779 		 *  how the ring_buffer_event is structured).
2780 		 *
2781 		 * Using a temp buffer during filtering and copying it
2782 		 * on a matched filter is quicker than writing directly
2783 		 * into the ring buffer and then discarding it when
2784 		 * it doesn't match. That is because the discard
2785 		 * requires several atomic operations to get right.
2786 		 * Copying on match and doing nothing on a failed match
2787 		 * is still quicker than no copy on match, but having
2788 		 * to discard out of the ring buffer on a failed match.
2789 		 */
2790 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2791 
2792 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2793 
2794 		/*
2795 		 * Preemption is disabled, but interrupts and NMIs
2796 		 * can still come in now. If that happens after
2797 		 * the above increment, then it will have to go
2798 		 * back to the old method of allocating the event
2799 		 * on the ring buffer, and if the filter fails, it
2800 		 * will have to call ring_buffer_discard_commit()
2801 		 * to remove it.
2802 		 *
2803 		 * Need to also check the unlikely case that the
2804 		 * length is bigger than the temp buffer size.
2805 		 * If that happens, then the reserve is pretty much
2806 		 * guaranteed to fail, as the ring buffer currently
2807 		 * only allows events less than a page. But that may
2808 		 * change in the future, so let the ring buffer reserve
2809 		 * handle the failure in that case.
2810 		 */
2811 		if (val == 1 && likely(len <= max_len)) {
2812 			trace_event_setup(entry, type, trace_ctx);
2813 			entry->array[0] = len;
2814 			return entry;
2815 		}
2816 		this_cpu_dec(trace_buffered_event_cnt);
2817 	}
2818 
2819 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820 					    trace_ctx);
2821 	/*
2822 	 * If tracing is off, but we have triggers enabled
2823 	 * we still need to look at the event data. Use the temp_buffer
2824 	 * to store the trace event for the trigger to use. It's recursive
2825 	 * safe and will not be recorded anywhere.
2826 	 */
2827 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2828 		*current_rb = temp_buffer;
2829 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2830 						    trace_ctx);
2831 	}
2832 	return entry;
2833 }
2834 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2835 
2836 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2837 static DEFINE_MUTEX(tracepoint_printk_mutex);
2838 
2839 static void output_printk(struct trace_event_buffer *fbuffer)
2840 {
2841 	struct trace_event_call *event_call;
2842 	struct trace_event_file *file;
2843 	struct trace_event *event;
2844 	unsigned long flags;
2845 	struct trace_iterator *iter = tracepoint_print_iter;
2846 
2847 	/* We should never get here if iter is NULL */
2848 	if (WARN_ON_ONCE(!iter))
2849 		return;
2850 
2851 	event_call = fbuffer->trace_file->event_call;
2852 	if (!event_call || !event_call->event.funcs ||
2853 	    !event_call->event.funcs->trace)
2854 		return;
2855 
2856 	file = fbuffer->trace_file;
2857 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2858 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2859 	     !filter_match_preds(file->filter, fbuffer->entry)))
2860 		return;
2861 
2862 	event = &fbuffer->trace_file->event_call->event;
2863 
2864 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2865 	trace_seq_init(&iter->seq);
2866 	iter->ent = fbuffer->entry;
2867 	event_call->event.funcs->trace(iter, 0, event);
2868 	trace_seq_putc(&iter->seq, 0);
2869 	printk("%s", iter->seq.buffer);
2870 
2871 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2872 }
2873 
2874 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2875 			     void *buffer, size_t *lenp,
2876 			     loff_t *ppos)
2877 {
2878 	int save_tracepoint_printk;
2879 	int ret;
2880 
2881 	mutex_lock(&tracepoint_printk_mutex);
2882 	save_tracepoint_printk = tracepoint_printk;
2883 
2884 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2885 
2886 	/*
2887 	 * This will force exiting early, as tracepoint_printk
2888 	 * is always zero when tracepoint_printk_iter is not allocated
2889 	 */
2890 	if (!tracepoint_print_iter)
2891 		tracepoint_printk = 0;
2892 
2893 	if (save_tracepoint_printk == tracepoint_printk)
2894 		goto out;
2895 
2896 	if (tracepoint_printk)
2897 		static_key_enable(&tracepoint_printk_key.key);
2898 	else
2899 		static_key_disable(&tracepoint_printk_key.key);
2900 
2901  out:
2902 	mutex_unlock(&tracepoint_printk_mutex);
2903 
2904 	return ret;
2905 }
2906 
2907 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2908 {
2909 	enum event_trigger_type tt = ETT_NONE;
2910 	struct trace_event_file *file = fbuffer->trace_file;
2911 
2912 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2913 			fbuffer->entry, &tt))
2914 		goto discard;
2915 
2916 	if (static_key_false(&tracepoint_printk_key.key))
2917 		output_printk(fbuffer);
2918 
2919 	if (static_branch_unlikely(&trace_event_exports_enabled))
2920 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2921 
2922 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2923 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2924 
2925 discard:
2926 	if (tt)
2927 		event_triggers_post_call(file, tt);
2928 
2929 }
2930 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2931 
2932 /*
2933  * Skip 3:
2934  *
2935  *   trace_buffer_unlock_commit_regs()
2936  *   trace_event_buffer_commit()
2937  *   trace_event_raw_event_xxx()
2938  */
2939 # define STACK_SKIP 3
2940 
2941 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2942 				     struct trace_buffer *buffer,
2943 				     struct ring_buffer_event *event,
2944 				     unsigned int trace_ctx,
2945 				     struct pt_regs *regs)
2946 {
2947 	__buffer_unlock_commit(buffer, event);
2948 
2949 	/*
2950 	 * If regs is not set, then skip the necessary functions.
2951 	 * Note, we can still get here via blktrace, wakeup tracer
2952 	 * and mmiotrace, but that's ok if they lose a function or
2953 	 * two. They are not that meaningful.
2954 	 */
2955 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2956 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2957 }
2958 
2959 /*
2960  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2961  */
2962 void
2963 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2964 				   struct ring_buffer_event *event)
2965 {
2966 	__buffer_unlock_commit(buffer, event);
2967 }
2968 
2969 void
2970 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2971 	       parent_ip, unsigned int trace_ctx)
2972 {
2973 	struct trace_event_call *call = &event_function;
2974 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2975 	struct ring_buffer_event *event;
2976 	struct ftrace_entry *entry;
2977 
2978 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2979 					    trace_ctx);
2980 	if (!event)
2981 		return;
2982 	entry	= ring_buffer_event_data(event);
2983 	entry->ip			= ip;
2984 	entry->parent_ip		= parent_ip;
2985 
2986 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2987 		if (static_branch_unlikely(&trace_function_exports_enabled))
2988 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2989 		__buffer_unlock_commit(buffer, event);
2990 	}
2991 }
2992 
2993 #ifdef CONFIG_STACKTRACE
2994 
2995 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2996 #define FTRACE_KSTACK_NESTING	4
2997 
2998 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2999 
3000 struct ftrace_stack {
3001 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3002 };
3003 
3004 
3005 struct ftrace_stacks {
3006 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3007 };
3008 
3009 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3010 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3011 
3012 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3013 				 unsigned int trace_ctx,
3014 				 int skip, struct pt_regs *regs)
3015 {
3016 	struct trace_event_call *call = &event_kernel_stack;
3017 	struct ring_buffer_event *event;
3018 	unsigned int size, nr_entries;
3019 	struct ftrace_stack *fstack;
3020 	struct stack_entry *entry;
3021 	int stackidx;
3022 
3023 	/*
3024 	 * Add one, for this function and the call to save_stack_trace()
3025 	 * If regs is set, then these functions will not be in the way.
3026 	 */
3027 #ifndef CONFIG_UNWINDER_ORC
3028 	if (!regs)
3029 		skip++;
3030 #endif
3031 
3032 	preempt_disable_notrace();
3033 
3034 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3035 
3036 	/* This should never happen. If it does, yell once and skip */
3037 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3038 		goto out;
3039 
3040 	/*
3041 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3042 	 * interrupt will either see the value pre increment or post
3043 	 * increment. If the interrupt happens pre increment it will have
3044 	 * restored the counter when it returns.  We just need a barrier to
3045 	 * keep gcc from moving things around.
3046 	 */
3047 	barrier();
3048 
3049 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3050 	size = ARRAY_SIZE(fstack->calls);
3051 
3052 	if (regs) {
3053 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3054 						   size, skip);
3055 	} else {
3056 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3057 	}
3058 
3059 	size = nr_entries * sizeof(unsigned long);
3060 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3061 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3062 				    trace_ctx);
3063 	if (!event)
3064 		goto out;
3065 	entry = ring_buffer_event_data(event);
3066 
3067 	memcpy(&entry->caller, fstack->calls, size);
3068 	entry->size = nr_entries;
3069 
3070 	if (!call_filter_check_discard(call, entry, buffer, event))
3071 		__buffer_unlock_commit(buffer, event);
3072 
3073  out:
3074 	/* Again, don't let gcc optimize things here */
3075 	barrier();
3076 	__this_cpu_dec(ftrace_stack_reserve);
3077 	preempt_enable_notrace();
3078 
3079 }
3080 
3081 static inline void ftrace_trace_stack(struct trace_array *tr,
3082 				      struct trace_buffer *buffer,
3083 				      unsigned int trace_ctx,
3084 				      int skip, struct pt_regs *regs)
3085 {
3086 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3087 		return;
3088 
3089 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3090 }
3091 
3092 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3093 		   int skip)
3094 {
3095 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3096 
3097 	if (rcu_is_watching()) {
3098 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3099 		return;
3100 	}
3101 
3102 	/*
3103 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3104 	 * but if the above rcu_is_watching() failed, then the NMI
3105 	 * triggered someplace critical, and rcu_irq_enter() should
3106 	 * not be called from NMI.
3107 	 */
3108 	if (unlikely(in_nmi()))
3109 		return;
3110 
3111 	rcu_irq_enter_irqson();
3112 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3113 	rcu_irq_exit_irqson();
3114 }
3115 
3116 /**
3117  * trace_dump_stack - record a stack back trace in the trace buffer
3118  * @skip: Number of functions to skip (helper handlers)
3119  */
3120 void trace_dump_stack(int skip)
3121 {
3122 	if (tracing_disabled || tracing_selftest_running)
3123 		return;
3124 
3125 #ifndef CONFIG_UNWINDER_ORC
3126 	/* Skip 1 to skip this function. */
3127 	skip++;
3128 #endif
3129 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3130 			     tracing_gen_ctx(), skip, NULL);
3131 }
3132 EXPORT_SYMBOL_GPL(trace_dump_stack);
3133 
3134 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3135 static DEFINE_PER_CPU(int, user_stack_count);
3136 
3137 static void
3138 ftrace_trace_userstack(struct trace_array *tr,
3139 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3140 {
3141 	struct trace_event_call *call = &event_user_stack;
3142 	struct ring_buffer_event *event;
3143 	struct userstack_entry *entry;
3144 
3145 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3146 		return;
3147 
3148 	/*
3149 	 * NMIs can not handle page faults, even with fix ups.
3150 	 * The save user stack can (and often does) fault.
3151 	 */
3152 	if (unlikely(in_nmi()))
3153 		return;
3154 
3155 	/*
3156 	 * prevent recursion, since the user stack tracing may
3157 	 * trigger other kernel events.
3158 	 */
3159 	preempt_disable();
3160 	if (__this_cpu_read(user_stack_count))
3161 		goto out;
3162 
3163 	__this_cpu_inc(user_stack_count);
3164 
3165 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3166 					    sizeof(*entry), trace_ctx);
3167 	if (!event)
3168 		goto out_drop_count;
3169 	entry	= ring_buffer_event_data(event);
3170 
3171 	entry->tgid		= current->tgid;
3172 	memset(&entry->caller, 0, sizeof(entry->caller));
3173 
3174 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3175 	if (!call_filter_check_discard(call, entry, buffer, event))
3176 		__buffer_unlock_commit(buffer, event);
3177 
3178  out_drop_count:
3179 	__this_cpu_dec(user_stack_count);
3180  out:
3181 	preempt_enable();
3182 }
3183 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3184 static void ftrace_trace_userstack(struct trace_array *tr,
3185 				   struct trace_buffer *buffer,
3186 				   unsigned int trace_ctx)
3187 {
3188 }
3189 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3190 
3191 #endif /* CONFIG_STACKTRACE */
3192 
3193 static inline void
3194 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3195 			  unsigned long long delta)
3196 {
3197 	entry->bottom_delta_ts = delta & U32_MAX;
3198 	entry->top_delta_ts = (delta >> 32);
3199 }
3200 
3201 void trace_last_func_repeats(struct trace_array *tr,
3202 			     struct trace_func_repeats *last_info,
3203 			     unsigned int trace_ctx)
3204 {
3205 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3206 	struct func_repeats_entry *entry;
3207 	struct ring_buffer_event *event;
3208 	u64 delta;
3209 
3210 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3211 					    sizeof(*entry), trace_ctx);
3212 	if (!event)
3213 		return;
3214 
3215 	delta = ring_buffer_event_time_stamp(buffer, event) -
3216 		last_info->ts_last_call;
3217 
3218 	entry = ring_buffer_event_data(event);
3219 	entry->ip = last_info->ip;
3220 	entry->parent_ip = last_info->parent_ip;
3221 	entry->count = last_info->count;
3222 	func_repeats_set_delta_ts(entry, delta);
3223 
3224 	__buffer_unlock_commit(buffer, event);
3225 }
3226 
3227 /* created for use with alloc_percpu */
3228 struct trace_buffer_struct {
3229 	int nesting;
3230 	char buffer[4][TRACE_BUF_SIZE];
3231 };
3232 
3233 static struct trace_buffer_struct *trace_percpu_buffer;
3234 
3235 /*
3236  * This allows for lockless recording.  If we're nested too deeply, then
3237  * this returns NULL.
3238  */
3239 static char *get_trace_buf(void)
3240 {
3241 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3242 
3243 	if (!buffer || buffer->nesting >= 4)
3244 		return NULL;
3245 
3246 	buffer->nesting++;
3247 
3248 	/* Interrupts must see nesting incremented before we use the buffer */
3249 	barrier();
3250 	return &buffer->buffer[buffer->nesting - 1][0];
3251 }
3252 
3253 static void put_trace_buf(void)
3254 {
3255 	/* Don't let the decrement of nesting leak before this */
3256 	barrier();
3257 	this_cpu_dec(trace_percpu_buffer->nesting);
3258 }
3259 
3260 static int alloc_percpu_trace_buffer(void)
3261 {
3262 	struct trace_buffer_struct *buffers;
3263 
3264 	if (trace_percpu_buffer)
3265 		return 0;
3266 
3267 	buffers = alloc_percpu(struct trace_buffer_struct);
3268 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3269 		return -ENOMEM;
3270 
3271 	trace_percpu_buffer = buffers;
3272 	return 0;
3273 }
3274 
3275 static int buffers_allocated;
3276 
3277 void trace_printk_init_buffers(void)
3278 {
3279 	if (buffers_allocated)
3280 		return;
3281 
3282 	if (alloc_percpu_trace_buffer())
3283 		return;
3284 
3285 	/* trace_printk() is for debug use only. Don't use it in production. */
3286 
3287 	pr_warn("\n");
3288 	pr_warn("**********************************************************\n");
3289 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290 	pr_warn("**                                                      **\n");
3291 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3292 	pr_warn("**                                                      **\n");
3293 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3294 	pr_warn("** unsafe for production use.                           **\n");
3295 	pr_warn("**                                                      **\n");
3296 	pr_warn("** If you see this message and you are not debugging    **\n");
3297 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3298 	pr_warn("**                                                      **\n");
3299 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3300 	pr_warn("**********************************************************\n");
3301 
3302 	/* Expand the buffers to set size */
3303 	tracing_update_buffers();
3304 
3305 	buffers_allocated = 1;
3306 
3307 	/*
3308 	 * trace_printk_init_buffers() can be called by modules.
3309 	 * If that happens, then we need to start cmdline recording
3310 	 * directly here. If the global_trace.buffer is already
3311 	 * allocated here, then this was called by module code.
3312 	 */
3313 	if (global_trace.array_buffer.buffer)
3314 		tracing_start_cmdline_record();
3315 }
3316 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3317 
3318 void trace_printk_start_comm(void)
3319 {
3320 	/* Start tracing comms if trace printk is set */
3321 	if (!buffers_allocated)
3322 		return;
3323 	tracing_start_cmdline_record();
3324 }
3325 
3326 static void trace_printk_start_stop_comm(int enabled)
3327 {
3328 	if (!buffers_allocated)
3329 		return;
3330 
3331 	if (enabled)
3332 		tracing_start_cmdline_record();
3333 	else
3334 		tracing_stop_cmdline_record();
3335 }
3336 
3337 /**
3338  * trace_vbprintk - write binary msg to tracing buffer
3339  * @ip:    The address of the caller
3340  * @fmt:   The string format to write to the buffer
3341  * @args:  Arguments for @fmt
3342  */
3343 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3344 {
3345 	struct trace_event_call *call = &event_bprint;
3346 	struct ring_buffer_event *event;
3347 	struct trace_buffer *buffer;
3348 	struct trace_array *tr = &global_trace;
3349 	struct bprint_entry *entry;
3350 	unsigned int trace_ctx;
3351 	char *tbuffer;
3352 	int len = 0, size;
3353 
3354 	if (unlikely(tracing_selftest_running || tracing_disabled))
3355 		return 0;
3356 
3357 	/* Don't pollute graph traces with trace_vprintk internals */
3358 	pause_graph_tracing();
3359 
3360 	trace_ctx = tracing_gen_ctx();
3361 	preempt_disable_notrace();
3362 
3363 	tbuffer = get_trace_buf();
3364 	if (!tbuffer) {
3365 		len = 0;
3366 		goto out_nobuffer;
3367 	}
3368 
3369 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3370 
3371 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3372 		goto out_put;
3373 
3374 	size = sizeof(*entry) + sizeof(u32) * len;
3375 	buffer = tr->array_buffer.buffer;
3376 	ring_buffer_nest_start(buffer);
3377 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3378 					    trace_ctx);
3379 	if (!event)
3380 		goto out;
3381 	entry = ring_buffer_event_data(event);
3382 	entry->ip			= ip;
3383 	entry->fmt			= fmt;
3384 
3385 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3386 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3387 		__buffer_unlock_commit(buffer, event);
3388 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3389 	}
3390 
3391 out:
3392 	ring_buffer_nest_end(buffer);
3393 out_put:
3394 	put_trace_buf();
3395 
3396 out_nobuffer:
3397 	preempt_enable_notrace();
3398 	unpause_graph_tracing();
3399 
3400 	return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403 
3404 __printf(3, 0)
3405 static int
3406 __trace_array_vprintk(struct trace_buffer *buffer,
3407 		      unsigned long ip, const char *fmt, va_list args)
3408 {
3409 	struct trace_event_call *call = &event_print;
3410 	struct ring_buffer_event *event;
3411 	int len = 0, size;
3412 	struct print_entry *entry;
3413 	unsigned int trace_ctx;
3414 	char *tbuffer;
3415 
3416 	if (tracing_disabled || tracing_selftest_running)
3417 		return 0;
3418 
3419 	/* Don't pollute graph traces with trace_vprintk internals */
3420 	pause_graph_tracing();
3421 
3422 	trace_ctx = tracing_gen_ctx();
3423 	preempt_disable_notrace();
3424 
3425 
3426 	tbuffer = get_trace_buf();
3427 	if (!tbuffer) {
3428 		len = 0;
3429 		goto out_nobuffer;
3430 	}
3431 
3432 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3433 
3434 	size = sizeof(*entry) + len + 1;
3435 	ring_buffer_nest_start(buffer);
3436 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3437 					    trace_ctx);
3438 	if (!event)
3439 		goto out;
3440 	entry = ring_buffer_event_data(event);
3441 	entry->ip = ip;
3442 
3443 	memcpy(&entry->buf, tbuffer, len + 1);
3444 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3445 		__buffer_unlock_commit(buffer, event);
3446 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3447 	}
3448 
3449 out:
3450 	ring_buffer_nest_end(buffer);
3451 	put_trace_buf();
3452 
3453 out_nobuffer:
3454 	preempt_enable_notrace();
3455 	unpause_graph_tracing();
3456 
3457 	return len;
3458 }
3459 
3460 __printf(3, 0)
3461 int trace_array_vprintk(struct trace_array *tr,
3462 			unsigned long ip, const char *fmt, va_list args)
3463 {
3464 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3465 }
3466 
3467 /**
3468  * trace_array_printk - Print a message to a specific instance
3469  * @tr: The instance trace_array descriptor
3470  * @ip: The instruction pointer that this is called from.
3471  * @fmt: The format to print (printf format)
3472  *
3473  * If a subsystem sets up its own instance, they have the right to
3474  * printk strings into their tracing instance buffer using this
3475  * function. Note, this function will not write into the top level
3476  * buffer (use trace_printk() for that), as writing into the top level
3477  * buffer should only have events that can be individually disabled.
3478  * trace_printk() is only used for debugging a kernel, and should not
3479  * be ever incorporated in normal use.
3480  *
3481  * trace_array_printk() can be used, as it will not add noise to the
3482  * top level tracing buffer.
3483  *
3484  * Note, trace_array_init_printk() must be called on @tr before this
3485  * can be used.
3486  */
3487 __printf(3, 0)
3488 int trace_array_printk(struct trace_array *tr,
3489 		       unsigned long ip, const char *fmt, ...)
3490 {
3491 	int ret;
3492 	va_list ap;
3493 
3494 	if (!tr)
3495 		return -ENOENT;
3496 
3497 	/* This is only allowed for created instances */
3498 	if (tr == &global_trace)
3499 		return 0;
3500 
3501 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3502 		return 0;
3503 
3504 	va_start(ap, fmt);
3505 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3506 	va_end(ap);
3507 	return ret;
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_printk);
3510 
3511 /**
3512  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3513  * @tr: The trace array to initialize the buffers for
3514  *
3515  * As trace_array_printk() only writes into instances, they are OK to
3516  * have in the kernel (unlike trace_printk()). This needs to be called
3517  * before trace_array_printk() can be used on a trace_array.
3518  */
3519 int trace_array_init_printk(struct trace_array *tr)
3520 {
3521 	if (!tr)
3522 		return -ENOENT;
3523 
3524 	/* This is only allowed for created instances */
3525 	if (tr == &global_trace)
3526 		return -EINVAL;
3527 
3528 	return alloc_percpu_trace_buffer();
3529 }
3530 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3531 
3532 __printf(3, 4)
3533 int trace_array_printk_buf(struct trace_buffer *buffer,
3534 			   unsigned long ip, const char *fmt, ...)
3535 {
3536 	int ret;
3537 	va_list ap;
3538 
3539 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3540 		return 0;
3541 
3542 	va_start(ap, fmt);
3543 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544 	va_end(ap);
3545 	return ret;
3546 }
3547 
3548 __printf(2, 0)
3549 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3550 {
3551 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3552 }
3553 EXPORT_SYMBOL_GPL(trace_vprintk);
3554 
3555 static void trace_iterator_increment(struct trace_iterator *iter)
3556 {
3557 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3558 
3559 	iter->idx++;
3560 	if (buf_iter)
3561 		ring_buffer_iter_advance(buf_iter);
3562 }
3563 
3564 static struct trace_entry *
3565 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3566 		unsigned long *lost_events)
3567 {
3568 	struct ring_buffer_event *event;
3569 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3570 
3571 	if (buf_iter) {
3572 		event = ring_buffer_iter_peek(buf_iter, ts);
3573 		if (lost_events)
3574 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3575 				(unsigned long)-1 : 0;
3576 	} else {
3577 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3578 					 lost_events);
3579 	}
3580 
3581 	if (event) {
3582 		iter->ent_size = ring_buffer_event_length(event);
3583 		return ring_buffer_event_data(event);
3584 	}
3585 	iter->ent_size = 0;
3586 	return NULL;
3587 }
3588 
3589 static struct trace_entry *
3590 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3591 		  unsigned long *missing_events, u64 *ent_ts)
3592 {
3593 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3594 	struct trace_entry *ent, *next = NULL;
3595 	unsigned long lost_events = 0, next_lost = 0;
3596 	int cpu_file = iter->cpu_file;
3597 	u64 next_ts = 0, ts;
3598 	int next_cpu = -1;
3599 	int next_size = 0;
3600 	int cpu;
3601 
3602 	/*
3603 	 * If we are in a per_cpu trace file, don't bother by iterating over
3604 	 * all cpu and peek directly.
3605 	 */
3606 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3607 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3608 			return NULL;
3609 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3610 		if (ent_cpu)
3611 			*ent_cpu = cpu_file;
3612 
3613 		return ent;
3614 	}
3615 
3616 	for_each_tracing_cpu(cpu) {
3617 
3618 		if (ring_buffer_empty_cpu(buffer, cpu))
3619 			continue;
3620 
3621 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3622 
3623 		/*
3624 		 * Pick the entry with the smallest timestamp:
3625 		 */
3626 		if (ent && (!next || ts < next_ts)) {
3627 			next = ent;
3628 			next_cpu = cpu;
3629 			next_ts = ts;
3630 			next_lost = lost_events;
3631 			next_size = iter->ent_size;
3632 		}
3633 	}
3634 
3635 	iter->ent_size = next_size;
3636 
3637 	if (ent_cpu)
3638 		*ent_cpu = next_cpu;
3639 
3640 	if (ent_ts)
3641 		*ent_ts = next_ts;
3642 
3643 	if (missing_events)
3644 		*missing_events = next_lost;
3645 
3646 	return next;
3647 }
3648 
3649 #define STATIC_FMT_BUF_SIZE	128
3650 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3651 
3652 static char *trace_iter_expand_format(struct trace_iterator *iter)
3653 {
3654 	char *tmp;
3655 
3656 	/*
3657 	 * iter->tr is NULL when used with tp_printk, which makes
3658 	 * this get called where it is not safe to call krealloc().
3659 	 */
3660 	if (!iter->tr || iter->fmt == static_fmt_buf)
3661 		return NULL;
3662 
3663 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3664 		       GFP_KERNEL);
3665 	if (tmp) {
3666 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3667 		iter->fmt = tmp;
3668 	}
3669 
3670 	return tmp;
3671 }
3672 
3673 /* Returns true if the string is safe to dereference from an event */
3674 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3675 {
3676 	unsigned long addr = (unsigned long)str;
3677 	struct trace_event *trace_event;
3678 	struct trace_event_call *event;
3679 
3680 	/* OK if part of the event data */
3681 	if ((addr >= (unsigned long)iter->ent) &&
3682 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3683 		return true;
3684 
3685 	/* OK if part of the temp seq buffer */
3686 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3687 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3688 		return true;
3689 
3690 	/* Core rodata can not be freed */
3691 	if (is_kernel_rodata(addr))
3692 		return true;
3693 
3694 	if (trace_is_tracepoint_string(str))
3695 		return true;
3696 
3697 	/*
3698 	 * Now this could be a module event, referencing core module
3699 	 * data, which is OK.
3700 	 */
3701 	if (!iter->ent)
3702 		return false;
3703 
3704 	trace_event = ftrace_find_event(iter->ent->type);
3705 	if (!trace_event)
3706 		return false;
3707 
3708 	event = container_of(trace_event, struct trace_event_call, event);
3709 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3710 		return false;
3711 
3712 	/* Would rather have rodata, but this will suffice */
3713 	if (within_module_core(addr, event->module))
3714 		return true;
3715 
3716 	return false;
3717 }
3718 
3719 static const char *show_buffer(struct trace_seq *s)
3720 {
3721 	struct seq_buf *seq = &s->seq;
3722 
3723 	seq_buf_terminate(seq);
3724 
3725 	return seq->buffer;
3726 }
3727 
3728 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3729 
3730 static int test_can_verify_check(const char *fmt, ...)
3731 {
3732 	char buf[16];
3733 	va_list ap;
3734 	int ret;
3735 
3736 	/*
3737 	 * The verifier is dependent on vsnprintf() modifies the va_list
3738 	 * passed to it, where it is sent as a reference. Some architectures
3739 	 * (like x86_32) passes it by value, which means that vsnprintf()
3740 	 * does not modify the va_list passed to it, and the verifier
3741 	 * would then need to be able to understand all the values that
3742 	 * vsnprintf can use. If it is passed by value, then the verifier
3743 	 * is disabled.
3744 	 */
3745 	va_start(ap, fmt);
3746 	vsnprintf(buf, 16, "%d", ap);
3747 	ret = va_arg(ap, int);
3748 	va_end(ap);
3749 
3750 	return ret;
3751 }
3752 
3753 static void test_can_verify(void)
3754 {
3755 	if (!test_can_verify_check("%d %d", 0, 1)) {
3756 		pr_info("trace event string verifier disabled\n");
3757 		static_branch_inc(&trace_no_verify);
3758 	}
3759 }
3760 
3761 /**
3762  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3763  * @iter: The iterator that holds the seq buffer and the event being printed
3764  * @fmt: The format used to print the event
3765  * @ap: The va_list holding the data to print from @fmt.
3766  *
3767  * This writes the data into the @iter->seq buffer using the data from
3768  * @fmt and @ap. If the format has a %s, then the source of the string
3769  * is examined to make sure it is safe to print, otherwise it will
3770  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3771  * pointer.
3772  */
3773 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3774 			 va_list ap)
3775 {
3776 	const char *p = fmt;
3777 	const char *str;
3778 	int i, j;
3779 
3780 	if (WARN_ON_ONCE(!fmt))
3781 		return;
3782 
3783 	if (static_branch_unlikely(&trace_no_verify))
3784 		goto print;
3785 
3786 	/* Don't bother checking when doing a ftrace_dump() */
3787 	if (iter->fmt == static_fmt_buf)
3788 		goto print;
3789 
3790 	while (*p) {
3791 		bool star = false;
3792 		int len = 0;
3793 
3794 		j = 0;
3795 
3796 		/* We only care about %s and variants */
3797 		for (i = 0; p[i]; i++) {
3798 			if (i + 1 >= iter->fmt_size) {
3799 				/*
3800 				 * If we can't expand the copy buffer,
3801 				 * just print it.
3802 				 */
3803 				if (!trace_iter_expand_format(iter))
3804 					goto print;
3805 			}
3806 
3807 			if (p[i] == '\\' && p[i+1]) {
3808 				i++;
3809 				continue;
3810 			}
3811 			if (p[i] == '%') {
3812 				/* Need to test cases like %08.*s */
3813 				for (j = 1; p[i+j]; j++) {
3814 					if (isdigit(p[i+j]) ||
3815 					    p[i+j] == '.')
3816 						continue;
3817 					if (p[i+j] == '*') {
3818 						star = true;
3819 						continue;
3820 					}
3821 					break;
3822 				}
3823 				if (p[i+j] == 's')
3824 					break;
3825 				star = false;
3826 			}
3827 			j = 0;
3828 		}
3829 		/* If no %s found then just print normally */
3830 		if (!p[i])
3831 			break;
3832 
3833 		/* Copy up to the %s, and print that */
3834 		strncpy(iter->fmt, p, i);
3835 		iter->fmt[i] = '\0';
3836 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3837 
3838 		if (star)
3839 			len = va_arg(ap, int);
3840 
3841 		/* The ap now points to the string data of the %s */
3842 		str = va_arg(ap, const char *);
3843 
3844 		/*
3845 		 * If you hit this warning, it is likely that the
3846 		 * trace event in question used %s on a string that
3847 		 * was saved at the time of the event, but may not be
3848 		 * around when the trace is read. Use __string(),
3849 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3850 		 * instead. See samples/trace_events/trace-events-sample.h
3851 		 * for reference.
3852 		 */
3853 		if (WARN_ONCE(!trace_safe_str(iter, str),
3854 			      "fmt: '%s' current_buffer: '%s'",
3855 			      fmt, show_buffer(&iter->seq))) {
3856 			int ret;
3857 
3858 			/* Try to safely read the string */
3859 			if (star) {
3860 				if (len + 1 > iter->fmt_size)
3861 					len = iter->fmt_size - 1;
3862 				if (len < 0)
3863 					len = 0;
3864 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3865 				iter->fmt[len] = 0;
3866 				star = false;
3867 			} else {
3868 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3869 								  iter->fmt_size);
3870 			}
3871 			if (ret < 0)
3872 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3873 			else
3874 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3875 						 str, iter->fmt);
3876 			str = "[UNSAFE-MEMORY]";
3877 			strcpy(iter->fmt, "%s");
3878 		} else {
3879 			strncpy(iter->fmt, p + i, j + 1);
3880 			iter->fmt[j+1] = '\0';
3881 		}
3882 		if (star)
3883 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3884 		else
3885 			trace_seq_printf(&iter->seq, iter->fmt, str);
3886 
3887 		p += i + j + 1;
3888 	}
3889  print:
3890 	if (*p)
3891 		trace_seq_vprintf(&iter->seq, p, ap);
3892 }
3893 
3894 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3895 {
3896 	const char *p, *new_fmt;
3897 	char *q;
3898 
3899 	if (WARN_ON_ONCE(!fmt))
3900 		return fmt;
3901 
3902 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3903 		return fmt;
3904 
3905 	p = fmt;
3906 	new_fmt = q = iter->fmt;
3907 	while (*p) {
3908 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3909 			if (!trace_iter_expand_format(iter))
3910 				return fmt;
3911 
3912 			q += iter->fmt - new_fmt;
3913 			new_fmt = iter->fmt;
3914 		}
3915 
3916 		*q++ = *p++;
3917 
3918 		/* Replace %p with %px */
3919 		if (p[-1] == '%') {
3920 			if (p[0] == '%') {
3921 				*q++ = *p++;
3922 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3923 				*q++ = *p++;
3924 				*q++ = 'x';
3925 			}
3926 		}
3927 	}
3928 	*q = '\0';
3929 
3930 	return new_fmt;
3931 }
3932 
3933 #define STATIC_TEMP_BUF_SIZE	128
3934 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3935 
3936 /* Find the next real entry, without updating the iterator itself */
3937 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3938 					  int *ent_cpu, u64 *ent_ts)
3939 {
3940 	/* __find_next_entry will reset ent_size */
3941 	int ent_size = iter->ent_size;
3942 	struct trace_entry *entry;
3943 
3944 	/*
3945 	 * If called from ftrace_dump(), then the iter->temp buffer
3946 	 * will be the static_temp_buf and not created from kmalloc.
3947 	 * If the entry size is greater than the buffer, we can
3948 	 * not save it. Just return NULL in that case. This is only
3949 	 * used to add markers when two consecutive events' time
3950 	 * stamps have a large delta. See trace_print_lat_context()
3951 	 */
3952 	if (iter->temp == static_temp_buf &&
3953 	    STATIC_TEMP_BUF_SIZE < ent_size)
3954 		return NULL;
3955 
3956 	/*
3957 	 * The __find_next_entry() may call peek_next_entry(), which may
3958 	 * call ring_buffer_peek() that may make the contents of iter->ent
3959 	 * undefined. Need to copy iter->ent now.
3960 	 */
3961 	if (iter->ent && iter->ent != iter->temp) {
3962 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3963 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3964 			void *temp;
3965 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3966 			if (!temp)
3967 				return NULL;
3968 			kfree(iter->temp);
3969 			iter->temp = temp;
3970 			iter->temp_size = iter->ent_size;
3971 		}
3972 		memcpy(iter->temp, iter->ent, iter->ent_size);
3973 		iter->ent = iter->temp;
3974 	}
3975 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3976 	/* Put back the original ent_size */
3977 	iter->ent_size = ent_size;
3978 
3979 	return entry;
3980 }
3981 
3982 /* Find the next real entry, and increment the iterator to the next entry */
3983 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3984 {
3985 	iter->ent = __find_next_entry(iter, &iter->cpu,
3986 				      &iter->lost_events, &iter->ts);
3987 
3988 	if (iter->ent)
3989 		trace_iterator_increment(iter);
3990 
3991 	return iter->ent ? iter : NULL;
3992 }
3993 
3994 static void trace_consume(struct trace_iterator *iter)
3995 {
3996 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3997 			    &iter->lost_events);
3998 }
3999 
4000 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4001 {
4002 	struct trace_iterator *iter = m->private;
4003 	int i = (int)*pos;
4004 	void *ent;
4005 
4006 	WARN_ON_ONCE(iter->leftover);
4007 
4008 	(*pos)++;
4009 
4010 	/* can't go backwards */
4011 	if (iter->idx > i)
4012 		return NULL;
4013 
4014 	if (iter->idx < 0)
4015 		ent = trace_find_next_entry_inc(iter);
4016 	else
4017 		ent = iter;
4018 
4019 	while (ent && iter->idx < i)
4020 		ent = trace_find_next_entry_inc(iter);
4021 
4022 	iter->pos = *pos;
4023 
4024 	return ent;
4025 }
4026 
4027 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4028 {
4029 	struct ring_buffer_iter *buf_iter;
4030 	unsigned long entries = 0;
4031 	u64 ts;
4032 
4033 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4034 
4035 	buf_iter = trace_buffer_iter(iter, cpu);
4036 	if (!buf_iter)
4037 		return;
4038 
4039 	ring_buffer_iter_reset(buf_iter);
4040 
4041 	/*
4042 	 * We could have the case with the max latency tracers
4043 	 * that a reset never took place on a cpu. This is evident
4044 	 * by the timestamp being before the start of the buffer.
4045 	 */
4046 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4047 		if (ts >= iter->array_buffer->time_start)
4048 			break;
4049 		entries++;
4050 		ring_buffer_iter_advance(buf_iter);
4051 	}
4052 
4053 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4054 }
4055 
4056 /*
4057  * The current tracer is copied to avoid a global locking
4058  * all around.
4059  */
4060 static void *s_start(struct seq_file *m, loff_t *pos)
4061 {
4062 	struct trace_iterator *iter = m->private;
4063 	struct trace_array *tr = iter->tr;
4064 	int cpu_file = iter->cpu_file;
4065 	void *p = NULL;
4066 	loff_t l = 0;
4067 	int cpu;
4068 
4069 	/*
4070 	 * copy the tracer to avoid using a global lock all around.
4071 	 * iter->trace is a copy of current_trace, the pointer to the
4072 	 * name may be used instead of a strcmp(), as iter->trace->name
4073 	 * will point to the same string as current_trace->name.
4074 	 */
4075 	mutex_lock(&trace_types_lock);
4076 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4077 		*iter->trace = *tr->current_trace;
4078 	mutex_unlock(&trace_types_lock);
4079 
4080 #ifdef CONFIG_TRACER_MAX_TRACE
4081 	if (iter->snapshot && iter->trace->use_max_tr)
4082 		return ERR_PTR(-EBUSY);
4083 #endif
4084 
4085 	if (*pos != iter->pos) {
4086 		iter->ent = NULL;
4087 		iter->cpu = 0;
4088 		iter->idx = -1;
4089 
4090 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4091 			for_each_tracing_cpu(cpu)
4092 				tracing_iter_reset(iter, cpu);
4093 		} else
4094 			tracing_iter_reset(iter, cpu_file);
4095 
4096 		iter->leftover = 0;
4097 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4098 			;
4099 
4100 	} else {
4101 		/*
4102 		 * If we overflowed the seq_file before, then we want
4103 		 * to just reuse the trace_seq buffer again.
4104 		 */
4105 		if (iter->leftover)
4106 			p = iter;
4107 		else {
4108 			l = *pos - 1;
4109 			p = s_next(m, p, &l);
4110 		}
4111 	}
4112 
4113 	trace_event_read_lock();
4114 	trace_access_lock(cpu_file);
4115 	return p;
4116 }
4117 
4118 static void s_stop(struct seq_file *m, void *p)
4119 {
4120 	struct trace_iterator *iter = m->private;
4121 
4122 #ifdef CONFIG_TRACER_MAX_TRACE
4123 	if (iter->snapshot && iter->trace->use_max_tr)
4124 		return;
4125 #endif
4126 
4127 	trace_access_unlock(iter->cpu_file);
4128 	trace_event_read_unlock();
4129 }
4130 
4131 static void
4132 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4133 		      unsigned long *entries, int cpu)
4134 {
4135 	unsigned long count;
4136 
4137 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4138 	/*
4139 	 * If this buffer has skipped entries, then we hold all
4140 	 * entries for the trace and we need to ignore the
4141 	 * ones before the time stamp.
4142 	 */
4143 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4144 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4145 		/* total is the same as the entries */
4146 		*total = count;
4147 	} else
4148 		*total = count +
4149 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4150 	*entries = count;
4151 }
4152 
4153 static void
4154 get_total_entries(struct array_buffer *buf,
4155 		  unsigned long *total, unsigned long *entries)
4156 {
4157 	unsigned long t, e;
4158 	int cpu;
4159 
4160 	*total = 0;
4161 	*entries = 0;
4162 
4163 	for_each_tracing_cpu(cpu) {
4164 		get_total_entries_cpu(buf, &t, &e, cpu);
4165 		*total += t;
4166 		*entries += e;
4167 	}
4168 }
4169 
4170 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4171 {
4172 	unsigned long total, entries;
4173 
4174 	if (!tr)
4175 		tr = &global_trace;
4176 
4177 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4178 
4179 	return entries;
4180 }
4181 
4182 unsigned long trace_total_entries(struct trace_array *tr)
4183 {
4184 	unsigned long total, entries;
4185 
4186 	if (!tr)
4187 		tr = &global_trace;
4188 
4189 	get_total_entries(&tr->array_buffer, &total, &entries);
4190 
4191 	return entries;
4192 }
4193 
4194 static void print_lat_help_header(struct seq_file *m)
4195 {
4196 	seq_puts(m, "#                    _------=> CPU#            \n"
4197 		    "#                   / _-----=> irqs-off        \n"
4198 		    "#                  | / _----=> need-resched    \n"
4199 		    "#                  || / _---=> hardirq/softirq \n"
4200 		    "#                  ||| / _--=> preempt-depth   \n"
4201 		    "#                  |||| / _-=> migrate-disable \n"
4202 		    "#                  ||||| /     delay           \n"
4203 		    "#  cmd     pid     |||||| time  |   caller     \n"
4204 		    "#     \\   /        ||||||  \\    |    /       \n");
4205 }
4206 
4207 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4208 {
4209 	unsigned long total;
4210 	unsigned long entries;
4211 
4212 	get_total_entries(buf, &total, &entries);
4213 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4214 		   entries, total, num_online_cpus());
4215 	seq_puts(m, "#\n");
4216 }
4217 
4218 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4219 				   unsigned int flags)
4220 {
4221 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222 
4223 	print_event_info(buf, m);
4224 
4225 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4226 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4227 }
4228 
4229 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4230 				       unsigned int flags)
4231 {
4232 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4233 	const char *space = "            ";
4234 	int prec = tgid ? 12 : 2;
4235 
4236 	print_event_info(buf, m);
4237 
4238 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4239 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4240 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4241 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4242 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4243 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4244 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4245 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4246 }
4247 
4248 void
4249 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4250 {
4251 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4252 	struct array_buffer *buf = iter->array_buffer;
4253 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4254 	struct tracer *type = iter->trace;
4255 	unsigned long entries;
4256 	unsigned long total;
4257 	const char *name = "preemption";
4258 
4259 	name = type->name;
4260 
4261 	get_total_entries(buf, &total, &entries);
4262 
4263 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4264 		   name, UTS_RELEASE);
4265 	seq_puts(m, "# -----------------------------------"
4266 		 "---------------------------------\n");
4267 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4268 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4269 		   nsecs_to_usecs(data->saved_latency),
4270 		   entries,
4271 		   total,
4272 		   buf->cpu,
4273 #if defined(CONFIG_PREEMPT_NONE)
4274 		   "server",
4275 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4276 		   "desktop",
4277 #elif defined(CONFIG_PREEMPT)
4278 		   "preempt",
4279 #elif defined(CONFIG_PREEMPT_RT)
4280 		   "preempt_rt",
4281 #else
4282 		   "unknown",
4283 #endif
4284 		   /* These are reserved for later use */
4285 		   0, 0, 0, 0);
4286 #ifdef CONFIG_SMP
4287 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4288 #else
4289 	seq_puts(m, ")\n");
4290 #endif
4291 	seq_puts(m, "#    -----------------\n");
4292 	seq_printf(m, "#    | task: %.16s-%d "
4293 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4294 		   data->comm, data->pid,
4295 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4296 		   data->policy, data->rt_priority);
4297 	seq_puts(m, "#    -----------------\n");
4298 
4299 	if (data->critical_start) {
4300 		seq_puts(m, "#  => started at: ");
4301 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4302 		trace_print_seq(m, &iter->seq);
4303 		seq_puts(m, "\n#  => ended at:   ");
4304 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4305 		trace_print_seq(m, &iter->seq);
4306 		seq_puts(m, "\n#\n");
4307 	}
4308 
4309 	seq_puts(m, "#\n");
4310 }
4311 
4312 static void test_cpu_buff_start(struct trace_iterator *iter)
4313 {
4314 	struct trace_seq *s = &iter->seq;
4315 	struct trace_array *tr = iter->tr;
4316 
4317 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4318 		return;
4319 
4320 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4321 		return;
4322 
4323 	if (cpumask_available(iter->started) &&
4324 	    cpumask_test_cpu(iter->cpu, iter->started))
4325 		return;
4326 
4327 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4328 		return;
4329 
4330 	if (cpumask_available(iter->started))
4331 		cpumask_set_cpu(iter->cpu, iter->started);
4332 
4333 	/* Don't print started cpu buffer for the first entry of the trace */
4334 	if (iter->idx > 1)
4335 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4336 				iter->cpu);
4337 }
4338 
4339 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4340 {
4341 	struct trace_array *tr = iter->tr;
4342 	struct trace_seq *s = &iter->seq;
4343 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4344 	struct trace_entry *entry;
4345 	struct trace_event *event;
4346 
4347 	entry = iter->ent;
4348 
4349 	test_cpu_buff_start(iter);
4350 
4351 	event = ftrace_find_event(entry->type);
4352 
4353 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4354 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4355 			trace_print_lat_context(iter);
4356 		else
4357 			trace_print_context(iter);
4358 	}
4359 
4360 	if (trace_seq_has_overflowed(s))
4361 		return TRACE_TYPE_PARTIAL_LINE;
4362 
4363 	if (event)
4364 		return event->funcs->trace(iter, sym_flags, event);
4365 
4366 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4367 
4368 	return trace_handle_return(s);
4369 }
4370 
4371 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4372 {
4373 	struct trace_array *tr = iter->tr;
4374 	struct trace_seq *s = &iter->seq;
4375 	struct trace_entry *entry;
4376 	struct trace_event *event;
4377 
4378 	entry = iter->ent;
4379 
4380 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4381 		trace_seq_printf(s, "%d %d %llu ",
4382 				 entry->pid, iter->cpu, iter->ts);
4383 
4384 	if (trace_seq_has_overflowed(s))
4385 		return TRACE_TYPE_PARTIAL_LINE;
4386 
4387 	event = ftrace_find_event(entry->type);
4388 	if (event)
4389 		return event->funcs->raw(iter, 0, event);
4390 
4391 	trace_seq_printf(s, "%d ?\n", entry->type);
4392 
4393 	return trace_handle_return(s);
4394 }
4395 
4396 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4397 {
4398 	struct trace_array *tr = iter->tr;
4399 	struct trace_seq *s = &iter->seq;
4400 	unsigned char newline = '\n';
4401 	struct trace_entry *entry;
4402 	struct trace_event *event;
4403 
4404 	entry = iter->ent;
4405 
4406 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4407 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4408 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4409 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4410 		if (trace_seq_has_overflowed(s))
4411 			return TRACE_TYPE_PARTIAL_LINE;
4412 	}
4413 
4414 	event = ftrace_find_event(entry->type);
4415 	if (event) {
4416 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4417 		if (ret != TRACE_TYPE_HANDLED)
4418 			return ret;
4419 	}
4420 
4421 	SEQ_PUT_FIELD(s, newline);
4422 
4423 	return trace_handle_return(s);
4424 }
4425 
4426 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4427 {
4428 	struct trace_array *tr = iter->tr;
4429 	struct trace_seq *s = &iter->seq;
4430 	struct trace_entry *entry;
4431 	struct trace_event *event;
4432 
4433 	entry = iter->ent;
4434 
4435 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436 		SEQ_PUT_FIELD(s, entry->pid);
4437 		SEQ_PUT_FIELD(s, iter->cpu);
4438 		SEQ_PUT_FIELD(s, iter->ts);
4439 		if (trace_seq_has_overflowed(s))
4440 			return TRACE_TYPE_PARTIAL_LINE;
4441 	}
4442 
4443 	event = ftrace_find_event(entry->type);
4444 	return event ? event->funcs->binary(iter, 0, event) :
4445 		TRACE_TYPE_HANDLED;
4446 }
4447 
4448 int trace_empty(struct trace_iterator *iter)
4449 {
4450 	struct ring_buffer_iter *buf_iter;
4451 	int cpu;
4452 
4453 	/* If we are looking at one CPU buffer, only check that one */
4454 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4455 		cpu = iter->cpu_file;
4456 		buf_iter = trace_buffer_iter(iter, cpu);
4457 		if (buf_iter) {
4458 			if (!ring_buffer_iter_empty(buf_iter))
4459 				return 0;
4460 		} else {
4461 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4462 				return 0;
4463 		}
4464 		return 1;
4465 	}
4466 
4467 	for_each_tracing_cpu(cpu) {
4468 		buf_iter = trace_buffer_iter(iter, cpu);
4469 		if (buf_iter) {
4470 			if (!ring_buffer_iter_empty(buf_iter))
4471 				return 0;
4472 		} else {
4473 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4474 				return 0;
4475 		}
4476 	}
4477 
4478 	return 1;
4479 }
4480 
4481 /*  Called with trace_event_read_lock() held. */
4482 enum print_line_t print_trace_line(struct trace_iterator *iter)
4483 {
4484 	struct trace_array *tr = iter->tr;
4485 	unsigned long trace_flags = tr->trace_flags;
4486 	enum print_line_t ret;
4487 
4488 	if (iter->lost_events) {
4489 		if (iter->lost_events == (unsigned long)-1)
4490 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4491 					 iter->cpu);
4492 		else
4493 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4494 					 iter->cpu, iter->lost_events);
4495 		if (trace_seq_has_overflowed(&iter->seq))
4496 			return TRACE_TYPE_PARTIAL_LINE;
4497 	}
4498 
4499 	if (iter->trace && iter->trace->print_line) {
4500 		ret = iter->trace->print_line(iter);
4501 		if (ret != TRACE_TYPE_UNHANDLED)
4502 			return ret;
4503 	}
4504 
4505 	if (iter->ent->type == TRACE_BPUTS &&
4506 			trace_flags & TRACE_ITER_PRINTK &&
4507 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508 		return trace_print_bputs_msg_only(iter);
4509 
4510 	if (iter->ent->type == TRACE_BPRINT &&
4511 			trace_flags & TRACE_ITER_PRINTK &&
4512 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513 		return trace_print_bprintk_msg_only(iter);
4514 
4515 	if (iter->ent->type == TRACE_PRINT &&
4516 			trace_flags & TRACE_ITER_PRINTK &&
4517 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4518 		return trace_print_printk_msg_only(iter);
4519 
4520 	if (trace_flags & TRACE_ITER_BIN)
4521 		return print_bin_fmt(iter);
4522 
4523 	if (trace_flags & TRACE_ITER_HEX)
4524 		return print_hex_fmt(iter);
4525 
4526 	if (trace_flags & TRACE_ITER_RAW)
4527 		return print_raw_fmt(iter);
4528 
4529 	return print_trace_fmt(iter);
4530 }
4531 
4532 void trace_latency_header(struct seq_file *m)
4533 {
4534 	struct trace_iterator *iter = m->private;
4535 	struct trace_array *tr = iter->tr;
4536 
4537 	/* print nothing if the buffers are empty */
4538 	if (trace_empty(iter))
4539 		return;
4540 
4541 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4542 		print_trace_header(m, iter);
4543 
4544 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4545 		print_lat_help_header(m);
4546 }
4547 
4548 void trace_default_header(struct seq_file *m)
4549 {
4550 	struct trace_iterator *iter = m->private;
4551 	struct trace_array *tr = iter->tr;
4552 	unsigned long trace_flags = tr->trace_flags;
4553 
4554 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4555 		return;
4556 
4557 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4558 		/* print nothing if the buffers are empty */
4559 		if (trace_empty(iter))
4560 			return;
4561 		print_trace_header(m, iter);
4562 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4563 			print_lat_help_header(m);
4564 	} else {
4565 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4566 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4567 				print_func_help_header_irq(iter->array_buffer,
4568 							   m, trace_flags);
4569 			else
4570 				print_func_help_header(iter->array_buffer, m,
4571 						       trace_flags);
4572 		}
4573 	}
4574 }
4575 
4576 static void test_ftrace_alive(struct seq_file *m)
4577 {
4578 	if (!ftrace_is_dead())
4579 		return;
4580 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4581 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4582 }
4583 
4584 #ifdef CONFIG_TRACER_MAX_TRACE
4585 static void show_snapshot_main_help(struct seq_file *m)
4586 {
4587 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4588 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589 		    "#                      Takes a snapshot of the main buffer.\n"
4590 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4591 		    "#                      (Doesn't have to be '2' works with any number that\n"
4592 		    "#                       is not a '0' or '1')\n");
4593 }
4594 
4595 static void show_snapshot_percpu_help(struct seq_file *m)
4596 {
4597 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4598 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4599 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4600 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4601 #else
4602 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4603 		    "#                     Must use main snapshot file to allocate.\n");
4604 #endif
4605 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4606 		    "#                      (Doesn't have to be '2' works with any number that\n"
4607 		    "#                       is not a '0' or '1')\n");
4608 }
4609 
4610 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4611 {
4612 	if (iter->tr->allocated_snapshot)
4613 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4614 	else
4615 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4616 
4617 	seq_puts(m, "# Snapshot commands:\n");
4618 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4619 		show_snapshot_main_help(m);
4620 	else
4621 		show_snapshot_percpu_help(m);
4622 }
4623 #else
4624 /* Should never be called */
4625 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4626 #endif
4627 
4628 static int s_show(struct seq_file *m, void *v)
4629 {
4630 	struct trace_iterator *iter = v;
4631 	int ret;
4632 
4633 	if (iter->ent == NULL) {
4634 		if (iter->tr) {
4635 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4636 			seq_puts(m, "#\n");
4637 			test_ftrace_alive(m);
4638 		}
4639 		if (iter->snapshot && trace_empty(iter))
4640 			print_snapshot_help(m, iter);
4641 		else if (iter->trace && iter->trace->print_header)
4642 			iter->trace->print_header(m);
4643 		else
4644 			trace_default_header(m);
4645 
4646 	} else if (iter->leftover) {
4647 		/*
4648 		 * If we filled the seq_file buffer earlier, we
4649 		 * want to just show it now.
4650 		 */
4651 		ret = trace_print_seq(m, &iter->seq);
4652 
4653 		/* ret should this time be zero, but you never know */
4654 		iter->leftover = ret;
4655 
4656 	} else {
4657 		print_trace_line(iter);
4658 		ret = trace_print_seq(m, &iter->seq);
4659 		/*
4660 		 * If we overflow the seq_file buffer, then it will
4661 		 * ask us for this data again at start up.
4662 		 * Use that instead.
4663 		 *  ret is 0 if seq_file write succeeded.
4664 		 *        -1 otherwise.
4665 		 */
4666 		iter->leftover = ret;
4667 	}
4668 
4669 	return 0;
4670 }
4671 
4672 /*
4673  * Should be used after trace_array_get(), trace_types_lock
4674  * ensures that i_cdev was already initialized.
4675  */
4676 static inline int tracing_get_cpu(struct inode *inode)
4677 {
4678 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4679 		return (long)inode->i_cdev - 1;
4680 	return RING_BUFFER_ALL_CPUS;
4681 }
4682 
4683 static const struct seq_operations tracer_seq_ops = {
4684 	.start		= s_start,
4685 	.next		= s_next,
4686 	.stop		= s_stop,
4687 	.show		= s_show,
4688 };
4689 
4690 static struct trace_iterator *
4691 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4692 {
4693 	struct trace_array *tr = inode->i_private;
4694 	struct trace_iterator *iter;
4695 	int cpu;
4696 
4697 	if (tracing_disabled)
4698 		return ERR_PTR(-ENODEV);
4699 
4700 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4701 	if (!iter)
4702 		return ERR_PTR(-ENOMEM);
4703 
4704 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4705 				    GFP_KERNEL);
4706 	if (!iter->buffer_iter)
4707 		goto release;
4708 
4709 	/*
4710 	 * trace_find_next_entry() may need to save off iter->ent.
4711 	 * It will place it into the iter->temp buffer. As most
4712 	 * events are less than 128, allocate a buffer of that size.
4713 	 * If one is greater, then trace_find_next_entry() will
4714 	 * allocate a new buffer to adjust for the bigger iter->ent.
4715 	 * It's not critical if it fails to get allocated here.
4716 	 */
4717 	iter->temp = kmalloc(128, GFP_KERNEL);
4718 	if (iter->temp)
4719 		iter->temp_size = 128;
4720 
4721 	/*
4722 	 * trace_event_printf() may need to modify given format
4723 	 * string to replace %p with %px so that it shows real address
4724 	 * instead of hash value. However, that is only for the event
4725 	 * tracing, other tracer may not need. Defer the allocation
4726 	 * until it is needed.
4727 	 */
4728 	iter->fmt = NULL;
4729 	iter->fmt_size = 0;
4730 
4731 	/*
4732 	 * We make a copy of the current tracer to avoid concurrent
4733 	 * changes on it while we are reading.
4734 	 */
4735 	mutex_lock(&trace_types_lock);
4736 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4737 	if (!iter->trace)
4738 		goto fail;
4739 
4740 	*iter->trace = *tr->current_trace;
4741 
4742 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4743 		goto fail;
4744 
4745 	iter->tr = tr;
4746 
4747 #ifdef CONFIG_TRACER_MAX_TRACE
4748 	/* Currently only the top directory has a snapshot */
4749 	if (tr->current_trace->print_max || snapshot)
4750 		iter->array_buffer = &tr->max_buffer;
4751 	else
4752 #endif
4753 		iter->array_buffer = &tr->array_buffer;
4754 	iter->snapshot = snapshot;
4755 	iter->pos = -1;
4756 	iter->cpu_file = tracing_get_cpu(inode);
4757 	mutex_init(&iter->mutex);
4758 
4759 	/* Notify the tracer early; before we stop tracing. */
4760 	if (iter->trace->open)
4761 		iter->trace->open(iter);
4762 
4763 	/* Annotate start of buffers if we had overruns */
4764 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4765 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4766 
4767 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4768 	if (trace_clocks[tr->clock_id].in_ns)
4769 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4770 
4771 	/*
4772 	 * If pause-on-trace is enabled, then stop the trace while
4773 	 * dumping, unless this is the "snapshot" file
4774 	 */
4775 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4776 		tracing_stop_tr(tr);
4777 
4778 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4779 		for_each_tracing_cpu(cpu) {
4780 			iter->buffer_iter[cpu] =
4781 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4782 							 cpu, GFP_KERNEL);
4783 		}
4784 		ring_buffer_read_prepare_sync();
4785 		for_each_tracing_cpu(cpu) {
4786 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4787 			tracing_iter_reset(iter, cpu);
4788 		}
4789 	} else {
4790 		cpu = iter->cpu_file;
4791 		iter->buffer_iter[cpu] =
4792 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4793 						 cpu, GFP_KERNEL);
4794 		ring_buffer_read_prepare_sync();
4795 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4796 		tracing_iter_reset(iter, cpu);
4797 	}
4798 
4799 	mutex_unlock(&trace_types_lock);
4800 
4801 	return iter;
4802 
4803  fail:
4804 	mutex_unlock(&trace_types_lock);
4805 	kfree(iter->trace);
4806 	kfree(iter->temp);
4807 	kfree(iter->buffer_iter);
4808 release:
4809 	seq_release_private(inode, file);
4810 	return ERR_PTR(-ENOMEM);
4811 }
4812 
4813 int tracing_open_generic(struct inode *inode, struct file *filp)
4814 {
4815 	int ret;
4816 
4817 	ret = tracing_check_open_get_tr(NULL);
4818 	if (ret)
4819 		return ret;
4820 
4821 	filp->private_data = inode->i_private;
4822 	return 0;
4823 }
4824 
4825 bool tracing_is_disabled(void)
4826 {
4827 	return (tracing_disabled) ? true: false;
4828 }
4829 
4830 /*
4831  * Open and update trace_array ref count.
4832  * Must have the current trace_array passed to it.
4833  */
4834 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4835 {
4836 	struct trace_array *tr = inode->i_private;
4837 	int ret;
4838 
4839 	ret = tracing_check_open_get_tr(tr);
4840 	if (ret)
4841 		return ret;
4842 
4843 	filp->private_data = inode->i_private;
4844 
4845 	return 0;
4846 }
4847 
4848 static int tracing_release(struct inode *inode, struct file *file)
4849 {
4850 	struct trace_array *tr = inode->i_private;
4851 	struct seq_file *m = file->private_data;
4852 	struct trace_iterator *iter;
4853 	int cpu;
4854 
4855 	if (!(file->f_mode & FMODE_READ)) {
4856 		trace_array_put(tr);
4857 		return 0;
4858 	}
4859 
4860 	/* Writes do not use seq_file */
4861 	iter = m->private;
4862 	mutex_lock(&trace_types_lock);
4863 
4864 	for_each_tracing_cpu(cpu) {
4865 		if (iter->buffer_iter[cpu])
4866 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4867 	}
4868 
4869 	if (iter->trace && iter->trace->close)
4870 		iter->trace->close(iter);
4871 
4872 	if (!iter->snapshot && tr->stop_count)
4873 		/* reenable tracing if it was previously enabled */
4874 		tracing_start_tr(tr);
4875 
4876 	__trace_array_put(tr);
4877 
4878 	mutex_unlock(&trace_types_lock);
4879 
4880 	mutex_destroy(&iter->mutex);
4881 	free_cpumask_var(iter->started);
4882 	kfree(iter->fmt);
4883 	kfree(iter->temp);
4884 	kfree(iter->trace);
4885 	kfree(iter->buffer_iter);
4886 	seq_release_private(inode, file);
4887 
4888 	return 0;
4889 }
4890 
4891 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4892 {
4893 	struct trace_array *tr = inode->i_private;
4894 
4895 	trace_array_put(tr);
4896 	return 0;
4897 }
4898 
4899 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4900 {
4901 	struct trace_array *tr = inode->i_private;
4902 
4903 	trace_array_put(tr);
4904 
4905 	return single_release(inode, file);
4906 }
4907 
4908 static int tracing_open(struct inode *inode, struct file *file)
4909 {
4910 	struct trace_array *tr = inode->i_private;
4911 	struct trace_iterator *iter;
4912 	int ret;
4913 
4914 	ret = tracing_check_open_get_tr(tr);
4915 	if (ret)
4916 		return ret;
4917 
4918 	/* If this file was open for write, then erase contents */
4919 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4920 		int cpu = tracing_get_cpu(inode);
4921 		struct array_buffer *trace_buf = &tr->array_buffer;
4922 
4923 #ifdef CONFIG_TRACER_MAX_TRACE
4924 		if (tr->current_trace->print_max)
4925 			trace_buf = &tr->max_buffer;
4926 #endif
4927 
4928 		if (cpu == RING_BUFFER_ALL_CPUS)
4929 			tracing_reset_online_cpus(trace_buf);
4930 		else
4931 			tracing_reset_cpu(trace_buf, cpu);
4932 	}
4933 
4934 	if (file->f_mode & FMODE_READ) {
4935 		iter = __tracing_open(inode, file, false);
4936 		if (IS_ERR(iter))
4937 			ret = PTR_ERR(iter);
4938 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4939 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4940 	}
4941 
4942 	if (ret < 0)
4943 		trace_array_put(tr);
4944 
4945 	return ret;
4946 }
4947 
4948 /*
4949  * Some tracers are not suitable for instance buffers.
4950  * A tracer is always available for the global array (toplevel)
4951  * or if it explicitly states that it is.
4952  */
4953 static bool
4954 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4955 {
4956 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4957 }
4958 
4959 /* Find the next tracer that this trace array may use */
4960 static struct tracer *
4961 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4962 {
4963 	while (t && !trace_ok_for_array(t, tr))
4964 		t = t->next;
4965 
4966 	return t;
4967 }
4968 
4969 static void *
4970 t_next(struct seq_file *m, void *v, loff_t *pos)
4971 {
4972 	struct trace_array *tr = m->private;
4973 	struct tracer *t = v;
4974 
4975 	(*pos)++;
4976 
4977 	if (t)
4978 		t = get_tracer_for_array(tr, t->next);
4979 
4980 	return t;
4981 }
4982 
4983 static void *t_start(struct seq_file *m, loff_t *pos)
4984 {
4985 	struct trace_array *tr = m->private;
4986 	struct tracer *t;
4987 	loff_t l = 0;
4988 
4989 	mutex_lock(&trace_types_lock);
4990 
4991 	t = get_tracer_for_array(tr, trace_types);
4992 	for (; t && l < *pos; t = t_next(m, t, &l))
4993 			;
4994 
4995 	return t;
4996 }
4997 
4998 static void t_stop(struct seq_file *m, void *p)
4999 {
5000 	mutex_unlock(&trace_types_lock);
5001 }
5002 
5003 static int t_show(struct seq_file *m, void *v)
5004 {
5005 	struct tracer *t = v;
5006 
5007 	if (!t)
5008 		return 0;
5009 
5010 	seq_puts(m, t->name);
5011 	if (t->next)
5012 		seq_putc(m, ' ');
5013 	else
5014 		seq_putc(m, '\n');
5015 
5016 	return 0;
5017 }
5018 
5019 static const struct seq_operations show_traces_seq_ops = {
5020 	.start		= t_start,
5021 	.next		= t_next,
5022 	.stop		= t_stop,
5023 	.show		= t_show,
5024 };
5025 
5026 static int show_traces_open(struct inode *inode, struct file *file)
5027 {
5028 	struct trace_array *tr = inode->i_private;
5029 	struct seq_file *m;
5030 	int ret;
5031 
5032 	ret = tracing_check_open_get_tr(tr);
5033 	if (ret)
5034 		return ret;
5035 
5036 	ret = seq_open(file, &show_traces_seq_ops);
5037 	if (ret) {
5038 		trace_array_put(tr);
5039 		return ret;
5040 	}
5041 
5042 	m = file->private_data;
5043 	m->private = tr;
5044 
5045 	return 0;
5046 }
5047 
5048 static int show_traces_release(struct inode *inode, struct file *file)
5049 {
5050 	struct trace_array *tr = inode->i_private;
5051 
5052 	trace_array_put(tr);
5053 	return seq_release(inode, file);
5054 }
5055 
5056 static ssize_t
5057 tracing_write_stub(struct file *filp, const char __user *ubuf,
5058 		   size_t count, loff_t *ppos)
5059 {
5060 	return count;
5061 }
5062 
5063 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5064 {
5065 	int ret;
5066 
5067 	if (file->f_mode & FMODE_READ)
5068 		ret = seq_lseek(file, offset, whence);
5069 	else
5070 		file->f_pos = ret = 0;
5071 
5072 	return ret;
5073 }
5074 
5075 static const struct file_operations tracing_fops = {
5076 	.open		= tracing_open,
5077 	.read		= seq_read,
5078 	.write		= tracing_write_stub,
5079 	.llseek		= tracing_lseek,
5080 	.release	= tracing_release,
5081 };
5082 
5083 static const struct file_operations show_traces_fops = {
5084 	.open		= show_traces_open,
5085 	.read		= seq_read,
5086 	.llseek		= seq_lseek,
5087 	.release	= show_traces_release,
5088 };
5089 
5090 static ssize_t
5091 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5092 		     size_t count, loff_t *ppos)
5093 {
5094 	struct trace_array *tr = file_inode(filp)->i_private;
5095 	char *mask_str;
5096 	int len;
5097 
5098 	len = snprintf(NULL, 0, "%*pb\n",
5099 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5100 	mask_str = kmalloc(len, GFP_KERNEL);
5101 	if (!mask_str)
5102 		return -ENOMEM;
5103 
5104 	len = snprintf(mask_str, len, "%*pb\n",
5105 		       cpumask_pr_args(tr->tracing_cpumask));
5106 	if (len >= count) {
5107 		count = -EINVAL;
5108 		goto out_err;
5109 	}
5110 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5111 
5112 out_err:
5113 	kfree(mask_str);
5114 
5115 	return count;
5116 }
5117 
5118 int tracing_set_cpumask(struct trace_array *tr,
5119 			cpumask_var_t tracing_cpumask_new)
5120 {
5121 	int cpu;
5122 
5123 	if (!tr)
5124 		return -EINVAL;
5125 
5126 	local_irq_disable();
5127 	arch_spin_lock(&tr->max_lock);
5128 	for_each_tracing_cpu(cpu) {
5129 		/*
5130 		 * Increase/decrease the disabled counter if we are
5131 		 * about to flip a bit in the cpumask:
5132 		 */
5133 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5134 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5135 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5136 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5137 		}
5138 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5141 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5142 		}
5143 	}
5144 	arch_spin_unlock(&tr->max_lock);
5145 	local_irq_enable();
5146 
5147 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5148 
5149 	return 0;
5150 }
5151 
5152 static ssize_t
5153 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5154 		      size_t count, loff_t *ppos)
5155 {
5156 	struct trace_array *tr = file_inode(filp)->i_private;
5157 	cpumask_var_t tracing_cpumask_new;
5158 	int err;
5159 
5160 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5161 		return -ENOMEM;
5162 
5163 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5164 	if (err)
5165 		goto err_free;
5166 
5167 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5168 	if (err)
5169 		goto err_free;
5170 
5171 	free_cpumask_var(tracing_cpumask_new);
5172 
5173 	return count;
5174 
5175 err_free:
5176 	free_cpumask_var(tracing_cpumask_new);
5177 
5178 	return err;
5179 }
5180 
5181 static const struct file_operations tracing_cpumask_fops = {
5182 	.open		= tracing_open_generic_tr,
5183 	.read		= tracing_cpumask_read,
5184 	.write		= tracing_cpumask_write,
5185 	.release	= tracing_release_generic_tr,
5186 	.llseek		= generic_file_llseek,
5187 };
5188 
5189 static int tracing_trace_options_show(struct seq_file *m, void *v)
5190 {
5191 	struct tracer_opt *trace_opts;
5192 	struct trace_array *tr = m->private;
5193 	u32 tracer_flags;
5194 	int i;
5195 
5196 	mutex_lock(&trace_types_lock);
5197 	tracer_flags = tr->current_trace->flags->val;
5198 	trace_opts = tr->current_trace->flags->opts;
5199 
5200 	for (i = 0; trace_options[i]; i++) {
5201 		if (tr->trace_flags & (1 << i))
5202 			seq_printf(m, "%s\n", trace_options[i]);
5203 		else
5204 			seq_printf(m, "no%s\n", trace_options[i]);
5205 	}
5206 
5207 	for (i = 0; trace_opts[i].name; i++) {
5208 		if (tracer_flags & trace_opts[i].bit)
5209 			seq_printf(m, "%s\n", trace_opts[i].name);
5210 		else
5211 			seq_printf(m, "no%s\n", trace_opts[i].name);
5212 	}
5213 	mutex_unlock(&trace_types_lock);
5214 
5215 	return 0;
5216 }
5217 
5218 static int __set_tracer_option(struct trace_array *tr,
5219 			       struct tracer_flags *tracer_flags,
5220 			       struct tracer_opt *opts, int neg)
5221 {
5222 	struct tracer *trace = tracer_flags->trace;
5223 	int ret;
5224 
5225 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5226 	if (ret)
5227 		return ret;
5228 
5229 	if (neg)
5230 		tracer_flags->val &= ~opts->bit;
5231 	else
5232 		tracer_flags->val |= opts->bit;
5233 	return 0;
5234 }
5235 
5236 /* Try to assign a tracer specific option */
5237 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5238 {
5239 	struct tracer *trace = tr->current_trace;
5240 	struct tracer_flags *tracer_flags = trace->flags;
5241 	struct tracer_opt *opts = NULL;
5242 	int i;
5243 
5244 	for (i = 0; tracer_flags->opts[i].name; i++) {
5245 		opts = &tracer_flags->opts[i];
5246 
5247 		if (strcmp(cmp, opts->name) == 0)
5248 			return __set_tracer_option(tr, trace->flags, opts, neg);
5249 	}
5250 
5251 	return -EINVAL;
5252 }
5253 
5254 /* Some tracers require overwrite to stay enabled */
5255 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5256 {
5257 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5258 		return -1;
5259 
5260 	return 0;
5261 }
5262 
5263 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5264 {
5265 	int *map;
5266 
5267 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5268 	    (mask == TRACE_ITER_RECORD_CMD))
5269 		lockdep_assert_held(&event_mutex);
5270 
5271 	/* do nothing if flag is already set */
5272 	if (!!(tr->trace_flags & mask) == !!enabled)
5273 		return 0;
5274 
5275 	/* Give the tracer a chance to approve the change */
5276 	if (tr->current_trace->flag_changed)
5277 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5278 			return -EINVAL;
5279 
5280 	if (enabled)
5281 		tr->trace_flags |= mask;
5282 	else
5283 		tr->trace_flags &= ~mask;
5284 
5285 	if (mask == TRACE_ITER_RECORD_CMD)
5286 		trace_event_enable_cmd_record(enabled);
5287 
5288 	if (mask == TRACE_ITER_RECORD_TGID) {
5289 		if (!tgid_map) {
5290 			tgid_map_max = pid_max;
5291 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5292 				       GFP_KERNEL);
5293 
5294 			/*
5295 			 * Pairs with smp_load_acquire() in
5296 			 * trace_find_tgid_ptr() to ensure that if it observes
5297 			 * the tgid_map we just allocated then it also observes
5298 			 * the corresponding tgid_map_max value.
5299 			 */
5300 			smp_store_release(&tgid_map, map);
5301 		}
5302 		if (!tgid_map) {
5303 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5304 			return -ENOMEM;
5305 		}
5306 
5307 		trace_event_enable_tgid_record(enabled);
5308 	}
5309 
5310 	if (mask == TRACE_ITER_EVENT_FORK)
5311 		trace_event_follow_fork(tr, enabled);
5312 
5313 	if (mask == TRACE_ITER_FUNC_FORK)
5314 		ftrace_pid_follow_fork(tr, enabled);
5315 
5316 	if (mask == TRACE_ITER_OVERWRITE) {
5317 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5318 #ifdef CONFIG_TRACER_MAX_TRACE
5319 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5320 #endif
5321 	}
5322 
5323 	if (mask == TRACE_ITER_PRINTK) {
5324 		trace_printk_start_stop_comm(enabled);
5325 		trace_printk_control(enabled);
5326 	}
5327 
5328 	return 0;
5329 }
5330 
5331 int trace_set_options(struct trace_array *tr, char *option)
5332 {
5333 	char *cmp;
5334 	int neg = 0;
5335 	int ret;
5336 	size_t orig_len = strlen(option);
5337 	int len;
5338 
5339 	cmp = strstrip(option);
5340 
5341 	len = str_has_prefix(cmp, "no");
5342 	if (len)
5343 		neg = 1;
5344 
5345 	cmp += len;
5346 
5347 	mutex_lock(&event_mutex);
5348 	mutex_lock(&trace_types_lock);
5349 
5350 	ret = match_string(trace_options, -1, cmp);
5351 	/* If no option could be set, test the specific tracer options */
5352 	if (ret < 0)
5353 		ret = set_tracer_option(tr, cmp, neg);
5354 	else
5355 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5356 
5357 	mutex_unlock(&trace_types_lock);
5358 	mutex_unlock(&event_mutex);
5359 
5360 	/*
5361 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5362 	 * turn it back into a space.
5363 	 */
5364 	if (orig_len > strlen(option))
5365 		option[strlen(option)] = ' ';
5366 
5367 	return ret;
5368 }
5369 
5370 static void __init apply_trace_boot_options(void)
5371 {
5372 	char *buf = trace_boot_options_buf;
5373 	char *option;
5374 
5375 	while (true) {
5376 		option = strsep(&buf, ",");
5377 
5378 		if (!option)
5379 			break;
5380 
5381 		if (*option)
5382 			trace_set_options(&global_trace, option);
5383 
5384 		/* Put back the comma to allow this to be called again */
5385 		if (buf)
5386 			*(buf - 1) = ',';
5387 	}
5388 }
5389 
5390 static ssize_t
5391 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5392 			size_t cnt, loff_t *ppos)
5393 {
5394 	struct seq_file *m = filp->private_data;
5395 	struct trace_array *tr = m->private;
5396 	char buf[64];
5397 	int ret;
5398 
5399 	if (cnt >= sizeof(buf))
5400 		return -EINVAL;
5401 
5402 	if (copy_from_user(buf, ubuf, cnt))
5403 		return -EFAULT;
5404 
5405 	buf[cnt] = 0;
5406 
5407 	ret = trace_set_options(tr, buf);
5408 	if (ret < 0)
5409 		return ret;
5410 
5411 	*ppos += cnt;
5412 
5413 	return cnt;
5414 }
5415 
5416 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5417 {
5418 	struct trace_array *tr = inode->i_private;
5419 	int ret;
5420 
5421 	ret = tracing_check_open_get_tr(tr);
5422 	if (ret)
5423 		return ret;
5424 
5425 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5426 	if (ret < 0)
5427 		trace_array_put(tr);
5428 
5429 	return ret;
5430 }
5431 
5432 static const struct file_operations tracing_iter_fops = {
5433 	.open		= tracing_trace_options_open,
5434 	.read		= seq_read,
5435 	.llseek		= seq_lseek,
5436 	.release	= tracing_single_release_tr,
5437 	.write		= tracing_trace_options_write,
5438 };
5439 
5440 static const char readme_msg[] =
5441 	"tracing mini-HOWTO:\n\n"
5442 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5443 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5444 	" Important files:\n"
5445 	"  trace\t\t\t- The static contents of the buffer\n"
5446 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5447 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5448 	"  current_tracer\t- function and latency tracers\n"
5449 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5450 	"  error_log\t- error log for failed commands (that support it)\n"
5451 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5452 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5453 	"  trace_clock\t\t-change the clock used to order events\n"
5454 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5455 	"      global:   Synced across CPUs but slows tracing down.\n"
5456 	"     counter:   Not a clock, but just an increment\n"
5457 	"      uptime:   Jiffy counter from time of boot\n"
5458 	"        perf:   Same clock that perf events use\n"
5459 #ifdef CONFIG_X86_64
5460 	"     x86-tsc:   TSC cycle counter\n"
5461 #endif
5462 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5463 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5464 	"    absolute:   Absolute (standalone) timestamp\n"
5465 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5466 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5467 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5468 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5469 	"\t\t\t  Remove sub-buffer with rmdir\n"
5470 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5471 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5472 	"\t\t\t  option name\n"
5473 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5474 #ifdef CONFIG_DYNAMIC_FTRACE
5475 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5476 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5477 	"\t\t\t  functions\n"
5478 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5479 	"\t     modules: Can select a group via module\n"
5480 	"\t      Format: :mod:<module-name>\n"
5481 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5482 	"\t    triggers: a command to perform when function is hit\n"
5483 	"\t      Format: <function>:<trigger>[:count]\n"
5484 	"\t     trigger: traceon, traceoff\n"
5485 	"\t\t      enable_event:<system>:<event>\n"
5486 	"\t\t      disable_event:<system>:<event>\n"
5487 #ifdef CONFIG_STACKTRACE
5488 	"\t\t      stacktrace\n"
5489 #endif
5490 #ifdef CONFIG_TRACER_SNAPSHOT
5491 	"\t\t      snapshot\n"
5492 #endif
5493 	"\t\t      dump\n"
5494 	"\t\t      cpudump\n"
5495 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5496 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5497 	"\t     The first one will disable tracing every time do_fault is hit\n"
5498 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5499 	"\t       The first time do trap is hit and it disables tracing, the\n"
5500 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5501 	"\t       the counter will not decrement. It only decrements when the\n"
5502 	"\t       trigger did work\n"
5503 	"\t     To remove trigger without count:\n"
5504 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5505 	"\t     To remove trigger with a count:\n"
5506 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5507 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5508 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5509 	"\t    modules: Can select a group via module command :mod:\n"
5510 	"\t    Does not accept triggers\n"
5511 #endif /* CONFIG_DYNAMIC_FTRACE */
5512 #ifdef CONFIG_FUNCTION_TRACER
5513 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5514 	"\t\t    (function)\n"
5515 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5516 	"\t\t    (function)\n"
5517 #endif
5518 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5519 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5520 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5521 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5522 #endif
5523 #ifdef CONFIG_TRACER_SNAPSHOT
5524 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5525 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5526 	"\t\t\t  information\n"
5527 #endif
5528 #ifdef CONFIG_STACK_TRACER
5529 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5530 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5531 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5532 	"\t\t\t  new trace)\n"
5533 #ifdef CONFIG_DYNAMIC_FTRACE
5534 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5535 	"\t\t\t  traces\n"
5536 #endif
5537 #endif /* CONFIG_STACK_TRACER */
5538 #ifdef CONFIG_DYNAMIC_EVENTS
5539 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5540 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5541 #endif
5542 #ifdef CONFIG_KPROBE_EVENTS
5543 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5544 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5545 #endif
5546 #ifdef CONFIG_UPROBE_EVENTS
5547 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5548 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5549 #endif
5550 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5551 	"\t  accepts: event-definitions (one definition per line)\n"
5552 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5553 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5556 #endif
5557 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5558 	"\t           -:[<group>/]<event>\n"
5559 #ifdef CONFIG_KPROBE_EVENTS
5560 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5561   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5562 #endif
5563 #ifdef CONFIG_UPROBE_EVENTS
5564   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5565 #endif
5566 	"\t     args: <name>=fetcharg[:type]\n"
5567 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5568 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5569 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5570 #else
5571 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5572 #endif
5573 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5574 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5575 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5576 	"\t           <type>\\[<array-size>\\]\n"
5577 #ifdef CONFIG_HIST_TRIGGERS
5578 	"\t    field: <stype> <name>;\n"
5579 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5580 	"\t           [unsigned] char/int/long\n"
5581 #endif
5582 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5583 	"\t            of the <attached-group>/<attached-event>.\n"
5584 #endif
5585 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5586 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5587 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5588 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5589 	"\t\t\t  events\n"
5590 	"      filter\t\t- If set, only events passing filter are traced\n"
5591 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5592 	"\t\t\t  <event>:\n"
5593 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5594 	"      filter\t\t- If set, only events passing filter are traced\n"
5595 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5596 	"\t    Format: <trigger>[:count][if <filter>]\n"
5597 	"\t   trigger: traceon, traceoff\n"
5598 	"\t            enable_event:<system>:<event>\n"
5599 	"\t            disable_event:<system>:<event>\n"
5600 #ifdef CONFIG_HIST_TRIGGERS
5601 	"\t            enable_hist:<system>:<event>\n"
5602 	"\t            disable_hist:<system>:<event>\n"
5603 #endif
5604 #ifdef CONFIG_STACKTRACE
5605 	"\t\t    stacktrace\n"
5606 #endif
5607 #ifdef CONFIG_TRACER_SNAPSHOT
5608 	"\t\t    snapshot\n"
5609 #endif
5610 #ifdef CONFIG_HIST_TRIGGERS
5611 	"\t\t    hist (see below)\n"
5612 #endif
5613 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5614 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5615 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5616 	"\t                  events/block/block_unplug/trigger\n"
5617 	"\t   The first disables tracing every time block_unplug is hit.\n"
5618 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5619 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5620 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5621 	"\t   Like function triggers, the counter is only decremented if it\n"
5622 	"\t    enabled or disabled tracing.\n"
5623 	"\t   To remove a trigger without a count:\n"
5624 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5625 	"\t   To remove a trigger with a count:\n"
5626 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5627 	"\t   Filters can be ignored when removing a trigger.\n"
5628 #ifdef CONFIG_HIST_TRIGGERS
5629 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5630 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5631 	"\t            [:values=<field1[,field2,...]>]\n"
5632 	"\t            [:sort=<field1[,field2,...]>]\n"
5633 	"\t            [:size=#entries]\n"
5634 	"\t            [:pause][:continue][:clear]\n"
5635 	"\t            [:name=histname1]\n"
5636 	"\t            [:<handler>.<action>]\n"
5637 	"\t            [if <filter>]\n\n"
5638 	"\t    Note, special fields can be used as well:\n"
5639 	"\t            common_timestamp - to record current timestamp\n"
5640 	"\t            common_cpu - to record the CPU the event happened on\n"
5641 	"\n"
5642 	"\t    When a matching event is hit, an entry is added to a hash\n"
5643 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5644 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5645 	"\t    correspond to fields in the event's format description.  Keys\n"
5646 	"\t    can be any field, or the special string 'stacktrace'.\n"
5647 	"\t    Compound keys consisting of up to two fields can be specified\n"
5648 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5649 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5650 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5651 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5652 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5653 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5654 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5655 	"\t    its histogram data will be shared with other triggers of the\n"
5656 	"\t    same name, and trigger hits will update this common data.\n\n"
5657 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5658 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5659 	"\t    triggers attached to an event, there will be a table for each\n"
5660 	"\t    trigger in the output.  The table displayed for a named\n"
5661 	"\t    trigger will be the same as any other instance having the\n"
5662 	"\t    same name.  The default format used to display a given field\n"
5663 	"\t    can be modified by appending any of the following modifiers\n"
5664 	"\t    to the field name, as applicable:\n\n"
5665 	"\t            .hex        display a number as a hex value\n"
5666 	"\t            .sym        display an address as a symbol\n"
5667 	"\t            .sym-offset display an address as a symbol and offset\n"
5668 	"\t            .execname   display a common_pid as a program name\n"
5669 	"\t            .syscall    display a syscall id as a syscall name\n"
5670 	"\t            .log2       display log2 value rather than raw number\n"
5671 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5672 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5673 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5674 	"\t    trigger or to start a hist trigger but not log any events\n"
5675 	"\t    until told to do so.  'continue' can be used to start or\n"
5676 	"\t    restart a paused hist trigger.\n\n"
5677 	"\t    The 'clear' parameter will clear the contents of a running\n"
5678 	"\t    hist trigger and leave its current paused/active state\n"
5679 	"\t    unchanged.\n\n"
5680 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5681 	"\t    have one event conditionally start and stop another event's\n"
5682 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5683 	"\t    the enable_event and disable_event triggers.\n\n"
5684 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5685 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5686 	"\t        <handler>.<action>\n\n"
5687 	"\t    The available handlers are:\n\n"
5688 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5689 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5690 	"\t        onchange(var)            - invoke action if var changes\n\n"
5691 	"\t    The available actions are:\n\n"
5692 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5693 	"\t        save(field,...)                      - save current event fields\n"
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5696 #endif
5697 #ifdef CONFIG_SYNTH_EVENTS
5698 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5699 	"\t  Write into this file to define/undefine new synthetic events.\n"
5700 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5701 #endif
5702 #endif
5703 ;
5704 
5705 static ssize_t
5706 tracing_readme_read(struct file *filp, char __user *ubuf,
5707 		       size_t cnt, loff_t *ppos)
5708 {
5709 	return simple_read_from_buffer(ubuf, cnt, ppos,
5710 					readme_msg, strlen(readme_msg));
5711 }
5712 
5713 static const struct file_operations tracing_readme_fops = {
5714 	.open		= tracing_open_generic,
5715 	.read		= tracing_readme_read,
5716 	.llseek		= generic_file_llseek,
5717 };
5718 
5719 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5720 {
5721 	int pid = ++(*pos);
5722 
5723 	return trace_find_tgid_ptr(pid);
5724 }
5725 
5726 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5727 {
5728 	int pid = *pos;
5729 
5730 	return trace_find_tgid_ptr(pid);
5731 }
5732 
5733 static void saved_tgids_stop(struct seq_file *m, void *v)
5734 {
5735 }
5736 
5737 static int saved_tgids_show(struct seq_file *m, void *v)
5738 {
5739 	int *entry = (int *)v;
5740 	int pid = entry - tgid_map;
5741 	int tgid = *entry;
5742 
5743 	if (tgid == 0)
5744 		return SEQ_SKIP;
5745 
5746 	seq_printf(m, "%d %d\n", pid, tgid);
5747 	return 0;
5748 }
5749 
5750 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5751 	.start		= saved_tgids_start,
5752 	.stop		= saved_tgids_stop,
5753 	.next		= saved_tgids_next,
5754 	.show		= saved_tgids_show,
5755 };
5756 
5757 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5758 {
5759 	int ret;
5760 
5761 	ret = tracing_check_open_get_tr(NULL);
5762 	if (ret)
5763 		return ret;
5764 
5765 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5766 }
5767 
5768 
5769 static const struct file_operations tracing_saved_tgids_fops = {
5770 	.open		= tracing_saved_tgids_open,
5771 	.read		= seq_read,
5772 	.llseek		= seq_lseek,
5773 	.release	= seq_release,
5774 };
5775 
5776 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5777 {
5778 	unsigned int *ptr = v;
5779 
5780 	if (*pos || m->count)
5781 		ptr++;
5782 
5783 	(*pos)++;
5784 
5785 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5786 	     ptr++) {
5787 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5788 			continue;
5789 
5790 		return ptr;
5791 	}
5792 
5793 	return NULL;
5794 }
5795 
5796 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5797 {
5798 	void *v;
5799 	loff_t l = 0;
5800 
5801 	preempt_disable();
5802 	arch_spin_lock(&trace_cmdline_lock);
5803 
5804 	v = &savedcmd->map_cmdline_to_pid[0];
5805 	while (l <= *pos) {
5806 		v = saved_cmdlines_next(m, v, &l);
5807 		if (!v)
5808 			return NULL;
5809 	}
5810 
5811 	return v;
5812 }
5813 
5814 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5815 {
5816 	arch_spin_unlock(&trace_cmdline_lock);
5817 	preempt_enable();
5818 }
5819 
5820 static int saved_cmdlines_show(struct seq_file *m, void *v)
5821 {
5822 	char buf[TASK_COMM_LEN];
5823 	unsigned int *pid = v;
5824 
5825 	__trace_find_cmdline(*pid, buf);
5826 	seq_printf(m, "%d %s\n", *pid, buf);
5827 	return 0;
5828 }
5829 
5830 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5831 	.start		= saved_cmdlines_start,
5832 	.next		= saved_cmdlines_next,
5833 	.stop		= saved_cmdlines_stop,
5834 	.show		= saved_cmdlines_show,
5835 };
5836 
5837 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5838 {
5839 	int ret;
5840 
5841 	ret = tracing_check_open_get_tr(NULL);
5842 	if (ret)
5843 		return ret;
5844 
5845 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5846 }
5847 
5848 static const struct file_operations tracing_saved_cmdlines_fops = {
5849 	.open		= tracing_saved_cmdlines_open,
5850 	.read		= seq_read,
5851 	.llseek		= seq_lseek,
5852 	.release	= seq_release,
5853 };
5854 
5855 static ssize_t
5856 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5857 				 size_t cnt, loff_t *ppos)
5858 {
5859 	char buf[64];
5860 	int r;
5861 
5862 	arch_spin_lock(&trace_cmdline_lock);
5863 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5864 	arch_spin_unlock(&trace_cmdline_lock);
5865 
5866 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5867 }
5868 
5869 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5870 {
5871 	kfree(s->saved_cmdlines);
5872 	kfree(s->map_cmdline_to_pid);
5873 	kfree(s);
5874 }
5875 
5876 static int tracing_resize_saved_cmdlines(unsigned int val)
5877 {
5878 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5879 
5880 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5881 	if (!s)
5882 		return -ENOMEM;
5883 
5884 	if (allocate_cmdlines_buffer(val, s) < 0) {
5885 		kfree(s);
5886 		return -ENOMEM;
5887 	}
5888 
5889 	arch_spin_lock(&trace_cmdline_lock);
5890 	savedcmd_temp = savedcmd;
5891 	savedcmd = s;
5892 	arch_spin_unlock(&trace_cmdline_lock);
5893 	free_saved_cmdlines_buffer(savedcmd_temp);
5894 
5895 	return 0;
5896 }
5897 
5898 static ssize_t
5899 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5900 				  size_t cnt, loff_t *ppos)
5901 {
5902 	unsigned long val;
5903 	int ret;
5904 
5905 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5906 	if (ret)
5907 		return ret;
5908 
5909 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5910 	if (!val || val > PID_MAX_DEFAULT)
5911 		return -EINVAL;
5912 
5913 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5914 	if (ret < 0)
5915 		return ret;
5916 
5917 	*ppos += cnt;
5918 
5919 	return cnt;
5920 }
5921 
5922 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5923 	.open		= tracing_open_generic,
5924 	.read		= tracing_saved_cmdlines_size_read,
5925 	.write		= tracing_saved_cmdlines_size_write,
5926 };
5927 
5928 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5929 static union trace_eval_map_item *
5930 update_eval_map(union trace_eval_map_item *ptr)
5931 {
5932 	if (!ptr->map.eval_string) {
5933 		if (ptr->tail.next) {
5934 			ptr = ptr->tail.next;
5935 			/* Set ptr to the next real item (skip head) */
5936 			ptr++;
5937 		} else
5938 			return NULL;
5939 	}
5940 	return ptr;
5941 }
5942 
5943 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5944 {
5945 	union trace_eval_map_item *ptr = v;
5946 
5947 	/*
5948 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5949 	 * This really should never happen.
5950 	 */
5951 	(*pos)++;
5952 	ptr = update_eval_map(ptr);
5953 	if (WARN_ON_ONCE(!ptr))
5954 		return NULL;
5955 
5956 	ptr++;
5957 	ptr = update_eval_map(ptr);
5958 
5959 	return ptr;
5960 }
5961 
5962 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5963 {
5964 	union trace_eval_map_item *v;
5965 	loff_t l = 0;
5966 
5967 	mutex_lock(&trace_eval_mutex);
5968 
5969 	v = trace_eval_maps;
5970 	if (v)
5971 		v++;
5972 
5973 	while (v && l < *pos) {
5974 		v = eval_map_next(m, v, &l);
5975 	}
5976 
5977 	return v;
5978 }
5979 
5980 static void eval_map_stop(struct seq_file *m, void *v)
5981 {
5982 	mutex_unlock(&trace_eval_mutex);
5983 }
5984 
5985 static int eval_map_show(struct seq_file *m, void *v)
5986 {
5987 	union trace_eval_map_item *ptr = v;
5988 
5989 	seq_printf(m, "%s %ld (%s)\n",
5990 		   ptr->map.eval_string, ptr->map.eval_value,
5991 		   ptr->map.system);
5992 
5993 	return 0;
5994 }
5995 
5996 static const struct seq_operations tracing_eval_map_seq_ops = {
5997 	.start		= eval_map_start,
5998 	.next		= eval_map_next,
5999 	.stop		= eval_map_stop,
6000 	.show		= eval_map_show,
6001 };
6002 
6003 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6004 {
6005 	int ret;
6006 
6007 	ret = tracing_check_open_get_tr(NULL);
6008 	if (ret)
6009 		return ret;
6010 
6011 	return seq_open(filp, &tracing_eval_map_seq_ops);
6012 }
6013 
6014 static const struct file_operations tracing_eval_map_fops = {
6015 	.open		= tracing_eval_map_open,
6016 	.read		= seq_read,
6017 	.llseek		= seq_lseek,
6018 	.release	= seq_release,
6019 };
6020 
6021 static inline union trace_eval_map_item *
6022 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6023 {
6024 	/* Return tail of array given the head */
6025 	return ptr + ptr->head.length + 1;
6026 }
6027 
6028 static void
6029 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6030 			   int len)
6031 {
6032 	struct trace_eval_map **stop;
6033 	struct trace_eval_map **map;
6034 	union trace_eval_map_item *map_array;
6035 	union trace_eval_map_item *ptr;
6036 
6037 	stop = start + len;
6038 
6039 	/*
6040 	 * The trace_eval_maps contains the map plus a head and tail item,
6041 	 * where the head holds the module and length of array, and the
6042 	 * tail holds a pointer to the next list.
6043 	 */
6044 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6045 	if (!map_array) {
6046 		pr_warn("Unable to allocate trace eval mapping\n");
6047 		return;
6048 	}
6049 
6050 	mutex_lock(&trace_eval_mutex);
6051 
6052 	if (!trace_eval_maps)
6053 		trace_eval_maps = map_array;
6054 	else {
6055 		ptr = trace_eval_maps;
6056 		for (;;) {
6057 			ptr = trace_eval_jmp_to_tail(ptr);
6058 			if (!ptr->tail.next)
6059 				break;
6060 			ptr = ptr->tail.next;
6061 
6062 		}
6063 		ptr->tail.next = map_array;
6064 	}
6065 	map_array->head.mod = mod;
6066 	map_array->head.length = len;
6067 	map_array++;
6068 
6069 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6070 		map_array->map = **map;
6071 		map_array++;
6072 	}
6073 	memset(map_array, 0, sizeof(*map_array));
6074 
6075 	mutex_unlock(&trace_eval_mutex);
6076 }
6077 
6078 static void trace_create_eval_file(struct dentry *d_tracer)
6079 {
6080 	trace_create_file("eval_map", 0444, d_tracer,
6081 			  NULL, &tracing_eval_map_fops);
6082 }
6083 
6084 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6085 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6086 static inline void trace_insert_eval_map_file(struct module *mod,
6087 			      struct trace_eval_map **start, int len) { }
6088 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6089 
6090 static void trace_insert_eval_map(struct module *mod,
6091 				  struct trace_eval_map **start, int len)
6092 {
6093 	struct trace_eval_map **map;
6094 
6095 	if (len <= 0)
6096 		return;
6097 
6098 	map = start;
6099 
6100 	trace_event_eval_update(map, len);
6101 
6102 	trace_insert_eval_map_file(mod, start, len);
6103 }
6104 
6105 static ssize_t
6106 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6107 		       size_t cnt, loff_t *ppos)
6108 {
6109 	struct trace_array *tr = filp->private_data;
6110 	char buf[MAX_TRACER_SIZE+2];
6111 	int r;
6112 
6113 	mutex_lock(&trace_types_lock);
6114 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6115 	mutex_unlock(&trace_types_lock);
6116 
6117 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118 }
6119 
6120 int tracer_init(struct tracer *t, struct trace_array *tr)
6121 {
6122 	tracing_reset_online_cpus(&tr->array_buffer);
6123 	return t->init(tr);
6124 }
6125 
6126 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6127 {
6128 	int cpu;
6129 
6130 	for_each_tracing_cpu(cpu)
6131 		per_cpu_ptr(buf->data, cpu)->entries = val;
6132 }
6133 
6134 #ifdef CONFIG_TRACER_MAX_TRACE
6135 /* resize @tr's buffer to the size of @size_tr's entries */
6136 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6137 					struct array_buffer *size_buf, int cpu_id)
6138 {
6139 	int cpu, ret = 0;
6140 
6141 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6142 		for_each_tracing_cpu(cpu) {
6143 			ret = ring_buffer_resize(trace_buf->buffer,
6144 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6145 			if (ret < 0)
6146 				break;
6147 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6148 				per_cpu_ptr(size_buf->data, cpu)->entries;
6149 		}
6150 	} else {
6151 		ret = ring_buffer_resize(trace_buf->buffer,
6152 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6153 		if (ret == 0)
6154 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6155 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6156 	}
6157 
6158 	return ret;
6159 }
6160 #endif /* CONFIG_TRACER_MAX_TRACE */
6161 
6162 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6163 					unsigned long size, int cpu)
6164 {
6165 	int ret;
6166 
6167 	/*
6168 	 * If kernel or user changes the size of the ring buffer
6169 	 * we use the size that was given, and we can forget about
6170 	 * expanding it later.
6171 	 */
6172 	ring_buffer_expanded = true;
6173 
6174 	/* May be called before buffers are initialized */
6175 	if (!tr->array_buffer.buffer)
6176 		return 0;
6177 
6178 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6179 	if (ret < 0)
6180 		return ret;
6181 
6182 #ifdef CONFIG_TRACER_MAX_TRACE
6183 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6184 	    !tr->current_trace->use_max_tr)
6185 		goto out;
6186 
6187 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6188 	if (ret < 0) {
6189 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6190 						     &tr->array_buffer, cpu);
6191 		if (r < 0) {
6192 			/*
6193 			 * AARGH! We are left with different
6194 			 * size max buffer!!!!
6195 			 * The max buffer is our "snapshot" buffer.
6196 			 * When a tracer needs a snapshot (one of the
6197 			 * latency tracers), it swaps the max buffer
6198 			 * with the saved snap shot. We succeeded to
6199 			 * update the size of the main buffer, but failed to
6200 			 * update the size of the max buffer. But when we tried
6201 			 * to reset the main buffer to the original size, we
6202 			 * failed there too. This is very unlikely to
6203 			 * happen, but if it does, warn and kill all
6204 			 * tracing.
6205 			 */
6206 			WARN_ON(1);
6207 			tracing_disabled = 1;
6208 		}
6209 		return ret;
6210 	}
6211 
6212 	if (cpu == RING_BUFFER_ALL_CPUS)
6213 		set_buffer_entries(&tr->max_buffer, size);
6214 	else
6215 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6216 
6217  out:
6218 #endif /* CONFIG_TRACER_MAX_TRACE */
6219 
6220 	if (cpu == RING_BUFFER_ALL_CPUS)
6221 		set_buffer_entries(&tr->array_buffer, size);
6222 	else
6223 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6224 
6225 	return ret;
6226 }
6227 
6228 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6229 				  unsigned long size, int cpu_id)
6230 {
6231 	int ret;
6232 
6233 	mutex_lock(&trace_types_lock);
6234 
6235 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6236 		/* make sure, this cpu is enabled in the mask */
6237 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6238 			ret = -EINVAL;
6239 			goto out;
6240 		}
6241 	}
6242 
6243 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6244 	if (ret < 0)
6245 		ret = -ENOMEM;
6246 
6247 out:
6248 	mutex_unlock(&trace_types_lock);
6249 
6250 	return ret;
6251 }
6252 
6253 
6254 /**
6255  * tracing_update_buffers - used by tracing facility to expand ring buffers
6256  *
6257  * To save on memory when the tracing is never used on a system with it
6258  * configured in. The ring buffers are set to a minimum size. But once
6259  * a user starts to use the tracing facility, then they need to grow
6260  * to their default size.
6261  *
6262  * This function is to be called when a tracer is about to be used.
6263  */
6264 int tracing_update_buffers(void)
6265 {
6266 	int ret = 0;
6267 
6268 	mutex_lock(&trace_types_lock);
6269 	if (!ring_buffer_expanded)
6270 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6271 						RING_BUFFER_ALL_CPUS);
6272 	mutex_unlock(&trace_types_lock);
6273 
6274 	return ret;
6275 }
6276 
6277 struct trace_option_dentry;
6278 
6279 static void
6280 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6281 
6282 /*
6283  * Used to clear out the tracer before deletion of an instance.
6284  * Must have trace_types_lock held.
6285  */
6286 static void tracing_set_nop(struct trace_array *tr)
6287 {
6288 	if (tr->current_trace == &nop_trace)
6289 		return;
6290 
6291 	tr->current_trace->enabled--;
6292 
6293 	if (tr->current_trace->reset)
6294 		tr->current_trace->reset(tr);
6295 
6296 	tr->current_trace = &nop_trace;
6297 }
6298 
6299 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6300 {
6301 	/* Only enable if the directory has been created already. */
6302 	if (!tr->dir)
6303 		return;
6304 
6305 	create_trace_option_files(tr, t);
6306 }
6307 
6308 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6309 {
6310 	struct tracer *t;
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312 	bool had_max_tr;
6313 #endif
6314 	int ret = 0;
6315 
6316 	mutex_lock(&trace_types_lock);
6317 
6318 	if (!ring_buffer_expanded) {
6319 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6320 						RING_BUFFER_ALL_CPUS);
6321 		if (ret < 0)
6322 			goto out;
6323 		ret = 0;
6324 	}
6325 
6326 	for (t = trace_types; t; t = t->next) {
6327 		if (strcmp(t->name, buf) == 0)
6328 			break;
6329 	}
6330 	if (!t) {
6331 		ret = -EINVAL;
6332 		goto out;
6333 	}
6334 	if (t == tr->current_trace)
6335 		goto out;
6336 
6337 #ifdef CONFIG_TRACER_SNAPSHOT
6338 	if (t->use_max_tr) {
6339 		arch_spin_lock(&tr->max_lock);
6340 		if (tr->cond_snapshot)
6341 			ret = -EBUSY;
6342 		arch_spin_unlock(&tr->max_lock);
6343 		if (ret)
6344 			goto out;
6345 	}
6346 #endif
6347 	/* Some tracers won't work on kernel command line */
6348 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6349 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6350 			t->name);
6351 		goto out;
6352 	}
6353 
6354 	/* Some tracers are only allowed for the top level buffer */
6355 	if (!trace_ok_for_array(t, tr)) {
6356 		ret = -EINVAL;
6357 		goto out;
6358 	}
6359 
6360 	/* If trace pipe files are being read, we can't change the tracer */
6361 	if (tr->trace_ref) {
6362 		ret = -EBUSY;
6363 		goto out;
6364 	}
6365 
6366 	trace_branch_disable();
6367 
6368 	tr->current_trace->enabled--;
6369 
6370 	if (tr->current_trace->reset)
6371 		tr->current_trace->reset(tr);
6372 
6373 	/* Current trace needs to be nop_trace before synchronize_rcu */
6374 	tr->current_trace = &nop_trace;
6375 
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377 	had_max_tr = tr->allocated_snapshot;
6378 
6379 	if (had_max_tr && !t->use_max_tr) {
6380 		/*
6381 		 * We need to make sure that the update_max_tr sees that
6382 		 * current_trace changed to nop_trace to keep it from
6383 		 * swapping the buffers after we resize it.
6384 		 * The update_max_tr is called from interrupts disabled
6385 		 * so a synchronized_sched() is sufficient.
6386 		 */
6387 		synchronize_rcu();
6388 		free_snapshot(tr);
6389 	}
6390 #endif
6391 
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393 	if (t->use_max_tr && !had_max_tr) {
6394 		ret = tracing_alloc_snapshot_instance(tr);
6395 		if (ret < 0)
6396 			goto out;
6397 	}
6398 #endif
6399 
6400 	if (t->init) {
6401 		ret = tracer_init(t, tr);
6402 		if (ret)
6403 			goto out;
6404 	}
6405 
6406 	tr->current_trace = t;
6407 	tr->current_trace->enabled++;
6408 	trace_branch_enable(tr);
6409  out:
6410 	mutex_unlock(&trace_types_lock);
6411 
6412 	return ret;
6413 }
6414 
6415 static ssize_t
6416 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6417 			size_t cnt, loff_t *ppos)
6418 {
6419 	struct trace_array *tr = filp->private_data;
6420 	char buf[MAX_TRACER_SIZE+1];
6421 	int i;
6422 	size_t ret;
6423 	int err;
6424 
6425 	ret = cnt;
6426 
6427 	if (cnt > MAX_TRACER_SIZE)
6428 		cnt = MAX_TRACER_SIZE;
6429 
6430 	if (copy_from_user(buf, ubuf, cnt))
6431 		return -EFAULT;
6432 
6433 	buf[cnt] = 0;
6434 
6435 	/* strip ending whitespace. */
6436 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6437 		buf[i] = 0;
6438 
6439 	err = tracing_set_tracer(tr, buf);
6440 	if (err)
6441 		return err;
6442 
6443 	*ppos += ret;
6444 
6445 	return ret;
6446 }
6447 
6448 static ssize_t
6449 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6450 		   size_t cnt, loff_t *ppos)
6451 {
6452 	char buf[64];
6453 	int r;
6454 
6455 	r = snprintf(buf, sizeof(buf), "%ld\n",
6456 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6457 	if (r > sizeof(buf))
6458 		r = sizeof(buf);
6459 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6460 }
6461 
6462 static ssize_t
6463 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6464 		    size_t cnt, loff_t *ppos)
6465 {
6466 	unsigned long val;
6467 	int ret;
6468 
6469 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6470 	if (ret)
6471 		return ret;
6472 
6473 	*ptr = val * 1000;
6474 
6475 	return cnt;
6476 }
6477 
6478 static ssize_t
6479 tracing_thresh_read(struct file *filp, char __user *ubuf,
6480 		    size_t cnt, loff_t *ppos)
6481 {
6482 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6483 }
6484 
6485 static ssize_t
6486 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6487 		     size_t cnt, loff_t *ppos)
6488 {
6489 	struct trace_array *tr = filp->private_data;
6490 	int ret;
6491 
6492 	mutex_lock(&trace_types_lock);
6493 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6494 	if (ret < 0)
6495 		goto out;
6496 
6497 	if (tr->current_trace->update_thresh) {
6498 		ret = tr->current_trace->update_thresh(tr);
6499 		if (ret < 0)
6500 			goto out;
6501 	}
6502 
6503 	ret = cnt;
6504 out:
6505 	mutex_unlock(&trace_types_lock);
6506 
6507 	return ret;
6508 }
6509 
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511 
6512 static ssize_t
6513 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6514 		     size_t cnt, loff_t *ppos)
6515 {
6516 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6517 }
6518 
6519 static ssize_t
6520 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521 		      size_t cnt, loff_t *ppos)
6522 {
6523 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6524 }
6525 
6526 #endif
6527 
6528 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6529 {
6530 	struct trace_array *tr = inode->i_private;
6531 	struct trace_iterator *iter;
6532 	int ret;
6533 
6534 	ret = tracing_check_open_get_tr(tr);
6535 	if (ret)
6536 		return ret;
6537 
6538 	mutex_lock(&trace_types_lock);
6539 
6540 	/* create a buffer to store the information to pass to userspace */
6541 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6542 	if (!iter) {
6543 		ret = -ENOMEM;
6544 		__trace_array_put(tr);
6545 		goto out;
6546 	}
6547 
6548 	trace_seq_init(&iter->seq);
6549 	iter->trace = tr->current_trace;
6550 
6551 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6552 		ret = -ENOMEM;
6553 		goto fail;
6554 	}
6555 
6556 	/* trace pipe does not show start of buffer */
6557 	cpumask_setall(iter->started);
6558 
6559 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6560 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6561 
6562 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6563 	if (trace_clocks[tr->clock_id].in_ns)
6564 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6565 
6566 	iter->tr = tr;
6567 	iter->array_buffer = &tr->array_buffer;
6568 	iter->cpu_file = tracing_get_cpu(inode);
6569 	mutex_init(&iter->mutex);
6570 	filp->private_data = iter;
6571 
6572 	if (iter->trace->pipe_open)
6573 		iter->trace->pipe_open(iter);
6574 
6575 	nonseekable_open(inode, filp);
6576 
6577 	tr->trace_ref++;
6578 out:
6579 	mutex_unlock(&trace_types_lock);
6580 	return ret;
6581 
6582 fail:
6583 	kfree(iter);
6584 	__trace_array_put(tr);
6585 	mutex_unlock(&trace_types_lock);
6586 	return ret;
6587 }
6588 
6589 static int tracing_release_pipe(struct inode *inode, struct file *file)
6590 {
6591 	struct trace_iterator *iter = file->private_data;
6592 	struct trace_array *tr = inode->i_private;
6593 
6594 	mutex_lock(&trace_types_lock);
6595 
6596 	tr->trace_ref--;
6597 
6598 	if (iter->trace->pipe_close)
6599 		iter->trace->pipe_close(iter);
6600 
6601 	mutex_unlock(&trace_types_lock);
6602 
6603 	free_cpumask_var(iter->started);
6604 	mutex_destroy(&iter->mutex);
6605 	kfree(iter);
6606 
6607 	trace_array_put(tr);
6608 
6609 	return 0;
6610 }
6611 
6612 static __poll_t
6613 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6614 {
6615 	struct trace_array *tr = iter->tr;
6616 
6617 	/* Iterators are static, they should be filled or empty */
6618 	if (trace_buffer_iter(iter, iter->cpu_file))
6619 		return EPOLLIN | EPOLLRDNORM;
6620 
6621 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6622 		/*
6623 		 * Always select as readable when in blocking mode
6624 		 */
6625 		return EPOLLIN | EPOLLRDNORM;
6626 	else
6627 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6628 					     filp, poll_table);
6629 }
6630 
6631 static __poll_t
6632 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6633 {
6634 	struct trace_iterator *iter = filp->private_data;
6635 
6636 	return trace_poll(iter, filp, poll_table);
6637 }
6638 
6639 /* Must be called with iter->mutex held. */
6640 static int tracing_wait_pipe(struct file *filp)
6641 {
6642 	struct trace_iterator *iter = filp->private_data;
6643 	int ret;
6644 
6645 	while (trace_empty(iter)) {
6646 
6647 		if ((filp->f_flags & O_NONBLOCK)) {
6648 			return -EAGAIN;
6649 		}
6650 
6651 		/*
6652 		 * We block until we read something and tracing is disabled.
6653 		 * We still block if tracing is disabled, but we have never
6654 		 * read anything. This allows a user to cat this file, and
6655 		 * then enable tracing. But after we have read something,
6656 		 * we give an EOF when tracing is again disabled.
6657 		 *
6658 		 * iter->pos will be 0 if we haven't read anything.
6659 		 */
6660 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6661 			break;
6662 
6663 		mutex_unlock(&iter->mutex);
6664 
6665 		ret = wait_on_pipe(iter, 0);
6666 
6667 		mutex_lock(&iter->mutex);
6668 
6669 		if (ret)
6670 			return ret;
6671 	}
6672 
6673 	return 1;
6674 }
6675 
6676 /*
6677  * Consumer reader.
6678  */
6679 static ssize_t
6680 tracing_read_pipe(struct file *filp, char __user *ubuf,
6681 		  size_t cnt, loff_t *ppos)
6682 {
6683 	struct trace_iterator *iter = filp->private_data;
6684 	ssize_t sret;
6685 
6686 	/*
6687 	 * Avoid more than one consumer on a single file descriptor
6688 	 * This is just a matter of traces coherency, the ring buffer itself
6689 	 * is protected.
6690 	 */
6691 	mutex_lock(&iter->mutex);
6692 
6693 	/* return any leftover data */
6694 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6695 	if (sret != -EBUSY)
6696 		goto out;
6697 
6698 	trace_seq_init(&iter->seq);
6699 
6700 	if (iter->trace->read) {
6701 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6702 		if (sret)
6703 			goto out;
6704 	}
6705 
6706 waitagain:
6707 	sret = tracing_wait_pipe(filp);
6708 	if (sret <= 0)
6709 		goto out;
6710 
6711 	/* stop when tracing is finished */
6712 	if (trace_empty(iter)) {
6713 		sret = 0;
6714 		goto out;
6715 	}
6716 
6717 	if (cnt >= PAGE_SIZE)
6718 		cnt = PAGE_SIZE - 1;
6719 
6720 	/* reset all but tr, trace, and overruns */
6721 	memset(&iter->seq, 0,
6722 	       sizeof(struct trace_iterator) -
6723 	       offsetof(struct trace_iterator, seq));
6724 	cpumask_clear(iter->started);
6725 	trace_seq_init(&iter->seq);
6726 	iter->pos = -1;
6727 
6728 	trace_event_read_lock();
6729 	trace_access_lock(iter->cpu_file);
6730 	while (trace_find_next_entry_inc(iter) != NULL) {
6731 		enum print_line_t ret;
6732 		int save_len = iter->seq.seq.len;
6733 
6734 		ret = print_trace_line(iter);
6735 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6736 			/* don't print partial lines */
6737 			iter->seq.seq.len = save_len;
6738 			break;
6739 		}
6740 		if (ret != TRACE_TYPE_NO_CONSUME)
6741 			trace_consume(iter);
6742 
6743 		if (trace_seq_used(&iter->seq) >= cnt)
6744 			break;
6745 
6746 		/*
6747 		 * Setting the full flag means we reached the trace_seq buffer
6748 		 * size and we should leave by partial output condition above.
6749 		 * One of the trace_seq_* functions is not used properly.
6750 		 */
6751 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6752 			  iter->ent->type);
6753 	}
6754 	trace_access_unlock(iter->cpu_file);
6755 	trace_event_read_unlock();
6756 
6757 	/* Now copy what we have to the user */
6758 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6759 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6760 		trace_seq_init(&iter->seq);
6761 
6762 	/*
6763 	 * If there was nothing to send to user, in spite of consuming trace
6764 	 * entries, go back to wait for more entries.
6765 	 */
6766 	if (sret == -EBUSY)
6767 		goto waitagain;
6768 
6769 out:
6770 	mutex_unlock(&iter->mutex);
6771 
6772 	return sret;
6773 }
6774 
6775 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6776 				     unsigned int idx)
6777 {
6778 	__free_page(spd->pages[idx]);
6779 }
6780 
6781 static size_t
6782 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6783 {
6784 	size_t count;
6785 	int save_len;
6786 	int ret;
6787 
6788 	/* Seq buffer is page-sized, exactly what we need. */
6789 	for (;;) {
6790 		save_len = iter->seq.seq.len;
6791 		ret = print_trace_line(iter);
6792 
6793 		if (trace_seq_has_overflowed(&iter->seq)) {
6794 			iter->seq.seq.len = save_len;
6795 			break;
6796 		}
6797 
6798 		/*
6799 		 * This should not be hit, because it should only
6800 		 * be set if the iter->seq overflowed. But check it
6801 		 * anyway to be safe.
6802 		 */
6803 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6804 			iter->seq.seq.len = save_len;
6805 			break;
6806 		}
6807 
6808 		count = trace_seq_used(&iter->seq) - save_len;
6809 		if (rem < count) {
6810 			rem = 0;
6811 			iter->seq.seq.len = save_len;
6812 			break;
6813 		}
6814 
6815 		if (ret != TRACE_TYPE_NO_CONSUME)
6816 			trace_consume(iter);
6817 		rem -= count;
6818 		if (!trace_find_next_entry_inc(iter))	{
6819 			rem = 0;
6820 			iter->ent = NULL;
6821 			break;
6822 		}
6823 	}
6824 
6825 	return rem;
6826 }
6827 
6828 static ssize_t tracing_splice_read_pipe(struct file *filp,
6829 					loff_t *ppos,
6830 					struct pipe_inode_info *pipe,
6831 					size_t len,
6832 					unsigned int flags)
6833 {
6834 	struct page *pages_def[PIPE_DEF_BUFFERS];
6835 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6836 	struct trace_iterator *iter = filp->private_data;
6837 	struct splice_pipe_desc spd = {
6838 		.pages		= pages_def,
6839 		.partial	= partial_def,
6840 		.nr_pages	= 0, /* This gets updated below. */
6841 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6842 		.ops		= &default_pipe_buf_ops,
6843 		.spd_release	= tracing_spd_release_pipe,
6844 	};
6845 	ssize_t ret;
6846 	size_t rem;
6847 	unsigned int i;
6848 
6849 	if (splice_grow_spd(pipe, &spd))
6850 		return -ENOMEM;
6851 
6852 	mutex_lock(&iter->mutex);
6853 
6854 	if (iter->trace->splice_read) {
6855 		ret = iter->trace->splice_read(iter, filp,
6856 					       ppos, pipe, len, flags);
6857 		if (ret)
6858 			goto out_err;
6859 	}
6860 
6861 	ret = tracing_wait_pipe(filp);
6862 	if (ret <= 0)
6863 		goto out_err;
6864 
6865 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6866 		ret = -EFAULT;
6867 		goto out_err;
6868 	}
6869 
6870 	trace_event_read_lock();
6871 	trace_access_lock(iter->cpu_file);
6872 
6873 	/* Fill as many pages as possible. */
6874 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6875 		spd.pages[i] = alloc_page(GFP_KERNEL);
6876 		if (!spd.pages[i])
6877 			break;
6878 
6879 		rem = tracing_fill_pipe_page(rem, iter);
6880 
6881 		/* Copy the data into the page, so we can start over. */
6882 		ret = trace_seq_to_buffer(&iter->seq,
6883 					  page_address(spd.pages[i]),
6884 					  trace_seq_used(&iter->seq));
6885 		if (ret < 0) {
6886 			__free_page(spd.pages[i]);
6887 			break;
6888 		}
6889 		spd.partial[i].offset = 0;
6890 		spd.partial[i].len = trace_seq_used(&iter->seq);
6891 
6892 		trace_seq_init(&iter->seq);
6893 	}
6894 
6895 	trace_access_unlock(iter->cpu_file);
6896 	trace_event_read_unlock();
6897 	mutex_unlock(&iter->mutex);
6898 
6899 	spd.nr_pages = i;
6900 
6901 	if (i)
6902 		ret = splice_to_pipe(pipe, &spd);
6903 	else
6904 		ret = 0;
6905 out:
6906 	splice_shrink_spd(&spd);
6907 	return ret;
6908 
6909 out_err:
6910 	mutex_unlock(&iter->mutex);
6911 	goto out;
6912 }
6913 
6914 static ssize_t
6915 tracing_entries_read(struct file *filp, char __user *ubuf,
6916 		     size_t cnt, loff_t *ppos)
6917 {
6918 	struct inode *inode = file_inode(filp);
6919 	struct trace_array *tr = inode->i_private;
6920 	int cpu = tracing_get_cpu(inode);
6921 	char buf[64];
6922 	int r = 0;
6923 	ssize_t ret;
6924 
6925 	mutex_lock(&trace_types_lock);
6926 
6927 	if (cpu == RING_BUFFER_ALL_CPUS) {
6928 		int cpu, buf_size_same;
6929 		unsigned long size;
6930 
6931 		size = 0;
6932 		buf_size_same = 1;
6933 		/* check if all cpu sizes are same */
6934 		for_each_tracing_cpu(cpu) {
6935 			/* fill in the size from first enabled cpu */
6936 			if (size == 0)
6937 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6938 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6939 				buf_size_same = 0;
6940 				break;
6941 			}
6942 		}
6943 
6944 		if (buf_size_same) {
6945 			if (!ring_buffer_expanded)
6946 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6947 					    size >> 10,
6948 					    trace_buf_size >> 10);
6949 			else
6950 				r = sprintf(buf, "%lu\n", size >> 10);
6951 		} else
6952 			r = sprintf(buf, "X\n");
6953 	} else
6954 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6955 
6956 	mutex_unlock(&trace_types_lock);
6957 
6958 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6959 	return ret;
6960 }
6961 
6962 static ssize_t
6963 tracing_entries_write(struct file *filp, const char __user *ubuf,
6964 		      size_t cnt, loff_t *ppos)
6965 {
6966 	struct inode *inode = file_inode(filp);
6967 	struct trace_array *tr = inode->i_private;
6968 	unsigned long val;
6969 	int ret;
6970 
6971 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972 	if (ret)
6973 		return ret;
6974 
6975 	/* must have at least 1 entry */
6976 	if (!val)
6977 		return -EINVAL;
6978 
6979 	/* value is in KB */
6980 	val <<= 10;
6981 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6982 	if (ret < 0)
6983 		return ret;
6984 
6985 	*ppos += cnt;
6986 
6987 	return cnt;
6988 }
6989 
6990 static ssize_t
6991 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6992 				size_t cnt, loff_t *ppos)
6993 {
6994 	struct trace_array *tr = filp->private_data;
6995 	char buf[64];
6996 	int r, cpu;
6997 	unsigned long size = 0, expanded_size = 0;
6998 
6999 	mutex_lock(&trace_types_lock);
7000 	for_each_tracing_cpu(cpu) {
7001 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7002 		if (!ring_buffer_expanded)
7003 			expanded_size += trace_buf_size >> 10;
7004 	}
7005 	if (ring_buffer_expanded)
7006 		r = sprintf(buf, "%lu\n", size);
7007 	else
7008 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7009 	mutex_unlock(&trace_types_lock);
7010 
7011 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012 }
7013 
7014 static ssize_t
7015 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7016 			  size_t cnt, loff_t *ppos)
7017 {
7018 	/*
7019 	 * There is no need to read what the user has written, this function
7020 	 * is just to make sure that there is no error when "echo" is used
7021 	 */
7022 
7023 	*ppos += cnt;
7024 
7025 	return cnt;
7026 }
7027 
7028 static int
7029 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7030 {
7031 	struct trace_array *tr = inode->i_private;
7032 
7033 	/* disable tracing ? */
7034 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7035 		tracer_tracing_off(tr);
7036 	/* resize the ring buffer to 0 */
7037 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7038 
7039 	trace_array_put(tr);
7040 
7041 	return 0;
7042 }
7043 
7044 static ssize_t
7045 tracing_mark_write(struct file *filp, const char __user *ubuf,
7046 					size_t cnt, loff_t *fpos)
7047 {
7048 	struct trace_array *tr = filp->private_data;
7049 	struct ring_buffer_event *event;
7050 	enum event_trigger_type tt = ETT_NONE;
7051 	struct trace_buffer *buffer;
7052 	struct print_entry *entry;
7053 	ssize_t written;
7054 	int size;
7055 	int len;
7056 
7057 /* Used in tracing_mark_raw_write() as well */
7058 #define FAULTED_STR "<faulted>"
7059 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7060 
7061 	if (tracing_disabled)
7062 		return -EINVAL;
7063 
7064 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7065 		return -EINVAL;
7066 
7067 	if (cnt > TRACE_BUF_SIZE)
7068 		cnt = TRACE_BUF_SIZE;
7069 
7070 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7071 
7072 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7073 
7074 	/* If less than "<faulted>", then make sure we can still add that */
7075 	if (cnt < FAULTED_SIZE)
7076 		size += FAULTED_SIZE - cnt;
7077 
7078 	buffer = tr->array_buffer.buffer;
7079 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7080 					    tracing_gen_ctx());
7081 	if (unlikely(!event))
7082 		/* Ring buffer disabled, return as if not open for write */
7083 		return -EBADF;
7084 
7085 	entry = ring_buffer_event_data(event);
7086 	entry->ip = _THIS_IP_;
7087 
7088 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7089 	if (len) {
7090 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7091 		cnt = FAULTED_SIZE;
7092 		written = -EFAULT;
7093 	} else
7094 		written = cnt;
7095 
7096 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7097 		/* do not add \n before testing triggers, but add \0 */
7098 		entry->buf[cnt] = '\0';
7099 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7100 	}
7101 
7102 	if (entry->buf[cnt - 1] != '\n') {
7103 		entry->buf[cnt] = '\n';
7104 		entry->buf[cnt + 1] = '\0';
7105 	} else
7106 		entry->buf[cnt] = '\0';
7107 
7108 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7109 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7110 	__buffer_unlock_commit(buffer, event);
7111 
7112 	if (tt)
7113 		event_triggers_post_call(tr->trace_marker_file, tt);
7114 
7115 	if (written > 0)
7116 		*fpos += written;
7117 
7118 	return written;
7119 }
7120 
7121 /* Limit it for now to 3K (including tag) */
7122 #define RAW_DATA_MAX_SIZE (1024*3)
7123 
7124 static ssize_t
7125 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7126 					size_t cnt, loff_t *fpos)
7127 {
7128 	struct trace_array *tr = filp->private_data;
7129 	struct ring_buffer_event *event;
7130 	struct trace_buffer *buffer;
7131 	struct raw_data_entry *entry;
7132 	ssize_t written;
7133 	int size;
7134 	int len;
7135 
7136 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7137 
7138 	if (tracing_disabled)
7139 		return -EINVAL;
7140 
7141 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7142 		return -EINVAL;
7143 
7144 	/* The marker must at least have a tag id */
7145 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7146 		return -EINVAL;
7147 
7148 	if (cnt > TRACE_BUF_SIZE)
7149 		cnt = TRACE_BUF_SIZE;
7150 
7151 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7152 
7153 	size = sizeof(*entry) + cnt;
7154 	if (cnt < FAULT_SIZE_ID)
7155 		size += FAULT_SIZE_ID - cnt;
7156 
7157 	buffer = tr->array_buffer.buffer;
7158 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7159 					    tracing_gen_ctx());
7160 	if (!event)
7161 		/* Ring buffer disabled, return as if not open for write */
7162 		return -EBADF;
7163 
7164 	entry = ring_buffer_event_data(event);
7165 
7166 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7167 	if (len) {
7168 		entry->id = -1;
7169 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7170 		written = -EFAULT;
7171 	} else
7172 		written = cnt;
7173 
7174 	__buffer_unlock_commit(buffer, event);
7175 
7176 	if (written > 0)
7177 		*fpos += written;
7178 
7179 	return written;
7180 }
7181 
7182 static int tracing_clock_show(struct seq_file *m, void *v)
7183 {
7184 	struct trace_array *tr = m->private;
7185 	int i;
7186 
7187 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7188 		seq_printf(m,
7189 			"%s%s%s%s", i ? " " : "",
7190 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7191 			i == tr->clock_id ? "]" : "");
7192 	seq_putc(m, '\n');
7193 
7194 	return 0;
7195 }
7196 
7197 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7198 {
7199 	int i;
7200 
7201 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7202 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7203 			break;
7204 	}
7205 	if (i == ARRAY_SIZE(trace_clocks))
7206 		return -EINVAL;
7207 
7208 	mutex_lock(&trace_types_lock);
7209 
7210 	tr->clock_id = i;
7211 
7212 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7213 
7214 	/*
7215 	 * New clock may not be consistent with the previous clock.
7216 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7217 	 */
7218 	tracing_reset_online_cpus(&tr->array_buffer);
7219 
7220 #ifdef CONFIG_TRACER_MAX_TRACE
7221 	if (tr->max_buffer.buffer)
7222 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7223 	tracing_reset_online_cpus(&tr->max_buffer);
7224 #endif
7225 
7226 	mutex_unlock(&trace_types_lock);
7227 
7228 	return 0;
7229 }
7230 
7231 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7232 				   size_t cnt, loff_t *fpos)
7233 {
7234 	struct seq_file *m = filp->private_data;
7235 	struct trace_array *tr = m->private;
7236 	char buf[64];
7237 	const char *clockstr;
7238 	int ret;
7239 
7240 	if (cnt >= sizeof(buf))
7241 		return -EINVAL;
7242 
7243 	if (copy_from_user(buf, ubuf, cnt))
7244 		return -EFAULT;
7245 
7246 	buf[cnt] = 0;
7247 
7248 	clockstr = strstrip(buf);
7249 
7250 	ret = tracing_set_clock(tr, clockstr);
7251 	if (ret)
7252 		return ret;
7253 
7254 	*fpos += cnt;
7255 
7256 	return cnt;
7257 }
7258 
7259 static int tracing_clock_open(struct inode *inode, struct file *file)
7260 {
7261 	struct trace_array *tr = inode->i_private;
7262 	int ret;
7263 
7264 	ret = tracing_check_open_get_tr(tr);
7265 	if (ret)
7266 		return ret;
7267 
7268 	ret = single_open(file, tracing_clock_show, inode->i_private);
7269 	if (ret < 0)
7270 		trace_array_put(tr);
7271 
7272 	return ret;
7273 }
7274 
7275 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7276 {
7277 	struct trace_array *tr = m->private;
7278 
7279 	mutex_lock(&trace_types_lock);
7280 
7281 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7282 		seq_puts(m, "delta [absolute]\n");
7283 	else
7284 		seq_puts(m, "[delta] absolute\n");
7285 
7286 	mutex_unlock(&trace_types_lock);
7287 
7288 	return 0;
7289 }
7290 
7291 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7292 {
7293 	struct trace_array *tr = inode->i_private;
7294 	int ret;
7295 
7296 	ret = tracing_check_open_get_tr(tr);
7297 	if (ret)
7298 		return ret;
7299 
7300 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7301 	if (ret < 0)
7302 		trace_array_put(tr);
7303 
7304 	return ret;
7305 }
7306 
7307 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7308 {
7309 	if (rbe == this_cpu_read(trace_buffered_event))
7310 		return ring_buffer_time_stamp(buffer);
7311 
7312 	return ring_buffer_event_time_stamp(buffer, rbe);
7313 }
7314 
7315 /*
7316  * Set or disable using the per CPU trace_buffer_event when possible.
7317  */
7318 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7319 {
7320 	int ret = 0;
7321 
7322 	mutex_lock(&trace_types_lock);
7323 
7324 	if (set && tr->no_filter_buffering_ref++)
7325 		goto out;
7326 
7327 	if (!set) {
7328 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7329 			ret = -EINVAL;
7330 			goto out;
7331 		}
7332 
7333 		--tr->no_filter_buffering_ref;
7334 	}
7335  out:
7336 	mutex_unlock(&trace_types_lock);
7337 
7338 	return ret;
7339 }
7340 
7341 struct ftrace_buffer_info {
7342 	struct trace_iterator	iter;
7343 	void			*spare;
7344 	unsigned int		spare_cpu;
7345 	unsigned int		read;
7346 };
7347 
7348 #ifdef CONFIG_TRACER_SNAPSHOT
7349 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7350 {
7351 	struct trace_array *tr = inode->i_private;
7352 	struct trace_iterator *iter;
7353 	struct seq_file *m;
7354 	int ret;
7355 
7356 	ret = tracing_check_open_get_tr(tr);
7357 	if (ret)
7358 		return ret;
7359 
7360 	if (file->f_mode & FMODE_READ) {
7361 		iter = __tracing_open(inode, file, true);
7362 		if (IS_ERR(iter))
7363 			ret = PTR_ERR(iter);
7364 	} else {
7365 		/* Writes still need the seq_file to hold the private data */
7366 		ret = -ENOMEM;
7367 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7368 		if (!m)
7369 			goto out;
7370 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7371 		if (!iter) {
7372 			kfree(m);
7373 			goto out;
7374 		}
7375 		ret = 0;
7376 
7377 		iter->tr = tr;
7378 		iter->array_buffer = &tr->max_buffer;
7379 		iter->cpu_file = tracing_get_cpu(inode);
7380 		m->private = iter;
7381 		file->private_data = m;
7382 	}
7383 out:
7384 	if (ret < 0)
7385 		trace_array_put(tr);
7386 
7387 	return ret;
7388 }
7389 
7390 static ssize_t
7391 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7392 		       loff_t *ppos)
7393 {
7394 	struct seq_file *m = filp->private_data;
7395 	struct trace_iterator *iter = m->private;
7396 	struct trace_array *tr = iter->tr;
7397 	unsigned long val;
7398 	int ret;
7399 
7400 	ret = tracing_update_buffers();
7401 	if (ret < 0)
7402 		return ret;
7403 
7404 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7405 	if (ret)
7406 		return ret;
7407 
7408 	mutex_lock(&trace_types_lock);
7409 
7410 	if (tr->current_trace->use_max_tr) {
7411 		ret = -EBUSY;
7412 		goto out;
7413 	}
7414 
7415 	arch_spin_lock(&tr->max_lock);
7416 	if (tr->cond_snapshot)
7417 		ret = -EBUSY;
7418 	arch_spin_unlock(&tr->max_lock);
7419 	if (ret)
7420 		goto out;
7421 
7422 	switch (val) {
7423 	case 0:
7424 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7425 			ret = -EINVAL;
7426 			break;
7427 		}
7428 		if (tr->allocated_snapshot)
7429 			free_snapshot(tr);
7430 		break;
7431 	case 1:
7432 /* Only allow per-cpu swap if the ring buffer supports it */
7433 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7434 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7435 			ret = -EINVAL;
7436 			break;
7437 		}
7438 #endif
7439 		if (tr->allocated_snapshot)
7440 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7441 					&tr->array_buffer, iter->cpu_file);
7442 		else
7443 			ret = tracing_alloc_snapshot_instance(tr);
7444 		if (ret < 0)
7445 			break;
7446 		local_irq_disable();
7447 		/* Now, we're going to swap */
7448 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7449 			update_max_tr(tr, current, smp_processor_id(), NULL);
7450 		else
7451 			update_max_tr_single(tr, current, iter->cpu_file);
7452 		local_irq_enable();
7453 		break;
7454 	default:
7455 		if (tr->allocated_snapshot) {
7456 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7457 				tracing_reset_online_cpus(&tr->max_buffer);
7458 			else
7459 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7460 		}
7461 		break;
7462 	}
7463 
7464 	if (ret >= 0) {
7465 		*ppos += cnt;
7466 		ret = cnt;
7467 	}
7468 out:
7469 	mutex_unlock(&trace_types_lock);
7470 	return ret;
7471 }
7472 
7473 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7474 {
7475 	struct seq_file *m = file->private_data;
7476 	int ret;
7477 
7478 	ret = tracing_release(inode, file);
7479 
7480 	if (file->f_mode & FMODE_READ)
7481 		return ret;
7482 
7483 	/* If write only, the seq_file is just a stub */
7484 	if (m)
7485 		kfree(m->private);
7486 	kfree(m);
7487 
7488 	return 0;
7489 }
7490 
7491 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7492 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7493 				    size_t count, loff_t *ppos);
7494 static int tracing_buffers_release(struct inode *inode, struct file *file);
7495 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7496 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7497 
7498 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7499 {
7500 	struct ftrace_buffer_info *info;
7501 	int ret;
7502 
7503 	/* The following checks for tracefs lockdown */
7504 	ret = tracing_buffers_open(inode, filp);
7505 	if (ret < 0)
7506 		return ret;
7507 
7508 	info = filp->private_data;
7509 
7510 	if (info->iter.trace->use_max_tr) {
7511 		tracing_buffers_release(inode, filp);
7512 		return -EBUSY;
7513 	}
7514 
7515 	info->iter.snapshot = true;
7516 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7517 
7518 	return ret;
7519 }
7520 
7521 #endif /* CONFIG_TRACER_SNAPSHOT */
7522 
7523 
7524 static const struct file_operations tracing_thresh_fops = {
7525 	.open		= tracing_open_generic,
7526 	.read		= tracing_thresh_read,
7527 	.write		= tracing_thresh_write,
7528 	.llseek		= generic_file_llseek,
7529 };
7530 
7531 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7532 static const struct file_operations tracing_max_lat_fops = {
7533 	.open		= tracing_open_generic,
7534 	.read		= tracing_max_lat_read,
7535 	.write		= tracing_max_lat_write,
7536 	.llseek		= generic_file_llseek,
7537 };
7538 #endif
7539 
7540 static const struct file_operations set_tracer_fops = {
7541 	.open		= tracing_open_generic,
7542 	.read		= tracing_set_trace_read,
7543 	.write		= tracing_set_trace_write,
7544 	.llseek		= generic_file_llseek,
7545 };
7546 
7547 static const struct file_operations tracing_pipe_fops = {
7548 	.open		= tracing_open_pipe,
7549 	.poll		= tracing_poll_pipe,
7550 	.read		= tracing_read_pipe,
7551 	.splice_read	= tracing_splice_read_pipe,
7552 	.release	= tracing_release_pipe,
7553 	.llseek		= no_llseek,
7554 };
7555 
7556 static const struct file_operations tracing_entries_fops = {
7557 	.open		= tracing_open_generic_tr,
7558 	.read		= tracing_entries_read,
7559 	.write		= tracing_entries_write,
7560 	.llseek		= generic_file_llseek,
7561 	.release	= tracing_release_generic_tr,
7562 };
7563 
7564 static const struct file_operations tracing_total_entries_fops = {
7565 	.open		= tracing_open_generic_tr,
7566 	.read		= tracing_total_entries_read,
7567 	.llseek		= generic_file_llseek,
7568 	.release	= tracing_release_generic_tr,
7569 };
7570 
7571 static const struct file_operations tracing_free_buffer_fops = {
7572 	.open		= tracing_open_generic_tr,
7573 	.write		= tracing_free_buffer_write,
7574 	.release	= tracing_free_buffer_release,
7575 };
7576 
7577 static const struct file_operations tracing_mark_fops = {
7578 	.open		= tracing_open_generic_tr,
7579 	.write		= tracing_mark_write,
7580 	.llseek		= generic_file_llseek,
7581 	.release	= tracing_release_generic_tr,
7582 };
7583 
7584 static const struct file_operations tracing_mark_raw_fops = {
7585 	.open		= tracing_open_generic_tr,
7586 	.write		= tracing_mark_raw_write,
7587 	.llseek		= generic_file_llseek,
7588 	.release	= tracing_release_generic_tr,
7589 };
7590 
7591 static const struct file_operations trace_clock_fops = {
7592 	.open		= tracing_clock_open,
7593 	.read		= seq_read,
7594 	.llseek		= seq_lseek,
7595 	.release	= tracing_single_release_tr,
7596 	.write		= tracing_clock_write,
7597 };
7598 
7599 static const struct file_operations trace_time_stamp_mode_fops = {
7600 	.open		= tracing_time_stamp_mode_open,
7601 	.read		= seq_read,
7602 	.llseek		= seq_lseek,
7603 	.release	= tracing_single_release_tr,
7604 };
7605 
7606 #ifdef CONFIG_TRACER_SNAPSHOT
7607 static const struct file_operations snapshot_fops = {
7608 	.open		= tracing_snapshot_open,
7609 	.read		= seq_read,
7610 	.write		= tracing_snapshot_write,
7611 	.llseek		= tracing_lseek,
7612 	.release	= tracing_snapshot_release,
7613 };
7614 
7615 static const struct file_operations snapshot_raw_fops = {
7616 	.open		= snapshot_raw_open,
7617 	.read		= tracing_buffers_read,
7618 	.release	= tracing_buffers_release,
7619 	.splice_read	= tracing_buffers_splice_read,
7620 	.llseek		= no_llseek,
7621 };
7622 
7623 #endif /* CONFIG_TRACER_SNAPSHOT */
7624 
7625 /*
7626  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7627  * @filp: The active open file structure
7628  * @ubuf: The userspace provided buffer to read value into
7629  * @cnt: The maximum number of bytes to read
7630  * @ppos: The current "file" position
7631  *
7632  * This function implements the write interface for a struct trace_min_max_param.
7633  * The filp->private_data must point to a trace_min_max_param structure that
7634  * defines where to write the value, the min and the max acceptable values,
7635  * and a lock to protect the write.
7636  */
7637 static ssize_t
7638 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7639 {
7640 	struct trace_min_max_param *param = filp->private_data;
7641 	u64 val;
7642 	int err;
7643 
7644 	if (!param)
7645 		return -EFAULT;
7646 
7647 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7648 	if (err)
7649 		return err;
7650 
7651 	if (param->lock)
7652 		mutex_lock(param->lock);
7653 
7654 	if (param->min && val < *param->min)
7655 		err = -EINVAL;
7656 
7657 	if (param->max && val > *param->max)
7658 		err = -EINVAL;
7659 
7660 	if (!err)
7661 		*param->val = val;
7662 
7663 	if (param->lock)
7664 		mutex_unlock(param->lock);
7665 
7666 	if (err)
7667 		return err;
7668 
7669 	return cnt;
7670 }
7671 
7672 /*
7673  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7674  * @filp: The active open file structure
7675  * @ubuf: The userspace provided buffer to read value into
7676  * @cnt: The maximum number of bytes to read
7677  * @ppos: The current "file" position
7678  *
7679  * This function implements the read interface for a struct trace_min_max_param.
7680  * The filp->private_data must point to a trace_min_max_param struct with valid
7681  * data.
7682  */
7683 static ssize_t
7684 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7685 {
7686 	struct trace_min_max_param *param = filp->private_data;
7687 	char buf[U64_STR_SIZE];
7688 	int len;
7689 	u64 val;
7690 
7691 	if (!param)
7692 		return -EFAULT;
7693 
7694 	val = *param->val;
7695 
7696 	if (cnt > sizeof(buf))
7697 		cnt = sizeof(buf);
7698 
7699 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7700 
7701 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7702 }
7703 
7704 const struct file_operations trace_min_max_fops = {
7705 	.open		= tracing_open_generic,
7706 	.read		= trace_min_max_read,
7707 	.write		= trace_min_max_write,
7708 };
7709 
7710 #define TRACING_LOG_ERRS_MAX	8
7711 #define TRACING_LOG_LOC_MAX	128
7712 
7713 #define CMD_PREFIX "  Command: "
7714 
7715 struct err_info {
7716 	const char	**errs;	/* ptr to loc-specific array of err strings */
7717 	u8		type;	/* index into errs -> specific err string */
7718 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7719 	u64		ts;
7720 };
7721 
7722 struct tracing_log_err {
7723 	struct list_head	list;
7724 	struct err_info		info;
7725 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7726 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7727 };
7728 
7729 static DEFINE_MUTEX(tracing_err_log_lock);
7730 
7731 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7732 {
7733 	struct tracing_log_err *err;
7734 
7735 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7736 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7737 		if (!err)
7738 			err = ERR_PTR(-ENOMEM);
7739 		tr->n_err_log_entries++;
7740 
7741 		return err;
7742 	}
7743 
7744 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7745 	list_del(&err->list);
7746 
7747 	return err;
7748 }
7749 
7750 /**
7751  * err_pos - find the position of a string within a command for error careting
7752  * @cmd: The tracing command that caused the error
7753  * @str: The string to position the caret at within @cmd
7754  *
7755  * Finds the position of the first occurrence of @str within @cmd.  The
7756  * return value can be passed to tracing_log_err() for caret placement
7757  * within @cmd.
7758  *
7759  * Returns the index within @cmd of the first occurrence of @str or 0
7760  * if @str was not found.
7761  */
7762 unsigned int err_pos(char *cmd, const char *str)
7763 {
7764 	char *found;
7765 
7766 	if (WARN_ON(!strlen(cmd)))
7767 		return 0;
7768 
7769 	found = strstr(cmd, str);
7770 	if (found)
7771 		return found - cmd;
7772 
7773 	return 0;
7774 }
7775 
7776 /**
7777  * tracing_log_err - write an error to the tracing error log
7778  * @tr: The associated trace array for the error (NULL for top level array)
7779  * @loc: A string describing where the error occurred
7780  * @cmd: The tracing command that caused the error
7781  * @errs: The array of loc-specific static error strings
7782  * @type: The index into errs[], which produces the specific static err string
7783  * @pos: The position the caret should be placed in the cmd
7784  *
7785  * Writes an error into tracing/error_log of the form:
7786  *
7787  * <loc>: error: <text>
7788  *   Command: <cmd>
7789  *              ^
7790  *
7791  * tracing/error_log is a small log file containing the last
7792  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7793  * unless there has been a tracing error, and the error log can be
7794  * cleared and have its memory freed by writing the empty string in
7795  * truncation mode to it i.e. echo > tracing/error_log.
7796  *
7797  * NOTE: the @errs array along with the @type param are used to
7798  * produce a static error string - this string is not copied and saved
7799  * when the error is logged - only a pointer to it is saved.  See
7800  * existing callers for examples of how static strings are typically
7801  * defined for use with tracing_log_err().
7802  */
7803 void tracing_log_err(struct trace_array *tr,
7804 		     const char *loc, const char *cmd,
7805 		     const char **errs, u8 type, u8 pos)
7806 {
7807 	struct tracing_log_err *err;
7808 
7809 	if (!tr)
7810 		tr = &global_trace;
7811 
7812 	mutex_lock(&tracing_err_log_lock);
7813 	err = get_tracing_log_err(tr);
7814 	if (PTR_ERR(err) == -ENOMEM) {
7815 		mutex_unlock(&tracing_err_log_lock);
7816 		return;
7817 	}
7818 
7819 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7820 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7821 
7822 	err->info.errs = errs;
7823 	err->info.type = type;
7824 	err->info.pos = pos;
7825 	err->info.ts = local_clock();
7826 
7827 	list_add_tail(&err->list, &tr->err_log);
7828 	mutex_unlock(&tracing_err_log_lock);
7829 }
7830 
7831 static void clear_tracing_err_log(struct trace_array *tr)
7832 {
7833 	struct tracing_log_err *err, *next;
7834 
7835 	mutex_lock(&tracing_err_log_lock);
7836 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7837 		list_del(&err->list);
7838 		kfree(err);
7839 	}
7840 
7841 	tr->n_err_log_entries = 0;
7842 	mutex_unlock(&tracing_err_log_lock);
7843 }
7844 
7845 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7846 {
7847 	struct trace_array *tr = m->private;
7848 
7849 	mutex_lock(&tracing_err_log_lock);
7850 
7851 	return seq_list_start(&tr->err_log, *pos);
7852 }
7853 
7854 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7855 {
7856 	struct trace_array *tr = m->private;
7857 
7858 	return seq_list_next(v, &tr->err_log, pos);
7859 }
7860 
7861 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7862 {
7863 	mutex_unlock(&tracing_err_log_lock);
7864 }
7865 
7866 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7867 {
7868 	u8 i;
7869 
7870 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7871 		seq_putc(m, ' ');
7872 	for (i = 0; i < pos; i++)
7873 		seq_putc(m, ' ');
7874 	seq_puts(m, "^\n");
7875 }
7876 
7877 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7878 {
7879 	struct tracing_log_err *err = v;
7880 
7881 	if (err) {
7882 		const char *err_text = err->info.errs[err->info.type];
7883 		u64 sec = err->info.ts;
7884 		u32 nsec;
7885 
7886 		nsec = do_div(sec, NSEC_PER_SEC);
7887 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7888 			   err->loc, err_text);
7889 		seq_printf(m, "%s", err->cmd);
7890 		tracing_err_log_show_pos(m, err->info.pos);
7891 	}
7892 
7893 	return 0;
7894 }
7895 
7896 static const struct seq_operations tracing_err_log_seq_ops = {
7897 	.start  = tracing_err_log_seq_start,
7898 	.next   = tracing_err_log_seq_next,
7899 	.stop   = tracing_err_log_seq_stop,
7900 	.show   = tracing_err_log_seq_show
7901 };
7902 
7903 static int tracing_err_log_open(struct inode *inode, struct file *file)
7904 {
7905 	struct trace_array *tr = inode->i_private;
7906 	int ret = 0;
7907 
7908 	ret = tracing_check_open_get_tr(tr);
7909 	if (ret)
7910 		return ret;
7911 
7912 	/* If this file was opened for write, then erase contents */
7913 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7914 		clear_tracing_err_log(tr);
7915 
7916 	if (file->f_mode & FMODE_READ) {
7917 		ret = seq_open(file, &tracing_err_log_seq_ops);
7918 		if (!ret) {
7919 			struct seq_file *m = file->private_data;
7920 			m->private = tr;
7921 		} else {
7922 			trace_array_put(tr);
7923 		}
7924 	}
7925 	return ret;
7926 }
7927 
7928 static ssize_t tracing_err_log_write(struct file *file,
7929 				     const char __user *buffer,
7930 				     size_t count, loff_t *ppos)
7931 {
7932 	return count;
7933 }
7934 
7935 static int tracing_err_log_release(struct inode *inode, struct file *file)
7936 {
7937 	struct trace_array *tr = inode->i_private;
7938 
7939 	trace_array_put(tr);
7940 
7941 	if (file->f_mode & FMODE_READ)
7942 		seq_release(inode, file);
7943 
7944 	return 0;
7945 }
7946 
7947 static const struct file_operations tracing_err_log_fops = {
7948 	.open           = tracing_err_log_open,
7949 	.write		= tracing_err_log_write,
7950 	.read           = seq_read,
7951 	.llseek         = seq_lseek,
7952 	.release        = tracing_err_log_release,
7953 };
7954 
7955 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7956 {
7957 	struct trace_array *tr = inode->i_private;
7958 	struct ftrace_buffer_info *info;
7959 	int ret;
7960 
7961 	ret = tracing_check_open_get_tr(tr);
7962 	if (ret)
7963 		return ret;
7964 
7965 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7966 	if (!info) {
7967 		trace_array_put(tr);
7968 		return -ENOMEM;
7969 	}
7970 
7971 	mutex_lock(&trace_types_lock);
7972 
7973 	info->iter.tr		= tr;
7974 	info->iter.cpu_file	= tracing_get_cpu(inode);
7975 	info->iter.trace	= tr->current_trace;
7976 	info->iter.array_buffer = &tr->array_buffer;
7977 	info->spare		= NULL;
7978 	/* Force reading ring buffer for first read */
7979 	info->read		= (unsigned int)-1;
7980 
7981 	filp->private_data = info;
7982 
7983 	tr->trace_ref++;
7984 
7985 	mutex_unlock(&trace_types_lock);
7986 
7987 	ret = nonseekable_open(inode, filp);
7988 	if (ret < 0)
7989 		trace_array_put(tr);
7990 
7991 	return ret;
7992 }
7993 
7994 static __poll_t
7995 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7996 {
7997 	struct ftrace_buffer_info *info = filp->private_data;
7998 	struct trace_iterator *iter = &info->iter;
7999 
8000 	return trace_poll(iter, filp, poll_table);
8001 }
8002 
8003 static ssize_t
8004 tracing_buffers_read(struct file *filp, char __user *ubuf,
8005 		     size_t count, loff_t *ppos)
8006 {
8007 	struct ftrace_buffer_info *info = filp->private_data;
8008 	struct trace_iterator *iter = &info->iter;
8009 	ssize_t ret = 0;
8010 	ssize_t size;
8011 
8012 	if (!count)
8013 		return 0;
8014 
8015 #ifdef CONFIG_TRACER_MAX_TRACE
8016 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8017 		return -EBUSY;
8018 #endif
8019 
8020 	if (!info->spare) {
8021 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8022 							  iter->cpu_file);
8023 		if (IS_ERR(info->spare)) {
8024 			ret = PTR_ERR(info->spare);
8025 			info->spare = NULL;
8026 		} else {
8027 			info->spare_cpu = iter->cpu_file;
8028 		}
8029 	}
8030 	if (!info->spare)
8031 		return ret;
8032 
8033 	/* Do we have previous read data to read? */
8034 	if (info->read < PAGE_SIZE)
8035 		goto read;
8036 
8037  again:
8038 	trace_access_lock(iter->cpu_file);
8039 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8040 				    &info->spare,
8041 				    count,
8042 				    iter->cpu_file, 0);
8043 	trace_access_unlock(iter->cpu_file);
8044 
8045 	if (ret < 0) {
8046 		if (trace_empty(iter)) {
8047 			if ((filp->f_flags & O_NONBLOCK))
8048 				return -EAGAIN;
8049 
8050 			ret = wait_on_pipe(iter, 0);
8051 			if (ret)
8052 				return ret;
8053 
8054 			goto again;
8055 		}
8056 		return 0;
8057 	}
8058 
8059 	info->read = 0;
8060  read:
8061 	size = PAGE_SIZE - info->read;
8062 	if (size > count)
8063 		size = count;
8064 
8065 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8066 	if (ret == size)
8067 		return -EFAULT;
8068 
8069 	size -= ret;
8070 
8071 	*ppos += size;
8072 	info->read += size;
8073 
8074 	return size;
8075 }
8076 
8077 static int tracing_buffers_release(struct inode *inode, struct file *file)
8078 {
8079 	struct ftrace_buffer_info *info = file->private_data;
8080 	struct trace_iterator *iter = &info->iter;
8081 
8082 	mutex_lock(&trace_types_lock);
8083 
8084 	iter->tr->trace_ref--;
8085 
8086 	__trace_array_put(iter->tr);
8087 
8088 	if (info->spare)
8089 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8090 					   info->spare_cpu, info->spare);
8091 	kvfree(info);
8092 
8093 	mutex_unlock(&trace_types_lock);
8094 
8095 	return 0;
8096 }
8097 
8098 struct buffer_ref {
8099 	struct trace_buffer	*buffer;
8100 	void			*page;
8101 	int			cpu;
8102 	refcount_t		refcount;
8103 };
8104 
8105 static void buffer_ref_release(struct buffer_ref *ref)
8106 {
8107 	if (!refcount_dec_and_test(&ref->refcount))
8108 		return;
8109 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8110 	kfree(ref);
8111 }
8112 
8113 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8114 				    struct pipe_buffer *buf)
8115 {
8116 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8117 
8118 	buffer_ref_release(ref);
8119 	buf->private = 0;
8120 }
8121 
8122 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8123 				struct pipe_buffer *buf)
8124 {
8125 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8126 
8127 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8128 		return false;
8129 
8130 	refcount_inc(&ref->refcount);
8131 	return true;
8132 }
8133 
8134 /* Pipe buffer operations for a buffer. */
8135 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8136 	.release		= buffer_pipe_buf_release,
8137 	.get			= buffer_pipe_buf_get,
8138 };
8139 
8140 /*
8141  * Callback from splice_to_pipe(), if we need to release some pages
8142  * at the end of the spd in case we error'ed out in filling the pipe.
8143  */
8144 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8145 {
8146 	struct buffer_ref *ref =
8147 		(struct buffer_ref *)spd->partial[i].private;
8148 
8149 	buffer_ref_release(ref);
8150 	spd->partial[i].private = 0;
8151 }
8152 
8153 static ssize_t
8154 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8155 			    struct pipe_inode_info *pipe, size_t len,
8156 			    unsigned int flags)
8157 {
8158 	struct ftrace_buffer_info *info = file->private_data;
8159 	struct trace_iterator *iter = &info->iter;
8160 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8161 	struct page *pages_def[PIPE_DEF_BUFFERS];
8162 	struct splice_pipe_desc spd = {
8163 		.pages		= pages_def,
8164 		.partial	= partial_def,
8165 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8166 		.ops		= &buffer_pipe_buf_ops,
8167 		.spd_release	= buffer_spd_release,
8168 	};
8169 	struct buffer_ref *ref;
8170 	int entries, i;
8171 	ssize_t ret = 0;
8172 
8173 #ifdef CONFIG_TRACER_MAX_TRACE
8174 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8175 		return -EBUSY;
8176 #endif
8177 
8178 	if (*ppos & (PAGE_SIZE - 1))
8179 		return -EINVAL;
8180 
8181 	if (len & (PAGE_SIZE - 1)) {
8182 		if (len < PAGE_SIZE)
8183 			return -EINVAL;
8184 		len &= PAGE_MASK;
8185 	}
8186 
8187 	if (splice_grow_spd(pipe, &spd))
8188 		return -ENOMEM;
8189 
8190  again:
8191 	trace_access_lock(iter->cpu_file);
8192 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8193 
8194 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8195 		struct page *page;
8196 		int r;
8197 
8198 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8199 		if (!ref) {
8200 			ret = -ENOMEM;
8201 			break;
8202 		}
8203 
8204 		refcount_set(&ref->refcount, 1);
8205 		ref->buffer = iter->array_buffer->buffer;
8206 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8207 		if (IS_ERR(ref->page)) {
8208 			ret = PTR_ERR(ref->page);
8209 			ref->page = NULL;
8210 			kfree(ref);
8211 			break;
8212 		}
8213 		ref->cpu = iter->cpu_file;
8214 
8215 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8216 					  len, iter->cpu_file, 1);
8217 		if (r < 0) {
8218 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8219 						   ref->page);
8220 			kfree(ref);
8221 			break;
8222 		}
8223 
8224 		page = virt_to_page(ref->page);
8225 
8226 		spd.pages[i] = page;
8227 		spd.partial[i].len = PAGE_SIZE;
8228 		spd.partial[i].offset = 0;
8229 		spd.partial[i].private = (unsigned long)ref;
8230 		spd.nr_pages++;
8231 		*ppos += PAGE_SIZE;
8232 
8233 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8234 	}
8235 
8236 	trace_access_unlock(iter->cpu_file);
8237 	spd.nr_pages = i;
8238 
8239 	/* did we read anything? */
8240 	if (!spd.nr_pages) {
8241 		if (ret)
8242 			goto out;
8243 
8244 		ret = -EAGAIN;
8245 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8246 			goto out;
8247 
8248 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8249 		if (ret)
8250 			goto out;
8251 
8252 		goto again;
8253 	}
8254 
8255 	ret = splice_to_pipe(pipe, &spd);
8256 out:
8257 	splice_shrink_spd(&spd);
8258 
8259 	return ret;
8260 }
8261 
8262 static const struct file_operations tracing_buffers_fops = {
8263 	.open		= tracing_buffers_open,
8264 	.read		= tracing_buffers_read,
8265 	.poll		= tracing_buffers_poll,
8266 	.release	= tracing_buffers_release,
8267 	.splice_read	= tracing_buffers_splice_read,
8268 	.llseek		= no_llseek,
8269 };
8270 
8271 static ssize_t
8272 tracing_stats_read(struct file *filp, char __user *ubuf,
8273 		   size_t count, loff_t *ppos)
8274 {
8275 	struct inode *inode = file_inode(filp);
8276 	struct trace_array *tr = inode->i_private;
8277 	struct array_buffer *trace_buf = &tr->array_buffer;
8278 	int cpu = tracing_get_cpu(inode);
8279 	struct trace_seq *s;
8280 	unsigned long cnt;
8281 	unsigned long long t;
8282 	unsigned long usec_rem;
8283 
8284 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8285 	if (!s)
8286 		return -ENOMEM;
8287 
8288 	trace_seq_init(s);
8289 
8290 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8291 	trace_seq_printf(s, "entries: %ld\n", cnt);
8292 
8293 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8294 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8295 
8296 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8297 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8298 
8299 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8300 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8301 
8302 	if (trace_clocks[tr->clock_id].in_ns) {
8303 		/* local or global for trace_clock */
8304 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305 		usec_rem = do_div(t, USEC_PER_SEC);
8306 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8307 								t, usec_rem);
8308 
8309 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8310 		usec_rem = do_div(t, USEC_PER_SEC);
8311 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8312 	} else {
8313 		/* counter or tsc mode for trace_clock */
8314 		trace_seq_printf(s, "oldest event ts: %llu\n",
8315 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8316 
8317 		trace_seq_printf(s, "now ts: %llu\n",
8318 				ring_buffer_time_stamp(trace_buf->buffer));
8319 	}
8320 
8321 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8322 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8323 
8324 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8325 	trace_seq_printf(s, "read events: %ld\n", cnt);
8326 
8327 	count = simple_read_from_buffer(ubuf, count, ppos,
8328 					s->buffer, trace_seq_used(s));
8329 
8330 	kfree(s);
8331 
8332 	return count;
8333 }
8334 
8335 static const struct file_operations tracing_stats_fops = {
8336 	.open		= tracing_open_generic_tr,
8337 	.read		= tracing_stats_read,
8338 	.llseek		= generic_file_llseek,
8339 	.release	= tracing_release_generic_tr,
8340 };
8341 
8342 #ifdef CONFIG_DYNAMIC_FTRACE
8343 
8344 static ssize_t
8345 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8346 		  size_t cnt, loff_t *ppos)
8347 {
8348 	ssize_t ret;
8349 	char *buf;
8350 	int r;
8351 
8352 	/* 256 should be plenty to hold the amount needed */
8353 	buf = kmalloc(256, GFP_KERNEL);
8354 	if (!buf)
8355 		return -ENOMEM;
8356 
8357 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8358 		      ftrace_update_tot_cnt,
8359 		      ftrace_number_of_pages,
8360 		      ftrace_number_of_groups);
8361 
8362 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8363 	kfree(buf);
8364 	return ret;
8365 }
8366 
8367 static const struct file_operations tracing_dyn_info_fops = {
8368 	.open		= tracing_open_generic,
8369 	.read		= tracing_read_dyn_info,
8370 	.llseek		= generic_file_llseek,
8371 };
8372 #endif /* CONFIG_DYNAMIC_FTRACE */
8373 
8374 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8375 static void
8376 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8377 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8378 		void *data)
8379 {
8380 	tracing_snapshot_instance(tr);
8381 }
8382 
8383 static void
8384 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8385 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8386 		      void *data)
8387 {
8388 	struct ftrace_func_mapper *mapper = data;
8389 	long *count = NULL;
8390 
8391 	if (mapper)
8392 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8393 
8394 	if (count) {
8395 
8396 		if (*count <= 0)
8397 			return;
8398 
8399 		(*count)--;
8400 	}
8401 
8402 	tracing_snapshot_instance(tr);
8403 }
8404 
8405 static int
8406 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8407 		      struct ftrace_probe_ops *ops, void *data)
8408 {
8409 	struct ftrace_func_mapper *mapper = data;
8410 	long *count = NULL;
8411 
8412 	seq_printf(m, "%ps:", (void *)ip);
8413 
8414 	seq_puts(m, "snapshot");
8415 
8416 	if (mapper)
8417 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8418 
8419 	if (count)
8420 		seq_printf(m, ":count=%ld\n", *count);
8421 	else
8422 		seq_puts(m, ":unlimited\n");
8423 
8424 	return 0;
8425 }
8426 
8427 static int
8428 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8429 		     unsigned long ip, void *init_data, void **data)
8430 {
8431 	struct ftrace_func_mapper *mapper = *data;
8432 
8433 	if (!mapper) {
8434 		mapper = allocate_ftrace_func_mapper();
8435 		if (!mapper)
8436 			return -ENOMEM;
8437 		*data = mapper;
8438 	}
8439 
8440 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8441 }
8442 
8443 static void
8444 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8445 		     unsigned long ip, void *data)
8446 {
8447 	struct ftrace_func_mapper *mapper = data;
8448 
8449 	if (!ip) {
8450 		if (!mapper)
8451 			return;
8452 		free_ftrace_func_mapper(mapper, NULL);
8453 		return;
8454 	}
8455 
8456 	ftrace_func_mapper_remove_ip(mapper, ip);
8457 }
8458 
8459 static struct ftrace_probe_ops snapshot_probe_ops = {
8460 	.func			= ftrace_snapshot,
8461 	.print			= ftrace_snapshot_print,
8462 };
8463 
8464 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8465 	.func			= ftrace_count_snapshot,
8466 	.print			= ftrace_snapshot_print,
8467 	.init			= ftrace_snapshot_init,
8468 	.free			= ftrace_snapshot_free,
8469 };
8470 
8471 static int
8472 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8473 			       char *glob, char *cmd, char *param, int enable)
8474 {
8475 	struct ftrace_probe_ops *ops;
8476 	void *count = (void *)-1;
8477 	char *number;
8478 	int ret;
8479 
8480 	if (!tr)
8481 		return -ENODEV;
8482 
8483 	/* hash funcs only work with set_ftrace_filter */
8484 	if (!enable)
8485 		return -EINVAL;
8486 
8487 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8488 
8489 	if (glob[0] == '!')
8490 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8491 
8492 	if (!param)
8493 		goto out_reg;
8494 
8495 	number = strsep(&param, ":");
8496 
8497 	if (!strlen(number))
8498 		goto out_reg;
8499 
8500 	/*
8501 	 * We use the callback data field (which is a pointer)
8502 	 * as our counter.
8503 	 */
8504 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8505 	if (ret)
8506 		return ret;
8507 
8508  out_reg:
8509 	ret = tracing_alloc_snapshot_instance(tr);
8510 	if (ret < 0)
8511 		goto out;
8512 
8513 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8514 
8515  out:
8516 	return ret < 0 ? ret : 0;
8517 }
8518 
8519 static struct ftrace_func_command ftrace_snapshot_cmd = {
8520 	.name			= "snapshot",
8521 	.func			= ftrace_trace_snapshot_callback,
8522 };
8523 
8524 static __init int register_snapshot_cmd(void)
8525 {
8526 	return register_ftrace_command(&ftrace_snapshot_cmd);
8527 }
8528 #else
8529 static inline __init int register_snapshot_cmd(void) { return 0; }
8530 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8531 
8532 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8533 {
8534 	if (WARN_ON(!tr->dir))
8535 		return ERR_PTR(-ENODEV);
8536 
8537 	/* Top directory uses NULL as the parent */
8538 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8539 		return NULL;
8540 
8541 	/* All sub buffers have a descriptor */
8542 	return tr->dir;
8543 }
8544 
8545 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8546 {
8547 	struct dentry *d_tracer;
8548 
8549 	if (tr->percpu_dir)
8550 		return tr->percpu_dir;
8551 
8552 	d_tracer = tracing_get_dentry(tr);
8553 	if (IS_ERR(d_tracer))
8554 		return NULL;
8555 
8556 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8557 
8558 	MEM_FAIL(!tr->percpu_dir,
8559 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8560 
8561 	return tr->percpu_dir;
8562 }
8563 
8564 static struct dentry *
8565 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8566 		      void *data, long cpu, const struct file_operations *fops)
8567 {
8568 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8569 
8570 	if (ret) /* See tracing_get_cpu() */
8571 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8572 	return ret;
8573 }
8574 
8575 static void
8576 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8577 {
8578 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8579 	struct dentry *d_cpu;
8580 	char cpu_dir[30]; /* 30 characters should be more than enough */
8581 
8582 	if (!d_percpu)
8583 		return;
8584 
8585 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8586 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8587 	if (!d_cpu) {
8588 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8589 		return;
8590 	}
8591 
8592 	/* per cpu trace_pipe */
8593 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8594 				tr, cpu, &tracing_pipe_fops);
8595 
8596 	/* per cpu trace */
8597 	trace_create_cpu_file("trace", 0644, d_cpu,
8598 				tr, cpu, &tracing_fops);
8599 
8600 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8601 				tr, cpu, &tracing_buffers_fops);
8602 
8603 	trace_create_cpu_file("stats", 0444, d_cpu,
8604 				tr, cpu, &tracing_stats_fops);
8605 
8606 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8607 				tr, cpu, &tracing_entries_fops);
8608 
8609 #ifdef CONFIG_TRACER_SNAPSHOT
8610 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8611 				tr, cpu, &snapshot_fops);
8612 
8613 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8614 				tr, cpu, &snapshot_raw_fops);
8615 #endif
8616 }
8617 
8618 #ifdef CONFIG_FTRACE_SELFTEST
8619 /* Let selftest have access to static functions in this file */
8620 #include "trace_selftest.c"
8621 #endif
8622 
8623 static ssize_t
8624 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8625 			loff_t *ppos)
8626 {
8627 	struct trace_option_dentry *topt = filp->private_data;
8628 	char *buf;
8629 
8630 	if (topt->flags->val & topt->opt->bit)
8631 		buf = "1\n";
8632 	else
8633 		buf = "0\n";
8634 
8635 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8636 }
8637 
8638 static ssize_t
8639 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8640 			 loff_t *ppos)
8641 {
8642 	struct trace_option_dentry *topt = filp->private_data;
8643 	unsigned long val;
8644 	int ret;
8645 
8646 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8647 	if (ret)
8648 		return ret;
8649 
8650 	if (val != 0 && val != 1)
8651 		return -EINVAL;
8652 
8653 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8654 		mutex_lock(&trace_types_lock);
8655 		ret = __set_tracer_option(topt->tr, topt->flags,
8656 					  topt->opt, !val);
8657 		mutex_unlock(&trace_types_lock);
8658 		if (ret)
8659 			return ret;
8660 	}
8661 
8662 	*ppos += cnt;
8663 
8664 	return cnt;
8665 }
8666 
8667 
8668 static const struct file_operations trace_options_fops = {
8669 	.open = tracing_open_generic,
8670 	.read = trace_options_read,
8671 	.write = trace_options_write,
8672 	.llseek	= generic_file_llseek,
8673 };
8674 
8675 /*
8676  * In order to pass in both the trace_array descriptor as well as the index
8677  * to the flag that the trace option file represents, the trace_array
8678  * has a character array of trace_flags_index[], which holds the index
8679  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8680  * The address of this character array is passed to the flag option file
8681  * read/write callbacks.
8682  *
8683  * In order to extract both the index and the trace_array descriptor,
8684  * get_tr_index() uses the following algorithm.
8685  *
8686  *   idx = *ptr;
8687  *
8688  * As the pointer itself contains the address of the index (remember
8689  * index[1] == 1).
8690  *
8691  * Then to get the trace_array descriptor, by subtracting that index
8692  * from the ptr, we get to the start of the index itself.
8693  *
8694  *   ptr - idx == &index[0]
8695  *
8696  * Then a simple container_of() from that pointer gets us to the
8697  * trace_array descriptor.
8698  */
8699 static void get_tr_index(void *data, struct trace_array **ptr,
8700 			 unsigned int *pindex)
8701 {
8702 	*pindex = *(unsigned char *)data;
8703 
8704 	*ptr = container_of(data - *pindex, struct trace_array,
8705 			    trace_flags_index);
8706 }
8707 
8708 static ssize_t
8709 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8710 			loff_t *ppos)
8711 {
8712 	void *tr_index = filp->private_data;
8713 	struct trace_array *tr;
8714 	unsigned int index;
8715 	char *buf;
8716 
8717 	get_tr_index(tr_index, &tr, &index);
8718 
8719 	if (tr->trace_flags & (1 << index))
8720 		buf = "1\n";
8721 	else
8722 		buf = "0\n";
8723 
8724 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8725 }
8726 
8727 static ssize_t
8728 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8729 			 loff_t *ppos)
8730 {
8731 	void *tr_index = filp->private_data;
8732 	struct trace_array *tr;
8733 	unsigned int index;
8734 	unsigned long val;
8735 	int ret;
8736 
8737 	get_tr_index(tr_index, &tr, &index);
8738 
8739 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8740 	if (ret)
8741 		return ret;
8742 
8743 	if (val != 0 && val != 1)
8744 		return -EINVAL;
8745 
8746 	mutex_lock(&event_mutex);
8747 	mutex_lock(&trace_types_lock);
8748 	ret = set_tracer_flag(tr, 1 << index, val);
8749 	mutex_unlock(&trace_types_lock);
8750 	mutex_unlock(&event_mutex);
8751 
8752 	if (ret < 0)
8753 		return ret;
8754 
8755 	*ppos += cnt;
8756 
8757 	return cnt;
8758 }
8759 
8760 static const struct file_operations trace_options_core_fops = {
8761 	.open = tracing_open_generic,
8762 	.read = trace_options_core_read,
8763 	.write = trace_options_core_write,
8764 	.llseek = generic_file_llseek,
8765 };
8766 
8767 struct dentry *trace_create_file(const char *name,
8768 				 umode_t mode,
8769 				 struct dentry *parent,
8770 				 void *data,
8771 				 const struct file_operations *fops)
8772 {
8773 	struct dentry *ret;
8774 
8775 	ret = tracefs_create_file(name, mode, parent, data, fops);
8776 	if (!ret)
8777 		pr_warn("Could not create tracefs '%s' entry\n", name);
8778 
8779 	return ret;
8780 }
8781 
8782 
8783 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8784 {
8785 	struct dentry *d_tracer;
8786 
8787 	if (tr->options)
8788 		return tr->options;
8789 
8790 	d_tracer = tracing_get_dentry(tr);
8791 	if (IS_ERR(d_tracer))
8792 		return NULL;
8793 
8794 	tr->options = tracefs_create_dir("options", d_tracer);
8795 	if (!tr->options) {
8796 		pr_warn("Could not create tracefs directory 'options'\n");
8797 		return NULL;
8798 	}
8799 
8800 	return tr->options;
8801 }
8802 
8803 static void
8804 create_trace_option_file(struct trace_array *tr,
8805 			 struct trace_option_dentry *topt,
8806 			 struct tracer_flags *flags,
8807 			 struct tracer_opt *opt)
8808 {
8809 	struct dentry *t_options;
8810 
8811 	t_options = trace_options_init_dentry(tr);
8812 	if (!t_options)
8813 		return;
8814 
8815 	topt->flags = flags;
8816 	topt->opt = opt;
8817 	topt->tr = tr;
8818 
8819 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8820 				    &trace_options_fops);
8821 
8822 }
8823 
8824 static void
8825 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8826 {
8827 	struct trace_option_dentry *topts;
8828 	struct trace_options *tr_topts;
8829 	struct tracer_flags *flags;
8830 	struct tracer_opt *opts;
8831 	int cnt;
8832 	int i;
8833 
8834 	if (!tracer)
8835 		return;
8836 
8837 	flags = tracer->flags;
8838 
8839 	if (!flags || !flags->opts)
8840 		return;
8841 
8842 	/*
8843 	 * If this is an instance, only create flags for tracers
8844 	 * the instance may have.
8845 	 */
8846 	if (!trace_ok_for_array(tracer, tr))
8847 		return;
8848 
8849 	for (i = 0; i < tr->nr_topts; i++) {
8850 		/* Make sure there's no duplicate flags. */
8851 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8852 			return;
8853 	}
8854 
8855 	opts = flags->opts;
8856 
8857 	for (cnt = 0; opts[cnt].name; cnt++)
8858 		;
8859 
8860 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8861 	if (!topts)
8862 		return;
8863 
8864 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8865 			    GFP_KERNEL);
8866 	if (!tr_topts) {
8867 		kfree(topts);
8868 		return;
8869 	}
8870 
8871 	tr->topts = tr_topts;
8872 	tr->topts[tr->nr_topts].tracer = tracer;
8873 	tr->topts[tr->nr_topts].topts = topts;
8874 	tr->nr_topts++;
8875 
8876 	for (cnt = 0; opts[cnt].name; cnt++) {
8877 		create_trace_option_file(tr, &topts[cnt], flags,
8878 					 &opts[cnt]);
8879 		MEM_FAIL(topts[cnt].entry == NULL,
8880 			  "Failed to create trace option: %s",
8881 			  opts[cnt].name);
8882 	}
8883 }
8884 
8885 static struct dentry *
8886 create_trace_option_core_file(struct trace_array *tr,
8887 			      const char *option, long index)
8888 {
8889 	struct dentry *t_options;
8890 
8891 	t_options = trace_options_init_dentry(tr);
8892 	if (!t_options)
8893 		return NULL;
8894 
8895 	return trace_create_file(option, 0644, t_options,
8896 				 (void *)&tr->trace_flags_index[index],
8897 				 &trace_options_core_fops);
8898 }
8899 
8900 static void create_trace_options_dir(struct trace_array *tr)
8901 {
8902 	struct dentry *t_options;
8903 	bool top_level = tr == &global_trace;
8904 	int i;
8905 
8906 	t_options = trace_options_init_dentry(tr);
8907 	if (!t_options)
8908 		return;
8909 
8910 	for (i = 0; trace_options[i]; i++) {
8911 		if (top_level ||
8912 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8913 			create_trace_option_core_file(tr, trace_options[i], i);
8914 	}
8915 }
8916 
8917 static ssize_t
8918 rb_simple_read(struct file *filp, char __user *ubuf,
8919 	       size_t cnt, loff_t *ppos)
8920 {
8921 	struct trace_array *tr = filp->private_data;
8922 	char buf[64];
8923 	int r;
8924 
8925 	r = tracer_tracing_is_on(tr);
8926 	r = sprintf(buf, "%d\n", r);
8927 
8928 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8929 }
8930 
8931 static ssize_t
8932 rb_simple_write(struct file *filp, const char __user *ubuf,
8933 		size_t cnt, loff_t *ppos)
8934 {
8935 	struct trace_array *tr = filp->private_data;
8936 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8937 	unsigned long val;
8938 	int ret;
8939 
8940 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8941 	if (ret)
8942 		return ret;
8943 
8944 	if (buffer) {
8945 		mutex_lock(&trace_types_lock);
8946 		if (!!val == tracer_tracing_is_on(tr)) {
8947 			val = 0; /* do nothing */
8948 		} else if (val) {
8949 			tracer_tracing_on(tr);
8950 			if (tr->current_trace->start)
8951 				tr->current_trace->start(tr);
8952 		} else {
8953 			tracer_tracing_off(tr);
8954 			if (tr->current_trace->stop)
8955 				tr->current_trace->stop(tr);
8956 		}
8957 		mutex_unlock(&trace_types_lock);
8958 	}
8959 
8960 	(*ppos)++;
8961 
8962 	return cnt;
8963 }
8964 
8965 static const struct file_operations rb_simple_fops = {
8966 	.open		= tracing_open_generic_tr,
8967 	.read		= rb_simple_read,
8968 	.write		= rb_simple_write,
8969 	.release	= tracing_release_generic_tr,
8970 	.llseek		= default_llseek,
8971 };
8972 
8973 static ssize_t
8974 buffer_percent_read(struct file *filp, char __user *ubuf,
8975 		    size_t cnt, loff_t *ppos)
8976 {
8977 	struct trace_array *tr = filp->private_data;
8978 	char buf[64];
8979 	int r;
8980 
8981 	r = tr->buffer_percent;
8982 	r = sprintf(buf, "%d\n", r);
8983 
8984 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8985 }
8986 
8987 static ssize_t
8988 buffer_percent_write(struct file *filp, const char __user *ubuf,
8989 		     size_t cnt, loff_t *ppos)
8990 {
8991 	struct trace_array *tr = filp->private_data;
8992 	unsigned long val;
8993 	int ret;
8994 
8995 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8996 	if (ret)
8997 		return ret;
8998 
8999 	if (val > 100)
9000 		return -EINVAL;
9001 
9002 	if (!val)
9003 		val = 1;
9004 
9005 	tr->buffer_percent = val;
9006 
9007 	(*ppos)++;
9008 
9009 	return cnt;
9010 }
9011 
9012 static const struct file_operations buffer_percent_fops = {
9013 	.open		= tracing_open_generic_tr,
9014 	.read		= buffer_percent_read,
9015 	.write		= buffer_percent_write,
9016 	.release	= tracing_release_generic_tr,
9017 	.llseek		= default_llseek,
9018 };
9019 
9020 static struct dentry *trace_instance_dir;
9021 
9022 static void
9023 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9024 
9025 static int
9026 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9027 {
9028 	enum ring_buffer_flags rb_flags;
9029 
9030 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9031 
9032 	buf->tr = tr;
9033 
9034 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9035 	if (!buf->buffer)
9036 		return -ENOMEM;
9037 
9038 	buf->data = alloc_percpu(struct trace_array_cpu);
9039 	if (!buf->data) {
9040 		ring_buffer_free(buf->buffer);
9041 		buf->buffer = NULL;
9042 		return -ENOMEM;
9043 	}
9044 
9045 	/* Allocate the first page for all buffers */
9046 	set_buffer_entries(&tr->array_buffer,
9047 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9048 
9049 	return 0;
9050 }
9051 
9052 static int allocate_trace_buffers(struct trace_array *tr, int size)
9053 {
9054 	int ret;
9055 
9056 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9057 	if (ret)
9058 		return ret;
9059 
9060 #ifdef CONFIG_TRACER_MAX_TRACE
9061 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9062 				    allocate_snapshot ? size : 1);
9063 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9064 		ring_buffer_free(tr->array_buffer.buffer);
9065 		tr->array_buffer.buffer = NULL;
9066 		free_percpu(tr->array_buffer.data);
9067 		tr->array_buffer.data = NULL;
9068 		return -ENOMEM;
9069 	}
9070 	tr->allocated_snapshot = allocate_snapshot;
9071 
9072 	/*
9073 	 * Only the top level trace array gets its snapshot allocated
9074 	 * from the kernel command line.
9075 	 */
9076 	allocate_snapshot = false;
9077 #endif
9078 
9079 	return 0;
9080 }
9081 
9082 static void free_trace_buffer(struct array_buffer *buf)
9083 {
9084 	if (buf->buffer) {
9085 		ring_buffer_free(buf->buffer);
9086 		buf->buffer = NULL;
9087 		free_percpu(buf->data);
9088 		buf->data = NULL;
9089 	}
9090 }
9091 
9092 static void free_trace_buffers(struct trace_array *tr)
9093 {
9094 	if (!tr)
9095 		return;
9096 
9097 	free_trace_buffer(&tr->array_buffer);
9098 
9099 #ifdef CONFIG_TRACER_MAX_TRACE
9100 	free_trace_buffer(&tr->max_buffer);
9101 #endif
9102 }
9103 
9104 static void init_trace_flags_index(struct trace_array *tr)
9105 {
9106 	int i;
9107 
9108 	/* Used by the trace options files */
9109 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9110 		tr->trace_flags_index[i] = i;
9111 }
9112 
9113 static void __update_tracer_options(struct trace_array *tr)
9114 {
9115 	struct tracer *t;
9116 
9117 	for (t = trace_types; t; t = t->next)
9118 		add_tracer_options(tr, t);
9119 }
9120 
9121 static void update_tracer_options(struct trace_array *tr)
9122 {
9123 	mutex_lock(&trace_types_lock);
9124 	__update_tracer_options(tr);
9125 	mutex_unlock(&trace_types_lock);
9126 }
9127 
9128 /* Must have trace_types_lock held */
9129 struct trace_array *trace_array_find(const char *instance)
9130 {
9131 	struct trace_array *tr, *found = NULL;
9132 
9133 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9134 		if (tr->name && strcmp(tr->name, instance) == 0) {
9135 			found = tr;
9136 			break;
9137 		}
9138 	}
9139 
9140 	return found;
9141 }
9142 
9143 struct trace_array *trace_array_find_get(const char *instance)
9144 {
9145 	struct trace_array *tr;
9146 
9147 	mutex_lock(&trace_types_lock);
9148 	tr = trace_array_find(instance);
9149 	if (tr)
9150 		tr->ref++;
9151 	mutex_unlock(&trace_types_lock);
9152 
9153 	return tr;
9154 }
9155 
9156 static int trace_array_create_dir(struct trace_array *tr)
9157 {
9158 	int ret;
9159 
9160 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9161 	if (!tr->dir)
9162 		return -EINVAL;
9163 
9164 	ret = event_trace_add_tracer(tr->dir, tr);
9165 	if (ret) {
9166 		tracefs_remove(tr->dir);
9167 		return ret;
9168 	}
9169 
9170 	init_tracer_tracefs(tr, tr->dir);
9171 	__update_tracer_options(tr);
9172 
9173 	return ret;
9174 }
9175 
9176 static struct trace_array *trace_array_create(const char *name)
9177 {
9178 	struct trace_array *tr;
9179 	int ret;
9180 
9181 	ret = -ENOMEM;
9182 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9183 	if (!tr)
9184 		return ERR_PTR(ret);
9185 
9186 	tr->name = kstrdup(name, GFP_KERNEL);
9187 	if (!tr->name)
9188 		goto out_free_tr;
9189 
9190 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9191 		goto out_free_tr;
9192 
9193 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9194 
9195 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9196 
9197 	raw_spin_lock_init(&tr->start_lock);
9198 
9199 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9200 
9201 	tr->current_trace = &nop_trace;
9202 
9203 	INIT_LIST_HEAD(&tr->systems);
9204 	INIT_LIST_HEAD(&tr->events);
9205 	INIT_LIST_HEAD(&tr->hist_vars);
9206 	INIT_LIST_HEAD(&tr->err_log);
9207 
9208 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9209 		goto out_free_tr;
9210 
9211 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9212 		goto out_free_tr;
9213 
9214 	ftrace_init_trace_array(tr);
9215 
9216 	init_trace_flags_index(tr);
9217 
9218 	if (trace_instance_dir) {
9219 		ret = trace_array_create_dir(tr);
9220 		if (ret)
9221 			goto out_free_tr;
9222 	} else
9223 		__trace_early_add_events(tr);
9224 
9225 	list_add(&tr->list, &ftrace_trace_arrays);
9226 
9227 	tr->ref++;
9228 
9229 	return tr;
9230 
9231  out_free_tr:
9232 	ftrace_free_ftrace_ops(tr);
9233 	free_trace_buffers(tr);
9234 	free_cpumask_var(tr->tracing_cpumask);
9235 	kfree(tr->name);
9236 	kfree(tr);
9237 
9238 	return ERR_PTR(ret);
9239 }
9240 
9241 static int instance_mkdir(const char *name)
9242 {
9243 	struct trace_array *tr;
9244 	int ret;
9245 
9246 	mutex_lock(&event_mutex);
9247 	mutex_lock(&trace_types_lock);
9248 
9249 	ret = -EEXIST;
9250 	if (trace_array_find(name))
9251 		goto out_unlock;
9252 
9253 	tr = trace_array_create(name);
9254 
9255 	ret = PTR_ERR_OR_ZERO(tr);
9256 
9257 out_unlock:
9258 	mutex_unlock(&trace_types_lock);
9259 	mutex_unlock(&event_mutex);
9260 	return ret;
9261 }
9262 
9263 /**
9264  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9265  * @name: The name of the trace array to be looked up/created.
9266  *
9267  * Returns pointer to trace array with given name.
9268  * NULL, if it cannot be created.
9269  *
9270  * NOTE: This function increments the reference counter associated with the
9271  * trace array returned. This makes sure it cannot be freed while in use.
9272  * Use trace_array_put() once the trace array is no longer needed.
9273  * If the trace_array is to be freed, trace_array_destroy() needs to
9274  * be called after the trace_array_put(), or simply let user space delete
9275  * it from the tracefs instances directory. But until the
9276  * trace_array_put() is called, user space can not delete it.
9277  *
9278  */
9279 struct trace_array *trace_array_get_by_name(const char *name)
9280 {
9281 	struct trace_array *tr;
9282 
9283 	mutex_lock(&event_mutex);
9284 	mutex_lock(&trace_types_lock);
9285 
9286 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9287 		if (tr->name && strcmp(tr->name, name) == 0)
9288 			goto out_unlock;
9289 	}
9290 
9291 	tr = trace_array_create(name);
9292 
9293 	if (IS_ERR(tr))
9294 		tr = NULL;
9295 out_unlock:
9296 	if (tr)
9297 		tr->ref++;
9298 
9299 	mutex_unlock(&trace_types_lock);
9300 	mutex_unlock(&event_mutex);
9301 	return tr;
9302 }
9303 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9304 
9305 static int __remove_instance(struct trace_array *tr)
9306 {
9307 	int i;
9308 
9309 	/* Reference counter for a newly created trace array = 1. */
9310 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9311 		return -EBUSY;
9312 
9313 	list_del(&tr->list);
9314 
9315 	/* Disable all the flags that were enabled coming in */
9316 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9317 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9318 			set_tracer_flag(tr, 1 << i, 0);
9319 	}
9320 
9321 	tracing_set_nop(tr);
9322 	clear_ftrace_function_probes(tr);
9323 	event_trace_del_tracer(tr);
9324 	ftrace_clear_pids(tr);
9325 	ftrace_destroy_function_files(tr);
9326 	tracefs_remove(tr->dir);
9327 	free_percpu(tr->last_func_repeats);
9328 	free_trace_buffers(tr);
9329 
9330 	for (i = 0; i < tr->nr_topts; i++) {
9331 		kfree(tr->topts[i].topts);
9332 	}
9333 	kfree(tr->topts);
9334 
9335 	free_cpumask_var(tr->tracing_cpumask);
9336 	kfree(tr->name);
9337 	kfree(tr);
9338 
9339 	return 0;
9340 }
9341 
9342 int trace_array_destroy(struct trace_array *this_tr)
9343 {
9344 	struct trace_array *tr;
9345 	int ret;
9346 
9347 	if (!this_tr)
9348 		return -EINVAL;
9349 
9350 	mutex_lock(&event_mutex);
9351 	mutex_lock(&trace_types_lock);
9352 
9353 	ret = -ENODEV;
9354 
9355 	/* Making sure trace array exists before destroying it. */
9356 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9357 		if (tr == this_tr) {
9358 			ret = __remove_instance(tr);
9359 			break;
9360 		}
9361 	}
9362 
9363 	mutex_unlock(&trace_types_lock);
9364 	mutex_unlock(&event_mutex);
9365 
9366 	return ret;
9367 }
9368 EXPORT_SYMBOL_GPL(trace_array_destroy);
9369 
9370 static int instance_rmdir(const char *name)
9371 {
9372 	struct trace_array *tr;
9373 	int ret;
9374 
9375 	mutex_lock(&event_mutex);
9376 	mutex_lock(&trace_types_lock);
9377 
9378 	ret = -ENODEV;
9379 	tr = trace_array_find(name);
9380 	if (tr)
9381 		ret = __remove_instance(tr);
9382 
9383 	mutex_unlock(&trace_types_lock);
9384 	mutex_unlock(&event_mutex);
9385 
9386 	return ret;
9387 }
9388 
9389 static __init void create_trace_instances(struct dentry *d_tracer)
9390 {
9391 	struct trace_array *tr;
9392 
9393 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9394 							 instance_mkdir,
9395 							 instance_rmdir);
9396 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9397 		return;
9398 
9399 	mutex_lock(&event_mutex);
9400 	mutex_lock(&trace_types_lock);
9401 
9402 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9403 		if (!tr->name)
9404 			continue;
9405 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9406 			     "Failed to create instance directory\n"))
9407 			break;
9408 	}
9409 
9410 	mutex_unlock(&trace_types_lock);
9411 	mutex_unlock(&event_mutex);
9412 }
9413 
9414 static void
9415 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9416 {
9417 	struct trace_event_file *file;
9418 	int cpu;
9419 
9420 	trace_create_file("available_tracers", 0444, d_tracer,
9421 			tr, &show_traces_fops);
9422 
9423 	trace_create_file("current_tracer", 0644, d_tracer,
9424 			tr, &set_tracer_fops);
9425 
9426 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9427 			  tr, &tracing_cpumask_fops);
9428 
9429 	trace_create_file("trace_options", 0644, d_tracer,
9430 			  tr, &tracing_iter_fops);
9431 
9432 	trace_create_file("trace", 0644, d_tracer,
9433 			  tr, &tracing_fops);
9434 
9435 	trace_create_file("trace_pipe", 0444, d_tracer,
9436 			  tr, &tracing_pipe_fops);
9437 
9438 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9439 			  tr, &tracing_entries_fops);
9440 
9441 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9442 			  tr, &tracing_total_entries_fops);
9443 
9444 	trace_create_file("free_buffer", 0200, d_tracer,
9445 			  tr, &tracing_free_buffer_fops);
9446 
9447 	trace_create_file("trace_marker", 0220, d_tracer,
9448 			  tr, &tracing_mark_fops);
9449 
9450 	file = __find_event_file(tr, "ftrace", "print");
9451 	if (file && file->dir)
9452 		trace_create_file("trigger", 0644, file->dir, file,
9453 				  &event_trigger_fops);
9454 	tr->trace_marker_file = file;
9455 
9456 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9457 			  tr, &tracing_mark_raw_fops);
9458 
9459 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9460 			  &trace_clock_fops);
9461 
9462 	trace_create_file("tracing_on", 0644, d_tracer,
9463 			  tr, &rb_simple_fops);
9464 
9465 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9466 			  &trace_time_stamp_mode_fops);
9467 
9468 	tr->buffer_percent = 50;
9469 
9470 	trace_create_file("buffer_percent", 0444, d_tracer,
9471 			tr, &buffer_percent_fops);
9472 
9473 	create_trace_options_dir(tr);
9474 
9475 	trace_create_maxlat_file(tr, d_tracer);
9476 
9477 	if (ftrace_create_function_files(tr, d_tracer))
9478 		MEM_FAIL(1, "Could not allocate function filter files");
9479 
9480 #ifdef CONFIG_TRACER_SNAPSHOT
9481 	trace_create_file("snapshot", 0644, d_tracer,
9482 			  tr, &snapshot_fops);
9483 #endif
9484 
9485 	trace_create_file("error_log", 0644, d_tracer,
9486 			  tr, &tracing_err_log_fops);
9487 
9488 	for_each_tracing_cpu(cpu)
9489 		tracing_init_tracefs_percpu(tr, cpu);
9490 
9491 	ftrace_init_tracefs(tr, d_tracer);
9492 }
9493 
9494 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9495 {
9496 	struct vfsmount *mnt;
9497 	struct file_system_type *type;
9498 
9499 	/*
9500 	 * To maintain backward compatibility for tools that mount
9501 	 * debugfs to get to the tracing facility, tracefs is automatically
9502 	 * mounted to the debugfs/tracing directory.
9503 	 */
9504 	type = get_fs_type("tracefs");
9505 	if (!type)
9506 		return NULL;
9507 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9508 	put_filesystem(type);
9509 	if (IS_ERR(mnt))
9510 		return NULL;
9511 	mntget(mnt);
9512 
9513 	return mnt;
9514 }
9515 
9516 /**
9517  * tracing_init_dentry - initialize top level trace array
9518  *
9519  * This is called when creating files or directories in the tracing
9520  * directory. It is called via fs_initcall() by any of the boot up code
9521  * and expects to return the dentry of the top level tracing directory.
9522  */
9523 int tracing_init_dentry(void)
9524 {
9525 	struct trace_array *tr = &global_trace;
9526 
9527 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9528 		pr_warn("Tracing disabled due to lockdown\n");
9529 		return -EPERM;
9530 	}
9531 
9532 	/* The top level trace array uses  NULL as parent */
9533 	if (tr->dir)
9534 		return 0;
9535 
9536 	if (WARN_ON(!tracefs_initialized()))
9537 		return -ENODEV;
9538 
9539 	/*
9540 	 * As there may still be users that expect the tracing
9541 	 * files to exist in debugfs/tracing, we must automount
9542 	 * the tracefs file system there, so older tools still
9543 	 * work with the newer kernel.
9544 	 */
9545 	tr->dir = debugfs_create_automount("tracing", NULL,
9546 					   trace_automount, NULL);
9547 
9548 	return 0;
9549 }
9550 
9551 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9552 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9553 
9554 static struct workqueue_struct *eval_map_wq __initdata;
9555 static struct work_struct eval_map_work __initdata;
9556 
9557 static void __init eval_map_work_func(struct work_struct *work)
9558 {
9559 	int len;
9560 
9561 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9562 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9563 }
9564 
9565 static int __init trace_eval_init(void)
9566 {
9567 	INIT_WORK(&eval_map_work, eval_map_work_func);
9568 
9569 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9570 	if (!eval_map_wq) {
9571 		pr_err("Unable to allocate eval_map_wq\n");
9572 		/* Do work here */
9573 		eval_map_work_func(&eval_map_work);
9574 		return -ENOMEM;
9575 	}
9576 
9577 	queue_work(eval_map_wq, &eval_map_work);
9578 	return 0;
9579 }
9580 
9581 static int __init trace_eval_sync(void)
9582 {
9583 	/* Make sure the eval map updates are finished */
9584 	if (eval_map_wq)
9585 		destroy_workqueue(eval_map_wq);
9586 	return 0;
9587 }
9588 
9589 late_initcall_sync(trace_eval_sync);
9590 
9591 
9592 #ifdef CONFIG_MODULES
9593 static void trace_module_add_evals(struct module *mod)
9594 {
9595 	if (!mod->num_trace_evals)
9596 		return;
9597 
9598 	/*
9599 	 * Modules with bad taint do not have events created, do
9600 	 * not bother with enums either.
9601 	 */
9602 	if (trace_module_has_bad_taint(mod))
9603 		return;
9604 
9605 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9606 }
9607 
9608 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9609 static void trace_module_remove_evals(struct module *mod)
9610 {
9611 	union trace_eval_map_item *map;
9612 	union trace_eval_map_item **last = &trace_eval_maps;
9613 
9614 	if (!mod->num_trace_evals)
9615 		return;
9616 
9617 	mutex_lock(&trace_eval_mutex);
9618 
9619 	map = trace_eval_maps;
9620 
9621 	while (map) {
9622 		if (map->head.mod == mod)
9623 			break;
9624 		map = trace_eval_jmp_to_tail(map);
9625 		last = &map->tail.next;
9626 		map = map->tail.next;
9627 	}
9628 	if (!map)
9629 		goto out;
9630 
9631 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9632 	kfree(map);
9633  out:
9634 	mutex_unlock(&trace_eval_mutex);
9635 }
9636 #else
9637 static inline void trace_module_remove_evals(struct module *mod) { }
9638 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9639 
9640 static int trace_module_notify(struct notifier_block *self,
9641 			       unsigned long val, void *data)
9642 {
9643 	struct module *mod = data;
9644 
9645 	switch (val) {
9646 	case MODULE_STATE_COMING:
9647 		trace_module_add_evals(mod);
9648 		break;
9649 	case MODULE_STATE_GOING:
9650 		trace_module_remove_evals(mod);
9651 		break;
9652 	}
9653 
9654 	return NOTIFY_OK;
9655 }
9656 
9657 static struct notifier_block trace_module_nb = {
9658 	.notifier_call = trace_module_notify,
9659 	.priority = 0,
9660 };
9661 #endif /* CONFIG_MODULES */
9662 
9663 static __init int tracer_init_tracefs(void)
9664 {
9665 	int ret;
9666 
9667 	trace_access_lock_init();
9668 
9669 	ret = tracing_init_dentry();
9670 	if (ret)
9671 		return 0;
9672 
9673 	event_trace_init();
9674 
9675 	init_tracer_tracefs(&global_trace, NULL);
9676 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9677 
9678 	trace_create_file("tracing_thresh", 0644, NULL,
9679 			&global_trace, &tracing_thresh_fops);
9680 
9681 	trace_create_file("README", 0444, NULL,
9682 			NULL, &tracing_readme_fops);
9683 
9684 	trace_create_file("saved_cmdlines", 0444, NULL,
9685 			NULL, &tracing_saved_cmdlines_fops);
9686 
9687 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9688 			  NULL, &tracing_saved_cmdlines_size_fops);
9689 
9690 	trace_create_file("saved_tgids", 0444, NULL,
9691 			NULL, &tracing_saved_tgids_fops);
9692 
9693 	trace_eval_init();
9694 
9695 	trace_create_eval_file(NULL);
9696 
9697 #ifdef CONFIG_MODULES
9698 	register_module_notifier(&trace_module_nb);
9699 #endif
9700 
9701 #ifdef CONFIG_DYNAMIC_FTRACE
9702 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9703 			NULL, &tracing_dyn_info_fops);
9704 #endif
9705 
9706 	create_trace_instances(NULL);
9707 
9708 	update_tracer_options(&global_trace);
9709 
9710 	return 0;
9711 }
9712 
9713 fs_initcall(tracer_init_tracefs);
9714 
9715 static int trace_panic_handler(struct notifier_block *this,
9716 			       unsigned long event, void *unused)
9717 {
9718 	if (ftrace_dump_on_oops)
9719 		ftrace_dump(ftrace_dump_on_oops);
9720 	return NOTIFY_OK;
9721 }
9722 
9723 static struct notifier_block trace_panic_notifier = {
9724 	.notifier_call  = trace_panic_handler,
9725 	.next           = NULL,
9726 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9727 };
9728 
9729 static int trace_die_handler(struct notifier_block *self,
9730 			     unsigned long val,
9731 			     void *data)
9732 {
9733 	switch (val) {
9734 	case DIE_OOPS:
9735 		if (ftrace_dump_on_oops)
9736 			ftrace_dump(ftrace_dump_on_oops);
9737 		break;
9738 	default:
9739 		break;
9740 	}
9741 	return NOTIFY_OK;
9742 }
9743 
9744 static struct notifier_block trace_die_notifier = {
9745 	.notifier_call = trace_die_handler,
9746 	.priority = 200
9747 };
9748 
9749 /*
9750  * printk is set to max of 1024, we really don't need it that big.
9751  * Nothing should be printing 1000 characters anyway.
9752  */
9753 #define TRACE_MAX_PRINT		1000
9754 
9755 /*
9756  * Define here KERN_TRACE so that we have one place to modify
9757  * it if we decide to change what log level the ftrace dump
9758  * should be at.
9759  */
9760 #define KERN_TRACE		KERN_EMERG
9761 
9762 void
9763 trace_printk_seq(struct trace_seq *s)
9764 {
9765 	/* Probably should print a warning here. */
9766 	if (s->seq.len >= TRACE_MAX_PRINT)
9767 		s->seq.len = TRACE_MAX_PRINT;
9768 
9769 	/*
9770 	 * More paranoid code. Although the buffer size is set to
9771 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9772 	 * an extra layer of protection.
9773 	 */
9774 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9775 		s->seq.len = s->seq.size - 1;
9776 
9777 	/* should be zero ended, but we are paranoid. */
9778 	s->buffer[s->seq.len] = 0;
9779 
9780 	printk(KERN_TRACE "%s", s->buffer);
9781 
9782 	trace_seq_init(s);
9783 }
9784 
9785 void trace_init_global_iter(struct trace_iterator *iter)
9786 {
9787 	iter->tr = &global_trace;
9788 	iter->trace = iter->tr->current_trace;
9789 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9790 	iter->array_buffer = &global_trace.array_buffer;
9791 
9792 	if (iter->trace && iter->trace->open)
9793 		iter->trace->open(iter);
9794 
9795 	/* Annotate start of buffers if we had overruns */
9796 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9797 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9798 
9799 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9800 	if (trace_clocks[iter->tr->clock_id].in_ns)
9801 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9802 }
9803 
9804 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9805 {
9806 	/* use static because iter can be a bit big for the stack */
9807 	static struct trace_iterator iter;
9808 	static atomic_t dump_running;
9809 	struct trace_array *tr = &global_trace;
9810 	unsigned int old_userobj;
9811 	unsigned long flags;
9812 	int cnt = 0, cpu;
9813 
9814 	/* Only allow one dump user at a time. */
9815 	if (atomic_inc_return(&dump_running) != 1) {
9816 		atomic_dec(&dump_running);
9817 		return;
9818 	}
9819 
9820 	/*
9821 	 * Always turn off tracing when we dump.
9822 	 * We don't need to show trace output of what happens
9823 	 * between multiple crashes.
9824 	 *
9825 	 * If the user does a sysrq-z, then they can re-enable
9826 	 * tracing with echo 1 > tracing_on.
9827 	 */
9828 	tracing_off();
9829 
9830 	local_irq_save(flags);
9831 
9832 	/* Simulate the iterator */
9833 	trace_init_global_iter(&iter);
9834 	/* Can not use kmalloc for iter.temp and iter.fmt */
9835 	iter.temp = static_temp_buf;
9836 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9837 	iter.fmt = static_fmt_buf;
9838 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9839 
9840 	for_each_tracing_cpu(cpu) {
9841 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9842 	}
9843 
9844 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9845 
9846 	/* don't look at user memory in panic mode */
9847 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9848 
9849 	switch (oops_dump_mode) {
9850 	case DUMP_ALL:
9851 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852 		break;
9853 	case DUMP_ORIG:
9854 		iter.cpu_file = raw_smp_processor_id();
9855 		break;
9856 	case DUMP_NONE:
9857 		goto out_enable;
9858 	default:
9859 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9860 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9861 	}
9862 
9863 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9864 
9865 	/* Did function tracer already get disabled? */
9866 	if (ftrace_is_dead()) {
9867 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9868 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9869 	}
9870 
9871 	/*
9872 	 * We need to stop all tracing on all CPUS to read
9873 	 * the next buffer. This is a bit expensive, but is
9874 	 * not done often. We fill all what we can read,
9875 	 * and then release the locks again.
9876 	 */
9877 
9878 	while (!trace_empty(&iter)) {
9879 
9880 		if (!cnt)
9881 			printk(KERN_TRACE "---------------------------------\n");
9882 
9883 		cnt++;
9884 
9885 		trace_iterator_reset(&iter);
9886 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9887 
9888 		if (trace_find_next_entry_inc(&iter) != NULL) {
9889 			int ret;
9890 
9891 			ret = print_trace_line(&iter);
9892 			if (ret != TRACE_TYPE_NO_CONSUME)
9893 				trace_consume(&iter);
9894 		}
9895 		touch_nmi_watchdog();
9896 
9897 		trace_printk_seq(&iter.seq);
9898 	}
9899 
9900 	if (!cnt)
9901 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9902 	else
9903 		printk(KERN_TRACE "---------------------------------\n");
9904 
9905  out_enable:
9906 	tr->trace_flags |= old_userobj;
9907 
9908 	for_each_tracing_cpu(cpu) {
9909 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9910 	}
9911 	atomic_dec(&dump_running);
9912 	local_irq_restore(flags);
9913 }
9914 EXPORT_SYMBOL_GPL(ftrace_dump);
9915 
9916 #define WRITE_BUFSIZE  4096
9917 
9918 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9919 				size_t count, loff_t *ppos,
9920 				int (*createfn)(const char *))
9921 {
9922 	char *kbuf, *buf, *tmp;
9923 	int ret = 0;
9924 	size_t done = 0;
9925 	size_t size;
9926 
9927 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9928 	if (!kbuf)
9929 		return -ENOMEM;
9930 
9931 	while (done < count) {
9932 		size = count - done;
9933 
9934 		if (size >= WRITE_BUFSIZE)
9935 			size = WRITE_BUFSIZE - 1;
9936 
9937 		if (copy_from_user(kbuf, buffer + done, size)) {
9938 			ret = -EFAULT;
9939 			goto out;
9940 		}
9941 		kbuf[size] = '\0';
9942 		buf = kbuf;
9943 		do {
9944 			tmp = strchr(buf, '\n');
9945 			if (tmp) {
9946 				*tmp = '\0';
9947 				size = tmp - buf + 1;
9948 			} else {
9949 				size = strlen(buf);
9950 				if (done + size < count) {
9951 					if (buf != kbuf)
9952 						break;
9953 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9954 					pr_warn("Line length is too long: Should be less than %d\n",
9955 						WRITE_BUFSIZE - 2);
9956 					ret = -EINVAL;
9957 					goto out;
9958 				}
9959 			}
9960 			done += size;
9961 
9962 			/* Remove comments */
9963 			tmp = strchr(buf, '#');
9964 
9965 			if (tmp)
9966 				*tmp = '\0';
9967 
9968 			ret = createfn(buf);
9969 			if (ret)
9970 				goto out;
9971 			buf += size;
9972 
9973 		} while (done < count);
9974 	}
9975 	ret = done;
9976 
9977 out:
9978 	kfree(kbuf);
9979 
9980 	return ret;
9981 }
9982 
9983 __init static int tracer_alloc_buffers(void)
9984 {
9985 	int ring_buf_size;
9986 	int ret = -ENOMEM;
9987 
9988 
9989 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9990 		pr_warn("Tracing disabled due to lockdown\n");
9991 		return -EPERM;
9992 	}
9993 
9994 	/*
9995 	 * Make sure we don't accidentally add more trace options
9996 	 * than we have bits for.
9997 	 */
9998 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9999 
10000 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10001 		goto out;
10002 
10003 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10004 		goto out_free_buffer_mask;
10005 
10006 	/* Only allocate trace_printk buffers if a trace_printk exists */
10007 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10008 		/* Must be called before global_trace.buffer is allocated */
10009 		trace_printk_init_buffers();
10010 
10011 	/* To save memory, keep the ring buffer size to its minimum */
10012 	if (ring_buffer_expanded)
10013 		ring_buf_size = trace_buf_size;
10014 	else
10015 		ring_buf_size = 1;
10016 
10017 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10018 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10019 
10020 	raw_spin_lock_init(&global_trace.start_lock);
10021 
10022 	/*
10023 	 * The prepare callbacks allocates some memory for the ring buffer. We
10024 	 * don't free the buffer if the CPU goes down. If we were to free
10025 	 * the buffer, then the user would lose any trace that was in the
10026 	 * buffer. The memory will be removed once the "instance" is removed.
10027 	 */
10028 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10029 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10030 				      NULL);
10031 	if (ret < 0)
10032 		goto out_free_cpumask;
10033 	/* Used for event triggers */
10034 	ret = -ENOMEM;
10035 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10036 	if (!temp_buffer)
10037 		goto out_rm_hp_state;
10038 
10039 	if (trace_create_savedcmd() < 0)
10040 		goto out_free_temp_buffer;
10041 
10042 	/* TODO: make the number of buffers hot pluggable with CPUS */
10043 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10044 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10045 		goto out_free_savedcmd;
10046 	}
10047 
10048 	if (global_trace.buffer_disabled)
10049 		tracing_off();
10050 
10051 	if (trace_boot_clock) {
10052 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10053 		if (ret < 0)
10054 			pr_warn("Trace clock %s not defined, going back to default\n",
10055 				trace_boot_clock);
10056 	}
10057 
10058 	/*
10059 	 * register_tracer() might reference current_trace, so it
10060 	 * needs to be set before we register anything. This is
10061 	 * just a bootstrap of current_trace anyway.
10062 	 */
10063 	global_trace.current_trace = &nop_trace;
10064 
10065 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10066 
10067 	ftrace_init_global_array_ops(&global_trace);
10068 
10069 	init_trace_flags_index(&global_trace);
10070 
10071 	register_tracer(&nop_trace);
10072 
10073 	/* Function tracing may start here (via kernel command line) */
10074 	init_function_trace();
10075 
10076 	/* All seems OK, enable tracing */
10077 	tracing_disabled = 0;
10078 
10079 	atomic_notifier_chain_register(&panic_notifier_list,
10080 				       &trace_panic_notifier);
10081 
10082 	register_die_notifier(&trace_die_notifier);
10083 
10084 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10085 
10086 	INIT_LIST_HEAD(&global_trace.systems);
10087 	INIT_LIST_HEAD(&global_trace.events);
10088 	INIT_LIST_HEAD(&global_trace.hist_vars);
10089 	INIT_LIST_HEAD(&global_trace.err_log);
10090 	list_add(&global_trace.list, &ftrace_trace_arrays);
10091 
10092 	apply_trace_boot_options();
10093 
10094 	register_snapshot_cmd();
10095 
10096 	test_can_verify();
10097 
10098 	return 0;
10099 
10100 out_free_savedcmd:
10101 	free_saved_cmdlines_buffer(savedcmd);
10102 out_free_temp_buffer:
10103 	ring_buffer_free(temp_buffer);
10104 out_rm_hp_state:
10105 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10106 out_free_cpumask:
10107 	free_cpumask_var(global_trace.tracing_cpumask);
10108 out_free_buffer_mask:
10109 	free_cpumask_var(tracing_buffer_mask);
10110 out:
10111 	return ret;
10112 }
10113 
10114 void __init early_trace_init(void)
10115 {
10116 	if (tracepoint_printk) {
10117 		tracepoint_print_iter =
10118 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10119 		if (MEM_FAIL(!tracepoint_print_iter,
10120 			     "Failed to allocate trace iterator\n"))
10121 			tracepoint_printk = 0;
10122 		else
10123 			static_key_enable(&tracepoint_printk_key.key);
10124 	}
10125 	tracer_alloc_buffers();
10126 }
10127 
10128 void __init trace_init(void)
10129 {
10130 	trace_event_init();
10131 }
10132 
10133 __init static void clear_boot_tracer(void)
10134 {
10135 	/*
10136 	 * The default tracer at boot buffer is an init section.
10137 	 * This function is called in lateinit. If we did not
10138 	 * find the boot tracer, then clear it out, to prevent
10139 	 * later registration from accessing the buffer that is
10140 	 * about to be freed.
10141 	 */
10142 	if (!default_bootup_tracer)
10143 		return;
10144 
10145 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10146 	       default_bootup_tracer);
10147 	default_bootup_tracer = NULL;
10148 }
10149 
10150 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10151 __init static void tracing_set_default_clock(void)
10152 {
10153 	/* sched_clock_stable() is determined in late_initcall */
10154 	if (!trace_boot_clock && !sched_clock_stable()) {
10155 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10156 			pr_warn("Can not set tracing clock due to lockdown\n");
10157 			return;
10158 		}
10159 
10160 		printk(KERN_WARNING
10161 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10162 		       "If you want to keep using the local clock, then add:\n"
10163 		       "  \"trace_clock=local\"\n"
10164 		       "on the kernel command line\n");
10165 		tracing_set_clock(&global_trace, "global");
10166 	}
10167 }
10168 #else
10169 static inline void tracing_set_default_clock(void) { }
10170 #endif
10171 
10172 __init static int late_trace_init(void)
10173 {
10174 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10175 		static_key_disable(&tracepoint_printk_key.key);
10176 		tracepoint_printk = 0;
10177 	}
10178 
10179 	tracing_set_default_clock();
10180 	clear_boot_tracer();
10181 	return 0;
10182 }
10183 
10184 late_initcall_sync(late_trace_init);
10185