xref: /openbmc/linux/kernel/trace/trace.c (revision 997a5310)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 /*
59  * On boot up, the ring buffer is set to the minimum size, so that
60  * we do not waste memory on systems that are not using tracing.
61  */
62 bool ring_buffer_expanded;
63 
64 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 /*
66  * We need to change this state when a selftest is running.
67  * A selftest will lurk into the ring-buffer to count the
68  * entries inserted during the selftest although some concurrent
69  * insertions into the ring-buffer such as trace_printk could occurred
70  * at the same time, giving false positive or negative results.
71  */
72 static bool __read_mostly tracing_selftest_running;
73 
74 /*
75  * If boot-time tracing including tracers/events via kernel cmdline
76  * is running, we do not want to run SELFTEST.
77  */
78 bool __read_mostly tracing_selftest_disabled;
79 
80 void __init disable_tracing_selftest(const char *reason)
81 {
82 	if (!tracing_selftest_disabled) {
83 		tracing_selftest_disabled = true;
84 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 	}
86 }
87 #else
88 #define tracing_selftest_running	0
89 #define tracing_selftest_disabled	0
90 #endif
91 
92 /* Pipe tracepoints to printk */
93 static struct trace_iterator *tracepoint_print_iter;
94 int tracepoint_printk;
95 static bool tracepoint_printk_stop_on_boot __initdata;
96 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97 
98 /* For tracers that don't implement custom flags */
99 static struct tracer_opt dummy_tracer_opt[] = {
100 	{ }
101 };
102 
103 static int
104 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 {
106 	return 0;
107 }
108 
109 /*
110  * To prevent the comm cache from being overwritten when no
111  * tracing is active, only save the comm when a trace event
112  * occurred.
113  */
114 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115 
116 /*
117  * Kill all tracing for good (never come back).
118  * It is initialized to 1 but will turn to zero if the initialization
119  * of the tracer is successful. But that is the only place that sets
120  * this back to zero.
121  */
122 static int tracing_disabled = 1;
123 
124 cpumask_var_t __read_mostly	tracing_buffer_mask;
125 
126 /*
127  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128  *
129  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130  * is set, then ftrace_dump is called. This will output the contents
131  * of the ftrace buffers to the console.  This is very useful for
132  * capturing traces that lead to crashes and outputing it to a
133  * serial console.
134  *
135  * It is default off, but you can enable it with either specifying
136  * "ftrace_dump_on_oops" in the kernel command line, or setting
137  * /proc/sys/kernel/ftrace_dump_on_oops
138  * Set 1 if you want to dump buffers of all CPUs
139  * Set 2 if you want to dump the buffer of the CPU that triggered oops
140  */
141 
142 enum ftrace_dump_mode ftrace_dump_on_oops;
143 
144 /* When set, tracing will stop when a WARN*() is hit */
145 int __disable_trace_on_warning;
146 
147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148 /* Map of enums to their values, for "eval_map" file */
149 struct trace_eval_map_head {
150 	struct module			*mod;
151 	unsigned long			length;
152 };
153 
154 union trace_eval_map_item;
155 
156 struct trace_eval_map_tail {
157 	/*
158 	 * "end" is first and points to NULL as it must be different
159 	 * than "mod" or "eval_string"
160 	 */
161 	union trace_eval_map_item	*next;
162 	const char			*end;	/* points to NULL */
163 };
164 
165 static DEFINE_MUTEX(trace_eval_mutex);
166 
167 /*
168  * The trace_eval_maps are saved in an array with two extra elements,
169  * one at the beginning, and one at the end. The beginning item contains
170  * the count of the saved maps (head.length), and the module they
171  * belong to if not built in (head.mod). The ending item contains a
172  * pointer to the next array of saved eval_map items.
173  */
174 union trace_eval_map_item {
175 	struct trace_eval_map		map;
176 	struct trace_eval_map_head	head;
177 	struct trace_eval_map_tail	tail;
178 };
179 
180 static union trace_eval_map_item *trace_eval_maps;
181 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182 
183 int tracing_set_tracer(struct trace_array *tr, const char *buf);
184 static void ftrace_trace_userstack(struct trace_array *tr,
185 				   struct trace_buffer *buffer,
186 				   unsigned int trace_ctx);
187 
188 #define MAX_TRACER_SIZE		100
189 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190 static char *default_bootup_tracer;
191 
192 static bool allocate_snapshot;
193 static bool snapshot_at_boot;
194 
195 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_instance_index;
197 
198 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199 static int boot_snapshot_index;
200 
201 static int __init set_cmdline_ftrace(char *str)
202 {
203 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204 	default_bootup_tracer = bootup_tracer_buf;
205 	/* We are using ftrace early, expand it */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("ftrace=", set_cmdline_ftrace);
210 
211 static int __init set_ftrace_dump_on_oops(char *str)
212 {
213 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
214 		ftrace_dump_on_oops = DUMP_ALL;
215 		return 1;
216 	}
217 
218 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219 		ftrace_dump_on_oops = DUMP_ORIG;
220                 return 1;
221         }
222 
223         return 0;
224 }
225 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226 
227 static int __init stop_trace_on_warning(char *str)
228 {
229 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230 		__disable_trace_on_warning = 1;
231 	return 1;
232 }
233 __setup("traceoff_on_warning", stop_trace_on_warning);
234 
235 static int __init boot_alloc_snapshot(char *str)
236 {
237 	char *slot = boot_snapshot_info + boot_snapshot_index;
238 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239 	int ret;
240 
241 	if (str[0] == '=') {
242 		str++;
243 		if (strlen(str) >= left)
244 			return -1;
245 
246 		ret = snprintf(slot, left, "%s\t", str);
247 		boot_snapshot_index += ret;
248 	} else {
249 		allocate_snapshot = true;
250 		/* We also need the main ring buffer expanded */
251 		ring_buffer_expanded = true;
252 	}
253 	return 1;
254 }
255 __setup("alloc_snapshot", boot_alloc_snapshot);
256 
257 
258 static int __init boot_snapshot(char *str)
259 {
260 	snapshot_at_boot = true;
261 	boot_alloc_snapshot(str);
262 	return 1;
263 }
264 __setup("ftrace_boot_snapshot", boot_snapshot);
265 
266 
267 static int __init boot_instance(char *str)
268 {
269 	char *slot = boot_instance_info + boot_instance_index;
270 	int left = sizeof(boot_instance_info) - boot_instance_index;
271 	int ret;
272 
273 	if (strlen(str) >= left)
274 		return -1;
275 
276 	ret = snprintf(slot, left, "%s\t", str);
277 	boot_instance_index += ret;
278 
279 	return 1;
280 }
281 __setup("trace_instance=", boot_instance);
282 
283 
284 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285 
286 static int __init set_trace_boot_options(char *str)
287 {
288 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289 	return 1;
290 }
291 __setup("trace_options=", set_trace_boot_options);
292 
293 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294 static char *trace_boot_clock __initdata;
295 
296 static int __init set_trace_boot_clock(char *str)
297 {
298 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299 	trace_boot_clock = trace_boot_clock_buf;
300 	return 1;
301 }
302 __setup("trace_clock=", set_trace_boot_clock);
303 
304 static int __init set_tracepoint_printk(char *str)
305 {
306 	/* Ignore the "tp_printk_stop_on_boot" param */
307 	if (*str == '_')
308 		return 0;
309 
310 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311 		tracepoint_printk = 1;
312 	return 1;
313 }
314 __setup("tp_printk", set_tracepoint_printk);
315 
316 static int __init set_tracepoint_printk_stop(char *str)
317 {
318 	tracepoint_printk_stop_on_boot = true;
319 	return 1;
320 }
321 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322 
323 unsigned long long ns2usecs(u64 nsec)
324 {
325 	nsec += 500;
326 	do_div(nsec, 1000);
327 	return nsec;
328 }
329 
330 static void
331 trace_process_export(struct trace_export *export,
332 	       struct ring_buffer_event *event, int flag)
333 {
334 	struct trace_entry *entry;
335 	unsigned int size = 0;
336 
337 	if (export->flags & flag) {
338 		entry = ring_buffer_event_data(event);
339 		size = ring_buffer_event_length(event);
340 		export->write(export, entry, size);
341 	}
342 }
343 
344 static DEFINE_MUTEX(ftrace_export_lock);
345 
346 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347 
348 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351 
352 static inline void ftrace_exports_enable(struct trace_export *export)
353 {
354 	if (export->flags & TRACE_EXPORT_FUNCTION)
355 		static_branch_inc(&trace_function_exports_enabled);
356 
357 	if (export->flags & TRACE_EXPORT_EVENT)
358 		static_branch_inc(&trace_event_exports_enabled);
359 
360 	if (export->flags & TRACE_EXPORT_MARKER)
361 		static_branch_inc(&trace_marker_exports_enabled);
362 }
363 
364 static inline void ftrace_exports_disable(struct trace_export *export)
365 {
366 	if (export->flags & TRACE_EXPORT_FUNCTION)
367 		static_branch_dec(&trace_function_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_EVENT)
370 		static_branch_dec(&trace_event_exports_enabled);
371 
372 	if (export->flags & TRACE_EXPORT_MARKER)
373 		static_branch_dec(&trace_marker_exports_enabled);
374 }
375 
376 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 {
378 	struct trace_export *export;
379 
380 	preempt_disable_notrace();
381 
382 	export = rcu_dereference_raw_check(ftrace_exports_list);
383 	while (export) {
384 		trace_process_export(export, event, flag);
385 		export = rcu_dereference_raw_check(export->next);
386 	}
387 
388 	preempt_enable_notrace();
389 }
390 
391 static inline void
392 add_trace_export(struct trace_export **list, struct trace_export *export)
393 {
394 	rcu_assign_pointer(export->next, *list);
395 	/*
396 	 * We are entering export into the list but another
397 	 * CPU might be walking that list. We need to make sure
398 	 * the export->next pointer is valid before another CPU sees
399 	 * the export pointer included into the list.
400 	 */
401 	rcu_assign_pointer(*list, export);
402 }
403 
404 static inline int
405 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 {
407 	struct trace_export **p;
408 
409 	for (p = list; *p != NULL; p = &(*p)->next)
410 		if (*p == export)
411 			break;
412 
413 	if (*p != export)
414 		return -1;
415 
416 	rcu_assign_pointer(*p, (*p)->next);
417 
418 	return 0;
419 }
420 
421 static inline void
422 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 {
424 	ftrace_exports_enable(export);
425 
426 	add_trace_export(list, export);
427 }
428 
429 static inline int
430 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431 {
432 	int ret;
433 
434 	ret = rm_trace_export(list, export);
435 	ftrace_exports_disable(export);
436 
437 	return ret;
438 }
439 
440 int register_ftrace_export(struct trace_export *export)
441 {
442 	if (WARN_ON_ONCE(!export->write))
443 		return -1;
444 
445 	mutex_lock(&ftrace_export_lock);
446 
447 	add_ftrace_export(&ftrace_exports_list, export);
448 
449 	mutex_unlock(&ftrace_export_lock);
450 
451 	return 0;
452 }
453 EXPORT_SYMBOL_GPL(register_ftrace_export);
454 
455 int unregister_ftrace_export(struct trace_export *export)
456 {
457 	int ret;
458 
459 	mutex_lock(&ftrace_export_lock);
460 
461 	ret = rm_ftrace_export(&ftrace_exports_list, export);
462 
463 	mutex_unlock(&ftrace_export_lock);
464 
465 	return ret;
466 }
467 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468 
469 /* trace_flags holds trace_options default values */
470 #define TRACE_DEFAULT_FLAGS						\
471 	(FUNCTION_DEFAULT_FLAGS |					\
472 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
473 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
474 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
475 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
476 	 TRACE_ITER_HASH_PTR)
477 
478 /* trace_options that are only supported by global_trace */
479 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
480 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481 
482 /* trace_flags that are default zero for instances */
483 #define ZEROED_TRACE_FLAGS \
484 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485 
486 /*
487  * The global_trace is the descriptor that holds the top-level tracing
488  * buffers for the live tracing.
489  */
490 static struct trace_array global_trace = {
491 	.trace_flags = TRACE_DEFAULT_FLAGS,
492 };
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int ret;
1267 
1268 	if (!tr->allocated_snapshot) {
1269 
1270 		/* allocate spare buffer */
1271 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 		if (ret < 0)
1274 			return ret;
1275 
1276 		tr->allocated_snapshot = true;
1277 	}
1278 
1279 	return 0;
1280 }
1281 
1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 	/*
1285 	 * We don't free the ring buffer. instead, resize it because
1286 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 	 * we want preserve it.
1288 	 */
1289 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 	set_buffer_entries(&tr->max_buffer, 1);
1291 	tracing_reset_online_cpus(&tr->max_buffer);
1292 	tr->allocated_snapshot = false;
1293 }
1294 
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
1305 int tracing_alloc_snapshot(void)
1306 {
1307 	struct trace_array *tr = &global_trace;
1308 	int ret;
1309 
1310 	ret = tracing_alloc_snapshot_instance(tr);
1311 	WARN_ON(ret < 0);
1312 
1313 	return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316 
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
1328 void tracing_snapshot_alloc(void)
1329 {
1330 	int ret;
1331 
1332 	ret = tracing_alloc_snapshot();
1333 	if (ret < 0)
1334 		return;
1335 
1336 	tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339 
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:		The tracing instance
1343  * @cond_data:	User data to associate with the snapshot
1344  * @update:	Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 				 cond_update_fn_t update)
1355 {
1356 	struct cond_snapshot *cond_snapshot;
1357 	int ret = 0;
1358 
1359 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 	if (!cond_snapshot)
1361 		return -ENOMEM;
1362 
1363 	cond_snapshot->cond_data = cond_data;
1364 	cond_snapshot->update = update;
1365 
1366 	mutex_lock(&trace_types_lock);
1367 
1368 	ret = tracing_alloc_snapshot_instance(tr);
1369 	if (ret)
1370 		goto fail_unlock;
1371 
1372 	if (tr->current_trace->use_max_tr) {
1373 		ret = -EBUSY;
1374 		goto fail_unlock;
1375 	}
1376 
1377 	/*
1378 	 * The cond_snapshot can only change to NULL without the
1379 	 * trace_types_lock. We don't care if we race with it going
1380 	 * to NULL, but we want to make sure that it's not set to
1381 	 * something other than NULL when we get here, which we can
1382 	 * do safely with only holding the trace_types_lock and not
1383 	 * having to take the max_lock.
1384 	 */
1385 	if (tr->cond_snapshot) {
1386 		ret = -EBUSY;
1387 		goto fail_unlock;
1388 	}
1389 
1390 	local_irq_disable();
1391 	arch_spin_lock(&tr->max_lock);
1392 	tr->cond_snapshot = cond_snapshot;
1393 	arch_spin_unlock(&tr->max_lock);
1394 	local_irq_enable();
1395 
1396 	mutex_unlock(&trace_types_lock);
1397 
1398 	return ret;
1399 
1400  fail_unlock:
1401 	mutex_unlock(&trace_types_lock);
1402 	kfree(cond_snapshot);
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406 
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:		The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 	int ret = 0;
1420 
1421 	local_irq_disable();
1422 	arch_spin_lock(&tr->max_lock);
1423 
1424 	if (!tr->cond_snapshot)
1425 		ret = -EINVAL;
1426 	else {
1427 		kfree(tr->cond_snapshot);
1428 		tr->cond_snapshot = NULL;
1429 	}
1430 
1431 	arch_spin_unlock(&tr->max_lock);
1432 	local_irq_enable();
1433 
1434 	return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
1438 void tracing_snapshot(void)
1439 {
1440 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1448 int tracing_alloc_snapshot(void)
1449 {
1450 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 	return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1454 void tracing_snapshot_alloc(void)
1455 {
1456 	/* Give warning */
1457 	tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 	return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 	return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 	return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)	do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477 
1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 	if (tr->array_buffer.buffer)
1481 		ring_buffer_record_off(tr->array_buffer.buffer);
1482 	/*
1483 	 * This flag is looked at when buffers haven't been allocated
1484 	 * yet, or by some tracers (like irqsoff), that just want to
1485 	 * know if the ring buffer has been disabled, but it can handle
1486 	 * races of where it gets disabled but we still do a record.
1487 	 * As the check is in the fast path of the tracers, it is more
1488 	 * important to be fast than accurate.
1489 	 */
1490 	tr->buffer_disabled = 1;
1491 	/* Make the flag seen by readers */
1492 	smp_wmb();
1493 }
1494 
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
1503 void tracing_off(void)
1504 {
1505 	tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508 
1509 void disable_trace_on_warning(void)
1510 {
1511 	if (__disable_trace_on_warning) {
1512 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 			"Disabling tracing due to warning\n");
1514 		tracing_off();
1515 	}
1516 }
1517 
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 	if (tr->array_buffer.buffer)
1527 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 	return !tr->buffer_disabled;
1529 }
1530 
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
1534 int tracing_is_on(void)
1535 {
1536 	return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539 
1540 static int __init set_buf_size(char *str)
1541 {
1542 	unsigned long buf_size;
1543 
1544 	if (!str)
1545 		return 0;
1546 	buf_size = memparse(str, &str);
1547 	/*
1548 	 * nr_entries can not be zero and the startup
1549 	 * tests require some buffer space. Therefore
1550 	 * ensure we have at least 4096 bytes of buffer.
1551 	 */
1552 	trace_buf_size = max(4096UL, buf_size);
1553 	return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556 
1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 	unsigned long threshold;
1560 	int ret;
1561 
1562 	if (!str)
1563 		return 0;
1564 	ret = kstrtoul(str, 0, &threshold);
1565 	if (ret < 0)
1566 		return 0;
1567 	tracing_thresh = threshold * 1000;
1568 	return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571 
1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 	return nsecs / 1000;
1575 }
1576 
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585 
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 	TRACE_FLAGS
1589 	NULL
1590 };
1591 
1592 static struct {
1593 	u64 (*func)(void);
1594 	const char *name;
1595 	int in_ns;		/* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 	{ trace_clock_local,		"local",	1 },
1598 	{ trace_clock_global,		"global",	1 },
1599 	{ trace_clock_counter,		"counter",	0 },
1600 	{ trace_clock_jiffies,		"uptime",	0 },
1601 	{ trace_clock,			"perf",		1 },
1602 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1603 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1604 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1605 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1606 	ARCH_TRACE_CLOCKS
1607 };
1608 
1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 	if (trace_clocks[tr->clock_id].in_ns)
1612 		return true;
1613 
1614 	return false;
1615 }
1616 
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 	memset(parser, 0, sizeof(*parser));
1623 
1624 	parser->buffer = kmalloc(size, GFP_KERNEL);
1625 	if (!parser->buffer)
1626 		return 1;
1627 
1628 	parser->size = size;
1629 	return 0;
1630 }
1631 
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 	kfree(parser->buffer);
1638 	parser->buffer = NULL;
1639 }
1640 
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 	size_t cnt, loff_t *ppos)
1654 {
1655 	char ch;
1656 	size_t read = 0;
1657 	ssize_t ret;
1658 
1659 	if (!*ppos)
1660 		trace_parser_clear(parser);
1661 
1662 	ret = get_user(ch, ubuf++);
1663 	if (ret)
1664 		goto out;
1665 
1666 	read++;
1667 	cnt--;
1668 
1669 	/*
1670 	 * The parser is not finished with the last write,
1671 	 * continue reading the user input without skipping spaces.
1672 	 */
1673 	if (!parser->cont) {
1674 		/* skip white space */
1675 		while (cnt && isspace(ch)) {
1676 			ret = get_user(ch, ubuf++);
1677 			if (ret)
1678 				goto out;
1679 			read++;
1680 			cnt--;
1681 		}
1682 
1683 		parser->idx = 0;
1684 
1685 		/* only spaces were written */
1686 		if (isspace(ch) || !ch) {
1687 			*ppos += read;
1688 			ret = read;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	/* read the non-space input */
1694 	while (cnt && !isspace(ch) && ch) {
1695 		if (parser->idx < parser->size - 1)
1696 			parser->buffer[parser->idx++] = ch;
1697 		else {
1698 			ret = -EINVAL;
1699 			goto out;
1700 		}
1701 		ret = get_user(ch, ubuf++);
1702 		if (ret)
1703 			goto out;
1704 		read++;
1705 		cnt--;
1706 	}
1707 
1708 	/* We either got finished input or we have to wait for another call. */
1709 	if (isspace(ch) || !ch) {
1710 		parser->buffer[parser->idx] = 0;
1711 		parser->cont = false;
1712 	} else if (parser->idx < parser->size - 1) {
1713 		parser->cont = true;
1714 		parser->buffer[parser->idx++] = ch;
1715 		/* Make sure the parsed string always terminates with '\0'. */
1716 		parser->buffer[parser->idx] = 0;
1717 	} else {
1718 		ret = -EINVAL;
1719 		goto out;
1720 	}
1721 
1722 	*ppos += read;
1723 	ret = read;
1724 
1725 out:
1726 	return ret;
1727 }
1728 
1729 /* TODO add a seq_buf_to_buffer() */
1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 	int len;
1733 
1734 	if (trace_seq_used(s) <= s->seq.readpos)
1735 		return -EBUSY;
1736 
1737 	len = trace_seq_used(s) - s->seq.readpos;
1738 	if (cnt > len)
1739 		cnt = len;
1740 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1741 
1742 	s->seq.readpos += cnt;
1743 	return cnt;
1744 }
1745 
1746 unsigned long __read_mostly	tracing_thresh;
1747 
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750 
1751 #ifdef LATENCY_FS_NOTIFY
1752 
1753 static struct workqueue_struct *fsnotify_wq;
1754 
1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 	struct trace_array *tr = container_of(work, struct trace_array,
1758 					      fsnotify_work);
1759 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761 
1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 	struct trace_array *tr = container_of(iwork, struct trace_array,
1765 					      fsnotify_irqwork);
1766 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768 
1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 				     struct dentry *d_tracer)
1771 {
1772 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 					      TRACE_MODE_WRITE,
1776 					      d_tracer, tr,
1777 					      &tracing_max_lat_fops);
1778 }
1779 
1780 __init static int latency_fsnotify_init(void)
1781 {
1782 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 	if (!fsnotify_wq) {
1785 		pr_err("Unable to allocate tr_max_lat_wq\n");
1786 		return -ENOMEM;
1787 	}
1788 	return 0;
1789 }
1790 
1791 late_initcall_sync(latency_fsnotify_init);
1792 
1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 	if (!fsnotify_wq)
1796 		return;
1797 	/*
1798 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 	 * possible that we are called from __schedule() or do_idle(), which
1800 	 * could cause a deadlock.
1801 	 */
1802 	irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804 
1805 #else /* !LATENCY_FS_NOTIFY */
1806 
1807 #define trace_create_maxlat_file(tr, d_tracer)				\
1808 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1809 			  d_tracer, tr, &tracing_max_lat_fops)
1810 
1811 #endif
1812 
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 	struct array_buffer *trace_buf = &tr->array_buffer;
1822 	struct array_buffer *max_buf = &tr->max_buffer;
1823 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825 
1826 	max_buf->cpu = cpu;
1827 	max_buf->time_start = data->preempt_timestamp;
1828 
1829 	max_data->saved_latency = tr->max_latency;
1830 	max_data->critical_start = data->critical_start;
1831 	max_data->critical_end = data->critical_end;
1832 
1833 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 	max_data->pid = tsk->pid;
1835 	/*
1836 	 * If tsk == current, then use current_uid(), as that does not use
1837 	 * RCU. The irq tracer can be called out of RCU scope.
1838 	 */
1839 	if (tsk == current)
1840 		max_data->uid = current_uid();
1841 	else
1842 		max_data->uid = task_uid(tsk);
1843 
1844 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 	max_data->policy = tsk->policy;
1846 	max_data->rt_priority = tsk->rt_priority;
1847 
1848 	/* record this tasks comm */
1849 	tracing_record_cmdline(tsk);
1850 	latency_fsnotify(tr);
1851 }
1852 
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 	      void *cond_data)
1866 {
1867 	if (tr->stop_count)
1868 		return;
1869 
1870 	WARN_ON_ONCE(!irqs_disabled());
1871 
1872 	if (!tr->allocated_snapshot) {
1873 		/* Only the nop tracer should hit this when disabling */
1874 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 		return;
1876 	}
1877 
1878 	arch_spin_lock(&tr->max_lock);
1879 
1880 	/* Inherit the recordable setting from array_buffer */
1881 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 		ring_buffer_record_on(tr->max_buffer.buffer);
1883 	else
1884 		ring_buffer_record_off(tr->max_buffer.buffer);
1885 
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 		arch_spin_unlock(&tr->max_lock);
1889 		return;
1890 	}
1891 #endif
1892 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893 
1894 	__update_max_tr(tr, tsk, cpu);
1895 
1896 	arch_spin_unlock(&tr->max_lock);
1897 
1898 	/* Any waiters on the old snapshot buffer need to wake up */
1899 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901 
1902 /**
1903  * update_max_tr_single - only copy one trace over, and reset the rest
1904  * @tr: tracer
1905  * @tsk: task with the latency
1906  * @cpu: the cpu of the buffer to copy.
1907  *
1908  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909  */
1910 void
1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 	int ret;
1914 
1915 	if (tr->stop_count)
1916 		return;
1917 
1918 	WARN_ON_ONCE(!irqs_disabled());
1919 	if (!tr->allocated_snapshot) {
1920 		/* Only the nop tracer should hit this when disabling */
1921 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 		return;
1923 	}
1924 
1925 	arch_spin_lock(&tr->max_lock);
1926 
1927 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928 
1929 	if (ret == -EBUSY) {
1930 		/*
1931 		 * We failed to swap the buffer due to a commit taking
1932 		 * place on this CPU. We fail to record, but we reset
1933 		 * the max trace buffer (no one writes directly to it)
1934 		 * and flag that it failed.
1935 		 * Another reason is resize is in progress.
1936 		 */
1937 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 			"Failed to swap buffers due to commit or resize in progress\n");
1939 	}
1940 
1941 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942 
1943 	__update_max_tr(tr, tsk, cpu);
1944 	arch_spin_unlock(&tr->max_lock);
1945 }
1946 
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948 
1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 	int ret;
1952 
1953 	/* Iterators are static, they should be filled or empty */
1954 	if (trace_buffer_iter(iter, iter->cpu_file))
1955 		return 0;
1956 
1957 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958 
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 	/*
1961 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 	 * to happen, this would now be the main buffer.
1963 	 */
1964 	if (iter->snapshot)
1965 		iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 	return ret;
1968 }
1969 
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972 
1973 struct trace_selftests {
1974 	struct list_head		list;
1975 	struct tracer			*type;
1976 };
1977 
1978 static LIST_HEAD(postponed_selftests);
1979 
1980 static int save_selftest(struct tracer *type)
1981 {
1982 	struct trace_selftests *selftest;
1983 
1984 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 	if (!selftest)
1986 		return -ENOMEM;
1987 
1988 	selftest->type = type;
1989 	list_add(&selftest->list, &postponed_selftests);
1990 	return 0;
1991 }
1992 
1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 	struct trace_array *tr = &global_trace;
1996 	struct tracer *saved_tracer = tr->current_trace;
1997 	int ret;
1998 
1999 	if (!type->selftest || tracing_selftest_disabled)
2000 		return 0;
2001 
2002 	/*
2003 	 * If a tracer registers early in boot up (before scheduling is
2004 	 * initialized and such), then do not run its selftests yet.
2005 	 * Instead, run it a little later in the boot process.
2006 	 */
2007 	if (!selftests_can_run)
2008 		return save_selftest(type);
2009 
2010 	if (!tracing_is_on()) {
2011 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 			type->name);
2013 		return 0;
2014 	}
2015 
2016 	/*
2017 	 * Run a selftest on this tracer.
2018 	 * Here we reset the trace buffer, and set the current
2019 	 * tracer to be this tracer. The tracer can then run some
2020 	 * internal tracing to verify that everything is in order.
2021 	 * If we fail, we do not register this tracer.
2022 	 */
2023 	tracing_reset_online_cpus(&tr->array_buffer);
2024 
2025 	tr->current_trace = type;
2026 
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 	if (type->use_max_tr) {
2029 		/* If we expanded the buffers, make sure the max is expanded too */
2030 		if (ring_buffer_expanded)
2031 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 					   RING_BUFFER_ALL_CPUS);
2033 		tr->allocated_snapshot = true;
2034 	}
2035 #endif
2036 
2037 	/* the test is responsible for initializing and enabling */
2038 	pr_info("Testing tracer %s: ", type->name);
2039 	ret = type->selftest(type, tr);
2040 	/* the test is responsible for resetting too */
2041 	tr->current_trace = saved_tracer;
2042 	if (ret) {
2043 		printk(KERN_CONT "FAILED!\n");
2044 		/* Add the warning after printing 'FAILED' */
2045 		WARN_ON(1);
2046 		return -1;
2047 	}
2048 	/* Only reset on passing, to avoid touching corrupted buffers */
2049 	tracing_reset_online_cpus(&tr->array_buffer);
2050 
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 	if (type->use_max_tr) {
2053 		tr->allocated_snapshot = false;
2054 
2055 		/* Shrink the max buffer again */
2056 		if (ring_buffer_expanded)
2057 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 					   RING_BUFFER_ALL_CPUS);
2059 	}
2060 #endif
2061 
2062 	printk(KERN_CONT "PASSED\n");
2063 	return 0;
2064 }
2065 
2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 	int ret;
2069 
2070 	/*
2071 	 * Tests can take a long time, especially if they are run one after the
2072 	 * other, as does happen during bootup when all the tracers are
2073 	 * registered. This could cause the soft lockup watchdog to trigger.
2074 	 */
2075 	cond_resched();
2076 
2077 	tracing_selftest_running = true;
2078 	ret = run_tracer_selftest(type);
2079 	tracing_selftest_running = false;
2080 
2081 	return ret;
2082 }
2083 
2084 static __init int init_trace_selftests(void)
2085 {
2086 	struct trace_selftests *p, *n;
2087 	struct tracer *t, **last;
2088 	int ret;
2089 
2090 	selftests_can_run = true;
2091 
2092 	mutex_lock(&trace_types_lock);
2093 
2094 	if (list_empty(&postponed_selftests))
2095 		goto out;
2096 
2097 	pr_info("Running postponed tracer tests:\n");
2098 
2099 	tracing_selftest_running = true;
2100 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 		/* This loop can take minutes when sanitizers are enabled, so
2102 		 * lets make sure we allow RCU processing.
2103 		 */
2104 		cond_resched();
2105 		ret = run_tracer_selftest(p->type);
2106 		/* If the test fails, then warn and remove from available_tracers */
2107 		if (ret < 0) {
2108 			WARN(1, "tracer: %s failed selftest, disabling\n",
2109 			     p->type->name);
2110 			last = &trace_types;
2111 			for (t = trace_types; t; t = t->next) {
2112 				if (t == p->type) {
2113 					*last = t->next;
2114 					break;
2115 				}
2116 				last = &t->next;
2117 			}
2118 		}
2119 		list_del(&p->list);
2120 		kfree(p);
2121 	}
2122 	tracing_selftest_running = false;
2123 
2124  out:
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 	return 0;
2134 }
2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 	return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140 
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142 
2143 static void __init apply_trace_boot_options(void);
2144 
2145 /**
2146  * register_tracer - register a tracer with the ftrace system.
2147  * @type: the plugin for the tracer
2148  *
2149  * Register a new plugin tracer.
2150  */
2151 int __init register_tracer(struct tracer *type)
2152 {
2153 	struct tracer *t;
2154 	int ret = 0;
2155 
2156 	if (!type->name) {
2157 		pr_info("Tracer must have a name\n");
2158 		return -1;
2159 	}
2160 
2161 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 		return -1;
2164 	}
2165 
2166 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 		pr_warn("Can not register tracer %s due to lockdown\n",
2168 			   type->name);
2169 		return -EPERM;
2170 	}
2171 
2172 	mutex_lock(&trace_types_lock);
2173 
2174 	for (t = trace_types; t; t = t->next) {
2175 		if (strcmp(type->name, t->name) == 0) {
2176 			/* already found */
2177 			pr_info("Tracer %s already registered\n",
2178 				type->name);
2179 			ret = -1;
2180 			goto out;
2181 		}
2182 	}
2183 
2184 	if (!type->set_flag)
2185 		type->set_flag = &dummy_set_flag;
2186 	if (!type->flags) {
2187 		/*allocate a dummy tracer_flags*/
2188 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 		if (!type->flags) {
2190 			ret = -ENOMEM;
2191 			goto out;
2192 		}
2193 		type->flags->val = 0;
2194 		type->flags->opts = dummy_tracer_opt;
2195 	} else
2196 		if (!type->flags->opts)
2197 			type->flags->opts = dummy_tracer_opt;
2198 
2199 	/* store the tracer for __set_tracer_option */
2200 	type->flags->trace = type;
2201 
2202 	ret = do_run_tracer_selftest(type);
2203 	if (ret < 0)
2204 		goto out;
2205 
2206 	type->next = trace_types;
2207 	trace_types = type;
2208 	add_tracer_options(&global_trace, type);
2209 
2210  out:
2211 	mutex_unlock(&trace_types_lock);
2212 
2213 	if (ret || !default_bootup_tracer)
2214 		goto out_unlock;
2215 
2216 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 		goto out_unlock;
2218 
2219 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 	/* Do we want this tracer to start on bootup? */
2221 	tracing_set_tracer(&global_trace, type->name);
2222 	default_bootup_tracer = NULL;
2223 
2224 	apply_trace_boot_options();
2225 
2226 	/* disable other selftests, since this will break it. */
2227 	disable_tracing_selftest("running a tracer");
2228 
2229  out_unlock:
2230 	return ret;
2231 }
2232 
2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 	struct trace_buffer *buffer = buf->buffer;
2236 
2237 	if (!buffer)
2238 		return;
2239 
2240 	ring_buffer_record_disable(buffer);
2241 
2242 	/* Make sure all commits have finished */
2243 	synchronize_rcu();
2244 	ring_buffer_reset_cpu(buffer, cpu);
2245 
2246 	ring_buffer_record_enable(buffer);
2247 }
2248 
2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 	struct trace_buffer *buffer = buf->buffer;
2252 
2253 	if (!buffer)
2254 		return;
2255 
2256 	ring_buffer_record_disable(buffer);
2257 
2258 	/* Make sure all commits have finished */
2259 	synchronize_rcu();
2260 
2261 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262 
2263 	ring_buffer_reset_online_cpus(buffer);
2264 
2265 	ring_buffer_record_enable(buffer);
2266 }
2267 
2268 /* Must have trace_types_lock held */
2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 	struct trace_array *tr;
2272 
2273 	lockdep_assert_held(&trace_types_lock);
2274 
2275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 		if (!tr->clear_trace)
2277 			continue;
2278 		tr->clear_trace = false;
2279 		tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 		tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 	}
2284 }
2285 
2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 	mutex_lock(&trace_types_lock);
2289 	tracing_reset_all_online_cpus_unlocked();
2290 	mutex_unlock(&trace_types_lock);
2291 }
2292 
2293 /*
2294  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295  * is the tgid last observed corresponding to pid=i.
2296  */
2297 static int *tgid_map;
2298 
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301 
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305  * Preemption must be disabled before acquiring trace_cmdline_lock.
2306  * The various trace_arrays' max_lock must be acquired in a context
2307  * where interrupt is disabled.
2308  */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 	unsigned *map_cmdline_to_pid;
2313 	unsigned cmdline_num;
2314 	int cmdline_idx;
2315 	char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318 
2319 static inline char *get_saved_cmdlines(int idx)
2320 {
2321 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2322 }
2323 
2324 static inline void set_cmdline(int idx, const char *cmdline)
2325 {
2326 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2327 }
2328 
2329 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2330 {
2331 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2332 
2333 	kfree(s->map_cmdline_to_pid);
2334 	kmemleak_free(s);
2335 	free_pages((unsigned long)s, order);
2336 }
2337 
2338 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2339 {
2340 	struct saved_cmdlines_buffer *s;
2341 	struct page *page;
2342 	int orig_size, size;
2343 	int order;
2344 
2345 	/* Figure out how much is needed to hold the given number of cmdlines */
2346 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2347 	order = get_order(orig_size);
2348 	size = 1 << (order + PAGE_SHIFT);
2349 	page = alloc_pages(GFP_KERNEL, order);
2350 	if (!page)
2351 		return NULL;
2352 
2353 	s = page_address(page);
2354 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2355 	memset(s, 0, sizeof(*s));
2356 
2357 	/* Round up to actual allocation */
2358 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2359 	s->cmdline_num = val;
2360 
2361 	s->map_cmdline_to_pid = kmalloc_array(val,
2362 					      sizeof(*s->map_cmdline_to_pid),
2363 					      GFP_KERNEL);
2364 	if (!s->map_cmdline_to_pid) {
2365 		free_saved_cmdlines_buffer(s);
2366 		return NULL;
2367 	}
2368 
2369 	s->cmdline_idx = 0;
2370 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2371 	       sizeof(s->map_pid_to_cmdline));
2372 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2373 	       val * sizeof(*s->map_cmdline_to_pid));
2374 
2375 	return s;
2376 }
2377 
2378 static int trace_create_savedcmd(void)
2379 {
2380 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2381 
2382 	return savedcmd ? 0 : -ENOMEM;
2383 }
2384 
2385 int is_tracing_stopped(void)
2386 {
2387 	return global_trace.stop_count;
2388 }
2389 
2390 static void tracing_start_tr(struct trace_array *tr)
2391 {
2392 	struct trace_buffer *buffer;
2393 	unsigned long flags;
2394 
2395 	if (tracing_disabled)
2396 		return;
2397 
2398 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2399 	if (--tr->stop_count) {
2400 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2401 			/* Someone screwed up their debugging */
2402 			tr->stop_count = 0;
2403 		}
2404 		goto out;
2405 	}
2406 
2407 	/* Prevent the buffers from switching */
2408 	arch_spin_lock(&tr->max_lock);
2409 
2410 	buffer = tr->array_buffer.buffer;
2411 	if (buffer)
2412 		ring_buffer_record_enable(buffer);
2413 
2414 #ifdef CONFIG_TRACER_MAX_TRACE
2415 	buffer = tr->max_buffer.buffer;
2416 	if (buffer)
2417 		ring_buffer_record_enable(buffer);
2418 #endif
2419 
2420 	arch_spin_unlock(&tr->max_lock);
2421 
2422  out:
2423 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2424 }
2425 
2426 /**
2427  * tracing_start - quick start of the tracer
2428  *
2429  * If tracing is enabled but was stopped by tracing_stop,
2430  * this will start the tracer back up.
2431  */
2432 void tracing_start(void)
2433 
2434 {
2435 	return tracing_start_tr(&global_trace);
2436 }
2437 
2438 static void tracing_stop_tr(struct trace_array *tr)
2439 {
2440 	struct trace_buffer *buffer;
2441 	unsigned long flags;
2442 
2443 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2444 	if (tr->stop_count++)
2445 		goto out;
2446 
2447 	/* Prevent the buffers from switching */
2448 	arch_spin_lock(&tr->max_lock);
2449 
2450 	buffer = tr->array_buffer.buffer;
2451 	if (buffer)
2452 		ring_buffer_record_disable(buffer);
2453 
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455 	buffer = tr->max_buffer.buffer;
2456 	if (buffer)
2457 		ring_buffer_record_disable(buffer);
2458 #endif
2459 
2460 	arch_spin_unlock(&tr->max_lock);
2461 
2462  out:
2463 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2464 }
2465 
2466 /**
2467  * tracing_stop - quick stop of the tracer
2468  *
2469  * Light weight way to stop tracing. Use in conjunction with
2470  * tracing_start.
2471  */
2472 void tracing_stop(void)
2473 {
2474 	return tracing_stop_tr(&global_trace);
2475 }
2476 
2477 static int trace_save_cmdline(struct task_struct *tsk)
2478 {
2479 	unsigned tpid, idx;
2480 
2481 	/* treat recording of idle task as a success */
2482 	if (!tsk->pid)
2483 		return 1;
2484 
2485 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2486 
2487 	/*
2488 	 * It's not the end of the world if we don't get
2489 	 * the lock, but we also don't want to spin
2490 	 * nor do we want to disable interrupts,
2491 	 * so if we miss here, then better luck next time.
2492 	 *
2493 	 * This is called within the scheduler and wake up, so interrupts
2494 	 * had better been disabled and run queue lock been held.
2495 	 */
2496 	lockdep_assert_preemption_disabled();
2497 	if (!arch_spin_trylock(&trace_cmdline_lock))
2498 		return 0;
2499 
2500 	idx = savedcmd->map_pid_to_cmdline[tpid];
2501 	if (idx == NO_CMDLINE_MAP) {
2502 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2503 
2504 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2505 		savedcmd->cmdline_idx = idx;
2506 	}
2507 
2508 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2509 	set_cmdline(idx, tsk->comm);
2510 
2511 	arch_spin_unlock(&trace_cmdline_lock);
2512 
2513 	return 1;
2514 }
2515 
2516 static void __trace_find_cmdline(int pid, char comm[])
2517 {
2518 	unsigned map;
2519 	int tpid;
2520 
2521 	if (!pid) {
2522 		strcpy(comm, "<idle>");
2523 		return;
2524 	}
2525 
2526 	if (WARN_ON_ONCE(pid < 0)) {
2527 		strcpy(comm, "<XXX>");
2528 		return;
2529 	}
2530 
2531 	tpid = pid & (PID_MAX_DEFAULT - 1);
2532 	map = savedcmd->map_pid_to_cmdline[tpid];
2533 	if (map != NO_CMDLINE_MAP) {
2534 		tpid = savedcmd->map_cmdline_to_pid[map];
2535 		if (tpid == pid) {
2536 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2537 			return;
2538 		}
2539 	}
2540 	strcpy(comm, "<...>");
2541 }
2542 
2543 void trace_find_cmdline(int pid, char comm[])
2544 {
2545 	preempt_disable();
2546 	arch_spin_lock(&trace_cmdline_lock);
2547 
2548 	__trace_find_cmdline(pid, comm);
2549 
2550 	arch_spin_unlock(&trace_cmdline_lock);
2551 	preempt_enable();
2552 }
2553 
2554 static int *trace_find_tgid_ptr(int pid)
2555 {
2556 	/*
2557 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2558 	 * if we observe a non-NULL tgid_map then we also observe the correct
2559 	 * tgid_map_max.
2560 	 */
2561 	int *map = smp_load_acquire(&tgid_map);
2562 
2563 	if (unlikely(!map || pid > tgid_map_max))
2564 		return NULL;
2565 
2566 	return &map[pid];
2567 }
2568 
2569 int trace_find_tgid(int pid)
2570 {
2571 	int *ptr = trace_find_tgid_ptr(pid);
2572 
2573 	return ptr ? *ptr : 0;
2574 }
2575 
2576 static int trace_save_tgid(struct task_struct *tsk)
2577 {
2578 	int *ptr;
2579 
2580 	/* treat recording of idle task as a success */
2581 	if (!tsk->pid)
2582 		return 1;
2583 
2584 	ptr = trace_find_tgid_ptr(tsk->pid);
2585 	if (!ptr)
2586 		return 0;
2587 
2588 	*ptr = tsk->tgid;
2589 	return 1;
2590 }
2591 
2592 static bool tracing_record_taskinfo_skip(int flags)
2593 {
2594 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2595 		return true;
2596 	if (!__this_cpu_read(trace_taskinfo_save))
2597 		return true;
2598 	return false;
2599 }
2600 
2601 /**
2602  * tracing_record_taskinfo - record the task info of a task
2603  *
2604  * @task:  task to record
2605  * @flags: TRACE_RECORD_CMDLINE for recording comm
2606  *         TRACE_RECORD_TGID for recording tgid
2607  */
2608 void tracing_record_taskinfo(struct task_struct *task, int flags)
2609 {
2610 	bool done;
2611 
2612 	if (tracing_record_taskinfo_skip(flags))
2613 		return;
2614 
2615 	/*
2616 	 * Record as much task information as possible. If some fail, continue
2617 	 * to try to record the others.
2618 	 */
2619 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2620 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2621 
2622 	/* If recording any information failed, retry again soon. */
2623 	if (!done)
2624 		return;
2625 
2626 	__this_cpu_write(trace_taskinfo_save, false);
2627 }
2628 
2629 /**
2630  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2631  *
2632  * @prev: previous task during sched_switch
2633  * @next: next task during sched_switch
2634  * @flags: TRACE_RECORD_CMDLINE for recording comm
2635  *         TRACE_RECORD_TGID for recording tgid
2636  */
2637 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2638 					  struct task_struct *next, int flags)
2639 {
2640 	bool done;
2641 
2642 	if (tracing_record_taskinfo_skip(flags))
2643 		return;
2644 
2645 	/*
2646 	 * Record as much task information as possible. If some fail, continue
2647 	 * to try to record the others.
2648 	 */
2649 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2650 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2651 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2652 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2653 
2654 	/* If recording any information failed, retry again soon. */
2655 	if (!done)
2656 		return;
2657 
2658 	__this_cpu_write(trace_taskinfo_save, false);
2659 }
2660 
2661 /* Helpers to record a specific task information */
2662 void tracing_record_cmdline(struct task_struct *task)
2663 {
2664 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2665 }
2666 
2667 void tracing_record_tgid(struct task_struct *task)
2668 {
2669 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2670 }
2671 
2672 /*
2673  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2674  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2675  * simplifies those functions and keeps them in sync.
2676  */
2677 enum print_line_t trace_handle_return(struct trace_seq *s)
2678 {
2679 	return trace_seq_has_overflowed(s) ?
2680 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2681 }
2682 EXPORT_SYMBOL_GPL(trace_handle_return);
2683 
2684 static unsigned short migration_disable_value(void)
2685 {
2686 #if defined(CONFIG_SMP)
2687 	return current->migration_disabled;
2688 #else
2689 	return 0;
2690 #endif
2691 }
2692 
2693 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2694 {
2695 	unsigned int trace_flags = irqs_status;
2696 	unsigned int pc;
2697 
2698 	pc = preempt_count();
2699 
2700 	if (pc & NMI_MASK)
2701 		trace_flags |= TRACE_FLAG_NMI;
2702 	if (pc & HARDIRQ_MASK)
2703 		trace_flags |= TRACE_FLAG_HARDIRQ;
2704 	if (in_serving_softirq())
2705 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2706 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2707 		trace_flags |= TRACE_FLAG_BH_OFF;
2708 
2709 	if (tif_need_resched())
2710 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2711 	if (test_preempt_need_resched())
2712 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2713 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2714 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2715 }
2716 
2717 struct ring_buffer_event *
2718 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2719 			  int type,
2720 			  unsigned long len,
2721 			  unsigned int trace_ctx)
2722 {
2723 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2724 }
2725 
2726 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2727 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2728 static int trace_buffered_event_ref;
2729 
2730 /**
2731  * trace_buffered_event_enable - enable buffering events
2732  *
2733  * When events are being filtered, it is quicker to use a temporary
2734  * buffer to write the event data into if there's a likely chance
2735  * that it will not be committed. The discard of the ring buffer
2736  * is not as fast as committing, and is much slower than copying
2737  * a commit.
2738  *
2739  * When an event is to be filtered, allocate per cpu buffers to
2740  * write the event data into, and if the event is filtered and discarded
2741  * it is simply dropped, otherwise, the entire data is to be committed
2742  * in one shot.
2743  */
2744 void trace_buffered_event_enable(void)
2745 {
2746 	struct ring_buffer_event *event;
2747 	struct page *page;
2748 	int cpu;
2749 
2750 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2751 
2752 	if (trace_buffered_event_ref++)
2753 		return;
2754 
2755 	for_each_tracing_cpu(cpu) {
2756 		page = alloc_pages_node(cpu_to_node(cpu),
2757 					GFP_KERNEL | __GFP_NORETRY, 0);
2758 		/* This is just an optimization and can handle failures */
2759 		if (!page) {
2760 			pr_err("Failed to allocate event buffer\n");
2761 			break;
2762 		}
2763 
2764 		event = page_address(page);
2765 		memset(event, 0, sizeof(*event));
2766 
2767 		per_cpu(trace_buffered_event, cpu) = event;
2768 
2769 		preempt_disable();
2770 		if (cpu == smp_processor_id() &&
2771 		    __this_cpu_read(trace_buffered_event) !=
2772 		    per_cpu(trace_buffered_event, cpu))
2773 			WARN_ON_ONCE(1);
2774 		preempt_enable();
2775 	}
2776 }
2777 
2778 static void enable_trace_buffered_event(void *data)
2779 {
2780 	/* Probably not needed, but do it anyway */
2781 	smp_rmb();
2782 	this_cpu_dec(trace_buffered_event_cnt);
2783 }
2784 
2785 static void disable_trace_buffered_event(void *data)
2786 {
2787 	this_cpu_inc(trace_buffered_event_cnt);
2788 }
2789 
2790 /**
2791  * trace_buffered_event_disable - disable buffering events
2792  *
2793  * When a filter is removed, it is faster to not use the buffered
2794  * events, and to commit directly into the ring buffer. Free up
2795  * the temp buffers when there are no more users. This requires
2796  * special synchronization with current events.
2797  */
2798 void trace_buffered_event_disable(void)
2799 {
2800 	int cpu;
2801 
2802 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2803 
2804 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2805 		return;
2806 
2807 	if (--trace_buffered_event_ref)
2808 		return;
2809 
2810 	/* For each CPU, set the buffer as used. */
2811 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2812 			 NULL, true);
2813 
2814 	/* Wait for all current users to finish */
2815 	synchronize_rcu();
2816 
2817 	for_each_tracing_cpu(cpu) {
2818 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2819 		per_cpu(trace_buffered_event, cpu) = NULL;
2820 	}
2821 
2822 	/*
2823 	 * Wait for all CPUs that potentially started checking if they can use
2824 	 * their event buffer only after the previous synchronize_rcu() call and
2825 	 * they still read a valid pointer from trace_buffered_event. It must be
2826 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2827 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2828 	 */
2829 	synchronize_rcu();
2830 
2831 	/* For each CPU, relinquish the buffer */
2832 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2833 			 true);
2834 }
2835 
2836 static struct trace_buffer *temp_buffer;
2837 
2838 struct ring_buffer_event *
2839 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2840 			  struct trace_event_file *trace_file,
2841 			  int type, unsigned long len,
2842 			  unsigned int trace_ctx)
2843 {
2844 	struct ring_buffer_event *entry;
2845 	struct trace_array *tr = trace_file->tr;
2846 	int val;
2847 
2848 	*current_rb = tr->array_buffer.buffer;
2849 
2850 	if (!tr->no_filter_buffering_ref &&
2851 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2852 		preempt_disable_notrace();
2853 		/*
2854 		 * Filtering is on, so try to use the per cpu buffer first.
2855 		 * This buffer will simulate a ring_buffer_event,
2856 		 * where the type_len is zero and the array[0] will
2857 		 * hold the full length.
2858 		 * (see include/linux/ring-buffer.h for details on
2859 		 *  how the ring_buffer_event is structured).
2860 		 *
2861 		 * Using a temp buffer during filtering and copying it
2862 		 * on a matched filter is quicker than writing directly
2863 		 * into the ring buffer and then discarding it when
2864 		 * it doesn't match. That is because the discard
2865 		 * requires several atomic operations to get right.
2866 		 * Copying on match and doing nothing on a failed match
2867 		 * is still quicker than no copy on match, but having
2868 		 * to discard out of the ring buffer on a failed match.
2869 		 */
2870 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2871 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2872 
2873 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2874 
2875 			/*
2876 			 * Preemption is disabled, but interrupts and NMIs
2877 			 * can still come in now. If that happens after
2878 			 * the above increment, then it will have to go
2879 			 * back to the old method of allocating the event
2880 			 * on the ring buffer, and if the filter fails, it
2881 			 * will have to call ring_buffer_discard_commit()
2882 			 * to remove it.
2883 			 *
2884 			 * Need to also check the unlikely case that the
2885 			 * length is bigger than the temp buffer size.
2886 			 * If that happens, then the reserve is pretty much
2887 			 * guaranteed to fail, as the ring buffer currently
2888 			 * only allows events less than a page. But that may
2889 			 * change in the future, so let the ring buffer reserve
2890 			 * handle the failure in that case.
2891 			 */
2892 			if (val == 1 && likely(len <= max_len)) {
2893 				trace_event_setup(entry, type, trace_ctx);
2894 				entry->array[0] = len;
2895 				/* Return with preemption disabled */
2896 				return entry;
2897 			}
2898 			this_cpu_dec(trace_buffered_event_cnt);
2899 		}
2900 		/* __trace_buffer_lock_reserve() disables preemption */
2901 		preempt_enable_notrace();
2902 	}
2903 
2904 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2905 					    trace_ctx);
2906 	/*
2907 	 * If tracing is off, but we have triggers enabled
2908 	 * we still need to look at the event data. Use the temp_buffer
2909 	 * to store the trace event for the trigger to use. It's recursive
2910 	 * safe and will not be recorded anywhere.
2911 	 */
2912 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2913 		*current_rb = temp_buffer;
2914 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2915 						    trace_ctx);
2916 	}
2917 	return entry;
2918 }
2919 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2920 
2921 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2922 static DEFINE_MUTEX(tracepoint_printk_mutex);
2923 
2924 static void output_printk(struct trace_event_buffer *fbuffer)
2925 {
2926 	struct trace_event_call *event_call;
2927 	struct trace_event_file *file;
2928 	struct trace_event *event;
2929 	unsigned long flags;
2930 	struct trace_iterator *iter = tracepoint_print_iter;
2931 
2932 	/* We should never get here if iter is NULL */
2933 	if (WARN_ON_ONCE(!iter))
2934 		return;
2935 
2936 	event_call = fbuffer->trace_file->event_call;
2937 	if (!event_call || !event_call->event.funcs ||
2938 	    !event_call->event.funcs->trace)
2939 		return;
2940 
2941 	file = fbuffer->trace_file;
2942 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2943 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2944 	     !filter_match_preds(file->filter, fbuffer->entry)))
2945 		return;
2946 
2947 	event = &fbuffer->trace_file->event_call->event;
2948 
2949 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2950 	trace_seq_init(&iter->seq);
2951 	iter->ent = fbuffer->entry;
2952 	event_call->event.funcs->trace(iter, 0, event);
2953 	trace_seq_putc(&iter->seq, 0);
2954 	printk("%s", iter->seq.buffer);
2955 
2956 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2957 }
2958 
2959 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2960 			     void *buffer, size_t *lenp,
2961 			     loff_t *ppos)
2962 {
2963 	int save_tracepoint_printk;
2964 	int ret;
2965 
2966 	mutex_lock(&tracepoint_printk_mutex);
2967 	save_tracepoint_printk = tracepoint_printk;
2968 
2969 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2970 
2971 	/*
2972 	 * This will force exiting early, as tracepoint_printk
2973 	 * is always zero when tracepoint_printk_iter is not allocated
2974 	 */
2975 	if (!tracepoint_print_iter)
2976 		tracepoint_printk = 0;
2977 
2978 	if (save_tracepoint_printk == tracepoint_printk)
2979 		goto out;
2980 
2981 	if (tracepoint_printk)
2982 		static_key_enable(&tracepoint_printk_key.key);
2983 	else
2984 		static_key_disable(&tracepoint_printk_key.key);
2985 
2986  out:
2987 	mutex_unlock(&tracepoint_printk_mutex);
2988 
2989 	return ret;
2990 }
2991 
2992 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2993 {
2994 	enum event_trigger_type tt = ETT_NONE;
2995 	struct trace_event_file *file = fbuffer->trace_file;
2996 
2997 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2998 			fbuffer->entry, &tt))
2999 		goto discard;
3000 
3001 	if (static_key_false(&tracepoint_printk_key.key))
3002 		output_printk(fbuffer);
3003 
3004 	if (static_branch_unlikely(&trace_event_exports_enabled))
3005 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3006 
3007 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3008 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3009 
3010 discard:
3011 	if (tt)
3012 		event_triggers_post_call(file, tt);
3013 
3014 }
3015 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3016 
3017 /*
3018  * Skip 3:
3019  *
3020  *   trace_buffer_unlock_commit_regs()
3021  *   trace_event_buffer_commit()
3022  *   trace_event_raw_event_xxx()
3023  */
3024 # define STACK_SKIP 3
3025 
3026 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3027 				     struct trace_buffer *buffer,
3028 				     struct ring_buffer_event *event,
3029 				     unsigned int trace_ctx,
3030 				     struct pt_regs *regs)
3031 {
3032 	__buffer_unlock_commit(buffer, event);
3033 
3034 	/*
3035 	 * If regs is not set, then skip the necessary functions.
3036 	 * Note, we can still get here via blktrace, wakeup tracer
3037 	 * and mmiotrace, but that's ok if they lose a function or
3038 	 * two. They are not that meaningful.
3039 	 */
3040 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3041 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3042 }
3043 
3044 /*
3045  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3046  */
3047 void
3048 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3049 				   struct ring_buffer_event *event)
3050 {
3051 	__buffer_unlock_commit(buffer, event);
3052 }
3053 
3054 void
3055 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3056 	       parent_ip, unsigned int trace_ctx)
3057 {
3058 	struct trace_event_call *call = &event_function;
3059 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3060 	struct ring_buffer_event *event;
3061 	struct ftrace_entry *entry;
3062 
3063 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3064 					    trace_ctx);
3065 	if (!event)
3066 		return;
3067 	entry	= ring_buffer_event_data(event);
3068 	entry->ip			= ip;
3069 	entry->parent_ip		= parent_ip;
3070 
3071 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3072 		if (static_branch_unlikely(&trace_function_exports_enabled))
3073 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3074 		__buffer_unlock_commit(buffer, event);
3075 	}
3076 }
3077 
3078 #ifdef CONFIG_STACKTRACE
3079 
3080 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3081 #define FTRACE_KSTACK_NESTING	4
3082 
3083 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3084 
3085 struct ftrace_stack {
3086 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3087 };
3088 
3089 
3090 struct ftrace_stacks {
3091 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3092 };
3093 
3094 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3095 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3096 
3097 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3098 				 unsigned int trace_ctx,
3099 				 int skip, struct pt_regs *regs)
3100 {
3101 	struct trace_event_call *call = &event_kernel_stack;
3102 	struct ring_buffer_event *event;
3103 	unsigned int size, nr_entries;
3104 	struct ftrace_stack *fstack;
3105 	struct stack_entry *entry;
3106 	int stackidx;
3107 
3108 	/*
3109 	 * Add one, for this function and the call to save_stack_trace()
3110 	 * If regs is set, then these functions will not be in the way.
3111 	 */
3112 #ifndef CONFIG_UNWINDER_ORC
3113 	if (!regs)
3114 		skip++;
3115 #endif
3116 
3117 	preempt_disable_notrace();
3118 
3119 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3120 
3121 	/* This should never happen. If it does, yell once and skip */
3122 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3123 		goto out;
3124 
3125 	/*
3126 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3127 	 * interrupt will either see the value pre increment or post
3128 	 * increment. If the interrupt happens pre increment it will have
3129 	 * restored the counter when it returns.  We just need a barrier to
3130 	 * keep gcc from moving things around.
3131 	 */
3132 	barrier();
3133 
3134 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3135 	size = ARRAY_SIZE(fstack->calls);
3136 
3137 	if (regs) {
3138 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3139 						   size, skip);
3140 	} else {
3141 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3142 	}
3143 
3144 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3145 				    struct_size(entry, caller, nr_entries),
3146 				    trace_ctx);
3147 	if (!event)
3148 		goto out;
3149 	entry = ring_buffer_event_data(event);
3150 
3151 	entry->size = nr_entries;
3152 	memcpy(&entry->caller, fstack->calls,
3153 	       flex_array_size(entry, caller, nr_entries));
3154 
3155 	if (!call_filter_check_discard(call, entry, buffer, event))
3156 		__buffer_unlock_commit(buffer, event);
3157 
3158  out:
3159 	/* Again, don't let gcc optimize things here */
3160 	barrier();
3161 	__this_cpu_dec(ftrace_stack_reserve);
3162 	preempt_enable_notrace();
3163 
3164 }
3165 
3166 static inline void ftrace_trace_stack(struct trace_array *tr,
3167 				      struct trace_buffer *buffer,
3168 				      unsigned int trace_ctx,
3169 				      int skip, struct pt_regs *regs)
3170 {
3171 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3172 		return;
3173 
3174 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3175 }
3176 
3177 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3178 		   int skip)
3179 {
3180 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3181 
3182 	if (rcu_is_watching()) {
3183 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3184 		return;
3185 	}
3186 
3187 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3188 		return;
3189 
3190 	/*
3191 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3192 	 * but if the above rcu_is_watching() failed, then the NMI
3193 	 * triggered someplace critical, and ct_irq_enter() should
3194 	 * not be called from NMI.
3195 	 */
3196 	if (unlikely(in_nmi()))
3197 		return;
3198 
3199 	ct_irq_enter_irqson();
3200 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3201 	ct_irq_exit_irqson();
3202 }
3203 
3204 /**
3205  * trace_dump_stack - record a stack back trace in the trace buffer
3206  * @skip: Number of functions to skip (helper handlers)
3207  */
3208 void trace_dump_stack(int skip)
3209 {
3210 	if (tracing_disabled || tracing_selftest_running)
3211 		return;
3212 
3213 #ifndef CONFIG_UNWINDER_ORC
3214 	/* Skip 1 to skip this function. */
3215 	skip++;
3216 #endif
3217 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3218 			     tracing_gen_ctx(), skip, NULL);
3219 }
3220 EXPORT_SYMBOL_GPL(trace_dump_stack);
3221 
3222 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3223 static DEFINE_PER_CPU(int, user_stack_count);
3224 
3225 static void
3226 ftrace_trace_userstack(struct trace_array *tr,
3227 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3228 {
3229 	struct trace_event_call *call = &event_user_stack;
3230 	struct ring_buffer_event *event;
3231 	struct userstack_entry *entry;
3232 
3233 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3234 		return;
3235 
3236 	/*
3237 	 * NMIs can not handle page faults, even with fix ups.
3238 	 * The save user stack can (and often does) fault.
3239 	 */
3240 	if (unlikely(in_nmi()))
3241 		return;
3242 
3243 	/*
3244 	 * prevent recursion, since the user stack tracing may
3245 	 * trigger other kernel events.
3246 	 */
3247 	preempt_disable();
3248 	if (__this_cpu_read(user_stack_count))
3249 		goto out;
3250 
3251 	__this_cpu_inc(user_stack_count);
3252 
3253 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3254 					    sizeof(*entry), trace_ctx);
3255 	if (!event)
3256 		goto out_drop_count;
3257 	entry	= ring_buffer_event_data(event);
3258 
3259 	entry->tgid		= current->tgid;
3260 	memset(&entry->caller, 0, sizeof(entry->caller));
3261 
3262 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3263 	if (!call_filter_check_discard(call, entry, buffer, event))
3264 		__buffer_unlock_commit(buffer, event);
3265 
3266  out_drop_count:
3267 	__this_cpu_dec(user_stack_count);
3268  out:
3269 	preempt_enable();
3270 }
3271 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3272 static void ftrace_trace_userstack(struct trace_array *tr,
3273 				   struct trace_buffer *buffer,
3274 				   unsigned int trace_ctx)
3275 {
3276 }
3277 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3278 
3279 #endif /* CONFIG_STACKTRACE */
3280 
3281 static inline void
3282 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3283 			  unsigned long long delta)
3284 {
3285 	entry->bottom_delta_ts = delta & U32_MAX;
3286 	entry->top_delta_ts = (delta >> 32);
3287 }
3288 
3289 void trace_last_func_repeats(struct trace_array *tr,
3290 			     struct trace_func_repeats *last_info,
3291 			     unsigned int trace_ctx)
3292 {
3293 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3294 	struct func_repeats_entry *entry;
3295 	struct ring_buffer_event *event;
3296 	u64 delta;
3297 
3298 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3299 					    sizeof(*entry), trace_ctx);
3300 	if (!event)
3301 		return;
3302 
3303 	delta = ring_buffer_event_time_stamp(buffer, event) -
3304 		last_info->ts_last_call;
3305 
3306 	entry = ring_buffer_event_data(event);
3307 	entry->ip = last_info->ip;
3308 	entry->parent_ip = last_info->parent_ip;
3309 	entry->count = last_info->count;
3310 	func_repeats_set_delta_ts(entry, delta);
3311 
3312 	__buffer_unlock_commit(buffer, event);
3313 }
3314 
3315 /* created for use with alloc_percpu */
3316 struct trace_buffer_struct {
3317 	int nesting;
3318 	char buffer[4][TRACE_BUF_SIZE];
3319 };
3320 
3321 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3322 
3323 /*
3324  * This allows for lockless recording.  If we're nested too deeply, then
3325  * this returns NULL.
3326  */
3327 static char *get_trace_buf(void)
3328 {
3329 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3330 
3331 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3332 		return NULL;
3333 
3334 	buffer->nesting++;
3335 
3336 	/* Interrupts must see nesting incremented before we use the buffer */
3337 	barrier();
3338 	return &buffer->buffer[buffer->nesting - 1][0];
3339 }
3340 
3341 static void put_trace_buf(void)
3342 {
3343 	/* Don't let the decrement of nesting leak before this */
3344 	barrier();
3345 	this_cpu_dec(trace_percpu_buffer->nesting);
3346 }
3347 
3348 static int alloc_percpu_trace_buffer(void)
3349 {
3350 	struct trace_buffer_struct __percpu *buffers;
3351 
3352 	if (trace_percpu_buffer)
3353 		return 0;
3354 
3355 	buffers = alloc_percpu(struct trace_buffer_struct);
3356 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3357 		return -ENOMEM;
3358 
3359 	trace_percpu_buffer = buffers;
3360 	return 0;
3361 }
3362 
3363 static int buffers_allocated;
3364 
3365 void trace_printk_init_buffers(void)
3366 {
3367 	if (buffers_allocated)
3368 		return;
3369 
3370 	if (alloc_percpu_trace_buffer())
3371 		return;
3372 
3373 	/* trace_printk() is for debug use only. Don't use it in production. */
3374 
3375 	pr_warn("\n");
3376 	pr_warn("**********************************************************\n");
3377 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3378 	pr_warn("**                                                      **\n");
3379 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3380 	pr_warn("**                                                      **\n");
3381 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3382 	pr_warn("** unsafe for production use.                           **\n");
3383 	pr_warn("**                                                      **\n");
3384 	pr_warn("** If you see this message and you are not debugging    **\n");
3385 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3386 	pr_warn("**                                                      **\n");
3387 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3388 	pr_warn("**********************************************************\n");
3389 
3390 	/* Expand the buffers to set size */
3391 	tracing_update_buffers();
3392 
3393 	buffers_allocated = 1;
3394 
3395 	/*
3396 	 * trace_printk_init_buffers() can be called by modules.
3397 	 * If that happens, then we need to start cmdline recording
3398 	 * directly here. If the global_trace.buffer is already
3399 	 * allocated here, then this was called by module code.
3400 	 */
3401 	if (global_trace.array_buffer.buffer)
3402 		tracing_start_cmdline_record();
3403 }
3404 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3405 
3406 void trace_printk_start_comm(void)
3407 {
3408 	/* Start tracing comms if trace printk is set */
3409 	if (!buffers_allocated)
3410 		return;
3411 	tracing_start_cmdline_record();
3412 }
3413 
3414 static void trace_printk_start_stop_comm(int enabled)
3415 {
3416 	if (!buffers_allocated)
3417 		return;
3418 
3419 	if (enabled)
3420 		tracing_start_cmdline_record();
3421 	else
3422 		tracing_stop_cmdline_record();
3423 }
3424 
3425 /**
3426  * trace_vbprintk - write binary msg to tracing buffer
3427  * @ip:    The address of the caller
3428  * @fmt:   The string format to write to the buffer
3429  * @args:  Arguments for @fmt
3430  */
3431 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3432 {
3433 	struct trace_event_call *call = &event_bprint;
3434 	struct ring_buffer_event *event;
3435 	struct trace_buffer *buffer;
3436 	struct trace_array *tr = &global_trace;
3437 	struct bprint_entry *entry;
3438 	unsigned int trace_ctx;
3439 	char *tbuffer;
3440 	int len = 0, size;
3441 
3442 	if (unlikely(tracing_selftest_running || tracing_disabled))
3443 		return 0;
3444 
3445 	/* Don't pollute graph traces with trace_vprintk internals */
3446 	pause_graph_tracing();
3447 
3448 	trace_ctx = tracing_gen_ctx();
3449 	preempt_disable_notrace();
3450 
3451 	tbuffer = get_trace_buf();
3452 	if (!tbuffer) {
3453 		len = 0;
3454 		goto out_nobuffer;
3455 	}
3456 
3457 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3458 
3459 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3460 		goto out_put;
3461 
3462 	size = sizeof(*entry) + sizeof(u32) * len;
3463 	buffer = tr->array_buffer.buffer;
3464 	ring_buffer_nest_start(buffer);
3465 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3466 					    trace_ctx);
3467 	if (!event)
3468 		goto out;
3469 	entry = ring_buffer_event_data(event);
3470 	entry->ip			= ip;
3471 	entry->fmt			= fmt;
3472 
3473 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3474 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3475 		__buffer_unlock_commit(buffer, event);
3476 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3477 	}
3478 
3479 out:
3480 	ring_buffer_nest_end(buffer);
3481 out_put:
3482 	put_trace_buf();
3483 
3484 out_nobuffer:
3485 	preempt_enable_notrace();
3486 	unpause_graph_tracing();
3487 
3488 	return len;
3489 }
3490 EXPORT_SYMBOL_GPL(trace_vbprintk);
3491 
3492 __printf(3, 0)
3493 static int
3494 __trace_array_vprintk(struct trace_buffer *buffer,
3495 		      unsigned long ip, const char *fmt, va_list args)
3496 {
3497 	struct trace_event_call *call = &event_print;
3498 	struct ring_buffer_event *event;
3499 	int len = 0, size;
3500 	struct print_entry *entry;
3501 	unsigned int trace_ctx;
3502 	char *tbuffer;
3503 
3504 	if (tracing_disabled)
3505 		return 0;
3506 
3507 	/* Don't pollute graph traces with trace_vprintk internals */
3508 	pause_graph_tracing();
3509 
3510 	trace_ctx = tracing_gen_ctx();
3511 	preempt_disable_notrace();
3512 
3513 
3514 	tbuffer = get_trace_buf();
3515 	if (!tbuffer) {
3516 		len = 0;
3517 		goto out_nobuffer;
3518 	}
3519 
3520 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3521 
3522 	size = sizeof(*entry) + len + 1;
3523 	ring_buffer_nest_start(buffer);
3524 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3525 					    trace_ctx);
3526 	if (!event)
3527 		goto out;
3528 	entry = ring_buffer_event_data(event);
3529 	entry->ip = ip;
3530 
3531 	memcpy(&entry->buf, tbuffer, len + 1);
3532 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3533 		__buffer_unlock_commit(buffer, event);
3534 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3535 	}
3536 
3537 out:
3538 	ring_buffer_nest_end(buffer);
3539 	put_trace_buf();
3540 
3541 out_nobuffer:
3542 	preempt_enable_notrace();
3543 	unpause_graph_tracing();
3544 
3545 	return len;
3546 }
3547 
3548 __printf(3, 0)
3549 int trace_array_vprintk(struct trace_array *tr,
3550 			unsigned long ip, const char *fmt, va_list args)
3551 {
3552 	if (tracing_selftest_running && tr == &global_trace)
3553 		return 0;
3554 
3555 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3556 }
3557 
3558 /**
3559  * trace_array_printk - Print a message to a specific instance
3560  * @tr: The instance trace_array descriptor
3561  * @ip: The instruction pointer that this is called from.
3562  * @fmt: The format to print (printf format)
3563  *
3564  * If a subsystem sets up its own instance, they have the right to
3565  * printk strings into their tracing instance buffer using this
3566  * function. Note, this function will not write into the top level
3567  * buffer (use trace_printk() for that), as writing into the top level
3568  * buffer should only have events that can be individually disabled.
3569  * trace_printk() is only used for debugging a kernel, and should not
3570  * be ever incorporated in normal use.
3571  *
3572  * trace_array_printk() can be used, as it will not add noise to the
3573  * top level tracing buffer.
3574  *
3575  * Note, trace_array_init_printk() must be called on @tr before this
3576  * can be used.
3577  */
3578 __printf(3, 0)
3579 int trace_array_printk(struct trace_array *tr,
3580 		       unsigned long ip, const char *fmt, ...)
3581 {
3582 	int ret;
3583 	va_list ap;
3584 
3585 	if (!tr)
3586 		return -ENOENT;
3587 
3588 	/* This is only allowed for created instances */
3589 	if (tr == &global_trace)
3590 		return 0;
3591 
3592 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3593 		return 0;
3594 
3595 	va_start(ap, fmt);
3596 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3597 	va_end(ap);
3598 	return ret;
3599 }
3600 EXPORT_SYMBOL_GPL(trace_array_printk);
3601 
3602 /**
3603  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3604  * @tr: The trace array to initialize the buffers for
3605  *
3606  * As trace_array_printk() only writes into instances, they are OK to
3607  * have in the kernel (unlike trace_printk()). This needs to be called
3608  * before trace_array_printk() can be used on a trace_array.
3609  */
3610 int trace_array_init_printk(struct trace_array *tr)
3611 {
3612 	if (!tr)
3613 		return -ENOENT;
3614 
3615 	/* This is only allowed for created instances */
3616 	if (tr == &global_trace)
3617 		return -EINVAL;
3618 
3619 	return alloc_percpu_trace_buffer();
3620 }
3621 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3622 
3623 __printf(3, 4)
3624 int trace_array_printk_buf(struct trace_buffer *buffer,
3625 			   unsigned long ip, const char *fmt, ...)
3626 {
3627 	int ret;
3628 	va_list ap;
3629 
3630 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3631 		return 0;
3632 
3633 	va_start(ap, fmt);
3634 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3635 	va_end(ap);
3636 	return ret;
3637 }
3638 
3639 __printf(2, 0)
3640 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3641 {
3642 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3643 }
3644 EXPORT_SYMBOL_GPL(trace_vprintk);
3645 
3646 static void trace_iterator_increment(struct trace_iterator *iter)
3647 {
3648 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3649 
3650 	iter->idx++;
3651 	if (buf_iter)
3652 		ring_buffer_iter_advance(buf_iter);
3653 }
3654 
3655 static struct trace_entry *
3656 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3657 		unsigned long *lost_events)
3658 {
3659 	struct ring_buffer_event *event;
3660 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3661 
3662 	if (buf_iter) {
3663 		event = ring_buffer_iter_peek(buf_iter, ts);
3664 		if (lost_events)
3665 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3666 				(unsigned long)-1 : 0;
3667 	} else {
3668 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3669 					 lost_events);
3670 	}
3671 
3672 	if (event) {
3673 		iter->ent_size = ring_buffer_event_length(event);
3674 		return ring_buffer_event_data(event);
3675 	}
3676 	iter->ent_size = 0;
3677 	return NULL;
3678 }
3679 
3680 static struct trace_entry *
3681 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3682 		  unsigned long *missing_events, u64 *ent_ts)
3683 {
3684 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3685 	struct trace_entry *ent, *next = NULL;
3686 	unsigned long lost_events = 0, next_lost = 0;
3687 	int cpu_file = iter->cpu_file;
3688 	u64 next_ts = 0, ts;
3689 	int next_cpu = -1;
3690 	int next_size = 0;
3691 	int cpu;
3692 
3693 	/*
3694 	 * If we are in a per_cpu trace file, don't bother by iterating over
3695 	 * all cpu and peek directly.
3696 	 */
3697 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3698 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3699 			return NULL;
3700 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3701 		if (ent_cpu)
3702 			*ent_cpu = cpu_file;
3703 
3704 		return ent;
3705 	}
3706 
3707 	for_each_tracing_cpu(cpu) {
3708 
3709 		if (ring_buffer_empty_cpu(buffer, cpu))
3710 			continue;
3711 
3712 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3713 
3714 		/*
3715 		 * Pick the entry with the smallest timestamp:
3716 		 */
3717 		if (ent && (!next || ts < next_ts)) {
3718 			next = ent;
3719 			next_cpu = cpu;
3720 			next_ts = ts;
3721 			next_lost = lost_events;
3722 			next_size = iter->ent_size;
3723 		}
3724 	}
3725 
3726 	iter->ent_size = next_size;
3727 
3728 	if (ent_cpu)
3729 		*ent_cpu = next_cpu;
3730 
3731 	if (ent_ts)
3732 		*ent_ts = next_ts;
3733 
3734 	if (missing_events)
3735 		*missing_events = next_lost;
3736 
3737 	return next;
3738 }
3739 
3740 #define STATIC_FMT_BUF_SIZE	128
3741 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3742 
3743 char *trace_iter_expand_format(struct trace_iterator *iter)
3744 {
3745 	char *tmp;
3746 
3747 	/*
3748 	 * iter->tr is NULL when used with tp_printk, which makes
3749 	 * this get called where it is not safe to call krealloc().
3750 	 */
3751 	if (!iter->tr || iter->fmt == static_fmt_buf)
3752 		return NULL;
3753 
3754 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3755 		       GFP_KERNEL);
3756 	if (tmp) {
3757 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3758 		iter->fmt = tmp;
3759 	}
3760 
3761 	return tmp;
3762 }
3763 
3764 /* Returns true if the string is safe to dereference from an event */
3765 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3766 			   bool star, int len)
3767 {
3768 	unsigned long addr = (unsigned long)str;
3769 	struct trace_event *trace_event;
3770 	struct trace_event_call *event;
3771 
3772 	/* Ignore strings with no length */
3773 	if (star && !len)
3774 		return true;
3775 
3776 	/* OK if part of the event data */
3777 	if ((addr >= (unsigned long)iter->ent) &&
3778 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3779 		return true;
3780 
3781 	/* OK if part of the temp seq buffer */
3782 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3783 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3784 		return true;
3785 
3786 	/* Core rodata can not be freed */
3787 	if (is_kernel_rodata(addr))
3788 		return true;
3789 
3790 	if (trace_is_tracepoint_string(str))
3791 		return true;
3792 
3793 	/*
3794 	 * Now this could be a module event, referencing core module
3795 	 * data, which is OK.
3796 	 */
3797 	if (!iter->ent)
3798 		return false;
3799 
3800 	trace_event = ftrace_find_event(iter->ent->type);
3801 	if (!trace_event)
3802 		return false;
3803 
3804 	event = container_of(trace_event, struct trace_event_call, event);
3805 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3806 		return false;
3807 
3808 	/* Would rather have rodata, but this will suffice */
3809 	if (within_module_core(addr, event->module))
3810 		return true;
3811 
3812 	return false;
3813 }
3814 
3815 static const char *show_buffer(struct trace_seq *s)
3816 {
3817 	struct seq_buf *seq = &s->seq;
3818 
3819 	seq_buf_terminate(seq);
3820 
3821 	return seq->buffer;
3822 }
3823 
3824 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3825 
3826 static int test_can_verify_check(const char *fmt, ...)
3827 {
3828 	char buf[16];
3829 	va_list ap;
3830 	int ret;
3831 
3832 	/*
3833 	 * The verifier is dependent on vsnprintf() modifies the va_list
3834 	 * passed to it, where it is sent as a reference. Some architectures
3835 	 * (like x86_32) passes it by value, which means that vsnprintf()
3836 	 * does not modify the va_list passed to it, and the verifier
3837 	 * would then need to be able to understand all the values that
3838 	 * vsnprintf can use. If it is passed by value, then the verifier
3839 	 * is disabled.
3840 	 */
3841 	va_start(ap, fmt);
3842 	vsnprintf(buf, 16, "%d", ap);
3843 	ret = va_arg(ap, int);
3844 	va_end(ap);
3845 
3846 	return ret;
3847 }
3848 
3849 static void test_can_verify(void)
3850 {
3851 	if (!test_can_verify_check("%d %d", 0, 1)) {
3852 		pr_info("trace event string verifier disabled\n");
3853 		static_branch_inc(&trace_no_verify);
3854 	}
3855 }
3856 
3857 /**
3858  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3859  * @iter: The iterator that holds the seq buffer and the event being printed
3860  * @fmt: The format used to print the event
3861  * @ap: The va_list holding the data to print from @fmt.
3862  *
3863  * This writes the data into the @iter->seq buffer using the data from
3864  * @fmt and @ap. If the format has a %s, then the source of the string
3865  * is examined to make sure it is safe to print, otherwise it will
3866  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3867  * pointer.
3868  */
3869 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3870 			 va_list ap)
3871 {
3872 	const char *p = fmt;
3873 	const char *str;
3874 	int i, j;
3875 
3876 	if (WARN_ON_ONCE(!fmt))
3877 		return;
3878 
3879 	if (static_branch_unlikely(&trace_no_verify))
3880 		goto print;
3881 
3882 	/* Don't bother checking when doing a ftrace_dump() */
3883 	if (iter->fmt == static_fmt_buf)
3884 		goto print;
3885 
3886 	while (*p) {
3887 		bool star = false;
3888 		int len = 0;
3889 
3890 		j = 0;
3891 
3892 		/* We only care about %s and variants */
3893 		for (i = 0; p[i]; i++) {
3894 			if (i + 1 >= iter->fmt_size) {
3895 				/*
3896 				 * If we can't expand the copy buffer,
3897 				 * just print it.
3898 				 */
3899 				if (!trace_iter_expand_format(iter))
3900 					goto print;
3901 			}
3902 
3903 			if (p[i] == '\\' && p[i+1]) {
3904 				i++;
3905 				continue;
3906 			}
3907 			if (p[i] == '%') {
3908 				/* Need to test cases like %08.*s */
3909 				for (j = 1; p[i+j]; j++) {
3910 					if (isdigit(p[i+j]) ||
3911 					    p[i+j] == '.')
3912 						continue;
3913 					if (p[i+j] == '*') {
3914 						star = true;
3915 						continue;
3916 					}
3917 					break;
3918 				}
3919 				if (p[i+j] == 's')
3920 					break;
3921 				star = false;
3922 			}
3923 			j = 0;
3924 		}
3925 		/* If no %s found then just print normally */
3926 		if (!p[i])
3927 			break;
3928 
3929 		/* Copy up to the %s, and print that */
3930 		strncpy(iter->fmt, p, i);
3931 		iter->fmt[i] = '\0';
3932 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3933 
3934 		/*
3935 		 * If iter->seq is full, the above call no longer guarantees
3936 		 * that ap is in sync with fmt processing, and further calls
3937 		 * to va_arg() can return wrong positional arguments.
3938 		 *
3939 		 * Ensure that ap is no longer used in this case.
3940 		 */
3941 		if (iter->seq.full) {
3942 			p = "";
3943 			break;
3944 		}
3945 
3946 		if (star)
3947 			len = va_arg(ap, int);
3948 
3949 		/* The ap now points to the string data of the %s */
3950 		str = va_arg(ap, const char *);
3951 
3952 		/*
3953 		 * If you hit this warning, it is likely that the
3954 		 * trace event in question used %s on a string that
3955 		 * was saved at the time of the event, but may not be
3956 		 * around when the trace is read. Use __string(),
3957 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3958 		 * instead. See samples/trace_events/trace-events-sample.h
3959 		 * for reference.
3960 		 */
3961 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3962 			      "fmt: '%s' current_buffer: '%s'",
3963 			      fmt, show_buffer(&iter->seq))) {
3964 			int ret;
3965 
3966 			/* Try to safely read the string */
3967 			if (star) {
3968 				if (len + 1 > iter->fmt_size)
3969 					len = iter->fmt_size - 1;
3970 				if (len < 0)
3971 					len = 0;
3972 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3973 				iter->fmt[len] = 0;
3974 				star = false;
3975 			} else {
3976 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3977 								  iter->fmt_size);
3978 			}
3979 			if (ret < 0)
3980 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3981 			else
3982 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3983 						 str, iter->fmt);
3984 			str = "[UNSAFE-MEMORY]";
3985 			strcpy(iter->fmt, "%s");
3986 		} else {
3987 			strncpy(iter->fmt, p + i, j + 1);
3988 			iter->fmt[j+1] = '\0';
3989 		}
3990 		if (star)
3991 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3992 		else
3993 			trace_seq_printf(&iter->seq, iter->fmt, str);
3994 
3995 		p += i + j + 1;
3996 	}
3997  print:
3998 	if (*p)
3999 		trace_seq_vprintf(&iter->seq, p, ap);
4000 }
4001 
4002 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4003 {
4004 	const char *p, *new_fmt;
4005 	char *q;
4006 
4007 	if (WARN_ON_ONCE(!fmt))
4008 		return fmt;
4009 
4010 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4011 		return fmt;
4012 
4013 	p = fmt;
4014 	new_fmt = q = iter->fmt;
4015 	while (*p) {
4016 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4017 			if (!trace_iter_expand_format(iter))
4018 				return fmt;
4019 
4020 			q += iter->fmt - new_fmt;
4021 			new_fmt = iter->fmt;
4022 		}
4023 
4024 		*q++ = *p++;
4025 
4026 		/* Replace %p with %px */
4027 		if (p[-1] == '%') {
4028 			if (p[0] == '%') {
4029 				*q++ = *p++;
4030 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4031 				*q++ = *p++;
4032 				*q++ = 'x';
4033 			}
4034 		}
4035 	}
4036 	*q = '\0';
4037 
4038 	return new_fmt;
4039 }
4040 
4041 #define STATIC_TEMP_BUF_SIZE	128
4042 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4043 
4044 /* Find the next real entry, without updating the iterator itself */
4045 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4046 					  int *ent_cpu, u64 *ent_ts)
4047 {
4048 	/* __find_next_entry will reset ent_size */
4049 	int ent_size = iter->ent_size;
4050 	struct trace_entry *entry;
4051 
4052 	/*
4053 	 * If called from ftrace_dump(), then the iter->temp buffer
4054 	 * will be the static_temp_buf and not created from kmalloc.
4055 	 * If the entry size is greater than the buffer, we can
4056 	 * not save it. Just return NULL in that case. This is only
4057 	 * used to add markers when two consecutive events' time
4058 	 * stamps have a large delta. See trace_print_lat_context()
4059 	 */
4060 	if (iter->temp == static_temp_buf &&
4061 	    STATIC_TEMP_BUF_SIZE < ent_size)
4062 		return NULL;
4063 
4064 	/*
4065 	 * The __find_next_entry() may call peek_next_entry(), which may
4066 	 * call ring_buffer_peek() that may make the contents of iter->ent
4067 	 * undefined. Need to copy iter->ent now.
4068 	 */
4069 	if (iter->ent && iter->ent != iter->temp) {
4070 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4071 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4072 			void *temp;
4073 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4074 			if (!temp)
4075 				return NULL;
4076 			kfree(iter->temp);
4077 			iter->temp = temp;
4078 			iter->temp_size = iter->ent_size;
4079 		}
4080 		memcpy(iter->temp, iter->ent, iter->ent_size);
4081 		iter->ent = iter->temp;
4082 	}
4083 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4084 	/* Put back the original ent_size */
4085 	iter->ent_size = ent_size;
4086 
4087 	return entry;
4088 }
4089 
4090 /* Find the next real entry, and increment the iterator to the next entry */
4091 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4092 {
4093 	iter->ent = __find_next_entry(iter, &iter->cpu,
4094 				      &iter->lost_events, &iter->ts);
4095 
4096 	if (iter->ent)
4097 		trace_iterator_increment(iter);
4098 
4099 	return iter->ent ? iter : NULL;
4100 }
4101 
4102 static void trace_consume(struct trace_iterator *iter)
4103 {
4104 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4105 			    &iter->lost_events);
4106 }
4107 
4108 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4109 {
4110 	struct trace_iterator *iter = m->private;
4111 	int i = (int)*pos;
4112 	void *ent;
4113 
4114 	WARN_ON_ONCE(iter->leftover);
4115 
4116 	(*pos)++;
4117 
4118 	/* can't go backwards */
4119 	if (iter->idx > i)
4120 		return NULL;
4121 
4122 	if (iter->idx < 0)
4123 		ent = trace_find_next_entry_inc(iter);
4124 	else
4125 		ent = iter;
4126 
4127 	while (ent && iter->idx < i)
4128 		ent = trace_find_next_entry_inc(iter);
4129 
4130 	iter->pos = *pos;
4131 
4132 	return ent;
4133 }
4134 
4135 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4136 {
4137 	struct ring_buffer_iter *buf_iter;
4138 	unsigned long entries = 0;
4139 	u64 ts;
4140 
4141 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4142 
4143 	buf_iter = trace_buffer_iter(iter, cpu);
4144 	if (!buf_iter)
4145 		return;
4146 
4147 	ring_buffer_iter_reset(buf_iter);
4148 
4149 	/*
4150 	 * We could have the case with the max latency tracers
4151 	 * that a reset never took place on a cpu. This is evident
4152 	 * by the timestamp being before the start of the buffer.
4153 	 */
4154 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4155 		if (ts >= iter->array_buffer->time_start)
4156 			break;
4157 		entries++;
4158 		ring_buffer_iter_advance(buf_iter);
4159 	}
4160 
4161 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163 
4164 /*
4165  * The current tracer is copied to avoid a global locking
4166  * all around.
4167  */
4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170 	struct trace_iterator *iter = m->private;
4171 	struct trace_array *tr = iter->tr;
4172 	int cpu_file = iter->cpu_file;
4173 	void *p = NULL;
4174 	loff_t l = 0;
4175 	int cpu;
4176 
4177 	mutex_lock(&trace_types_lock);
4178 	if (unlikely(tr->current_trace != iter->trace)) {
4179 		/* Close iter->trace before switching to the new current tracer */
4180 		if (iter->trace->close)
4181 			iter->trace->close(iter);
4182 		iter->trace = tr->current_trace;
4183 		/* Reopen the new current tracer */
4184 		if (iter->trace->open)
4185 			iter->trace->open(iter);
4186 	}
4187 	mutex_unlock(&trace_types_lock);
4188 
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 	if (iter->snapshot && iter->trace->use_max_tr)
4191 		return ERR_PTR(-EBUSY);
4192 #endif
4193 
4194 	if (*pos != iter->pos) {
4195 		iter->ent = NULL;
4196 		iter->cpu = 0;
4197 		iter->idx = -1;
4198 
4199 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200 			for_each_tracing_cpu(cpu)
4201 				tracing_iter_reset(iter, cpu);
4202 		} else
4203 			tracing_iter_reset(iter, cpu_file);
4204 
4205 		iter->leftover = 0;
4206 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207 			;
4208 
4209 	} else {
4210 		/*
4211 		 * If we overflowed the seq_file before, then we want
4212 		 * to just reuse the trace_seq buffer again.
4213 		 */
4214 		if (iter->leftover)
4215 			p = iter;
4216 		else {
4217 			l = *pos - 1;
4218 			p = s_next(m, p, &l);
4219 		}
4220 	}
4221 
4222 	trace_event_read_lock();
4223 	trace_access_lock(cpu_file);
4224 	return p;
4225 }
4226 
4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229 	struct trace_iterator *iter = m->private;
4230 
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232 	if (iter->snapshot && iter->trace->use_max_tr)
4233 		return;
4234 #endif
4235 
4236 	trace_access_unlock(iter->cpu_file);
4237 	trace_event_read_unlock();
4238 }
4239 
4240 static void
4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242 		      unsigned long *entries, int cpu)
4243 {
4244 	unsigned long count;
4245 
4246 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247 	/*
4248 	 * If this buffer has skipped entries, then we hold all
4249 	 * entries for the trace and we need to ignore the
4250 	 * ones before the time stamp.
4251 	 */
4252 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254 		/* total is the same as the entries */
4255 		*total = count;
4256 	} else
4257 		*total = count +
4258 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4259 	*entries = count;
4260 }
4261 
4262 static void
4263 get_total_entries(struct array_buffer *buf,
4264 		  unsigned long *total, unsigned long *entries)
4265 {
4266 	unsigned long t, e;
4267 	int cpu;
4268 
4269 	*total = 0;
4270 	*entries = 0;
4271 
4272 	for_each_tracing_cpu(cpu) {
4273 		get_total_entries_cpu(buf, &t, &e, cpu);
4274 		*total += t;
4275 		*entries += e;
4276 	}
4277 }
4278 
4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281 	unsigned long total, entries;
4282 
4283 	if (!tr)
4284 		tr = &global_trace;
4285 
4286 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287 
4288 	return entries;
4289 }
4290 
4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293 	unsigned long total, entries;
4294 
4295 	if (!tr)
4296 		tr = &global_trace;
4297 
4298 	get_total_entries(&tr->array_buffer, &total, &entries);
4299 
4300 	return entries;
4301 }
4302 
4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305 	seq_puts(m, "#                    _------=> CPU#            \n"
4306 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4307 		    "#                  | / _----=> need-resched    \n"
4308 		    "#                  || / _---=> hardirq/softirq \n"
4309 		    "#                  ||| / _--=> preempt-depth   \n"
4310 		    "#                  |||| / _-=> migrate-disable \n"
4311 		    "#                  ||||| /     delay           \n"
4312 		    "#  cmd     pid     |||||| time  |   caller     \n"
4313 		    "#     \\   /        ||||||  \\    |    /       \n");
4314 }
4315 
4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318 	unsigned long total;
4319 	unsigned long entries;
4320 
4321 	get_total_entries(buf, &total, &entries);
4322 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4323 		   entries, total, num_online_cpus());
4324 	seq_puts(m, "#\n");
4325 }
4326 
4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328 				   unsigned int flags)
4329 {
4330 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331 
4332 	print_event_info(buf, m);
4333 
4334 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4335 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4336 }
4337 
4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339 				       unsigned int flags)
4340 {
4341 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342 	static const char space[] = "            ";
4343 	int prec = tgid ? 12 : 2;
4344 
4345 	print_event_info(buf, m);
4346 
4347 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4348 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4349 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4350 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4351 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4352 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4353 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4354 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4355 }
4356 
4357 void
4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361 	struct array_buffer *buf = iter->array_buffer;
4362 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363 	struct tracer *type = iter->trace;
4364 	unsigned long entries;
4365 	unsigned long total;
4366 	const char *name = type->name;
4367 
4368 	get_total_entries(buf, &total, &entries);
4369 
4370 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371 		   name, UTS_RELEASE);
4372 	seq_puts(m, "# -----------------------------------"
4373 		 "---------------------------------\n");
4374 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376 		   nsecs_to_usecs(data->saved_latency),
4377 		   entries,
4378 		   total,
4379 		   buf->cpu,
4380 		   preempt_model_none()      ? "server" :
4381 		   preempt_model_voluntary() ? "desktop" :
4382 		   preempt_model_full()      ? "preempt" :
4383 		   preempt_model_rt()        ? "preempt_rt" :
4384 		   "unknown",
4385 		   /* These are reserved for later use */
4386 		   0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390 	seq_puts(m, ")\n");
4391 #endif
4392 	seq_puts(m, "#    -----------------\n");
4393 	seq_printf(m, "#    | task: %.16s-%d "
4394 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395 		   data->comm, data->pid,
4396 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397 		   data->policy, data->rt_priority);
4398 	seq_puts(m, "#    -----------------\n");
4399 
4400 	if (data->critical_start) {
4401 		seq_puts(m, "#  => started at: ");
4402 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403 		trace_print_seq(m, &iter->seq);
4404 		seq_puts(m, "\n#  => ended at:   ");
4405 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406 		trace_print_seq(m, &iter->seq);
4407 		seq_puts(m, "\n#\n");
4408 	}
4409 
4410 	seq_puts(m, "#\n");
4411 }
4412 
4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415 	struct trace_seq *s = &iter->seq;
4416 	struct trace_array *tr = iter->tr;
4417 
4418 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419 		return;
4420 
4421 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422 		return;
4423 
4424 	if (cpumask_available(iter->started) &&
4425 	    cpumask_test_cpu(iter->cpu, iter->started))
4426 		return;
4427 
4428 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429 		return;
4430 
4431 	if (cpumask_available(iter->started))
4432 		cpumask_set_cpu(iter->cpu, iter->started);
4433 
4434 	/* Don't print started cpu buffer for the first entry of the trace */
4435 	if (iter->idx > 1)
4436 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437 				iter->cpu);
4438 }
4439 
4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442 	struct trace_array *tr = iter->tr;
4443 	struct trace_seq *s = &iter->seq;
4444 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445 	struct trace_entry *entry;
4446 	struct trace_event *event;
4447 
4448 	entry = iter->ent;
4449 
4450 	test_cpu_buff_start(iter);
4451 
4452 	event = ftrace_find_event(entry->type);
4453 
4454 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456 			trace_print_lat_context(iter);
4457 		else
4458 			trace_print_context(iter);
4459 	}
4460 
4461 	if (trace_seq_has_overflowed(s))
4462 		return TRACE_TYPE_PARTIAL_LINE;
4463 
4464 	if (event) {
4465 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4466 			return print_event_fields(iter, event);
4467 		return event->funcs->trace(iter, sym_flags, event);
4468 	}
4469 
4470 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471 
4472 	return trace_handle_return(s);
4473 }
4474 
4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477 	struct trace_array *tr = iter->tr;
4478 	struct trace_seq *s = &iter->seq;
4479 	struct trace_entry *entry;
4480 	struct trace_event *event;
4481 
4482 	entry = iter->ent;
4483 
4484 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485 		trace_seq_printf(s, "%d %d %llu ",
4486 				 entry->pid, iter->cpu, iter->ts);
4487 
4488 	if (trace_seq_has_overflowed(s))
4489 		return TRACE_TYPE_PARTIAL_LINE;
4490 
4491 	event = ftrace_find_event(entry->type);
4492 	if (event)
4493 		return event->funcs->raw(iter, 0, event);
4494 
4495 	trace_seq_printf(s, "%d ?\n", entry->type);
4496 
4497 	return trace_handle_return(s);
4498 }
4499 
4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502 	struct trace_array *tr = iter->tr;
4503 	struct trace_seq *s = &iter->seq;
4504 	unsigned char newline = '\n';
4505 	struct trace_entry *entry;
4506 	struct trace_event *event;
4507 
4508 	entry = iter->ent;
4509 
4510 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4512 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4514 		if (trace_seq_has_overflowed(s))
4515 			return TRACE_TYPE_PARTIAL_LINE;
4516 	}
4517 
4518 	event = ftrace_find_event(entry->type);
4519 	if (event) {
4520 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521 		if (ret != TRACE_TYPE_HANDLED)
4522 			return ret;
4523 	}
4524 
4525 	SEQ_PUT_FIELD(s, newline);
4526 
4527 	return trace_handle_return(s);
4528 }
4529 
4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532 	struct trace_array *tr = iter->tr;
4533 	struct trace_seq *s = &iter->seq;
4534 	struct trace_entry *entry;
4535 	struct trace_event *event;
4536 
4537 	entry = iter->ent;
4538 
4539 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540 		SEQ_PUT_FIELD(s, entry->pid);
4541 		SEQ_PUT_FIELD(s, iter->cpu);
4542 		SEQ_PUT_FIELD(s, iter->ts);
4543 		if (trace_seq_has_overflowed(s))
4544 			return TRACE_TYPE_PARTIAL_LINE;
4545 	}
4546 
4547 	event = ftrace_find_event(entry->type);
4548 	return event ? event->funcs->binary(iter, 0, event) :
4549 		TRACE_TYPE_HANDLED;
4550 }
4551 
4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554 	struct ring_buffer_iter *buf_iter;
4555 	int cpu;
4556 
4557 	/* If we are looking at one CPU buffer, only check that one */
4558 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559 		cpu = iter->cpu_file;
4560 		buf_iter = trace_buffer_iter(iter, cpu);
4561 		if (buf_iter) {
4562 			if (!ring_buffer_iter_empty(buf_iter))
4563 				return 0;
4564 		} else {
4565 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566 				return 0;
4567 		}
4568 		return 1;
4569 	}
4570 
4571 	for_each_tracing_cpu(cpu) {
4572 		buf_iter = trace_buffer_iter(iter, cpu);
4573 		if (buf_iter) {
4574 			if (!ring_buffer_iter_empty(buf_iter))
4575 				return 0;
4576 		} else {
4577 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578 				return 0;
4579 		}
4580 	}
4581 
4582 	return 1;
4583 }
4584 
4585 /*  Called with trace_event_read_lock() held. */
4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588 	struct trace_array *tr = iter->tr;
4589 	unsigned long trace_flags = tr->trace_flags;
4590 	enum print_line_t ret;
4591 
4592 	if (iter->lost_events) {
4593 		if (iter->lost_events == (unsigned long)-1)
4594 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595 					 iter->cpu);
4596 		else
4597 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598 					 iter->cpu, iter->lost_events);
4599 		if (trace_seq_has_overflowed(&iter->seq))
4600 			return TRACE_TYPE_PARTIAL_LINE;
4601 	}
4602 
4603 	if (iter->trace && iter->trace->print_line) {
4604 		ret = iter->trace->print_line(iter);
4605 		if (ret != TRACE_TYPE_UNHANDLED)
4606 			return ret;
4607 	}
4608 
4609 	if (iter->ent->type == TRACE_BPUTS &&
4610 			trace_flags & TRACE_ITER_PRINTK &&
4611 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612 		return trace_print_bputs_msg_only(iter);
4613 
4614 	if (iter->ent->type == TRACE_BPRINT &&
4615 			trace_flags & TRACE_ITER_PRINTK &&
4616 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617 		return trace_print_bprintk_msg_only(iter);
4618 
4619 	if (iter->ent->type == TRACE_PRINT &&
4620 			trace_flags & TRACE_ITER_PRINTK &&
4621 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622 		return trace_print_printk_msg_only(iter);
4623 
4624 	if (trace_flags & TRACE_ITER_BIN)
4625 		return print_bin_fmt(iter);
4626 
4627 	if (trace_flags & TRACE_ITER_HEX)
4628 		return print_hex_fmt(iter);
4629 
4630 	if (trace_flags & TRACE_ITER_RAW)
4631 		return print_raw_fmt(iter);
4632 
4633 	return print_trace_fmt(iter);
4634 }
4635 
4636 void trace_latency_header(struct seq_file *m)
4637 {
4638 	struct trace_iterator *iter = m->private;
4639 	struct trace_array *tr = iter->tr;
4640 
4641 	/* print nothing if the buffers are empty */
4642 	if (trace_empty(iter))
4643 		return;
4644 
4645 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646 		print_trace_header(m, iter);
4647 
4648 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649 		print_lat_help_header(m);
4650 }
4651 
4652 void trace_default_header(struct seq_file *m)
4653 {
4654 	struct trace_iterator *iter = m->private;
4655 	struct trace_array *tr = iter->tr;
4656 	unsigned long trace_flags = tr->trace_flags;
4657 
4658 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659 		return;
4660 
4661 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662 		/* print nothing if the buffers are empty */
4663 		if (trace_empty(iter))
4664 			return;
4665 		print_trace_header(m, iter);
4666 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4667 			print_lat_help_header(m);
4668 	} else {
4669 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4671 				print_func_help_header_irq(iter->array_buffer,
4672 							   m, trace_flags);
4673 			else
4674 				print_func_help_header(iter->array_buffer, m,
4675 						       trace_flags);
4676 		}
4677 	}
4678 }
4679 
4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682 	if (!ftrace_is_dead())
4683 		return;
4684 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687 
4688 #ifdef CONFIG_TRACER_MAX_TRACE
4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693 		    "#                      Takes a snapshot of the main buffer.\n"
4694 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695 		    "#                      (Doesn't have to be '2' works with any number that\n"
4696 		    "#                       is not a '0' or '1')\n");
4697 }
4698 
4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707 		    "#                     Must use main snapshot file to allocate.\n");
4708 #endif
4709 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710 		    "#                      (Doesn't have to be '2' works with any number that\n"
4711 		    "#                       is not a '0' or '1')\n");
4712 }
4713 
4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716 	if (iter->tr->allocated_snapshot)
4717 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718 	else
4719 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720 
4721 	seq_puts(m, "# Snapshot commands:\n");
4722 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723 		show_snapshot_main_help(m);
4724 	else
4725 		show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731 
4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734 	struct trace_iterator *iter = v;
4735 	int ret;
4736 
4737 	if (iter->ent == NULL) {
4738 		if (iter->tr) {
4739 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740 			seq_puts(m, "#\n");
4741 			test_ftrace_alive(m);
4742 		}
4743 		if (iter->snapshot && trace_empty(iter))
4744 			print_snapshot_help(m, iter);
4745 		else if (iter->trace && iter->trace->print_header)
4746 			iter->trace->print_header(m);
4747 		else
4748 			trace_default_header(m);
4749 
4750 	} else if (iter->leftover) {
4751 		/*
4752 		 * If we filled the seq_file buffer earlier, we
4753 		 * want to just show it now.
4754 		 */
4755 		ret = trace_print_seq(m, &iter->seq);
4756 
4757 		/* ret should this time be zero, but you never know */
4758 		iter->leftover = ret;
4759 
4760 	} else {
4761 		ret = print_trace_line(iter);
4762 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763 			iter->seq.full = 0;
4764 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765 		}
4766 		ret = trace_print_seq(m, &iter->seq);
4767 		/*
4768 		 * If we overflow the seq_file buffer, then it will
4769 		 * ask us for this data again at start up.
4770 		 * Use that instead.
4771 		 *  ret is 0 if seq_file write succeeded.
4772 		 *        -1 otherwise.
4773 		 */
4774 		iter->leftover = ret;
4775 	}
4776 
4777 	return 0;
4778 }
4779 
4780 /*
4781  * Should be used after trace_array_get(), trace_types_lock
4782  * ensures that i_cdev was already initialized.
4783  */
4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4787 		return (long)inode->i_cdev - 1;
4788 	return RING_BUFFER_ALL_CPUS;
4789 }
4790 
4791 static const struct seq_operations tracer_seq_ops = {
4792 	.start		= s_start,
4793 	.next		= s_next,
4794 	.stop		= s_stop,
4795 	.show		= s_show,
4796 };
4797 
4798 /*
4799  * Note, as iter itself can be allocated and freed in different
4800  * ways, this function is only used to free its content, and not
4801  * the iterator itself. The only requirement to all the allocations
4802  * is that it must zero all fields (kzalloc), as freeing works with
4803  * ethier allocated content or NULL.
4804  */
4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4808 	if (iter->fmt != static_fmt_buf)
4809 		kfree(iter->fmt);
4810 
4811 	kfree(iter->temp);
4812 	kfree(iter->buffer_iter);
4813 	mutex_destroy(&iter->mutex);
4814 	free_cpumask_var(iter->started);
4815 }
4816 
4817 static struct trace_iterator *
4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820 	struct trace_array *tr = inode->i_private;
4821 	struct trace_iterator *iter;
4822 	int cpu;
4823 
4824 	if (tracing_disabled)
4825 		return ERR_PTR(-ENODEV);
4826 
4827 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828 	if (!iter)
4829 		return ERR_PTR(-ENOMEM);
4830 
4831 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832 				    GFP_KERNEL);
4833 	if (!iter->buffer_iter)
4834 		goto release;
4835 
4836 	/*
4837 	 * trace_find_next_entry() may need to save off iter->ent.
4838 	 * It will place it into the iter->temp buffer. As most
4839 	 * events are less than 128, allocate a buffer of that size.
4840 	 * If one is greater, then trace_find_next_entry() will
4841 	 * allocate a new buffer to adjust for the bigger iter->ent.
4842 	 * It's not critical if it fails to get allocated here.
4843 	 */
4844 	iter->temp = kmalloc(128, GFP_KERNEL);
4845 	if (iter->temp)
4846 		iter->temp_size = 128;
4847 
4848 	/*
4849 	 * trace_event_printf() may need to modify given format
4850 	 * string to replace %p with %px so that it shows real address
4851 	 * instead of hash value. However, that is only for the event
4852 	 * tracing, other tracer may not need. Defer the allocation
4853 	 * until it is needed.
4854 	 */
4855 	iter->fmt = NULL;
4856 	iter->fmt_size = 0;
4857 
4858 	mutex_lock(&trace_types_lock);
4859 	iter->trace = tr->current_trace;
4860 
4861 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862 		goto fail;
4863 
4864 	iter->tr = tr;
4865 
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 	/* Currently only the top directory has a snapshot */
4868 	if (tr->current_trace->print_max || snapshot)
4869 		iter->array_buffer = &tr->max_buffer;
4870 	else
4871 #endif
4872 		iter->array_buffer = &tr->array_buffer;
4873 	iter->snapshot = snapshot;
4874 	iter->pos = -1;
4875 	iter->cpu_file = tracing_get_cpu(inode);
4876 	mutex_init(&iter->mutex);
4877 
4878 	/* Notify the tracer early; before we stop tracing. */
4879 	if (iter->trace->open)
4880 		iter->trace->open(iter);
4881 
4882 	/* Annotate start of buffers if we had overruns */
4883 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4884 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885 
4886 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887 	if (trace_clocks[tr->clock_id].in_ns)
4888 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889 
4890 	/*
4891 	 * If pause-on-trace is enabled, then stop the trace while
4892 	 * dumping, unless this is the "snapshot" file
4893 	 */
4894 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895 		tracing_stop_tr(tr);
4896 
4897 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898 		for_each_tracing_cpu(cpu) {
4899 			iter->buffer_iter[cpu] =
4900 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4901 							 cpu, GFP_KERNEL);
4902 		}
4903 		ring_buffer_read_prepare_sync();
4904 		for_each_tracing_cpu(cpu) {
4905 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4906 			tracing_iter_reset(iter, cpu);
4907 		}
4908 	} else {
4909 		cpu = iter->cpu_file;
4910 		iter->buffer_iter[cpu] =
4911 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4912 						 cpu, GFP_KERNEL);
4913 		ring_buffer_read_prepare_sync();
4914 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4915 		tracing_iter_reset(iter, cpu);
4916 	}
4917 
4918 	mutex_unlock(&trace_types_lock);
4919 
4920 	return iter;
4921 
4922  fail:
4923 	mutex_unlock(&trace_types_lock);
4924 	free_trace_iter_content(iter);
4925 release:
4926 	seq_release_private(inode, file);
4927 	return ERR_PTR(-ENOMEM);
4928 }
4929 
4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932 	int ret;
4933 
4934 	ret = tracing_check_open_get_tr(NULL);
4935 	if (ret)
4936 		return ret;
4937 
4938 	filp->private_data = inode->i_private;
4939 	return 0;
4940 }
4941 
4942 bool tracing_is_disabled(void)
4943 {
4944 	return (tracing_disabled) ? true: false;
4945 }
4946 
4947 /*
4948  * Open and update trace_array ref count.
4949  * Must have the current trace_array passed to it.
4950  */
4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953 	struct trace_array *tr = inode->i_private;
4954 	int ret;
4955 
4956 	ret = tracing_check_open_get_tr(tr);
4957 	if (ret)
4958 		return ret;
4959 
4960 	filp->private_data = inode->i_private;
4961 
4962 	return 0;
4963 }
4964 
4965 /*
4966  * The private pointer of the inode is the trace_event_file.
4967  * Update the tr ref count associated to it.
4968  */
4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971 	struct trace_event_file *file = inode->i_private;
4972 	int ret;
4973 
4974 	ret = tracing_check_open_get_tr(file->tr);
4975 	if (ret)
4976 		return ret;
4977 
4978 	mutex_lock(&event_mutex);
4979 
4980 	/* Fail if the file is marked for removal */
4981 	if (file->flags & EVENT_FILE_FL_FREED) {
4982 		trace_array_put(file->tr);
4983 		ret = -ENODEV;
4984 	} else {
4985 		event_file_get(file);
4986 	}
4987 
4988 	mutex_unlock(&event_mutex);
4989 	if (ret)
4990 		return ret;
4991 
4992 	filp->private_data = inode->i_private;
4993 
4994 	return 0;
4995 }
4996 
4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999 	struct trace_event_file *file = inode->i_private;
5000 
5001 	trace_array_put(file->tr);
5002 	event_file_put(file);
5003 
5004 	return 0;
5005 }
5006 
5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009 	tracing_release_file_tr(inode, filp);
5010 	return single_release(inode, filp);
5011 }
5012 
5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015 	stream_open(inode, filp);
5016 	return tracing_open_generic_tr(inode, filp);
5017 }
5018 
5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021 	struct trace_array *tr = inode->i_private;
5022 	struct seq_file *m = file->private_data;
5023 	struct trace_iterator *iter;
5024 	int cpu;
5025 
5026 	if (!(file->f_mode & FMODE_READ)) {
5027 		trace_array_put(tr);
5028 		return 0;
5029 	}
5030 
5031 	/* Writes do not use seq_file */
5032 	iter = m->private;
5033 	mutex_lock(&trace_types_lock);
5034 
5035 	for_each_tracing_cpu(cpu) {
5036 		if (iter->buffer_iter[cpu])
5037 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038 	}
5039 
5040 	if (iter->trace && iter->trace->close)
5041 		iter->trace->close(iter);
5042 
5043 	if (!iter->snapshot && tr->stop_count)
5044 		/* reenable tracing if it was previously enabled */
5045 		tracing_start_tr(tr);
5046 
5047 	__trace_array_put(tr);
5048 
5049 	mutex_unlock(&trace_types_lock);
5050 
5051 	free_trace_iter_content(iter);
5052 	seq_release_private(inode, file);
5053 
5054 	return 0;
5055 }
5056 
5057 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059 	struct trace_array *tr = inode->i_private;
5060 
5061 	trace_array_put(tr);
5062 	return 0;
5063 }
5064 
5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067 	struct trace_array *tr = inode->i_private;
5068 
5069 	trace_array_put(tr);
5070 
5071 	return single_release(inode, file);
5072 }
5073 
5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076 	struct trace_array *tr = inode->i_private;
5077 	struct trace_iterator *iter;
5078 	int ret;
5079 
5080 	ret = tracing_check_open_get_tr(tr);
5081 	if (ret)
5082 		return ret;
5083 
5084 	/* If this file was open for write, then erase contents */
5085 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086 		int cpu = tracing_get_cpu(inode);
5087 		struct array_buffer *trace_buf = &tr->array_buffer;
5088 
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090 		if (tr->current_trace->print_max)
5091 			trace_buf = &tr->max_buffer;
5092 #endif
5093 
5094 		if (cpu == RING_BUFFER_ALL_CPUS)
5095 			tracing_reset_online_cpus(trace_buf);
5096 		else
5097 			tracing_reset_cpu(trace_buf, cpu);
5098 	}
5099 
5100 	if (file->f_mode & FMODE_READ) {
5101 		iter = __tracing_open(inode, file, false);
5102 		if (IS_ERR(iter))
5103 			ret = PTR_ERR(iter);
5104 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106 	}
5107 
5108 	if (ret < 0)
5109 		trace_array_put(tr);
5110 
5111 	return ret;
5112 }
5113 
5114 /*
5115  * Some tracers are not suitable for instance buffers.
5116  * A tracer is always available for the global array (toplevel)
5117  * or if it explicitly states that it is.
5118  */
5119 static bool
5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124 
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129 	while (t && !trace_ok_for_array(t, tr))
5130 		t = t->next;
5131 
5132 	return t;
5133 }
5134 
5135 static void *
5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138 	struct trace_array *tr = m->private;
5139 	struct tracer *t = v;
5140 
5141 	(*pos)++;
5142 
5143 	if (t)
5144 		t = get_tracer_for_array(tr, t->next);
5145 
5146 	return t;
5147 }
5148 
5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151 	struct trace_array *tr = m->private;
5152 	struct tracer *t;
5153 	loff_t l = 0;
5154 
5155 	mutex_lock(&trace_types_lock);
5156 
5157 	t = get_tracer_for_array(tr, trace_types);
5158 	for (; t && l < *pos; t = t_next(m, t, &l))
5159 			;
5160 
5161 	return t;
5162 }
5163 
5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166 	mutex_unlock(&trace_types_lock);
5167 }
5168 
5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171 	struct tracer *t = v;
5172 
5173 	if (!t)
5174 		return 0;
5175 
5176 	seq_puts(m, t->name);
5177 	if (t->next)
5178 		seq_putc(m, ' ');
5179 	else
5180 		seq_putc(m, '\n');
5181 
5182 	return 0;
5183 }
5184 
5185 static const struct seq_operations show_traces_seq_ops = {
5186 	.start		= t_start,
5187 	.next		= t_next,
5188 	.stop		= t_stop,
5189 	.show		= t_show,
5190 };
5191 
5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194 	struct trace_array *tr = inode->i_private;
5195 	struct seq_file *m;
5196 	int ret;
5197 
5198 	ret = tracing_check_open_get_tr(tr);
5199 	if (ret)
5200 		return ret;
5201 
5202 	ret = seq_open(file, &show_traces_seq_ops);
5203 	if (ret) {
5204 		trace_array_put(tr);
5205 		return ret;
5206 	}
5207 
5208 	m = file->private_data;
5209 	m->private = tr;
5210 
5211 	return 0;
5212 }
5213 
5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216 	struct trace_array *tr = inode->i_private;
5217 
5218 	trace_array_put(tr);
5219 	return seq_release(inode, file);
5220 }
5221 
5222 static ssize_t
5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224 		   size_t count, loff_t *ppos)
5225 {
5226 	return count;
5227 }
5228 
5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231 	int ret;
5232 
5233 	if (file->f_mode & FMODE_READ)
5234 		ret = seq_lseek(file, offset, whence);
5235 	else
5236 		file->f_pos = ret = 0;
5237 
5238 	return ret;
5239 }
5240 
5241 static const struct file_operations tracing_fops = {
5242 	.open		= tracing_open,
5243 	.read		= seq_read,
5244 	.read_iter	= seq_read_iter,
5245 	.splice_read	= copy_splice_read,
5246 	.write		= tracing_write_stub,
5247 	.llseek		= tracing_lseek,
5248 	.release	= tracing_release,
5249 };
5250 
5251 static const struct file_operations show_traces_fops = {
5252 	.open		= show_traces_open,
5253 	.read		= seq_read,
5254 	.llseek		= seq_lseek,
5255 	.release	= show_traces_release,
5256 };
5257 
5258 static ssize_t
5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260 		     size_t count, loff_t *ppos)
5261 {
5262 	struct trace_array *tr = file_inode(filp)->i_private;
5263 	char *mask_str;
5264 	int len;
5265 
5266 	len = snprintf(NULL, 0, "%*pb\n",
5267 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268 	mask_str = kmalloc(len, GFP_KERNEL);
5269 	if (!mask_str)
5270 		return -ENOMEM;
5271 
5272 	len = snprintf(mask_str, len, "%*pb\n",
5273 		       cpumask_pr_args(tr->tracing_cpumask));
5274 	if (len >= count) {
5275 		count = -EINVAL;
5276 		goto out_err;
5277 	}
5278 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279 
5280 out_err:
5281 	kfree(mask_str);
5282 
5283 	return count;
5284 }
5285 
5286 int tracing_set_cpumask(struct trace_array *tr,
5287 			cpumask_var_t tracing_cpumask_new)
5288 {
5289 	int cpu;
5290 
5291 	if (!tr)
5292 		return -EINVAL;
5293 
5294 	local_irq_disable();
5295 	arch_spin_lock(&tr->max_lock);
5296 	for_each_tracing_cpu(cpu) {
5297 		/*
5298 		 * Increase/decrease the disabled counter if we are
5299 		 * about to flip a bit in the cpumask:
5300 		 */
5301 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308 		}
5309 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316 		}
5317 	}
5318 	arch_spin_unlock(&tr->max_lock);
5319 	local_irq_enable();
5320 
5321 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322 
5323 	return 0;
5324 }
5325 
5326 static ssize_t
5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328 		      size_t count, loff_t *ppos)
5329 {
5330 	struct trace_array *tr = file_inode(filp)->i_private;
5331 	cpumask_var_t tracing_cpumask_new;
5332 	int err;
5333 
5334 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5335 		return -ENOMEM;
5336 
5337 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5338 	if (err)
5339 		goto err_free;
5340 
5341 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5342 	if (err)
5343 		goto err_free;
5344 
5345 	free_cpumask_var(tracing_cpumask_new);
5346 
5347 	return count;
5348 
5349 err_free:
5350 	free_cpumask_var(tracing_cpumask_new);
5351 
5352 	return err;
5353 }
5354 
5355 static const struct file_operations tracing_cpumask_fops = {
5356 	.open		= tracing_open_generic_tr,
5357 	.read		= tracing_cpumask_read,
5358 	.write		= tracing_cpumask_write,
5359 	.release	= tracing_release_generic_tr,
5360 	.llseek		= generic_file_llseek,
5361 };
5362 
5363 static int tracing_trace_options_show(struct seq_file *m, void *v)
5364 {
5365 	struct tracer_opt *trace_opts;
5366 	struct trace_array *tr = m->private;
5367 	u32 tracer_flags;
5368 	int i;
5369 
5370 	mutex_lock(&trace_types_lock);
5371 	tracer_flags = tr->current_trace->flags->val;
5372 	trace_opts = tr->current_trace->flags->opts;
5373 
5374 	for (i = 0; trace_options[i]; i++) {
5375 		if (tr->trace_flags & (1 << i))
5376 			seq_printf(m, "%s\n", trace_options[i]);
5377 		else
5378 			seq_printf(m, "no%s\n", trace_options[i]);
5379 	}
5380 
5381 	for (i = 0; trace_opts[i].name; i++) {
5382 		if (tracer_flags & trace_opts[i].bit)
5383 			seq_printf(m, "%s\n", trace_opts[i].name);
5384 		else
5385 			seq_printf(m, "no%s\n", trace_opts[i].name);
5386 	}
5387 	mutex_unlock(&trace_types_lock);
5388 
5389 	return 0;
5390 }
5391 
5392 static int __set_tracer_option(struct trace_array *tr,
5393 			       struct tracer_flags *tracer_flags,
5394 			       struct tracer_opt *opts, int neg)
5395 {
5396 	struct tracer *trace = tracer_flags->trace;
5397 	int ret;
5398 
5399 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5400 	if (ret)
5401 		return ret;
5402 
5403 	if (neg)
5404 		tracer_flags->val &= ~opts->bit;
5405 	else
5406 		tracer_flags->val |= opts->bit;
5407 	return 0;
5408 }
5409 
5410 /* Try to assign a tracer specific option */
5411 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5412 {
5413 	struct tracer *trace = tr->current_trace;
5414 	struct tracer_flags *tracer_flags = trace->flags;
5415 	struct tracer_opt *opts = NULL;
5416 	int i;
5417 
5418 	for (i = 0; tracer_flags->opts[i].name; i++) {
5419 		opts = &tracer_flags->opts[i];
5420 
5421 		if (strcmp(cmp, opts->name) == 0)
5422 			return __set_tracer_option(tr, trace->flags, opts, neg);
5423 	}
5424 
5425 	return -EINVAL;
5426 }
5427 
5428 /* Some tracers require overwrite to stay enabled */
5429 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5430 {
5431 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5432 		return -1;
5433 
5434 	return 0;
5435 }
5436 
5437 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5438 {
5439 	int *map;
5440 
5441 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5442 	    (mask == TRACE_ITER_RECORD_CMD))
5443 		lockdep_assert_held(&event_mutex);
5444 
5445 	/* do nothing if flag is already set */
5446 	if (!!(tr->trace_flags & mask) == !!enabled)
5447 		return 0;
5448 
5449 	/* Give the tracer a chance to approve the change */
5450 	if (tr->current_trace->flag_changed)
5451 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5452 			return -EINVAL;
5453 
5454 	if (enabled)
5455 		tr->trace_flags |= mask;
5456 	else
5457 		tr->trace_flags &= ~mask;
5458 
5459 	if (mask == TRACE_ITER_RECORD_CMD)
5460 		trace_event_enable_cmd_record(enabled);
5461 
5462 	if (mask == TRACE_ITER_RECORD_TGID) {
5463 		if (!tgid_map) {
5464 			tgid_map_max = pid_max;
5465 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5466 				       GFP_KERNEL);
5467 
5468 			/*
5469 			 * Pairs with smp_load_acquire() in
5470 			 * trace_find_tgid_ptr() to ensure that if it observes
5471 			 * the tgid_map we just allocated then it also observes
5472 			 * the corresponding tgid_map_max value.
5473 			 */
5474 			smp_store_release(&tgid_map, map);
5475 		}
5476 		if (!tgid_map) {
5477 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5478 			return -ENOMEM;
5479 		}
5480 
5481 		trace_event_enable_tgid_record(enabled);
5482 	}
5483 
5484 	if (mask == TRACE_ITER_EVENT_FORK)
5485 		trace_event_follow_fork(tr, enabled);
5486 
5487 	if (mask == TRACE_ITER_FUNC_FORK)
5488 		ftrace_pid_follow_fork(tr, enabled);
5489 
5490 	if (mask == TRACE_ITER_OVERWRITE) {
5491 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5492 #ifdef CONFIG_TRACER_MAX_TRACE
5493 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5494 #endif
5495 	}
5496 
5497 	if (mask == TRACE_ITER_PRINTK) {
5498 		trace_printk_start_stop_comm(enabled);
5499 		trace_printk_control(enabled);
5500 	}
5501 
5502 	return 0;
5503 }
5504 
5505 int trace_set_options(struct trace_array *tr, char *option)
5506 {
5507 	char *cmp;
5508 	int neg = 0;
5509 	int ret;
5510 	size_t orig_len = strlen(option);
5511 	int len;
5512 
5513 	cmp = strstrip(option);
5514 
5515 	len = str_has_prefix(cmp, "no");
5516 	if (len)
5517 		neg = 1;
5518 
5519 	cmp += len;
5520 
5521 	mutex_lock(&event_mutex);
5522 	mutex_lock(&trace_types_lock);
5523 
5524 	ret = match_string(trace_options, -1, cmp);
5525 	/* If no option could be set, test the specific tracer options */
5526 	if (ret < 0)
5527 		ret = set_tracer_option(tr, cmp, neg);
5528 	else
5529 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5530 
5531 	mutex_unlock(&trace_types_lock);
5532 	mutex_unlock(&event_mutex);
5533 
5534 	/*
5535 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5536 	 * turn it back into a space.
5537 	 */
5538 	if (orig_len > strlen(option))
5539 		option[strlen(option)] = ' ';
5540 
5541 	return ret;
5542 }
5543 
5544 static void __init apply_trace_boot_options(void)
5545 {
5546 	char *buf = trace_boot_options_buf;
5547 	char *option;
5548 
5549 	while (true) {
5550 		option = strsep(&buf, ",");
5551 
5552 		if (!option)
5553 			break;
5554 
5555 		if (*option)
5556 			trace_set_options(&global_trace, option);
5557 
5558 		/* Put back the comma to allow this to be called again */
5559 		if (buf)
5560 			*(buf - 1) = ',';
5561 	}
5562 }
5563 
5564 static ssize_t
5565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5566 			size_t cnt, loff_t *ppos)
5567 {
5568 	struct seq_file *m = filp->private_data;
5569 	struct trace_array *tr = m->private;
5570 	char buf[64];
5571 	int ret;
5572 
5573 	if (cnt >= sizeof(buf))
5574 		return -EINVAL;
5575 
5576 	if (copy_from_user(buf, ubuf, cnt))
5577 		return -EFAULT;
5578 
5579 	buf[cnt] = 0;
5580 
5581 	ret = trace_set_options(tr, buf);
5582 	if (ret < 0)
5583 		return ret;
5584 
5585 	*ppos += cnt;
5586 
5587 	return cnt;
5588 }
5589 
5590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5591 {
5592 	struct trace_array *tr = inode->i_private;
5593 	int ret;
5594 
5595 	ret = tracing_check_open_get_tr(tr);
5596 	if (ret)
5597 		return ret;
5598 
5599 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5600 	if (ret < 0)
5601 		trace_array_put(tr);
5602 
5603 	return ret;
5604 }
5605 
5606 static const struct file_operations tracing_iter_fops = {
5607 	.open		= tracing_trace_options_open,
5608 	.read		= seq_read,
5609 	.llseek		= seq_lseek,
5610 	.release	= tracing_single_release_tr,
5611 	.write		= tracing_trace_options_write,
5612 };
5613 
5614 static const char readme_msg[] =
5615 	"tracing mini-HOWTO:\n\n"
5616 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5617 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5618 	" Important files:\n"
5619 	"  trace\t\t\t- The static contents of the buffer\n"
5620 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5621 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5622 	"  current_tracer\t- function and latency tracers\n"
5623 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5624 	"  error_log\t- error log for failed commands (that support it)\n"
5625 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5626 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5627 	"  trace_clock\t\t- change the clock used to order events\n"
5628 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5629 	"      global:   Synced across CPUs but slows tracing down.\n"
5630 	"     counter:   Not a clock, but just an increment\n"
5631 	"      uptime:   Jiffy counter from time of boot\n"
5632 	"        perf:   Same clock that perf events use\n"
5633 #ifdef CONFIG_X86_64
5634 	"     x86-tsc:   TSC cycle counter\n"
5635 #endif
5636 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5637 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5638 	"    absolute:   Absolute (standalone) timestamp\n"
5639 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5640 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5641 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5642 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5643 	"\t\t\t  Remove sub-buffer with rmdir\n"
5644 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5645 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5646 	"\t\t\t  option name\n"
5647 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5648 #ifdef CONFIG_DYNAMIC_FTRACE
5649 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5650 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5651 	"\t\t\t  functions\n"
5652 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5653 	"\t     modules: Can select a group via module\n"
5654 	"\t      Format: :mod:<module-name>\n"
5655 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5656 	"\t    triggers: a command to perform when function is hit\n"
5657 	"\t      Format: <function>:<trigger>[:count]\n"
5658 	"\t     trigger: traceon, traceoff\n"
5659 	"\t\t      enable_event:<system>:<event>\n"
5660 	"\t\t      disable_event:<system>:<event>\n"
5661 #ifdef CONFIG_STACKTRACE
5662 	"\t\t      stacktrace\n"
5663 #endif
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665 	"\t\t      snapshot\n"
5666 #endif
5667 	"\t\t      dump\n"
5668 	"\t\t      cpudump\n"
5669 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5670 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5671 	"\t     The first one will disable tracing every time do_fault is hit\n"
5672 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5673 	"\t       The first time do trap is hit and it disables tracing, the\n"
5674 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5675 	"\t       the counter will not decrement. It only decrements when the\n"
5676 	"\t       trigger did work\n"
5677 	"\t     To remove trigger without count:\n"
5678 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5679 	"\t     To remove trigger with a count:\n"
5680 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5681 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5682 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5683 	"\t    modules: Can select a group via module command :mod:\n"
5684 	"\t    Does not accept triggers\n"
5685 #endif /* CONFIG_DYNAMIC_FTRACE */
5686 #ifdef CONFIG_FUNCTION_TRACER
5687 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5688 	"\t\t    (function)\n"
5689 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5690 	"\t\t    (function)\n"
5691 #endif
5692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5693 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5694 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5695 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5696 #endif
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5699 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5700 	"\t\t\t  information\n"
5701 #endif
5702 #ifdef CONFIG_STACK_TRACER
5703 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5704 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5705 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5706 	"\t\t\t  new trace)\n"
5707 #ifdef CONFIG_DYNAMIC_FTRACE
5708 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5709 	"\t\t\t  traces\n"
5710 #endif
5711 #endif /* CONFIG_STACK_TRACER */
5712 #ifdef CONFIG_DYNAMIC_EVENTS
5713 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5714 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5715 #endif
5716 #ifdef CONFIG_KPROBE_EVENTS
5717 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5718 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5722 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5725     defined(CONFIG_FPROBE_EVENTS)
5726 	"\t  accepts: event-definitions (one definition per line)\n"
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5728 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5729 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5730 #endif
5731 #ifdef CONFIG_FPROBE_EVENTS
5732 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5733 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5737 #endif
5738 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5739 	"\t           -:[<group>/][<event>]\n"
5740 #ifdef CONFIG_KPROBE_EVENTS
5741 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5742   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5743 #endif
5744 #ifdef CONFIG_UPROBE_EVENTS
5745   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5746 #endif
5747 	"\t     args: <name>=fetcharg[:type]\n"
5748 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5749 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5750 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5751 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752 	"\t           <argname>[->field[->field|.field...]],\n"
5753 #else
5754 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5755 #endif
5756 #else
5757 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5758 #endif
5759 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5760 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5761 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5762 	"\t           symstr, <type>\\[<array-size>\\]\n"
5763 #ifdef CONFIG_HIST_TRIGGERS
5764 	"\t    field: <stype> <name>;\n"
5765 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5766 	"\t           [unsigned] char/int/long\n"
5767 #endif
5768 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5769 	"\t            of the <attached-group>/<attached-event>.\n"
5770 #endif
5771 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5772 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5773 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5774 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5775 	"\t\t\t  events\n"
5776 	"      filter\t\t- If set, only events passing filter are traced\n"
5777 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5778 	"\t\t\t  <event>:\n"
5779 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5780 	"      filter\t\t- If set, only events passing filter are traced\n"
5781 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5782 	"\t    Format: <trigger>[:count][if <filter>]\n"
5783 	"\t   trigger: traceon, traceoff\n"
5784 	"\t            enable_event:<system>:<event>\n"
5785 	"\t            disable_event:<system>:<event>\n"
5786 #ifdef CONFIG_HIST_TRIGGERS
5787 	"\t            enable_hist:<system>:<event>\n"
5788 	"\t            disable_hist:<system>:<event>\n"
5789 #endif
5790 #ifdef CONFIG_STACKTRACE
5791 	"\t\t    stacktrace\n"
5792 #endif
5793 #ifdef CONFIG_TRACER_SNAPSHOT
5794 	"\t\t    snapshot\n"
5795 #endif
5796 #ifdef CONFIG_HIST_TRIGGERS
5797 	"\t\t    hist (see below)\n"
5798 #endif
5799 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5800 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5801 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5802 	"\t                  events/block/block_unplug/trigger\n"
5803 	"\t   The first disables tracing every time block_unplug is hit.\n"
5804 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5805 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5806 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5807 	"\t   Like function triggers, the counter is only decremented if it\n"
5808 	"\t    enabled or disabled tracing.\n"
5809 	"\t   To remove a trigger without a count:\n"
5810 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5811 	"\t   To remove a trigger with a count:\n"
5812 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5813 	"\t   Filters can be ignored when removing a trigger.\n"
5814 #ifdef CONFIG_HIST_TRIGGERS
5815 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5816 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5817 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5818 	"\t            [:values=<field1[,field2,...]>]\n"
5819 	"\t            [:sort=<field1[,field2,...]>]\n"
5820 	"\t            [:size=#entries]\n"
5821 	"\t            [:pause][:continue][:clear]\n"
5822 	"\t            [:name=histname1]\n"
5823 	"\t            [:nohitcount]\n"
5824 	"\t            [:<handler>.<action>]\n"
5825 	"\t            [if <filter>]\n\n"
5826 	"\t    Note, special fields can be used as well:\n"
5827 	"\t            common_timestamp - to record current timestamp\n"
5828 	"\t            common_cpu - to record the CPU the event happened on\n"
5829 	"\n"
5830 	"\t    A hist trigger variable can be:\n"
5831 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5832 	"\t        - a reference to another variable e.g. y=$x,\n"
5833 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5834 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5835 	"\n"
5836 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5837 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5838 	"\t    variable reference, field or numeric literal.\n"
5839 	"\n"
5840 	"\t    When a matching event is hit, an entry is added to a hash\n"
5841 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5842 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5843 	"\t    correspond to fields in the event's format description.  Keys\n"
5844 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5845 	"\t    Compound keys consisting of up to two fields can be specified\n"
5846 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5847 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5848 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5849 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5850 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5851 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5852 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5853 	"\t    its histogram data will be shared with other triggers of the\n"
5854 	"\t    same name, and trigger hits will update this common data.\n\n"
5855 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5856 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5857 	"\t    triggers attached to an event, there will be a table for each\n"
5858 	"\t    trigger in the output.  The table displayed for a named\n"
5859 	"\t    trigger will be the same as any other instance having the\n"
5860 	"\t    same name.  The default format used to display a given field\n"
5861 	"\t    can be modified by appending any of the following modifiers\n"
5862 	"\t    to the field name, as applicable:\n\n"
5863 	"\t            .hex        display a number as a hex value\n"
5864 	"\t            .sym        display an address as a symbol\n"
5865 	"\t            .sym-offset display an address as a symbol and offset\n"
5866 	"\t            .execname   display a common_pid as a program name\n"
5867 	"\t            .syscall    display a syscall id as a syscall name\n"
5868 	"\t            .log2       display log2 value rather than raw number\n"
5869 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5870 	"\t            .usecs      display a common_timestamp in microseconds\n"
5871 	"\t            .percent    display a number of percentage value\n"
5872 	"\t            .graph      display a bar-graph of a value\n\n"
5873 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5874 	"\t    trigger or to start a hist trigger but not log any events\n"
5875 	"\t    until told to do so.  'continue' can be used to start or\n"
5876 	"\t    restart a paused hist trigger.\n\n"
5877 	"\t    The 'clear' parameter will clear the contents of a running\n"
5878 	"\t    hist trigger and leave its current paused/active state\n"
5879 	"\t    unchanged.\n\n"
5880 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5881 	"\t    raw hitcount in the histogram.\n\n"
5882 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5883 	"\t    have one event conditionally start and stop another event's\n"
5884 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5885 	"\t    the enable_event and disable_event triggers.\n\n"
5886 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5887 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5888 	"\t        <handler>.<action>\n\n"
5889 	"\t    The available handlers are:\n\n"
5890 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5891 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5892 	"\t        onchange(var)            - invoke action if var changes\n\n"
5893 	"\t    The available actions are:\n\n"
5894 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5895 	"\t        save(field,...)                      - save current event fields\n"
5896 #ifdef CONFIG_TRACER_SNAPSHOT
5897 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5898 #endif
5899 #ifdef CONFIG_SYNTH_EVENTS
5900 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5901 	"\t  Write into this file to define/undefine new synthetic events.\n"
5902 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5903 #endif
5904 #endif
5905 ;
5906 
5907 static ssize_t
5908 tracing_readme_read(struct file *filp, char __user *ubuf,
5909 		       size_t cnt, loff_t *ppos)
5910 {
5911 	return simple_read_from_buffer(ubuf, cnt, ppos,
5912 					readme_msg, strlen(readme_msg));
5913 }
5914 
5915 static const struct file_operations tracing_readme_fops = {
5916 	.open		= tracing_open_generic,
5917 	.read		= tracing_readme_read,
5918 	.llseek		= generic_file_llseek,
5919 };
5920 
5921 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5922 {
5923 	int pid = ++(*pos);
5924 
5925 	return trace_find_tgid_ptr(pid);
5926 }
5927 
5928 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5929 {
5930 	int pid = *pos;
5931 
5932 	return trace_find_tgid_ptr(pid);
5933 }
5934 
5935 static void saved_tgids_stop(struct seq_file *m, void *v)
5936 {
5937 }
5938 
5939 static int saved_tgids_show(struct seq_file *m, void *v)
5940 {
5941 	int *entry = (int *)v;
5942 	int pid = entry - tgid_map;
5943 	int tgid = *entry;
5944 
5945 	if (tgid == 0)
5946 		return SEQ_SKIP;
5947 
5948 	seq_printf(m, "%d %d\n", pid, tgid);
5949 	return 0;
5950 }
5951 
5952 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5953 	.start		= saved_tgids_start,
5954 	.stop		= saved_tgids_stop,
5955 	.next		= saved_tgids_next,
5956 	.show		= saved_tgids_show,
5957 };
5958 
5959 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5960 {
5961 	int ret;
5962 
5963 	ret = tracing_check_open_get_tr(NULL);
5964 	if (ret)
5965 		return ret;
5966 
5967 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5968 }
5969 
5970 
5971 static const struct file_operations tracing_saved_tgids_fops = {
5972 	.open		= tracing_saved_tgids_open,
5973 	.read		= seq_read,
5974 	.llseek		= seq_lseek,
5975 	.release	= seq_release,
5976 };
5977 
5978 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5979 {
5980 	unsigned int *ptr = v;
5981 
5982 	if (*pos || m->count)
5983 		ptr++;
5984 
5985 	(*pos)++;
5986 
5987 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5988 	     ptr++) {
5989 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5990 			continue;
5991 
5992 		return ptr;
5993 	}
5994 
5995 	return NULL;
5996 }
5997 
5998 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5999 {
6000 	void *v;
6001 	loff_t l = 0;
6002 
6003 	preempt_disable();
6004 	arch_spin_lock(&trace_cmdline_lock);
6005 
6006 	v = &savedcmd->map_cmdline_to_pid[0];
6007 	while (l <= *pos) {
6008 		v = saved_cmdlines_next(m, v, &l);
6009 		if (!v)
6010 			return NULL;
6011 	}
6012 
6013 	return v;
6014 }
6015 
6016 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6017 {
6018 	arch_spin_unlock(&trace_cmdline_lock);
6019 	preempt_enable();
6020 }
6021 
6022 static int saved_cmdlines_show(struct seq_file *m, void *v)
6023 {
6024 	char buf[TASK_COMM_LEN];
6025 	unsigned int *pid = v;
6026 
6027 	__trace_find_cmdline(*pid, buf);
6028 	seq_printf(m, "%d %s\n", *pid, buf);
6029 	return 0;
6030 }
6031 
6032 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6033 	.start		= saved_cmdlines_start,
6034 	.next		= saved_cmdlines_next,
6035 	.stop		= saved_cmdlines_stop,
6036 	.show		= saved_cmdlines_show,
6037 };
6038 
6039 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6040 {
6041 	int ret;
6042 
6043 	ret = tracing_check_open_get_tr(NULL);
6044 	if (ret)
6045 		return ret;
6046 
6047 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6048 }
6049 
6050 static const struct file_operations tracing_saved_cmdlines_fops = {
6051 	.open		= tracing_saved_cmdlines_open,
6052 	.read		= seq_read,
6053 	.llseek		= seq_lseek,
6054 	.release	= seq_release,
6055 };
6056 
6057 static ssize_t
6058 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6059 				 size_t cnt, loff_t *ppos)
6060 {
6061 	char buf[64];
6062 	int r;
6063 
6064 	preempt_disable();
6065 	arch_spin_lock(&trace_cmdline_lock);
6066 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6067 	arch_spin_unlock(&trace_cmdline_lock);
6068 	preempt_enable();
6069 
6070 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6071 }
6072 
6073 static int tracing_resize_saved_cmdlines(unsigned int val)
6074 {
6075 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6076 
6077 	s = allocate_cmdlines_buffer(val);
6078 	if (!s)
6079 		return -ENOMEM;
6080 
6081 	preempt_disable();
6082 	arch_spin_lock(&trace_cmdline_lock);
6083 	savedcmd_temp = savedcmd;
6084 	savedcmd = s;
6085 	arch_spin_unlock(&trace_cmdline_lock);
6086 	preempt_enable();
6087 	free_saved_cmdlines_buffer(savedcmd_temp);
6088 
6089 	return 0;
6090 }
6091 
6092 static ssize_t
6093 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6094 				  size_t cnt, loff_t *ppos)
6095 {
6096 	unsigned long val;
6097 	int ret;
6098 
6099 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6100 	if (ret)
6101 		return ret;
6102 
6103 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6104 	if (!val || val > PID_MAX_DEFAULT)
6105 		return -EINVAL;
6106 
6107 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6108 	if (ret < 0)
6109 		return ret;
6110 
6111 	*ppos += cnt;
6112 
6113 	return cnt;
6114 }
6115 
6116 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6117 	.open		= tracing_open_generic,
6118 	.read		= tracing_saved_cmdlines_size_read,
6119 	.write		= tracing_saved_cmdlines_size_write,
6120 };
6121 
6122 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6123 static union trace_eval_map_item *
6124 update_eval_map(union trace_eval_map_item *ptr)
6125 {
6126 	if (!ptr->map.eval_string) {
6127 		if (ptr->tail.next) {
6128 			ptr = ptr->tail.next;
6129 			/* Set ptr to the next real item (skip head) */
6130 			ptr++;
6131 		} else
6132 			return NULL;
6133 	}
6134 	return ptr;
6135 }
6136 
6137 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6138 {
6139 	union trace_eval_map_item *ptr = v;
6140 
6141 	/*
6142 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6143 	 * This really should never happen.
6144 	 */
6145 	(*pos)++;
6146 	ptr = update_eval_map(ptr);
6147 	if (WARN_ON_ONCE(!ptr))
6148 		return NULL;
6149 
6150 	ptr++;
6151 	ptr = update_eval_map(ptr);
6152 
6153 	return ptr;
6154 }
6155 
6156 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6157 {
6158 	union trace_eval_map_item *v;
6159 	loff_t l = 0;
6160 
6161 	mutex_lock(&trace_eval_mutex);
6162 
6163 	v = trace_eval_maps;
6164 	if (v)
6165 		v++;
6166 
6167 	while (v && l < *pos) {
6168 		v = eval_map_next(m, v, &l);
6169 	}
6170 
6171 	return v;
6172 }
6173 
6174 static void eval_map_stop(struct seq_file *m, void *v)
6175 {
6176 	mutex_unlock(&trace_eval_mutex);
6177 }
6178 
6179 static int eval_map_show(struct seq_file *m, void *v)
6180 {
6181 	union trace_eval_map_item *ptr = v;
6182 
6183 	seq_printf(m, "%s %ld (%s)\n",
6184 		   ptr->map.eval_string, ptr->map.eval_value,
6185 		   ptr->map.system);
6186 
6187 	return 0;
6188 }
6189 
6190 static const struct seq_operations tracing_eval_map_seq_ops = {
6191 	.start		= eval_map_start,
6192 	.next		= eval_map_next,
6193 	.stop		= eval_map_stop,
6194 	.show		= eval_map_show,
6195 };
6196 
6197 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6198 {
6199 	int ret;
6200 
6201 	ret = tracing_check_open_get_tr(NULL);
6202 	if (ret)
6203 		return ret;
6204 
6205 	return seq_open(filp, &tracing_eval_map_seq_ops);
6206 }
6207 
6208 static const struct file_operations tracing_eval_map_fops = {
6209 	.open		= tracing_eval_map_open,
6210 	.read		= seq_read,
6211 	.llseek		= seq_lseek,
6212 	.release	= seq_release,
6213 };
6214 
6215 static inline union trace_eval_map_item *
6216 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6217 {
6218 	/* Return tail of array given the head */
6219 	return ptr + ptr->head.length + 1;
6220 }
6221 
6222 static void
6223 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6224 			   int len)
6225 {
6226 	struct trace_eval_map **stop;
6227 	struct trace_eval_map **map;
6228 	union trace_eval_map_item *map_array;
6229 	union trace_eval_map_item *ptr;
6230 
6231 	stop = start + len;
6232 
6233 	/*
6234 	 * The trace_eval_maps contains the map plus a head and tail item,
6235 	 * where the head holds the module and length of array, and the
6236 	 * tail holds a pointer to the next list.
6237 	 */
6238 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6239 	if (!map_array) {
6240 		pr_warn("Unable to allocate trace eval mapping\n");
6241 		return;
6242 	}
6243 
6244 	mutex_lock(&trace_eval_mutex);
6245 
6246 	if (!trace_eval_maps)
6247 		trace_eval_maps = map_array;
6248 	else {
6249 		ptr = trace_eval_maps;
6250 		for (;;) {
6251 			ptr = trace_eval_jmp_to_tail(ptr);
6252 			if (!ptr->tail.next)
6253 				break;
6254 			ptr = ptr->tail.next;
6255 
6256 		}
6257 		ptr->tail.next = map_array;
6258 	}
6259 	map_array->head.mod = mod;
6260 	map_array->head.length = len;
6261 	map_array++;
6262 
6263 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6264 		map_array->map = **map;
6265 		map_array++;
6266 	}
6267 	memset(map_array, 0, sizeof(*map_array));
6268 
6269 	mutex_unlock(&trace_eval_mutex);
6270 }
6271 
6272 static void trace_create_eval_file(struct dentry *d_tracer)
6273 {
6274 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6275 			  NULL, &tracing_eval_map_fops);
6276 }
6277 
6278 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6279 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6280 static inline void trace_insert_eval_map_file(struct module *mod,
6281 			      struct trace_eval_map **start, int len) { }
6282 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6283 
6284 static void trace_insert_eval_map(struct module *mod,
6285 				  struct trace_eval_map **start, int len)
6286 {
6287 	struct trace_eval_map **map;
6288 
6289 	if (len <= 0)
6290 		return;
6291 
6292 	map = start;
6293 
6294 	trace_event_eval_update(map, len);
6295 
6296 	trace_insert_eval_map_file(mod, start, len);
6297 }
6298 
6299 static ssize_t
6300 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6301 		       size_t cnt, loff_t *ppos)
6302 {
6303 	struct trace_array *tr = filp->private_data;
6304 	char buf[MAX_TRACER_SIZE+2];
6305 	int r;
6306 
6307 	mutex_lock(&trace_types_lock);
6308 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6309 	mutex_unlock(&trace_types_lock);
6310 
6311 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6312 }
6313 
6314 int tracer_init(struct tracer *t, struct trace_array *tr)
6315 {
6316 	tracing_reset_online_cpus(&tr->array_buffer);
6317 	return t->init(tr);
6318 }
6319 
6320 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6321 {
6322 	int cpu;
6323 
6324 	for_each_tracing_cpu(cpu)
6325 		per_cpu_ptr(buf->data, cpu)->entries = val;
6326 }
6327 
6328 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6329 {
6330 	if (cpu == RING_BUFFER_ALL_CPUS) {
6331 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6332 	} else {
6333 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6334 	}
6335 }
6336 
6337 #ifdef CONFIG_TRACER_MAX_TRACE
6338 /* resize @tr's buffer to the size of @size_tr's entries */
6339 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6340 					struct array_buffer *size_buf, int cpu_id)
6341 {
6342 	int cpu, ret = 0;
6343 
6344 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6345 		for_each_tracing_cpu(cpu) {
6346 			ret = ring_buffer_resize(trace_buf->buffer,
6347 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6348 			if (ret < 0)
6349 				break;
6350 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6351 				per_cpu_ptr(size_buf->data, cpu)->entries;
6352 		}
6353 	} else {
6354 		ret = ring_buffer_resize(trace_buf->buffer,
6355 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6356 		if (ret == 0)
6357 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6358 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6359 	}
6360 
6361 	return ret;
6362 }
6363 #endif /* CONFIG_TRACER_MAX_TRACE */
6364 
6365 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6366 					unsigned long size, int cpu)
6367 {
6368 	int ret;
6369 
6370 	/*
6371 	 * If kernel or user changes the size of the ring buffer
6372 	 * we use the size that was given, and we can forget about
6373 	 * expanding it later.
6374 	 */
6375 	ring_buffer_expanded = true;
6376 
6377 	/* May be called before buffers are initialized */
6378 	if (!tr->array_buffer.buffer)
6379 		return 0;
6380 
6381 	/* Do not allow tracing while resizing ring buffer */
6382 	tracing_stop_tr(tr);
6383 
6384 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6385 	if (ret < 0)
6386 		goto out_start;
6387 
6388 #ifdef CONFIG_TRACER_MAX_TRACE
6389 	if (!tr->allocated_snapshot)
6390 		goto out;
6391 
6392 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6393 	if (ret < 0) {
6394 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6395 						     &tr->array_buffer, cpu);
6396 		if (r < 0) {
6397 			/*
6398 			 * AARGH! We are left with different
6399 			 * size max buffer!!!!
6400 			 * The max buffer is our "snapshot" buffer.
6401 			 * When a tracer needs a snapshot (one of the
6402 			 * latency tracers), it swaps the max buffer
6403 			 * with the saved snap shot. We succeeded to
6404 			 * update the size of the main buffer, but failed to
6405 			 * update the size of the max buffer. But when we tried
6406 			 * to reset the main buffer to the original size, we
6407 			 * failed there too. This is very unlikely to
6408 			 * happen, but if it does, warn and kill all
6409 			 * tracing.
6410 			 */
6411 			WARN_ON(1);
6412 			tracing_disabled = 1;
6413 		}
6414 		goto out_start;
6415 	}
6416 
6417 	update_buffer_entries(&tr->max_buffer, cpu);
6418 
6419  out:
6420 #endif /* CONFIG_TRACER_MAX_TRACE */
6421 
6422 	update_buffer_entries(&tr->array_buffer, cpu);
6423  out_start:
6424 	tracing_start_tr(tr);
6425 	return ret;
6426 }
6427 
6428 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6429 				  unsigned long size, int cpu_id)
6430 {
6431 	int ret;
6432 
6433 	mutex_lock(&trace_types_lock);
6434 
6435 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6436 		/* make sure, this cpu is enabled in the mask */
6437 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6438 			ret = -EINVAL;
6439 			goto out;
6440 		}
6441 	}
6442 
6443 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6444 	if (ret < 0)
6445 		ret = -ENOMEM;
6446 
6447 out:
6448 	mutex_unlock(&trace_types_lock);
6449 
6450 	return ret;
6451 }
6452 
6453 
6454 /**
6455  * tracing_update_buffers - used by tracing facility to expand ring buffers
6456  *
6457  * To save on memory when the tracing is never used on a system with it
6458  * configured in. The ring buffers are set to a minimum size. But once
6459  * a user starts to use the tracing facility, then they need to grow
6460  * to their default size.
6461  *
6462  * This function is to be called when a tracer is about to be used.
6463  */
6464 int tracing_update_buffers(void)
6465 {
6466 	int ret = 0;
6467 
6468 	mutex_lock(&trace_types_lock);
6469 	if (!ring_buffer_expanded)
6470 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6471 						RING_BUFFER_ALL_CPUS);
6472 	mutex_unlock(&trace_types_lock);
6473 
6474 	return ret;
6475 }
6476 
6477 struct trace_option_dentry;
6478 
6479 static void
6480 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6481 
6482 /*
6483  * Used to clear out the tracer before deletion of an instance.
6484  * Must have trace_types_lock held.
6485  */
6486 static void tracing_set_nop(struct trace_array *tr)
6487 {
6488 	if (tr->current_trace == &nop_trace)
6489 		return;
6490 
6491 	tr->current_trace->enabled--;
6492 
6493 	if (tr->current_trace->reset)
6494 		tr->current_trace->reset(tr);
6495 
6496 	tr->current_trace = &nop_trace;
6497 }
6498 
6499 static bool tracer_options_updated;
6500 
6501 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6502 {
6503 	/* Only enable if the directory has been created already. */
6504 	if (!tr->dir)
6505 		return;
6506 
6507 	/* Only create trace option files after update_tracer_options finish */
6508 	if (!tracer_options_updated)
6509 		return;
6510 
6511 	create_trace_option_files(tr, t);
6512 }
6513 
6514 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6515 {
6516 	struct tracer *t;
6517 #ifdef CONFIG_TRACER_MAX_TRACE
6518 	bool had_max_tr;
6519 #endif
6520 	int ret = 0;
6521 
6522 	mutex_lock(&trace_types_lock);
6523 
6524 	if (!ring_buffer_expanded) {
6525 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6526 						RING_BUFFER_ALL_CPUS);
6527 		if (ret < 0)
6528 			goto out;
6529 		ret = 0;
6530 	}
6531 
6532 	for (t = trace_types; t; t = t->next) {
6533 		if (strcmp(t->name, buf) == 0)
6534 			break;
6535 	}
6536 	if (!t) {
6537 		ret = -EINVAL;
6538 		goto out;
6539 	}
6540 	if (t == tr->current_trace)
6541 		goto out;
6542 
6543 #ifdef CONFIG_TRACER_SNAPSHOT
6544 	if (t->use_max_tr) {
6545 		local_irq_disable();
6546 		arch_spin_lock(&tr->max_lock);
6547 		if (tr->cond_snapshot)
6548 			ret = -EBUSY;
6549 		arch_spin_unlock(&tr->max_lock);
6550 		local_irq_enable();
6551 		if (ret)
6552 			goto out;
6553 	}
6554 #endif
6555 	/* Some tracers won't work on kernel command line */
6556 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6557 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6558 			t->name);
6559 		goto out;
6560 	}
6561 
6562 	/* Some tracers are only allowed for the top level buffer */
6563 	if (!trace_ok_for_array(t, tr)) {
6564 		ret = -EINVAL;
6565 		goto out;
6566 	}
6567 
6568 	/* If trace pipe files are being read, we can't change the tracer */
6569 	if (tr->trace_ref) {
6570 		ret = -EBUSY;
6571 		goto out;
6572 	}
6573 
6574 	trace_branch_disable();
6575 
6576 	tr->current_trace->enabled--;
6577 
6578 	if (tr->current_trace->reset)
6579 		tr->current_trace->reset(tr);
6580 
6581 #ifdef CONFIG_TRACER_MAX_TRACE
6582 	had_max_tr = tr->current_trace->use_max_tr;
6583 
6584 	/* Current trace needs to be nop_trace before synchronize_rcu */
6585 	tr->current_trace = &nop_trace;
6586 
6587 	if (had_max_tr && !t->use_max_tr) {
6588 		/*
6589 		 * We need to make sure that the update_max_tr sees that
6590 		 * current_trace changed to nop_trace to keep it from
6591 		 * swapping the buffers after we resize it.
6592 		 * The update_max_tr is called from interrupts disabled
6593 		 * so a synchronized_sched() is sufficient.
6594 		 */
6595 		synchronize_rcu();
6596 		free_snapshot(tr);
6597 	}
6598 
6599 	if (t->use_max_tr && !tr->allocated_snapshot) {
6600 		ret = tracing_alloc_snapshot_instance(tr);
6601 		if (ret < 0)
6602 			goto out;
6603 	}
6604 #else
6605 	tr->current_trace = &nop_trace;
6606 #endif
6607 
6608 	if (t->init) {
6609 		ret = tracer_init(t, tr);
6610 		if (ret)
6611 			goto out;
6612 	}
6613 
6614 	tr->current_trace = t;
6615 	tr->current_trace->enabled++;
6616 	trace_branch_enable(tr);
6617  out:
6618 	mutex_unlock(&trace_types_lock);
6619 
6620 	return ret;
6621 }
6622 
6623 static ssize_t
6624 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6625 			size_t cnt, loff_t *ppos)
6626 {
6627 	struct trace_array *tr = filp->private_data;
6628 	char buf[MAX_TRACER_SIZE+1];
6629 	char *name;
6630 	size_t ret;
6631 	int err;
6632 
6633 	ret = cnt;
6634 
6635 	if (cnt > MAX_TRACER_SIZE)
6636 		cnt = MAX_TRACER_SIZE;
6637 
6638 	if (copy_from_user(buf, ubuf, cnt))
6639 		return -EFAULT;
6640 
6641 	buf[cnt] = 0;
6642 
6643 	name = strim(buf);
6644 
6645 	err = tracing_set_tracer(tr, name);
6646 	if (err)
6647 		return err;
6648 
6649 	*ppos += ret;
6650 
6651 	return ret;
6652 }
6653 
6654 static ssize_t
6655 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6656 		   size_t cnt, loff_t *ppos)
6657 {
6658 	char buf[64];
6659 	int r;
6660 
6661 	r = snprintf(buf, sizeof(buf), "%ld\n",
6662 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6663 	if (r > sizeof(buf))
6664 		r = sizeof(buf);
6665 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6666 }
6667 
6668 static ssize_t
6669 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6670 		    size_t cnt, loff_t *ppos)
6671 {
6672 	unsigned long val;
6673 	int ret;
6674 
6675 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6676 	if (ret)
6677 		return ret;
6678 
6679 	*ptr = val * 1000;
6680 
6681 	return cnt;
6682 }
6683 
6684 static ssize_t
6685 tracing_thresh_read(struct file *filp, char __user *ubuf,
6686 		    size_t cnt, loff_t *ppos)
6687 {
6688 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6689 }
6690 
6691 static ssize_t
6692 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6693 		     size_t cnt, loff_t *ppos)
6694 {
6695 	struct trace_array *tr = filp->private_data;
6696 	int ret;
6697 
6698 	mutex_lock(&trace_types_lock);
6699 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6700 	if (ret < 0)
6701 		goto out;
6702 
6703 	if (tr->current_trace->update_thresh) {
6704 		ret = tr->current_trace->update_thresh(tr);
6705 		if (ret < 0)
6706 			goto out;
6707 	}
6708 
6709 	ret = cnt;
6710 out:
6711 	mutex_unlock(&trace_types_lock);
6712 
6713 	return ret;
6714 }
6715 
6716 #ifdef CONFIG_TRACER_MAX_TRACE
6717 
6718 static ssize_t
6719 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6720 		     size_t cnt, loff_t *ppos)
6721 {
6722 	struct trace_array *tr = filp->private_data;
6723 
6724 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6725 }
6726 
6727 static ssize_t
6728 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6729 		      size_t cnt, loff_t *ppos)
6730 {
6731 	struct trace_array *tr = filp->private_data;
6732 
6733 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6734 }
6735 
6736 #endif
6737 
6738 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6739 {
6740 	if (cpu == RING_BUFFER_ALL_CPUS) {
6741 		if (cpumask_empty(tr->pipe_cpumask)) {
6742 			cpumask_setall(tr->pipe_cpumask);
6743 			return 0;
6744 		}
6745 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6746 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6747 		return 0;
6748 	}
6749 	return -EBUSY;
6750 }
6751 
6752 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6753 {
6754 	if (cpu == RING_BUFFER_ALL_CPUS) {
6755 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6756 		cpumask_clear(tr->pipe_cpumask);
6757 	} else {
6758 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6759 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6760 	}
6761 }
6762 
6763 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6764 {
6765 	struct trace_array *tr = inode->i_private;
6766 	struct trace_iterator *iter;
6767 	int cpu;
6768 	int ret;
6769 
6770 	ret = tracing_check_open_get_tr(tr);
6771 	if (ret)
6772 		return ret;
6773 
6774 	mutex_lock(&trace_types_lock);
6775 	cpu = tracing_get_cpu(inode);
6776 	ret = open_pipe_on_cpu(tr, cpu);
6777 	if (ret)
6778 		goto fail_pipe_on_cpu;
6779 
6780 	/* create a buffer to store the information to pass to userspace */
6781 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6782 	if (!iter) {
6783 		ret = -ENOMEM;
6784 		goto fail_alloc_iter;
6785 	}
6786 
6787 	trace_seq_init(&iter->seq);
6788 	iter->trace = tr->current_trace;
6789 
6790 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6791 		ret = -ENOMEM;
6792 		goto fail;
6793 	}
6794 
6795 	/* trace pipe does not show start of buffer */
6796 	cpumask_setall(iter->started);
6797 
6798 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6799 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6800 
6801 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6802 	if (trace_clocks[tr->clock_id].in_ns)
6803 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6804 
6805 	iter->tr = tr;
6806 	iter->array_buffer = &tr->array_buffer;
6807 	iter->cpu_file = cpu;
6808 	mutex_init(&iter->mutex);
6809 	filp->private_data = iter;
6810 
6811 	if (iter->trace->pipe_open)
6812 		iter->trace->pipe_open(iter);
6813 
6814 	nonseekable_open(inode, filp);
6815 
6816 	tr->trace_ref++;
6817 
6818 	mutex_unlock(&trace_types_lock);
6819 	return ret;
6820 
6821 fail:
6822 	kfree(iter);
6823 fail_alloc_iter:
6824 	close_pipe_on_cpu(tr, cpu);
6825 fail_pipe_on_cpu:
6826 	__trace_array_put(tr);
6827 	mutex_unlock(&trace_types_lock);
6828 	return ret;
6829 }
6830 
6831 static int tracing_release_pipe(struct inode *inode, struct file *file)
6832 {
6833 	struct trace_iterator *iter = file->private_data;
6834 	struct trace_array *tr = inode->i_private;
6835 
6836 	mutex_lock(&trace_types_lock);
6837 
6838 	tr->trace_ref--;
6839 
6840 	if (iter->trace->pipe_close)
6841 		iter->trace->pipe_close(iter);
6842 	close_pipe_on_cpu(tr, iter->cpu_file);
6843 	mutex_unlock(&trace_types_lock);
6844 
6845 	free_trace_iter_content(iter);
6846 	kfree(iter);
6847 
6848 	trace_array_put(tr);
6849 
6850 	return 0;
6851 }
6852 
6853 static __poll_t
6854 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6855 {
6856 	struct trace_array *tr = iter->tr;
6857 
6858 	/* Iterators are static, they should be filled or empty */
6859 	if (trace_buffer_iter(iter, iter->cpu_file))
6860 		return EPOLLIN | EPOLLRDNORM;
6861 
6862 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6863 		/*
6864 		 * Always select as readable when in blocking mode
6865 		 */
6866 		return EPOLLIN | EPOLLRDNORM;
6867 	else
6868 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6869 					     filp, poll_table, iter->tr->buffer_percent);
6870 }
6871 
6872 static __poll_t
6873 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6874 {
6875 	struct trace_iterator *iter = filp->private_data;
6876 
6877 	return trace_poll(iter, filp, poll_table);
6878 }
6879 
6880 /* Must be called with iter->mutex held. */
6881 static int tracing_wait_pipe(struct file *filp)
6882 {
6883 	struct trace_iterator *iter = filp->private_data;
6884 	int ret;
6885 
6886 	while (trace_empty(iter)) {
6887 
6888 		if ((filp->f_flags & O_NONBLOCK)) {
6889 			return -EAGAIN;
6890 		}
6891 
6892 		/*
6893 		 * We block until we read something and tracing is disabled.
6894 		 * We still block if tracing is disabled, but we have never
6895 		 * read anything. This allows a user to cat this file, and
6896 		 * then enable tracing. But after we have read something,
6897 		 * we give an EOF when tracing is again disabled.
6898 		 *
6899 		 * iter->pos will be 0 if we haven't read anything.
6900 		 */
6901 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6902 			break;
6903 
6904 		mutex_unlock(&iter->mutex);
6905 
6906 		ret = wait_on_pipe(iter, 0);
6907 
6908 		mutex_lock(&iter->mutex);
6909 
6910 		if (ret)
6911 			return ret;
6912 	}
6913 
6914 	return 1;
6915 }
6916 
6917 /*
6918  * Consumer reader.
6919  */
6920 static ssize_t
6921 tracing_read_pipe(struct file *filp, char __user *ubuf,
6922 		  size_t cnt, loff_t *ppos)
6923 {
6924 	struct trace_iterator *iter = filp->private_data;
6925 	ssize_t sret;
6926 
6927 	/*
6928 	 * Avoid more than one consumer on a single file descriptor
6929 	 * This is just a matter of traces coherency, the ring buffer itself
6930 	 * is protected.
6931 	 */
6932 	mutex_lock(&iter->mutex);
6933 
6934 	/* return any leftover data */
6935 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6936 	if (sret != -EBUSY)
6937 		goto out;
6938 
6939 	trace_seq_init(&iter->seq);
6940 
6941 	if (iter->trace->read) {
6942 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6943 		if (sret)
6944 			goto out;
6945 	}
6946 
6947 waitagain:
6948 	sret = tracing_wait_pipe(filp);
6949 	if (sret <= 0)
6950 		goto out;
6951 
6952 	/* stop when tracing is finished */
6953 	if (trace_empty(iter)) {
6954 		sret = 0;
6955 		goto out;
6956 	}
6957 
6958 	if (cnt >= PAGE_SIZE)
6959 		cnt = PAGE_SIZE - 1;
6960 
6961 	/* reset all but tr, trace, and overruns */
6962 	trace_iterator_reset(iter);
6963 	cpumask_clear(iter->started);
6964 	trace_seq_init(&iter->seq);
6965 
6966 	trace_event_read_lock();
6967 	trace_access_lock(iter->cpu_file);
6968 	while (trace_find_next_entry_inc(iter) != NULL) {
6969 		enum print_line_t ret;
6970 		int save_len = iter->seq.seq.len;
6971 
6972 		ret = print_trace_line(iter);
6973 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6974 			/*
6975 			 * If one print_trace_line() fills entire trace_seq in one shot,
6976 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6977 			 * In this case, we need to consume it, otherwise, loop will peek
6978 			 * this event next time, resulting in an infinite loop.
6979 			 */
6980 			if (save_len == 0) {
6981 				iter->seq.full = 0;
6982 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6983 				trace_consume(iter);
6984 				break;
6985 			}
6986 
6987 			/* In other cases, don't print partial lines */
6988 			iter->seq.seq.len = save_len;
6989 			break;
6990 		}
6991 		if (ret != TRACE_TYPE_NO_CONSUME)
6992 			trace_consume(iter);
6993 
6994 		if (trace_seq_used(&iter->seq) >= cnt)
6995 			break;
6996 
6997 		/*
6998 		 * Setting the full flag means we reached the trace_seq buffer
6999 		 * size and we should leave by partial output condition above.
7000 		 * One of the trace_seq_* functions is not used properly.
7001 		 */
7002 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7003 			  iter->ent->type);
7004 	}
7005 	trace_access_unlock(iter->cpu_file);
7006 	trace_event_read_unlock();
7007 
7008 	/* Now copy what we have to the user */
7009 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7010 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7011 		trace_seq_init(&iter->seq);
7012 
7013 	/*
7014 	 * If there was nothing to send to user, in spite of consuming trace
7015 	 * entries, go back to wait for more entries.
7016 	 */
7017 	if (sret == -EBUSY)
7018 		goto waitagain;
7019 
7020 out:
7021 	mutex_unlock(&iter->mutex);
7022 
7023 	return sret;
7024 }
7025 
7026 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7027 				     unsigned int idx)
7028 {
7029 	__free_page(spd->pages[idx]);
7030 }
7031 
7032 static size_t
7033 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7034 {
7035 	size_t count;
7036 	int save_len;
7037 	int ret;
7038 
7039 	/* Seq buffer is page-sized, exactly what we need. */
7040 	for (;;) {
7041 		save_len = iter->seq.seq.len;
7042 		ret = print_trace_line(iter);
7043 
7044 		if (trace_seq_has_overflowed(&iter->seq)) {
7045 			iter->seq.seq.len = save_len;
7046 			break;
7047 		}
7048 
7049 		/*
7050 		 * This should not be hit, because it should only
7051 		 * be set if the iter->seq overflowed. But check it
7052 		 * anyway to be safe.
7053 		 */
7054 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7055 			iter->seq.seq.len = save_len;
7056 			break;
7057 		}
7058 
7059 		count = trace_seq_used(&iter->seq) - save_len;
7060 		if (rem < count) {
7061 			rem = 0;
7062 			iter->seq.seq.len = save_len;
7063 			break;
7064 		}
7065 
7066 		if (ret != TRACE_TYPE_NO_CONSUME)
7067 			trace_consume(iter);
7068 		rem -= count;
7069 		if (!trace_find_next_entry_inc(iter))	{
7070 			rem = 0;
7071 			iter->ent = NULL;
7072 			break;
7073 		}
7074 	}
7075 
7076 	return rem;
7077 }
7078 
7079 static ssize_t tracing_splice_read_pipe(struct file *filp,
7080 					loff_t *ppos,
7081 					struct pipe_inode_info *pipe,
7082 					size_t len,
7083 					unsigned int flags)
7084 {
7085 	struct page *pages_def[PIPE_DEF_BUFFERS];
7086 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7087 	struct trace_iterator *iter = filp->private_data;
7088 	struct splice_pipe_desc spd = {
7089 		.pages		= pages_def,
7090 		.partial	= partial_def,
7091 		.nr_pages	= 0, /* This gets updated below. */
7092 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7093 		.ops		= &default_pipe_buf_ops,
7094 		.spd_release	= tracing_spd_release_pipe,
7095 	};
7096 	ssize_t ret;
7097 	size_t rem;
7098 	unsigned int i;
7099 
7100 	if (splice_grow_spd(pipe, &spd))
7101 		return -ENOMEM;
7102 
7103 	mutex_lock(&iter->mutex);
7104 
7105 	if (iter->trace->splice_read) {
7106 		ret = iter->trace->splice_read(iter, filp,
7107 					       ppos, pipe, len, flags);
7108 		if (ret)
7109 			goto out_err;
7110 	}
7111 
7112 	ret = tracing_wait_pipe(filp);
7113 	if (ret <= 0)
7114 		goto out_err;
7115 
7116 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7117 		ret = -EFAULT;
7118 		goto out_err;
7119 	}
7120 
7121 	trace_event_read_lock();
7122 	trace_access_lock(iter->cpu_file);
7123 
7124 	/* Fill as many pages as possible. */
7125 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7126 		spd.pages[i] = alloc_page(GFP_KERNEL);
7127 		if (!spd.pages[i])
7128 			break;
7129 
7130 		rem = tracing_fill_pipe_page(rem, iter);
7131 
7132 		/* Copy the data into the page, so we can start over. */
7133 		ret = trace_seq_to_buffer(&iter->seq,
7134 					  page_address(spd.pages[i]),
7135 					  trace_seq_used(&iter->seq));
7136 		if (ret < 0) {
7137 			__free_page(spd.pages[i]);
7138 			break;
7139 		}
7140 		spd.partial[i].offset = 0;
7141 		spd.partial[i].len = trace_seq_used(&iter->seq);
7142 
7143 		trace_seq_init(&iter->seq);
7144 	}
7145 
7146 	trace_access_unlock(iter->cpu_file);
7147 	trace_event_read_unlock();
7148 	mutex_unlock(&iter->mutex);
7149 
7150 	spd.nr_pages = i;
7151 
7152 	if (i)
7153 		ret = splice_to_pipe(pipe, &spd);
7154 	else
7155 		ret = 0;
7156 out:
7157 	splice_shrink_spd(&spd);
7158 	return ret;
7159 
7160 out_err:
7161 	mutex_unlock(&iter->mutex);
7162 	goto out;
7163 }
7164 
7165 static ssize_t
7166 tracing_entries_read(struct file *filp, char __user *ubuf,
7167 		     size_t cnt, loff_t *ppos)
7168 {
7169 	struct inode *inode = file_inode(filp);
7170 	struct trace_array *tr = inode->i_private;
7171 	int cpu = tracing_get_cpu(inode);
7172 	char buf[64];
7173 	int r = 0;
7174 	ssize_t ret;
7175 
7176 	mutex_lock(&trace_types_lock);
7177 
7178 	if (cpu == RING_BUFFER_ALL_CPUS) {
7179 		int cpu, buf_size_same;
7180 		unsigned long size;
7181 
7182 		size = 0;
7183 		buf_size_same = 1;
7184 		/* check if all cpu sizes are same */
7185 		for_each_tracing_cpu(cpu) {
7186 			/* fill in the size from first enabled cpu */
7187 			if (size == 0)
7188 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7189 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7190 				buf_size_same = 0;
7191 				break;
7192 			}
7193 		}
7194 
7195 		if (buf_size_same) {
7196 			if (!ring_buffer_expanded)
7197 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7198 					    size >> 10,
7199 					    trace_buf_size >> 10);
7200 			else
7201 				r = sprintf(buf, "%lu\n", size >> 10);
7202 		} else
7203 			r = sprintf(buf, "X\n");
7204 	} else
7205 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7206 
7207 	mutex_unlock(&trace_types_lock);
7208 
7209 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7210 	return ret;
7211 }
7212 
7213 static ssize_t
7214 tracing_entries_write(struct file *filp, const char __user *ubuf,
7215 		      size_t cnt, loff_t *ppos)
7216 {
7217 	struct inode *inode = file_inode(filp);
7218 	struct trace_array *tr = inode->i_private;
7219 	unsigned long val;
7220 	int ret;
7221 
7222 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7223 	if (ret)
7224 		return ret;
7225 
7226 	/* must have at least 1 entry */
7227 	if (!val)
7228 		return -EINVAL;
7229 
7230 	/* value is in KB */
7231 	val <<= 10;
7232 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7233 	if (ret < 0)
7234 		return ret;
7235 
7236 	*ppos += cnt;
7237 
7238 	return cnt;
7239 }
7240 
7241 static ssize_t
7242 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7243 				size_t cnt, loff_t *ppos)
7244 {
7245 	struct trace_array *tr = filp->private_data;
7246 	char buf[64];
7247 	int r, cpu;
7248 	unsigned long size = 0, expanded_size = 0;
7249 
7250 	mutex_lock(&trace_types_lock);
7251 	for_each_tracing_cpu(cpu) {
7252 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7253 		if (!ring_buffer_expanded)
7254 			expanded_size += trace_buf_size >> 10;
7255 	}
7256 	if (ring_buffer_expanded)
7257 		r = sprintf(buf, "%lu\n", size);
7258 	else
7259 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7260 	mutex_unlock(&trace_types_lock);
7261 
7262 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7263 }
7264 
7265 static ssize_t
7266 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7267 			  size_t cnt, loff_t *ppos)
7268 {
7269 	/*
7270 	 * There is no need to read what the user has written, this function
7271 	 * is just to make sure that there is no error when "echo" is used
7272 	 */
7273 
7274 	*ppos += cnt;
7275 
7276 	return cnt;
7277 }
7278 
7279 static int
7280 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7281 {
7282 	struct trace_array *tr = inode->i_private;
7283 
7284 	/* disable tracing ? */
7285 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7286 		tracer_tracing_off(tr);
7287 	/* resize the ring buffer to 0 */
7288 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7289 
7290 	trace_array_put(tr);
7291 
7292 	return 0;
7293 }
7294 
7295 static ssize_t
7296 tracing_mark_write(struct file *filp, const char __user *ubuf,
7297 					size_t cnt, loff_t *fpos)
7298 {
7299 	struct trace_array *tr = filp->private_data;
7300 	struct ring_buffer_event *event;
7301 	enum event_trigger_type tt = ETT_NONE;
7302 	struct trace_buffer *buffer;
7303 	struct print_entry *entry;
7304 	ssize_t written;
7305 	int size;
7306 	int len;
7307 
7308 /* Used in tracing_mark_raw_write() as well */
7309 #define FAULTED_STR "<faulted>"
7310 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7311 
7312 	if (tracing_disabled)
7313 		return -EINVAL;
7314 
7315 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7316 		return -EINVAL;
7317 
7318 	if (cnt > TRACE_BUF_SIZE)
7319 		cnt = TRACE_BUF_SIZE;
7320 
7321 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7322 
7323 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7324 
7325 	/* If less than "<faulted>", then make sure we can still add that */
7326 	if (cnt < FAULTED_SIZE)
7327 		size += FAULTED_SIZE - cnt;
7328 
7329 	buffer = tr->array_buffer.buffer;
7330 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7331 					    tracing_gen_ctx());
7332 	if (unlikely(!event))
7333 		/* Ring buffer disabled, return as if not open for write */
7334 		return -EBADF;
7335 
7336 	entry = ring_buffer_event_data(event);
7337 	entry->ip = _THIS_IP_;
7338 
7339 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7340 	if (len) {
7341 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7342 		cnt = FAULTED_SIZE;
7343 		written = -EFAULT;
7344 	} else
7345 		written = cnt;
7346 
7347 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7348 		/* do not add \n before testing triggers, but add \0 */
7349 		entry->buf[cnt] = '\0';
7350 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7351 	}
7352 
7353 	if (entry->buf[cnt - 1] != '\n') {
7354 		entry->buf[cnt] = '\n';
7355 		entry->buf[cnt + 1] = '\0';
7356 	} else
7357 		entry->buf[cnt] = '\0';
7358 
7359 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7360 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7361 	__buffer_unlock_commit(buffer, event);
7362 
7363 	if (tt)
7364 		event_triggers_post_call(tr->trace_marker_file, tt);
7365 
7366 	return written;
7367 }
7368 
7369 /* Limit it for now to 3K (including tag) */
7370 #define RAW_DATA_MAX_SIZE (1024*3)
7371 
7372 static ssize_t
7373 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7374 					size_t cnt, loff_t *fpos)
7375 {
7376 	struct trace_array *tr = filp->private_data;
7377 	struct ring_buffer_event *event;
7378 	struct trace_buffer *buffer;
7379 	struct raw_data_entry *entry;
7380 	ssize_t written;
7381 	int size;
7382 	int len;
7383 
7384 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7385 
7386 	if (tracing_disabled)
7387 		return -EINVAL;
7388 
7389 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7390 		return -EINVAL;
7391 
7392 	/* The marker must at least have a tag id */
7393 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7394 		return -EINVAL;
7395 
7396 	if (cnt > TRACE_BUF_SIZE)
7397 		cnt = TRACE_BUF_SIZE;
7398 
7399 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7400 
7401 	size = sizeof(*entry) + cnt;
7402 	if (cnt < FAULT_SIZE_ID)
7403 		size += FAULT_SIZE_ID - cnt;
7404 
7405 	buffer = tr->array_buffer.buffer;
7406 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7407 					    tracing_gen_ctx());
7408 	if (!event)
7409 		/* Ring buffer disabled, return as if not open for write */
7410 		return -EBADF;
7411 
7412 	entry = ring_buffer_event_data(event);
7413 
7414 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7415 	if (len) {
7416 		entry->id = -1;
7417 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7418 		written = -EFAULT;
7419 	} else
7420 		written = cnt;
7421 
7422 	__buffer_unlock_commit(buffer, event);
7423 
7424 	return written;
7425 }
7426 
7427 static int tracing_clock_show(struct seq_file *m, void *v)
7428 {
7429 	struct trace_array *tr = m->private;
7430 	int i;
7431 
7432 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7433 		seq_printf(m,
7434 			"%s%s%s%s", i ? " " : "",
7435 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7436 			i == tr->clock_id ? "]" : "");
7437 	seq_putc(m, '\n');
7438 
7439 	return 0;
7440 }
7441 
7442 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7443 {
7444 	int i;
7445 
7446 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7447 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7448 			break;
7449 	}
7450 	if (i == ARRAY_SIZE(trace_clocks))
7451 		return -EINVAL;
7452 
7453 	mutex_lock(&trace_types_lock);
7454 
7455 	tr->clock_id = i;
7456 
7457 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7458 
7459 	/*
7460 	 * New clock may not be consistent with the previous clock.
7461 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7462 	 */
7463 	tracing_reset_online_cpus(&tr->array_buffer);
7464 
7465 #ifdef CONFIG_TRACER_MAX_TRACE
7466 	if (tr->max_buffer.buffer)
7467 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7468 	tracing_reset_online_cpus(&tr->max_buffer);
7469 #endif
7470 
7471 	mutex_unlock(&trace_types_lock);
7472 
7473 	return 0;
7474 }
7475 
7476 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7477 				   size_t cnt, loff_t *fpos)
7478 {
7479 	struct seq_file *m = filp->private_data;
7480 	struct trace_array *tr = m->private;
7481 	char buf[64];
7482 	const char *clockstr;
7483 	int ret;
7484 
7485 	if (cnt >= sizeof(buf))
7486 		return -EINVAL;
7487 
7488 	if (copy_from_user(buf, ubuf, cnt))
7489 		return -EFAULT;
7490 
7491 	buf[cnt] = 0;
7492 
7493 	clockstr = strstrip(buf);
7494 
7495 	ret = tracing_set_clock(tr, clockstr);
7496 	if (ret)
7497 		return ret;
7498 
7499 	*fpos += cnt;
7500 
7501 	return cnt;
7502 }
7503 
7504 static int tracing_clock_open(struct inode *inode, struct file *file)
7505 {
7506 	struct trace_array *tr = inode->i_private;
7507 	int ret;
7508 
7509 	ret = tracing_check_open_get_tr(tr);
7510 	if (ret)
7511 		return ret;
7512 
7513 	ret = single_open(file, tracing_clock_show, inode->i_private);
7514 	if (ret < 0)
7515 		trace_array_put(tr);
7516 
7517 	return ret;
7518 }
7519 
7520 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7521 {
7522 	struct trace_array *tr = m->private;
7523 
7524 	mutex_lock(&trace_types_lock);
7525 
7526 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7527 		seq_puts(m, "delta [absolute]\n");
7528 	else
7529 		seq_puts(m, "[delta] absolute\n");
7530 
7531 	mutex_unlock(&trace_types_lock);
7532 
7533 	return 0;
7534 }
7535 
7536 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7537 {
7538 	struct trace_array *tr = inode->i_private;
7539 	int ret;
7540 
7541 	ret = tracing_check_open_get_tr(tr);
7542 	if (ret)
7543 		return ret;
7544 
7545 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7546 	if (ret < 0)
7547 		trace_array_put(tr);
7548 
7549 	return ret;
7550 }
7551 
7552 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7553 {
7554 	if (rbe == this_cpu_read(trace_buffered_event))
7555 		return ring_buffer_time_stamp(buffer);
7556 
7557 	return ring_buffer_event_time_stamp(buffer, rbe);
7558 }
7559 
7560 /*
7561  * Set or disable using the per CPU trace_buffer_event when possible.
7562  */
7563 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7564 {
7565 	int ret = 0;
7566 
7567 	mutex_lock(&trace_types_lock);
7568 
7569 	if (set && tr->no_filter_buffering_ref++)
7570 		goto out;
7571 
7572 	if (!set) {
7573 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7574 			ret = -EINVAL;
7575 			goto out;
7576 		}
7577 
7578 		--tr->no_filter_buffering_ref;
7579 	}
7580  out:
7581 	mutex_unlock(&trace_types_lock);
7582 
7583 	return ret;
7584 }
7585 
7586 struct ftrace_buffer_info {
7587 	struct trace_iterator	iter;
7588 	void			*spare;
7589 	unsigned int		spare_cpu;
7590 	unsigned int		read;
7591 };
7592 
7593 #ifdef CONFIG_TRACER_SNAPSHOT
7594 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7595 {
7596 	struct trace_array *tr = inode->i_private;
7597 	struct trace_iterator *iter;
7598 	struct seq_file *m;
7599 	int ret;
7600 
7601 	ret = tracing_check_open_get_tr(tr);
7602 	if (ret)
7603 		return ret;
7604 
7605 	if (file->f_mode & FMODE_READ) {
7606 		iter = __tracing_open(inode, file, true);
7607 		if (IS_ERR(iter))
7608 			ret = PTR_ERR(iter);
7609 	} else {
7610 		/* Writes still need the seq_file to hold the private data */
7611 		ret = -ENOMEM;
7612 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7613 		if (!m)
7614 			goto out;
7615 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7616 		if (!iter) {
7617 			kfree(m);
7618 			goto out;
7619 		}
7620 		ret = 0;
7621 
7622 		iter->tr = tr;
7623 		iter->array_buffer = &tr->max_buffer;
7624 		iter->cpu_file = tracing_get_cpu(inode);
7625 		m->private = iter;
7626 		file->private_data = m;
7627 	}
7628 out:
7629 	if (ret < 0)
7630 		trace_array_put(tr);
7631 
7632 	return ret;
7633 }
7634 
7635 static void tracing_swap_cpu_buffer(void *tr)
7636 {
7637 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7638 }
7639 
7640 static ssize_t
7641 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7642 		       loff_t *ppos)
7643 {
7644 	struct seq_file *m = filp->private_data;
7645 	struct trace_iterator *iter = m->private;
7646 	struct trace_array *tr = iter->tr;
7647 	unsigned long val;
7648 	int ret;
7649 
7650 	ret = tracing_update_buffers();
7651 	if (ret < 0)
7652 		return ret;
7653 
7654 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7655 	if (ret)
7656 		return ret;
7657 
7658 	mutex_lock(&trace_types_lock);
7659 
7660 	if (tr->current_trace->use_max_tr) {
7661 		ret = -EBUSY;
7662 		goto out;
7663 	}
7664 
7665 	local_irq_disable();
7666 	arch_spin_lock(&tr->max_lock);
7667 	if (tr->cond_snapshot)
7668 		ret = -EBUSY;
7669 	arch_spin_unlock(&tr->max_lock);
7670 	local_irq_enable();
7671 	if (ret)
7672 		goto out;
7673 
7674 	switch (val) {
7675 	case 0:
7676 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7677 			ret = -EINVAL;
7678 			break;
7679 		}
7680 		if (tr->allocated_snapshot)
7681 			free_snapshot(tr);
7682 		break;
7683 	case 1:
7684 /* Only allow per-cpu swap if the ring buffer supports it */
7685 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7686 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7687 			ret = -EINVAL;
7688 			break;
7689 		}
7690 #endif
7691 		if (tr->allocated_snapshot)
7692 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7693 					&tr->array_buffer, iter->cpu_file);
7694 		else
7695 			ret = tracing_alloc_snapshot_instance(tr);
7696 		if (ret < 0)
7697 			break;
7698 		/* Now, we're going to swap */
7699 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7700 			local_irq_disable();
7701 			update_max_tr(tr, current, smp_processor_id(), NULL);
7702 			local_irq_enable();
7703 		} else {
7704 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7705 						 (void *)tr, 1);
7706 		}
7707 		break;
7708 	default:
7709 		if (tr->allocated_snapshot) {
7710 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7711 				tracing_reset_online_cpus(&tr->max_buffer);
7712 			else
7713 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7714 		}
7715 		break;
7716 	}
7717 
7718 	if (ret >= 0) {
7719 		*ppos += cnt;
7720 		ret = cnt;
7721 	}
7722 out:
7723 	mutex_unlock(&trace_types_lock);
7724 	return ret;
7725 }
7726 
7727 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7728 {
7729 	struct seq_file *m = file->private_data;
7730 	int ret;
7731 
7732 	ret = tracing_release(inode, file);
7733 
7734 	if (file->f_mode & FMODE_READ)
7735 		return ret;
7736 
7737 	/* If write only, the seq_file is just a stub */
7738 	if (m)
7739 		kfree(m->private);
7740 	kfree(m);
7741 
7742 	return 0;
7743 }
7744 
7745 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7746 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7747 				    size_t count, loff_t *ppos);
7748 static int tracing_buffers_release(struct inode *inode, struct file *file);
7749 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7750 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7751 
7752 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7753 {
7754 	struct ftrace_buffer_info *info;
7755 	int ret;
7756 
7757 	/* The following checks for tracefs lockdown */
7758 	ret = tracing_buffers_open(inode, filp);
7759 	if (ret < 0)
7760 		return ret;
7761 
7762 	info = filp->private_data;
7763 
7764 	if (info->iter.trace->use_max_tr) {
7765 		tracing_buffers_release(inode, filp);
7766 		return -EBUSY;
7767 	}
7768 
7769 	info->iter.snapshot = true;
7770 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7771 
7772 	return ret;
7773 }
7774 
7775 #endif /* CONFIG_TRACER_SNAPSHOT */
7776 
7777 
7778 static const struct file_operations tracing_thresh_fops = {
7779 	.open		= tracing_open_generic,
7780 	.read		= tracing_thresh_read,
7781 	.write		= tracing_thresh_write,
7782 	.llseek		= generic_file_llseek,
7783 };
7784 
7785 #ifdef CONFIG_TRACER_MAX_TRACE
7786 static const struct file_operations tracing_max_lat_fops = {
7787 	.open		= tracing_open_generic_tr,
7788 	.read		= tracing_max_lat_read,
7789 	.write		= tracing_max_lat_write,
7790 	.llseek		= generic_file_llseek,
7791 	.release	= tracing_release_generic_tr,
7792 };
7793 #endif
7794 
7795 static const struct file_operations set_tracer_fops = {
7796 	.open		= tracing_open_generic_tr,
7797 	.read		= tracing_set_trace_read,
7798 	.write		= tracing_set_trace_write,
7799 	.llseek		= generic_file_llseek,
7800 	.release	= tracing_release_generic_tr,
7801 };
7802 
7803 static const struct file_operations tracing_pipe_fops = {
7804 	.open		= tracing_open_pipe,
7805 	.poll		= tracing_poll_pipe,
7806 	.read		= tracing_read_pipe,
7807 	.splice_read	= tracing_splice_read_pipe,
7808 	.release	= tracing_release_pipe,
7809 	.llseek		= no_llseek,
7810 };
7811 
7812 static const struct file_operations tracing_entries_fops = {
7813 	.open		= tracing_open_generic_tr,
7814 	.read		= tracing_entries_read,
7815 	.write		= tracing_entries_write,
7816 	.llseek		= generic_file_llseek,
7817 	.release	= tracing_release_generic_tr,
7818 };
7819 
7820 static const struct file_operations tracing_total_entries_fops = {
7821 	.open		= tracing_open_generic_tr,
7822 	.read		= tracing_total_entries_read,
7823 	.llseek		= generic_file_llseek,
7824 	.release	= tracing_release_generic_tr,
7825 };
7826 
7827 static const struct file_operations tracing_free_buffer_fops = {
7828 	.open		= tracing_open_generic_tr,
7829 	.write		= tracing_free_buffer_write,
7830 	.release	= tracing_free_buffer_release,
7831 };
7832 
7833 static const struct file_operations tracing_mark_fops = {
7834 	.open		= tracing_mark_open,
7835 	.write		= tracing_mark_write,
7836 	.release	= tracing_release_generic_tr,
7837 };
7838 
7839 static const struct file_operations tracing_mark_raw_fops = {
7840 	.open		= tracing_mark_open,
7841 	.write		= tracing_mark_raw_write,
7842 	.release	= tracing_release_generic_tr,
7843 };
7844 
7845 static const struct file_operations trace_clock_fops = {
7846 	.open		= tracing_clock_open,
7847 	.read		= seq_read,
7848 	.llseek		= seq_lseek,
7849 	.release	= tracing_single_release_tr,
7850 	.write		= tracing_clock_write,
7851 };
7852 
7853 static const struct file_operations trace_time_stamp_mode_fops = {
7854 	.open		= tracing_time_stamp_mode_open,
7855 	.read		= seq_read,
7856 	.llseek		= seq_lseek,
7857 	.release	= tracing_single_release_tr,
7858 };
7859 
7860 #ifdef CONFIG_TRACER_SNAPSHOT
7861 static const struct file_operations snapshot_fops = {
7862 	.open		= tracing_snapshot_open,
7863 	.read		= seq_read,
7864 	.write		= tracing_snapshot_write,
7865 	.llseek		= tracing_lseek,
7866 	.release	= tracing_snapshot_release,
7867 };
7868 
7869 static const struct file_operations snapshot_raw_fops = {
7870 	.open		= snapshot_raw_open,
7871 	.read		= tracing_buffers_read,
7872 	.release	= tracing_buffers_release,
7873 	.splice_read	= tracing_buffers_splice_read,
7874 	.llseek		= no_llseek,
7875 };
7876 
7877 #endif /* CONFIG_TRACER_SNAPSHOT */
7878 
7879 /*
7880  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7881  * @filp: The active open file structure
7882  * @ubuf: The userspace provided buffer to read value into
7883  * @cnt: The maximum number of bytes to read
7884  * @ppos: The current "file" position
7885  *
7886  * This function implements the write interface for a struct trace_min_max_param.
7887  * The filp->private_data must point to a trace_min_max_param structure that
7888  * defines where to write the value, the min and the max acceptable values,
7889  * and a lock to protect the write.
7890  */
7891 static ssize_t
7892 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7893 {
7894 	struct trace_min_max_param *param = filp->private_data;
7895 	u64 val;
7896 	int err;
7897 
7898 	if (!param)
7899 		return -EFAULT;
7900 
7901 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7902 	if (err)
7903 		return err;
7904 
7905 	if (param->lock)
7906 		mutex_lock(param->lock);
7907 
7908 	if (param->min && val < *param->min)
7909 		err = -EINVAL;
7910 
7911 	if (param->max && val > *param->max)
7912 		err = -EINVAL;
7913 
7914 	if (!err)
7915 		*param->val = val;
7916 
7917 	if (param->lock)
7918 		mutex_unlock(param->lock);
7919 
7920 	if (err)
7921 		return err;
7922 
7923 	return cnt;
7924 }
7925 
7926 /*
7927  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7928  * @filp: The active open file structure
7929  * @ubuf: The userspace provided buffer to read value into
7930  * @cnt: The maximum number of bytes to read
7931  * @ppos: The current "file" position
7932  *
7933  * This function implements the read interface for a struct trace_min_max_param.
7934  * The filp->private_data must point to a trace_min_max_param struct with valid
7935  * data.
7936  */
7937 static ssize_t
7938 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7939 {
7940 	struct trace_min_max_param *param = filp->private_data;
7941 	char buf[U64_STR_SIZE];
7942 	int len;
7943 	u64 val;
7944 
7945 	if (!param)
7946 		return -EFAULT;
7947 
7948 	val = *param->val;
7949 
7950 	if (cnt > sizeof(buf))
7951 		cnt = sizeof(buf);
7952 
7953 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7954 
7955 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7956 }
7957 
7958 const struct file_operations trace_min_max_fops = {
7959 	.open		= tracing_open_generic,
7960 	.read		= trace_min_max_read,
7961 	.write		= trace_min_max_write,
7962 };
7963 
7964 #define TRACING_LOG_ERRS_MAX	8
7965 #define TRACING_LOG_LOC_MAX	128
7966 
7967 #define CMD_PREFIX "  Command: "
7968 
7969 struct err_info {
7970 	const char	**errs;	/* ptr to loc-specific array of err strings */
7971 	u8		type;	/* index into errs -> specific err string */
7972 	u16		pos;	/* caret position */
7973 	u64		ts;
7974 };
7975 
7976 struct tracing_log_err {
7977 	struct list_head	list;
7978 	struct err_info		info;
7979 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7980 	char			*cmd;                     /* what caused err */
7981 };
7982 
7983 static DEFINE_MUTEX(tracing_err_log_lock);
7984 
7985 static struct tracing_log_err *alloc_tracing_log_err(int len)
7986 {
7987 	struct tracing_log_err *err;
7988 
7989 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7990 	if (!err)
7991 		return ERR_PTR(-ENOMEM);
7992 
7993 	err->cmd = kzalloc(len, GFP_KERNEL);
7994 	if (!err->cmd) {
7995 		kfree(err);
7996 		return ERR_PTR(-ENOMEM);
7997 	}
7998 
7999 	return err;
8000 }
8001 
8002 static void free_tracing_log_err(struct tracing_log_err *err)
8003 {
8004 	kfree(err->cmd);
8005 	kfree(err);
8006 }
8007 
8008 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8009 						   int len)
8010 {
8011 	struct tracing_log_err *err;
8012 	char *cmd;
8013 
8014 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8015 		err = alloc_tracing_log_err(len);
8016 		if (PTR_ERR(err) != -ENOMEM)
8017 			tr->n_err_log_entries++;
8018 
8019 		return err;
8020 	}
8021 	cmd = kzalloc(len, GFP_KERNEL);
8022 	if (!cmd)
8023 		return ERR_PTR(-ENOMEM);
8024 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8025 	kfree(err->cmd);
8026 	err->cmd = cmd;
8027 	list_del(&err->list);
8028 
8029 	return err;
8030 }
8031 
8032 /**
8033  * err_pos - find the position of a string within a command for error careting
8034  * @cmd: The tracing command that caused the error
8035  * @str: The string to position the caret at within @cmd
8036  *
8037  * Finds the position of the first occurrence of @str within @cmd.  The
8038  * return value can be passed to tracing_log_err() for caret placement
8039  * within @cmd.
8040  *
8041  * Returns the index within @cmd of the first occurrence of @str or 0
8042  * if @str was not found.
8043  */
8044 unsigned int err_pos(char *cmd, const char *str)
8045 {
8046 	char *found;
8047 
8048 	if (WARN_ON(!strlen(cmd)))
8049 		return 0;
8050 
8051 	found = strstr(cmd, str);
8052 	if (found)
8053 		return found - cmd;
8054 
8055 	return 0;
8056 }
8057 
8058 /**
8059  * tracing_log_err - write an error to the tracing error log
8060  * @tr: The associated trace array for the error (NULL for top level array)
8061  * @loc: A string describing where the error occurred
8062  * @cmd: The tracing command that caused the error
8063  * @errs: The array of loc-specific static error strings
8064  * @type: The index into errs[], which produces the specific static err string
8065  * @pos: The position the caret should be placed in the cmd
8066  *
8067  * Writes an error into tracing/error_log of the form:
8068  *
8069  * <loc>: error: <text>
8070  *   Command: <cmd>
8071  *              ^
8072  *
8073  * tracing/error_log is a small log file containing the last
8074  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8075  * unless there has been a tracing error, and the error log can be
8076  * cleared and have its memory freed by writing the empty string in
8077  * truncation mode to it i.e. echo > tracing/error_log.
8078  *
8079  * NOTE: the @errs array along with the @type param are used to
8080  * produce a static error string - this string is not copied and saved
8081  * when the error is logged - only a pointer to it is saved.  See
8082  * existing callers for examples of how static strings are typically
8083  * defined for use with tracing_log_err().
8084  */
8085 void tracing_log_err(struct trace_array *tr,
8086 		     const char *loc, const char *cmd,
8087 		     const char **errs, u8 type, u16 pos)
8088 {
8089 	struct tracing_log_err *err;
8090 	int len = 0;
8091 
8092 	if (!tr)
8093 		tr = &global_trace;
8094 
8095 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8096 
8097 	mutex_lock(&tracing_err_log_lock);
8098 	err = get_tracing_log_err(tr, len);
8099 	if (PTR_ERR(err) == -ENOMEM) {
8100 		mutex_unlock(&tracing_err_log_lock);
8101 		return;
8102 	}
8103 
8104 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8105 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8106 
8107 	err->info.errs = errs;
8108 	err->info.type = type;
8109 	err->info.pos = pos;
8110 	err->info.ts = local_clock();
8111 
8112 	list_add_tail(&err->list, &tr->err_log);
8113 	mutex_unlock(&tracing_err_log_lock);
8114 }
8115 
8116 static void clear_tracing_err_log(struct trace_array *tr)
8117 {
8118 	struct tracing_log_err *err, *next;
8119 
8120 	mutex_lock(&tracing_err_log_lock);
8121 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8122 		list_del(&err->list);
8123 		free_tracing_log_err(err);
8124 	}
8125 
8126 	tr->n_err_log_entries = 0;
8127 	mutex_unlock(&tracing_err_log_lock);
8128 }
8129 
8130 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8131 {
8132 	struct trace_array *tr = m->private;
8133 
8134 	mutex_lock(&tracing_err_log_lock);
8135 
8136 	return seq_list_start(&tr->err_log, *pos);
8137 }
8138 
8139 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8140 {
8141 	struct trace_array *tr = m->private;
8142 
8143 	return seq_list_next(v, &tr->err_log, pos);
8144 }
8145 
8146 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8147 {
8148 	mutex_unlock(&tracing_err_log_lock);
8149 }
8150 
8151 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8152 {
8153 	u16 i;
8154 
8155 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8156 		seq_putc(m, ' ');
8157 	for (i = 0; i < pos; i++)
8158 		seq_putc(m, ' ');
8159 	seq_puts(m, "^\n");
8160 }
8161 
8162 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8163 {
8164 	struct tracing_log_err *err = v;
8165 
8166 	if (err) {
8167 		const char *err_text = err->info.errs[err->info.type];
8168 		u64 sec = err->info.ts;
8169 		u32 nsec;
8170 
8171 		nsec = do_div(sec, NSEC_PER_SEC);
8172 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8173 			   err->loc, err_text);
8174 		seq_printf(m, "%s", err->cmd);
8175 		tracing_err_log_show_pos(m, err->info.pos);
8176 	}
8177 
8178 	return 0;
8179 }
8180 
8181 static const struct seq_operations tracing_err_log_seq_ops = {
8182 	.start  = tracing_err_log_seq_start,
8183 	.next   = tracing_err_log_seq_next,
8184 	.stop   = tracing_err_log_seq_stop,
8185 	.show   = tracing_err_log_seq_show
8186 };
8187 
8188 static int tracing_err_log_open(struct inode *inode, struct file *file)
8189 {
8190 	struct trace_array *tr = inode->i_private;
8191 	int ret = 0;
8192 
8193 	ret = tracing_check_open_get_tr(tr);
8194 	if (ret)
8195 		return ret;
8196 
8197 	/* If this file was opened for write, then erase contents */
8198 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8199 		clear_tracing_err_log(tr);
8200 
8201 	if (file->f_mode & FMODE_READ) {
8202 		ret = seq_open(file, &tracing_err_log_seq_ops);
8203 		if (!ret) {
8204 			struct seq_file *m = file->private_data;
8205 			m->private = tr;
8206 		} else {
8207 			trace_array_put(tr);
8208 		}
8209 	}
8210 	return ret;
8211 }
8212 
8213 static ssize_t tracing_err_log_write(struct file *file,
8214 				     const char __user *buffer,
8215 				     size_t count, loff_t *ppos)
8216 {
8217 	return count;
8218 }
8219 
8220 static int tracing_err_log_release(struct inode *inode, struct file *file)
8221 {
8222 	struct trace_array *tr = inode->i_private;
8223 
8224 	trace_array_put(tr);
8225 
8226 	if (file->f_mode & FMODE_READ)
8227 		seq_release(inode, file);
8228 
8229 	return 0;
8230 }
8231 
8232 static const struct file_operations tracing_err_log_fops = {
8233 	.open           = tracing_err_log_open,
8234 	.write		= tracing_err_log_write,
8235 	.read           = seq_read,
8236 	.llseek         = tracing_lseek,
8237 	.release        = tracing_err_log_release,
8238 };
8239 
8240 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8241 {
8242 	struct trace_array *tr = inode->i_private;
8243 	struct ftrace_buffer_info *info;
8244 	int ret;
8245 
8246 	ret = tracing_check_open_get_tr(tr);
8247 	if (ret)
8248 		return ret;
8249 
8250 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8251 	if (!info) {
8252 		trace_array_put(tr);
8253 		return -ENOMEM;
8254 	}
8255 
8256 	mutex_lock(&trace_types_lock);
8257 
8258 	info->iter.tr		= tr;
8259 	info->iter.cpu_file	= tracing_get_cpu(inode);
8260 	info->iter.trace	= tr->current_trace;
8261 	info->iter.array_buffer = &tr->array_buffer;
8262 	info->spare		= NULL;
8263 	/* Force reading ring buffer for first read */
8264 	info->read		= (unsigned int)-1;
8265 
8266 	filp->private_data = info;
8267 
8268 	tr->trace_ref++;
8269 
8270 	mutex_unlock(&trace_types_lock);
8271 
8272 	ret = nonseekable_open(inode, filp);
8273 	if (ret < 0)
8274 		trace_array_put(tr);
8275 
8276 	return ret;
8277 }
8278 
8279 static __poll_t
8280 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8281 {
8282 	struct ftrace_buffer_info *info = filp->private_data;
8283 	struct trace_iterator *iter = &info->iter;
8284 
8285 	return trace_poll(iter, filp, poll_table);
8286 }
8287 
8288 static ssize_t
8289 tracing_buffers_read(struct file *filp, char __user *ubuf,
8290 		     size_t count, loff_t *ppos)
8291 {
8292 	struct ftrace_buffer_info *info = filp->private_data;
8293 	struct trace_iterator *iter = &info->iter;
8294 	ssize_t ret = 0;
8295 	ssize_t size;
8296 
8297 	if (!count)
8298 		return 0;
8299 
8300 #ifdef CONFIG_TRACER_MAX_TRACE
8301 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8302 		return -EBUSY;
8303 #endif
8304 
8305 	if (!info->spare) {
8306 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8307 							  iter->cpu_file);
8308 		if (IS_ERR(info->spare)) {
8309 			ret = PTR_ERR(info->spare);
8310 			info->spare = NULL;
8311 		} else {
8312 			info->spare_cpu = iter->cpu_file;
8313 		}
8314 	}
8315 	if (!info->spare)
8316 		return ret;
8317 
8318 	/* Do we have previous read data to read? */
8319 	if (info->read < PAGE_SIZE)
8320 		goto read;
8321 
8322  again:
8323 	trace_access_lock(iter->cpu_file);
8324 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8325 				    &info->spare,
8326 				    count,
8327 				    iter->cpu_file, 0);
8328 	trace_access_unlock(iter->cpu_file);
8329 
8330 	if (ret < 0) {
8331 		if (trace_empty(iter)) {
8332 			if ((filp->f_flags & O_NONBLOCK))
8333 				return -EAGAIN;
8334 
8335 			ret = wait_on_pipe(iter, 0);
8336 			if (ret)
8337 				return ret;
8338 
8339 			goto again;
8340 		}
8341 		return 0;
8342 	}
8343 
8344 	info->read = 0;
8345  read:
8346 	size = PAGE_SIZE - info->read;
8347 	if (size > count)
8348 		size = count;
8349 
8350 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8351 	if (ret == size)
8352 		return -EFAULT;
8353 
8354 	size -= ret;
8355 
8356 	*ppos += size;
8357 	info->read += size;
8358 
8359 	return size;
8360 }
8361 
8362 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8363 {
8364 	struct ftrace_buffer_info *info = file->private_data;
8365 	struct trace_iterator *iter = &info->iter;
8366 
8367 	iter->wait_index++;
8368 	/* Make sure the waiters see the new wait_index */
8369 	smp_wmb();
8370 
8371 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8372 
8373 	return 0;
8374 }
8375 
8376 static int tracing_buffers_release(struct inode *inode, struct file *file)
8377 {
8378 	struct ftrace_buffer_info *info = file->private_data;
8379 	struct trace_iterator *iter = &info->iter;
8380 
8381 	mutex_lock(&trace_types_lock);
8382 
8383 	iter->tr->trace_ref--;
8384 
8385 	__trace_array_put(iter->tr);
8386 
8387 	if (info->spare)
8388 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8389 					   info->spare_cpu, info->spare);
8390 	kvfree(info);
8391 
8392 	mutex_unlock(&trace_types_lock);
8393 
8394 	return 0;
8395 }
8396 
8397 struct buffer_ref {
8398 	struct trace_buffer	*buffer;
8399 	void			*page;
8400 	int			cpu;
8401 	refcount_t		refcount;
8402 };
8403 
8404 static void buffer_ref_release(struct buffer_ref *ref)
8405 {
8406 	if (!refcount_dec_and_test(&ref->refcount))
8407 		return;
8408 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8409 	kfree(ref);
8410 }
8411 
8412 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8413 				    struct pipe_buffer *buf)
8414 {
8415 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8416 
8417 	buffer_ref_release(ref);
8418 	buf->private = 0;
8419 }
8420 
8421 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8422 				struct pipe_buffer *buf)
8423 {
8424 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8425 
8426 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8427 		return false;
8428 
8429 	refcount_inc(&ref->refcount);
8430 	return true;
8431 }
8432 
8433 /* Pipe buffer operations for a buffer. */
8434 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8435 	.release		= buffer_pipe_buf_release,
8436 	.get			= buffer_pipe_buf_get,
8437 };
8438 
8439 /*
8440  * Callback from splice_to_pipe(), if we need to release some pages
8441  * at the end of the spd in case we error'ed out in filling the pipe.
8442  */
8443 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8444 {
8445 	struct buffer_ref *ref =
8446 		(struct buffer_ref *)spd->partial[i].private;
8447 
8448 	buffer_ref_release(ref);
8449 	spd->partial[i].private = 0;
8450 }
8451 
8452 static ssize_t
8453 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8454 			    struct pipe_inode_info *pipe, size_t len,
8455 			    unsigned int flags)
8456 {
8457 	struct ftrace_buffer_info *info = file->private_data;
8458 	struct trace_iterator *iter = &info->iter;
8459 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8460 	struct page *pages_def[PIPE_DEF_BUFFERS];
8461 	struct splice_pipe_desc spd = {
8462 		.pages		= pages_def,
8463 		.partial	= partial_def,
8464 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8465 		.ops		= &buffer_pipe_buf_ops,
8466 		.spd_release	= buffer_spd_release,
8467 	};
8468 	struct buffer_ref *ref;
8469 	int entries, i;
8470 	ssize_t ret = 0;
8471 
8472 #ifdef CONFIG_TRACER_MAX_TRACE
8473 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8474 		return -EBUSY;
8475 #endif
8476 
8477 	if (*ppos & (PAGE_SIZE - 1))
8478 		return -EINVAL;
8479 
8480 	if (len & (PAGE_SIZE - 1)) {
8481 		if (len < PAGE_SIZE)
8482 			return -EINVAL;
8483 		len &= PAGE_MASK;
8484 	}
8485 
8486 	if (splice_grow_spd(pipe, &spd))
8487 		return -ENOMEM;
8488 
8489  again:
8490 	trace_access_lock(iter->cpu_file);
8491 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8492 
8493 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8494 		struct page *page;
8495 		int r;
8496 
8497 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8498 		if (!ref) {
8499 			ret = -ENOMEM;
8500 			break;
8501 		}
8502 
8503 		refcount_set(&ref->refcount, 1);
8504 		ref->buffer = iter->array_buffer->buffer;
8505 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8506 		if (IS_ERR(ref->page)) {
8507 			ret = PTR_ERR(ref->page);
8508 			ref->page = NULL;
8509 			kfree(ref);
8510 			break;
8511 		}
8512 		ref->cpu = iter->cpu_file;
8513 
8514 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8515 					  len, iter->cpu_file, 1);
8516 		if (r < 0) {
8517 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8518 						   ref->page);
8519 			kfree(ref);
8520 			break;
8521 		}
8522 
8523 		page = virt_to_page(ref->page);
8524 
8525 		spd.pages[i] = page;
8526 		spd.partial[i].len = PAGE_SIZE;
8527 		spd.partial[i].offset = 0;
8528 		spd.partial[i].private = (unsigned long)ref;
8529 		spd.nr_pages++;
8530 		*ppos += PAGE_SIZE;
8531 
8532 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8533 	}
8534 
8535 	trace_access_unlock(iter->cpu_file);
8536 	spd.nr_pages = i;
8537 
8538 	/* did we read anything? */
8539 	if (!spd.nr_pages) {
8540 		long wait_index;
8541 
8542 		if (ret)
8543 			goto out;
8544 
8545 		ret = -EAGAIN;
8546 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8547 			goto out;
8548 
8549 		wait_index = READ_ONCE(iter->wait_index);
8550 
8551 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8552 		if (ret)
8553 			goto out;
8554 
8555 		/* No need to wait after waking up when tracing is off */
8556 		if (!tracer_tracing_is_on(iter->tr))
8557 			goto out;
8558 
8559 		/* Make sure we see the new wait_index */
8560 		smp_rmb();
8561 		if (wait_index != iter->wait_index)
8562 			goto out;
8563 
8564 		goto again;
8565 	}
8566 
8567 	ret = splice_to_pipe(pipe, &spd);
8568 out:
8569 	splice_shrink_spd(&spd);
8570 
8571 	return ret;
8572 }
8573 
8574 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8575 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8576 {
8577 	struct ftrace_buffer_info *info = file->private_data;
8578 	struct trace_iterator *iter = &info->iter;
8579 
8580 	if (cmd)
8581 		return -ENOIOCTLCMD;
8582 
8583 	mutex_lock(&trace_types_lock);
8584 
8585 	iter->wait_index++;
8586 	/* Make sure the waiters see the new wait_index */
8587 	smp_wmb();
8588 
8589 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8590 
8591 	mutex_unlock(&trace_types_lock);
8592 	return 0;
8593 }
8594 
8595 static const struct file_operations tracing_buffers_fops = {
8596 	.open		= tracing_buffers_open,
8597 	.read		= tracing_buffers_read,
8598 	.poll		= tracing_buffers_poll,
8599 	.release	= tracing_buffers_release,
8600 	.flush		= tracing_buffers_flush,
8601 	.splice_read	= tracing_buffers_splice_read,
8602 	.unlocked_ioctl = tracing_buffers_ioctl,
8603 	.llseek		= no_llseek,
8604 };
8605 
8606 static ssize_t
8607 tracing_stats_read(struct file *filp, char __user *ubuf,
8608 		   size_t count, loff_t *ppos)
8609 {
8610 	struct inode *inode = file_inode(filp);
8611 	struct trace_array *tr = inode->i_private;
8612 	struct array_buffer *trace_buf = &tr->array_buffer;
8613 	int cpu = tracing_get_cpu(inode);
8614 	struct trace_seq *s;
8615 	unsigned long cnt;
8616 	unsigned long long t;
8617 	unsigned long usec_rem;
8618 
8619 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8620 	if (!s)
8621 		return -ENOMEM;
8622 
8623 	trace_seq_init(s);
8624 
8625 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8626 	trace_seq_printf(s, "entries: %ld\n", cnt);
8627 
8628 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8629 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8630 
8631 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8632 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8633 
8634 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8635 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8636 
8637 	if (trace_clocks[tr->clock_id].in_ns) {
8638 		/* local or global for trace_clock */
8639 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8640 		usec_rem = do_div(t, USEC_PER_SEC);
8641 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8642 								t, usec_rem);
8643 
8644 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8645 		usec_rem = do_div(t, USEC_PER_SEC);
8646 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8647 	} else {
8648 		/* counter or tsc mode for trace_clock */
8649 		trace_seq_printf(s, "oldest event ts: %llu\n",
8650 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8651 
8652 		trace_seq_printf(s, "now ts: %llu\n",
8653 				ring_buffer_time_stamp(trace_buf->buffer));
8654 	}
8655 
8656 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8657 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8658 
8659 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8660 	trace_seq_printf(s, "read events: %ld\n", cnt);
8661 
8662 	count = simple_read_from_buffer(ubuf, count, ppos,
8663 					s->buffer, trace_seq_used(s));
8664 
8665 	kfree(s);
8666 
8667 	return count;
8668 }
8669 
8670 static const struct file_operations tracing_stats_fops = {
8671 	.open		= tracing_open_generic_tr,
8672 	.read		= tracing_stats_read,
8673 	.llseek		= generic_file_llseek,
8674 	.release	= tracing_release_generic_tr,
8675 };
8676 
8677 #ifdef CONFIG_DYNAMIC_FTRACE
8678 
8679 static ssize_t
8680 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8681 		  size_t cnt, loff_t *ppos)
8682 {
8683 	ssize_t ret;
8684 	char *buf;
8685 	int r;
8686 
8687 	/* 256 should be plenty to hold the amount needed */
8688 	buf = kmalloc(256, GFP_KERNEL);
8689 	if (!buf)
8690 		return -ENOMEM;
8691 
8692 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8693 		      ftrace_update_tot_cnt,
8694 		      ftrace_number_of_pages,
8695 		      ftrace_number_of_groups);
8696 
8697 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8698 	kfree(buf);
8699 	return ret;
8700 }
8701 
8702 static const struct file_operations tracing_dyn_info_fops = {
8703 	.open		= tracing_open_generic,
8704 	.read		= tracing_read_dyn_info,
8705 	.llseek		= generic_file_llseek,
8706 };
8707 #endif /* CONFIG_DYNAMIC_FTRACE */
8708 
8709 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8710 static void
8711 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8712 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8713 		void *data)
8714 {
8715 	tracing_snapshot_instance(tr);
8716 }
8717 
8718 static void
8719 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8720 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8721 		      void *data)
8722 {
8723 	struct ftrace_func_mapper *mapper = data;
8724 	long *count = NULL;
8725 
8726 	if (mapper)
8727 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8728 
8729 	if (count) {
8730 
8731 		if (*count <= 0)
8732 			return;
8733 
8734 		(*count)--;
8735 	}
8736 
8737 	tracing_snapshot_instance(tr);
8738 }
8739 
8740 static int
8741 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8742 		      struct ftrace_probe_ops *ops, void *data)
8743 {
8744 	struct ftrace_func_mapper *mapper = data;
8745 	long *count = NULL;
8746 
8747 	seq_printf(m, "%ps:", (void *)ip);
8748 
8749 	seq_puts(m, "snapshot");
8750 
8751 	if (mapper)
8752 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8753 
8754 	if (count)
8755 		seq_printf(m, ":count=%ld\n", *count);
8756 	else
8757 		seq_puts(m, ":unlimited\n");
8758 
8759 	return 0;
8760 }
8761 
8762 static int
8763 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8764 		     unsigned long ip, void *init_data, void **data)
8765 {
8766 	struct ftrace_func_mapper *mapper = *data;
8767 
8768 	if (!mapper) {
8769 		mapper = allocate_ftrace_func_mapper();
8770 		if (!mapper)
8771 			return -ENOMEM;
8772 		*data = mapper;
8773 	}
8774 
8775 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8776 }
8777 
8778 static void
8779 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8780 		     unsigned long ip, void *data)
8781 {
8782 	struct ftrace_func_mapper *mapper = data;
8783 
8784 	if (!ip) {
8785 		if (!mapper)
8786 			return;
8787 		free_ftrace_func_mapper(mapper, NULL);
8788 		return;
8789 	}
8790 
8791 	ftrace_func_mapper_remove_ip(mapper, ip);
8792 }
8793 
8794 static struct ftrace_probe_ops snapshot_probe_ops = {
8795 	.func			= ftrace_snapshot,
8796 	.print			= ftrace_snapshot_print,
8797 };
8798 
8799 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8800 	.func			= ftrace_count_snapshot,
8801 	.print			= ftrace_snapshot_print,
8802 	.init			= ftrace_snapshot_init,
8803 	.free			= ftrace_snapshot_free,
8804 };
8805 
8806 static int
8807 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8808 			       char *glob, char *cmd, char *param, int enable)
8809 {
8810 	struct ftrace_probe_ops *ops;
8811 	void *count = (void *)-1;
8812 	char *number;
8813 	int ret;
8814 
8815 	if (!tr)
8816 		return -ENODEV;
8817 
8818 	/* hash funcs only work with set_ftrace_filter */
8819 	if (!enable)
8820 		return -EINVAL;
8821 
8822 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8823 
8824 	if (glob[0] == '!')
8825 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8826 
8827 	if (!param)
8828 		goto out_reg;
8829 
8830 	number = strsep(&param, ":");
8831 
8832 	if (!strlen(number))
8833 		goto out_reg;
8834 
8835 	/*
8836 	 * We use the callback data field (which is a pointer)
8837 	 * as our counter.
8838 	 */
8839 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8840 	if (ret)
8841 		return ret;
8842 
8843  out_reg:
8844 	ret = tracing_alloc_snapshot_instance(tr);
8845 	if (ret < 0)
8846 		goto out;
8847 
8848 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8849 
8850  out:
8851 	return ret < 0 ? ret : 0;
8852 }
8853 
8854 static struct ftrace_func_command ftrace_snapshot_cmd = {
8855 	.name			= "snapshot",
8856 	.func			= ftrace_trace_snapshot_callback,
8857 };
8858 
8859 static __init int register_snapshot_cmd(void)
8860 {
8861 	return register_ftrace_command(&ftrace_snapshot_cmd);
8862 }
8863 #else
8864 static inline __init int register_snapshot_cmd(void) { return 0; }
8865 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8866 
8867 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8868 {
8869 	if (WARN_ON(!tr->dir))
8870 		return ERR_PTR(-ENODEV);
8871 
8872 	/* Top directory uses NULL as the parent */
8873 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8874 		return NULL;
8875 
8876 	/* All sub buffers have a descriptor */
8877 	return tr->dir;
8878 }
8879 
8880 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8881 {
8882 	struct dentry *d_tracer;
8883 
8884 	if (tr->percpu_dir)
8885 		return tr->percpu_dir;
8886 
8887 	d_tracer = tracing_get_dentry(tr);
8888 	if (IS_ERR(d_tracer))
8889 		return NULL;
8890 
8891 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8892 
8893 	MEM_FAIL(!tr->percpu_dir,
8894 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8895 
8896 	return tr->percpu_dir;
8897 }
8898 
8899 static struct dentry *
8900 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8901 		      void *data, long cpu, const struct file_operations *fops)
8902 {
8903 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8904 
8905 	if (ret) /* See tracing_get_cpu() */
8906 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8907 	return ret;
8908 }
8909 
8910 static void
8911 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8912 {
8913 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8914 	struct dentry *d_cpu;
8915 	char cpu_dir[30]; /* 30 characters should be more than enough */
8916 
8917 	if (!d_percpu)
8918 		return;
8919 
8920 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8921 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8922 	if (!d_cpu) {
8923 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8924 		return;
8925 	}
8926 
8927 	/* per cpu trace_pipe */
8928 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8929 				tr, cpu, &tracing_pipe_fops);
8930 
8931 	/* per cpu trace */
8932 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8933 				tr, cpu, &tracing_fops);
8934 
8935 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8936 				tr, cpu, &tracing_buffers_fops);
8937 
8938 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8939 				tr, cpu, &tracing_stats_fops);
8940 
8941 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8942 				tr, cpu, &tracing_entries_fops);
8943 
8944 #ifdef CONFIG_TRACER_SNAPSHOT
8945 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8946 				tr, cpu, &snapshot_fops);
8947 
8948 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8949 				tr, cpu, &snapshot_raw_fops);
8950 #endif
8951 }
8952 
8953 #ifdef CONFIG_FTRACE_SELFTEST
8954 /* Let selftest have access to static functions in this file */
8955 #include "trace_selftest.c"
8956 #endif
8957 
8958 static ssize_t
8959 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8960 			loff_t *ppos)
8961 {
8962 	struct trace_option_dentry *topt = filp->private_data;
8963 	char *buf;
8964 
8965 	if (topt->flags->val & topt->opt->bit)
8966 		buf = "1\n";
8967 	else
8968 		buf = "0\n";
8969 
8970 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8971 }
8972 
8973 static ssize_t
8974 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8975 			 loff_t *ppos)
8976 {
8977 	struct trace_option_dentry *topt = filp->private_data;
8978 	unsigned long val;
8979 	int ret;
8980 
8981 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8982 	if (ret)
8983 		return ret;
8984 
8985 	if (val != 0 && val != 1)
8986 		return -EINVAL;
8987 
8988 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8989 		mutex_lock(&trace_types_lock);
8990 		ret = __set_tracer_option(topt->tr, topt->flags,
8991 					  topt->opt, !val);
8992 		mutex_unlock(&trace_types_lock);
8993 		if (ret)
8994 			return ret;
8995 	}
8996 
8997 	*ppos += cnt;
8998 
8999 	return cnt;
9000 }
9001 
9002 static int tracing_open_options(struct inode *inode, struct file *filp)
9003 {
9004 	struct trace_option_dentry *topt = inode->i_private;
9005 	int ret;
9006 
9007 	ret = tracing_check_open_get_tr(topt->tr);
9008 	if (ret)
9009 		return ret;
9010 
9011 	filp->private_data = inode->i_private;
9012 	return 0;
9013 }
9014 
9015 static int tracing_release_options(struct inode *inode, struct file *file)
9016 {
9017 	struct trace_option_dentry *topt = file->private_data;
9018 
9019 	trace_array_put(topt->tr);
9020 	return 0;
9021 }
9022 
9023 static const struct file_operations trace_options_fops = {
9024 	.open = tracing_open_options,
9025 	.read = trace_options_read,
9026 	.write = trace_options_write,
9027 	.llseek	= generic_file_llseek,
9028 	.release = tracing_release_options,
9029 };
9030 
9031 /*
9032  * In order to pass in both the trace_array descriptor as well as the index
9033  * to the flag that the trace option file represents, the trace_array
9034  * has a character array of trace_flags_index[], which holds the index
9035  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9036  * The address of this character array is passed to the flag option file
9037  * read/write callbacks.
9038  *
9039  * In order to extract both the index and the trace_array descriptor,
9040  * get_tr_index() uses the following algorithm.
9041  *
9042  *   idx = *ptr;
9043  *
9044  * As the pointer itself contains the address of the index (remember
9045  * index[1] == 1).
9046  *
9047  * Then to get the trace_array descriptor, by subtracting that index
9048  * from the ptr, we get to the start of the index itself.
9049  *
9050  *   ptr - idx == &index[0]
9051  *
9052  * Then a simple container_of() from that pointer gets us to the
9053  * trace_array descriptor.
9054  */
9055 static void get_tr_index(void *data, struct trace_array **ptr,
9056 			 unsigned int *pindex)
9057 {
9058 	*pindex = *(unsigned char *)data;
9059 
9060 	*ptr = container_of(data - *pindex, struct trace_array,
9061 			    trace_flags_index);
9062 }
9063 
9064 static ssize_t
9065 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9066 			loff_t *ppos)
9067 {
9068 	void *tr_index = filp->private_data;
9069 	struct trace_array *tr;
9070 	unsigned int index;
9071 	char *buf;
9072 
9073 	get_tr_index(tr_index, &tr, &index);
9074 
9075 	if (tr->trace_flags & (1 << index))
9076 		buf = "1\n";
9077 	else
9078 		buf = "0\n";
9079 
9080 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9081 }
9082 
9083 static ssize_t
9084 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9085 			 loff_t *ppos)
9086 {
9087 	void *tr_index = filp->private_data;
9088 	struct trace_array *tr;
9089 	unsigned int index;
9090 	unsigned long val;
9091 	int ret;
9092 
9093 	get_tr_index(tr_index, &tr, &index);
9094 
9095 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9096 	if (ret)
9097 		return ret;
9098 
9099 	if (val != 0 && val != 1)
9100 		return -EINVAL;
9101 
9102 	mutex_lock(&event_mutex);
9103 	mutex_lock(&trace_types_lock);
9104 	ret = set_tracer_flag(tr, 1 << index, val);
9105 	mutex_unlock(&trace_types_lock);
9106 	mutex_unlock(&event_mutex);
9107 
9108 	if (ret < 0)
9109 		return ret;
9110 
9111 	*ppos += cnt;
9112 
9113 	return cnt;
9114 }
9115 
9116 static const struct file_operations trace_options_core_fops = {
9117 	.open = tracing_open_generic,
9118 	.read = trace_options_core_read,
9119 	.write = trace_options_core_write,
9120 	.llseek = generic_file_llseek,
9121 };
9122 
9123 struct dentry *trace_create_file(const char *name,
9124 				 umode_t mode,
9125 				 struct dentry *parent,
9126 				 void *data,
9127 				 const struct file_operations *fops)
9128 {
9129 	struct dentry *ret;
9130 
9131 	ret = tracefs_create_file(name, mode, parent, data, fops);
9132 	if (!ret)
9133 		pr_warn("Could not create tracefs '%s' entry\n", name);
9134 
9135 	return ret;
9136 }
9137 
9138 
9139 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9140 {
9141 	struct dentry *d_tracer;
9142 
9143 	if (tr->options)
9144 		return tr->options;
9145 
9146 	d_tracer = tracing_get_dentry(tr);
9147 	if (IS_ERR(d_tracer))
9148 		return NULL;
9149 
9150 	tr->options = tracefs_create_dir("options", d_tracer);
9151 	if (!tr->options) {
9152 		pr_warn("Could not create tracefs directory 'options'\n");
9153 		return NULL;
9154 	}
9155 
9156 	return tr->options;
9157 }
9158 
9159 static void
9160 create_trace_option_file(struct trace_array *tr,
9161 			 struct trace_option_dentry *topt,
9162 			 struct tracer_flags *flags,
9163 			 struct tracer_opt *opt)
9164 {
9165 	struct dentry *t_options;
9166 
9167 	t_options = trace_options_init_dentry(tr);
9168 	if (!t_options)
9169 		return;
9170 
9171 	topt->flags = flags;
9172 	topt->opt = opt;
9173 	topt->tr = tr;
9174 
9175 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9176 					t_options, topt, &trace_options_fops);
9177 
9178 }
9179 
9180 static void
9181 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9182 {
9183 	struct trace_option_dentry *topts;
9184 	struct trace_options *tr_topts;
9185 	struct tracer_flags *flags;
9186 	struct tracer_opt *opts;
9187 	int cnt;
9188 	int i;
9189 
9190 	if (!tracer)
9191 		return;
9192 
9193 	flags = tracer->flags;
9194 
9195 	if (!flags || !flags->opts)
9196 		return;
9197 
9198 	/*
9199 	 * If this is an instance, only create flags for tracers
9200 	 * the instance may have.
9201 	 */
9202 	if (!trace_ok_for_array(tracer, tr))
9203 		return;
9204 
9205 	for (i = 0; i < tr->nr_topts; i++) {
9206 		/* Make sure there's no duplicate flags. */
9207 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9208 			return;
9209 	}
9210 
9211 	opts = flags->opts;
9212 
9213 	for (cnt = 0; opts[cnt].name; cnt++)
9214 		;
9215 
9216 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9217 	if (!topts)
9218 		return;
9219 
9220 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9221 			    GFP_KERNEL);
9222 	if (!tr_topts) {
9223 		kfree(topts);
9224 		return;
9225 	}
9226 
9227 	tr->topts = tr_topts;
9228 	tr->topts[tr->nr_topts].tracer = tracer;
9229 	tr->topts[tr->nr_topts].topts = topts;
9230 	tr->nr_topts++;
9231 
9232 	for (cnt = 0; opts[cnt].name; cnt++) {
9233 		create_trace_option_file(tr, &topts[cnt], flags,
9234 					 &opts[cnt]);
9235 		MEM_FAIL(topts[cnt].entry == NULL,
9236 			  "Failed to create trace option: %s",
9237 			  opts[cnt].name);
9238 	}
9239 }
9240 
9241 static struct dentry *
9242 create_trace_option_core_file(struct trace_array *tr,
9243 			      const char *option, long index)
9244 {
9245 	struct dentry *t_options;
9246 
9247 	t_options = trace_options_init_dentry(tr);
9248 	if (!t_options)
9249 		return NULL;
9250 
9251 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9252 				 (void *)&tr->trace_flags_index[index],
9253 				 &trace_options_core_fops);
9254 }
9255 
9256 static void create_trace_options_dir(struct trace_array *tr)
9257 {
9258 	struct dentry *t_options;
9259 	bool top_level = tr == &global_trace;
9260 	int i;
9261 
9262 	t_options = trace_options_init_dentry(tr);
9263 	if (!t_options)
9264 		return;
9265 
9266 	for (i = 0; trace_options[i]; i++) {
9267 		if (top_level ||
9268 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9269 			create_trace_option_core_file(tr, trace_options[i], i);
9270 	}
9271 }
9272 
9273 static ssize_t
9274 rb_simple_read(struct file *filp, char __user *ubuf,
9275 	       size_t cnt, loff_t *ppos)
9276 {
9277 	struct trace_array *tr = filp->private_data;
9278 	char buf[64];
9279 	int r;
9280 
9281 	r = tracer_tracing_is_on(tr);
9282 	r = sprintf(buf, "%d\n", r);
9283 
9284 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9285 }
9286 
9287 static ssize_t
9288 rb_simple_write(struct file *filp, const char __user *ubuf,
9289 		size_t cnt, loff_t *ppos)
9290 {
9291 	struct trace_array *tr = filp->private_data;
9292 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9293 	unsigned long val;
9294 	int ret;
9295 
9296 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9297 	if (ret)
9298 		return ret;
9299 
9300 	if (buffer) {
9301 		mutex_lock(&trace_types_lock);
9302 		if (!!val == tracer_tracing_is_on(tr)) {
9303 			val = 0; /* do nothing */
9304 		} else if (val) {
9305 			tracer_tracing_on(tr);
9306 			if (tr->current_trace->start)
9307 				tr->current_trace->start(tr);
9308 		} else {
9309 			tracer_tracing_off(tr);
9310 			if (tr->current_trace->stop)
9311 				tr->current_trace->stop(tr);
9312 			/* Wake up any waiters */
9313 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9314 		}
9315 		mutex_unlock(&trace_types_lock);
9316 	}
9317 
9318 	(*ppos)++;
9319 
9320 	return cnt;
9321 }
9322 
9323 static const struct file_operations rb_simple_fops = {
9324 	.open		= tracing_open_generic_tr,
9325 	.read		= rb_simple_read,
9326 	.write		= rb_simple_write,
9327 	.release	= tracing_release_generic_tr,
9328 	.llseek		= default_llseek,
9329 };
9330 
9331 static ssize_t
9332 buffer_percent_read(struct file *filp, char __user *ubuf,
9333 		    size_t cnt, loff_t *ppos)
9334 {
9335 	struct trace_array *tr = filp->private_data;
9336 	char buf[64];
9337 	int r;
9338 
9339 	r = tr->buffer_percent;
9340 	r = sprintf(buf, "%d\n", r);
9341 
9342 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9343 }
9344 
9345 static ssize_t
9346 buffer_percent_write(struct file *filp, const char __user *ubuf,
9347 		     size_t cnt, loff_t *ppos)
9348 {
9349 	struct trace_array *tr = filp->private_data;
9350 	unsigned long val;
9351 	int ret;
9352 
9353 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9354 	if (ret)
9355 		return ret;
9356 
9357 	if (val > 100)
9358 		return -EINVAL;
9359 
9360 	tr->buffer_percent = val;
9361 
9362 	(*ppos)++;
9363 
9364 	return cnt;
9365 }
9366 
9367 static const struct file_operations buffer_percent_fops = {
9368 	.open		= tracing_open_generic_tr,
9369 	.read		= buffer_percent_read,
9370 	.write		= buffer_percent_write,
9371 	.release	= tracing_release_generic_tr,
9372 	.llseek		= default_llseek,
9373 };
9374 
9375 static struct dentry *trace_instance_dir;
9376 
9377 static void
9378 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9379 
9380 static int
9381 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9382 {
9383 	enum ring_buffer_flags rb_flags;
9384 
9385 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9386 
9387 	buf->tr = tr;
9388 
9389 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9390 	if (!buf->buffer)
9391 		return -ENOMEM;
9392 
9393 	buf->data = alloc_percpu(struct trace_array_cpu);
9394 	if (!buf->data) {
9395 		ring_buffer_free(buf->buffer);
9396 		buf->buffer = NULL;
9397 		return -ENOMEM;
9398 	}
9399 
9400 	/* Allocate the first page for all buffers */
9401 	set_buffer_entries(&tr->array_buffer,
9402 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9403 
9404 	return 0;
9405 }
9406 
9407 static void free_trace_buffer(struct array_buffer *buf)
9408 {
9409 	if (buf->buffer) {
9410 		ring_buffer_free(buf->buffer);
9411 		buf->buffer = NULL;
9412 		free_percpu(buf->data);
9413 		buf->data = NULL;
9414 	}
9415 }
9416 
9417 static int allocate_trace_buffers(struct trace_array *tr, int size)
9418 {
9419 	int ret;
9420 
9421 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9422 	if (ret)
9423 		return ret;
9424 
9425 #ifdef CONFIG_TRACER_MAX_TRACE
9426 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9427 				    allocate_snapshot ? size : 1);
9428 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9429 		free_trace_buffer(&tr->array_buffer);
9430 		return -ENOMEM;
9431 	}
9432 	tr->allocated_snapshot = allocate_snapshot;
9433 
9434 	allocate_snapshot = false;
9435 #endif
9436 
9437 	return 0;
9438 }
9439 
9440 static void free_trace_buffers(struct trace_array *tr)
9441 {
9442 	if (!tr)
9443 		return;
9444 
9445 	free_trace_buffer(&tr->array_buffer);
9446 
9447 #ifdef CONFIG_TRACER_MAX_TRACE
9448 	free_trace_buffer(&tr->max_buffer);
9449 #endif
9450 }
9451 
9452 static void init_trace_flags_index(struct trace_array *tr)
9453 {
9454 	int i;
9455 
9456 	/* Used by the trace options files */
9457 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9458 		tr->trace_flags_index[i] = i;
9459 }
9460 
9461 static void __update_tracer_options(struct trace_array *tr)
9462 {
9463 	struct tracer *t;
9464 
9465 	for (t = trace_types; t; t = t->next)
9466 		add_tracer_options(tr, t);
9467 }
9468 
9469 static void update_tracer_options(struct trace_array *tr)
9470 {
9471 	mutex_lock(&trace_types_lock);
9472 	tracer_options_updated = true;
9473 	__update_tracer_options(tr);
9474 	mutex_unlock(&trace_types_lock);
9475 }
9476 
9477 /* Must have trace_types_lock held */
9478 struct trace_array *trace_array_find(const char *instance)
9479 {
9480 	struct trace_array *tr, *found = NULL;
9481 
9482 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9483 		if (tr->name && strcmp(tr->name, instance) == 0) {
9484 			found = tr;
9485 			break;
9486 		}
9487 	}
9488 
9489 	return found;
9490 }
9491 
9492 struct trace_array *trace_array_find_get(const char *instance)
9493 {
9494 	struct trace_array *tr;
9495 
9496 	mutex_lock(&trace_types_lock);
9497 	tr = trace_array_find(instance);
9498 	if (tr)
9499 		tr->ref++;
9500 	mutex_unlock(&trace_types_lock);
9501 
9502 	return tr;
9503 }
9504 
9505 static int trace_array_create_dir(struct trace_array *tr)
9506 {
9507 	int ret;
9508 
9509 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9510 	if (!tr->dir)
9511 		return -EINVAL;
9512 
9513 	ret = event_trace_add_tracer(tr->dir, tr);
9514 	if (ret) {
9515 		tracefs_remove(tr->dir);
9516 		return ret;
9517 	}
9518 
9519 	init_tracer_tracefs(tr, tr->dir);
9520 	__update_tracer_options(tr);
9521 
9522 	return ret;
9523 }
9524 
9525 static struct trace_array *trace_array_create(const char *name)
9526 {
9527 	struct trace_array *tr;
9528 	int ret;
9529 
9530 	ret = -ENOMEM;
9531 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9532 	if (!tr)
9533 		return ERR_PTR(ret);
9534 
9535 	tr->name = kstrdup(name, GFP_KERNEL);
9536 	if (!tr->name)
9537 		goto out_free_tr;
9538 
9539 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9540 		goto out_free_tr;
9541 
9542 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9543 		goto out_free_tr;
9544 
9545 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9546 
9547 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9548 
9549 	raw_spin_lock_init(&tr->start_lock);
9550 
9551 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9552 
9553 	tr->current_trace = &nop_trace;
9554 
9555 	INIT_LIST_HEAD(&tr->systems);
9556 	INIT_LIST_HEAD(&tr->events);
9557 	INIT_LIST_HEAD(&tr->hist_vars);
9558 	INIT_LIST_HEAD(&tr->err_log);
9559 
9560 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9561 		goto out_free_tr;
9562 
9563 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9564 		goto out_free_tr;
9565 
9566 	ftrace_init_trace_array(tr);
9567 
9568 	init_trace_flags_index(tr);
9569 
9570 	if (trace_instance_dir) {
9571 		ret = trace_array_create_dir(tr);
9572 		if (ret)
9573 			goto out_free_tr;
9574 	} else
9575 		__trace_early_add_events(tr);
9576 
9577 	list_add(&tr->list, &ftrace_trace_arrays);
9578 
9579 	tr->ref++;
9580 
9581 	return tr;
9582 
9583  out_free_tr:
9584 	ftrace_free_ftrace_ops(tr);
9585 	free_trace_buffers(tr);
9586 	free_cpumask_var(tr->pipe_cpumask);
9587 	free_cpumask_var(tr->tracing_cpumask);
9588 	kfree(tr->name);
9589 	kfree(tr);
9590 
9591 	return ERR_PTR(ret);
9592 }
9593 
9594 static int instance_mkdir(const char *name)
9595 {
9596 	struct trace_array *tr;
9597 	int ret;
9598 
9599 	mutex_lock(&event_mutex);
9600 	mutex_lock(&trace_types_lock);
9601 
9602 	ret = -EEXIST;
9603 	if (trace_array_find(name))
9604 		goto out_unlock;
9605 
9606 	tr = trace_array_create(name);
9607 
9608 	ret = PTR_ERR_OR_ZERO(tr);
9609 
9610 out_unlock:
9611 	mutex_unlock(&trace_types_lock);
9612 	mutex_unlock(&event_mutex);
9613 	return ret;
9614 }
9615 
9616 /**
9617  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9618  * @name: The name of the trace array to be looked up/created.
9619  *
9620  * Returns pointer to trace array with given name.
9621  * NULL, if it cannot be created.
9622  *
9623  * NOTE: This function increments the reference counter associated with the
9624  * trace array returned. This makes sure it cannot be freed while in use.
9625  * Use trace_array_put() once the trace array is no longer needed.
9626  * If the trace_array is to be freed, trace_array_destroy() needs to
9627  * be called after the trace_array_put(), or simply let user space delete
9628  * it from the tracefs instances directory. But until the
9629  * trace_array_put() is called, user space can not delete it.
9630  *
9631  */
9632 struct trace_array *trace_array_get_by_name(const char *name)
9633 {
9634 	struct trace_array *tr;
9635 
9636 	mutex_lock(&event_mutex);
9637 	mutex_lock(&trace_types_lock);
9638 
9639 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9640 		if (tr->name && strcmp(tr->name, name) == 0)
9641 			goto out_unlock;
9642 	}
9643 
9644 	tr = trace_array_create(name);
9645 
9646 	if (IS_ERR(tr))
9647 		tr = NULL;
9648 out_unlock:
9649 	if (tr)
9650 		tr->ref++;
9651 
9652 	mutex_unlock(&trace_types_lock);
9653 	mutex_unlock(&event_mutex);
9654 	return tr;
9655 }
9656 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9657 
9658 static int __remove_instance(struct trace_array *tr)
9659 {
9660 	int i;
9661 
9662 	/* Reference counter for a newly created trace array = 1. */
9663 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9664 		return -EBUSY;
9665 
9666 	list_del(&tr->list);
9667 
9668 	/* Disable all the flags that were enabled coming in */
9669 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9670 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9671 			set_tracer_flag(tr, 1 << i, 0);
9672 	}
9673 
9674 	tracing_set_nop(tr);
9675 	clear_ftrace_function_probes(tr);
9676 	event_trace_del_tracer(tr);
9677 	ftrace_clear_pids(tr);
9678 	ftrace_destroy_function_files(tr);
9679 	tracefs_remove(tr->dir);
9680 	free_percpu(tr->last_func_repeats);
9681 	free_trace_buffers(tr);
9682 	clear_tracing_err_log(tr);
9683 
9684 	for (i = 0; i < tr->nr_topts; i++) {
9685 		kfree(tr->topts[i].topts);
9686 	}
9687 	kfree(tr->topts);
9688 
9689 	free_cpumask_var(tr->pipe_cpumask);
9690 	free_cpumask_var(tr->tracing_cpumask);
9691 	kfree(tr->name);
9692 	kfree(tr);
9693 
9694 	return 0;
9695 }
9696 
9697 int trace_array_destroy(struct trace_array *this_tr)
9698 {
9699 	struct trace_array *tr;
9700 	int ret;
9701 
9702 	if (!this_tr)
9703 		return -EINVAL;
9704 
9705 	mutex_lock(&event_mutex);
9706 	mutex_lock(&trace_types_lock);
9707 
9708 	ret = -ENODEV;
9709 
9710 	/* Making sure trace array exists before destroying it. */
9711 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9712 		if (tr == this_tr) {
9713 			ret = __remove_instance(tr);
9714 			break;
9715 		}
9716 	}
9717 
9718 	mutex_unlock(&trace_types_lock);
9719 	mutex_unlock(&event_mutex);
9720 
9721 	return ret;
9722 }
9723 EXPORT_SYMBOL_GPL(trace_array_destroy);
9724 
9725 static int instance_rmdir(const char *name)
9726 {
9727 	struct trace_array *tr;
9728 	int ret;
9729 
9730 	mutex_lock(&event_mutex);
9731 	mutex_lock(&trace_types_lock);
9732 
9733 	ret = -ENODEV;
9734 	tr = trace_array_find(name);
9735 	if (tr)
9736 		ret = __remove_instance(tr);
9737 
9738 	mutex_unlock(&trace_types_lock);
9739 	mutex_unlock(&event_mutex);
9740 
9741 	return ret;
9742 }
9743 
9744 static __init void create_trace_instances(struct dentry *d_tracer)
9745 {
9746 	struct trace_array *tr;
9747 
9748 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9749 							 instance_mkdir,
9750 							 instance_rmdir);
9751 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9752 		return;
9753 
9754 	mutex_lock(&event_mutex);
9755 	mutex_lock(&trace_types_lock);
9756 
9757 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9758 		if (!tr->name)
9759 			continue;
9760 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9761 			     "Failed to create instance directory\n"))
9762 			break;
9763 	}
9764 
9765 	mutex_unlock(&trace_types_lock);
9766 	mutex_unlock(&event_mutex);
9767 }
9768 
9769 static void
9770 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9771 {
9772 	int cpu;
9773 
9774 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9775 			tr, &show_traces_fops);
9776 
9777 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9778 			tr, &set_tracer_fops);
9779 
9780 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9781 			  tr, &tracing_cpumask_fops);
9782 
9783 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9784 			  tr, &tracing_iter_fops);
9785 
9786 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9787 			  tr, &tracing_fops);
9788 
9789 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9790 			  tr, &tracing_pipe_fops);
9791 
9792 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9793 			  tr, &tracing_entries_fops);
9794 
9795 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9796 			  tr, &tracing_total_entries_fops);
9797 
9798 	trace_create_file("free_buffer", 0200, d_tracer,
9799 			  tr, &tracing_free_buffer_fops);
9800 
9801 	trace_create_file("trace_marker", 0220, d_tracer,
9802 			  tr, &tracing_mark_fops);
9803 
9804 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9805 
9806 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9807 			  tr, &tracing_mark_raw_fops);
9808 
9809 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9810 			  &trace_clock_fops);
9811 
9812 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9813 			  tr, &rb_simple_fops);
9814 
9815 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9816 			  &trace_time_stamp_mode_fops);
9817 
9818 	tr->buffer_percent = 50;
9819 
9820 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9821 			tr, &buffer_percent_fops);
9822 
9823 	create_trace_options_dir(tr);
9824 
9825 #ifdef CONFIG_TRACER_MAX_TRACE
9826 	trace_create_maxlat_file(tr, d_tracer);
9827 #endif
9828 
9829 	if (ftrace_create_function_files(tr, d_tracer))
9830 		MEM_FAIL(1, "Could not allocate function filter files");
9831 
9832 #ifdef CONFIG_TRACER_SNAPSHOT
9833 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9834 			  tr, &snapshot_fops);
9835 #endif
9836 
9837 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9838 			  tr, &tracing_err_log_fops);
9839 
9840 	for_each_tracing_cpu(cpu)
9841 		tracing_init_tracefs_percpu(tr, cpu);
9842 
9843 	ftrace_init_tracefs(tr, d_tracer);
9844 }
9845 
9846 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9847 {
9848 	struct vfsmount *mnt;
9849 	struct file_system_type *type;
9850 
9851 	/*
9852 	 * To maintain backward compatibility for tools that mount
9853 	 * debugfs to get to the tracing facility, tracefs is automatically
9854 	 * mounted to the debugfs/tracing directory.
9855 	 */
9856 	type = get_fs_type("tracefs");
9857 	if (!type)
9858 		return NULL;
9859 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9860 	put_filesystem(type);
9861 	if (IS_ERR(mnt))
9862 		return NULL;
9863 	mntget(mnt);
9864 
9865 	return mnt;
9866 }
9867 
9868 /**
9869  * tracing_init_dentry - initialize top level trace array
9870  *
9871  * This is called when creating files or directories in the tracing
9872  * directory. It is called via fs_initcall() by any of the boot up code
9873  * and expects to return the dentry of the top level tracing directory.
9874  */
9875 int tracing_init_dentry(void)
9876 {
9877 	struct trace_array *tr = &global_trace;
9878 
9879 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9880 		pr_warn("Tracing disabled due to lockdown\n");
9881 		return -EPERM;
9882 	}
9883 
9884 	/* The top level trace array uses  NULL as parent */
9885 	if (tr->dir)
9886 		return 0;
9887 
9888 	if (WARN_ON(!tracefs_initialized()))
9889 		return -ENODEV;
9890 
9891 	/*
9892 	 * As there may still be users that expect the tracing
9893 	 * files to exist in debugfs/tracing, we must automount
9894 	 * the tracefs file system there, so older tools still
9895 	 * work with the newer kernel.
9896 	 */
9897 	tr->dir = debugfs_create_automount("tracing", NULL,
9898 					   trace_automount, NULL);
9899 
9900 	return 0;
9901 }
9902 
9903 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9904 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9905 
9906 static struct workqueue_struct *eval_map_wq __initdata;
9907 static struct work_struct eval_map_work __initdata;
9908 static struct work_struct tracerfs_init_work __initdata;
9909 
9910 static void __init eval_map_work_func(struct work_struct *work)
9911 {
9912 	int len;
9913 
9914 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9915 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9916 }
9917 
9918 static int __init trace_eval_init(void)
9919 {
9920 	INIT_WORK(&eval_map_work, eval_map_work_func);
9921 
9922 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9923 	if (!eval_map_wq) {
9924 		pr_err("Unable to allocate eval_map_wq\n");
9925 		/* Do work here */
9926 		eval_map_work_func(&eval_map_work);
9927 		return -ENOMEM;
9928 	}
9929 
9930 	queue_work(eval_map_wq, &eval_map_work);
9931 	return 0;
9932 }
9933 
9934 subsys_initcall(trace_eval_init);
9935 
9936 static int __init trace_eval_sync(void)
9937 {
9938 	/* Make sure the eval map updates are finished */
9939 	if (eval_map_wq)
9940 		destroy_workqueue(eval_map_wq);
9941 	return 0;
9942 }
9943 
9944 late_initcall_sync(trace_eval_sync);
9945 
9946 
9947 #ifdef CONFIG_MODULES
9948 static void trace_module_add_evals(struct module *mod)
9949 {
9950 	if (!mod->num_trace_evals)
9951 		return;
9952 
9953 	/*
9954 	 * Modules with bad taint do not have events created, do
9955 	 * not bother with enums either.
9956 	 */
9957 	if (trace_module_has_bad_taint(mod))
9958 		return;
9959 
9960 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9961 }
9962 
9963 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9964 static void trace_module_remove_evals(struct module *mod)
9965 {
9966 	union trace_eval_map_item *map;
9967 	union trace_eval_map_item **last = &trace_eval_maps;
9968 
9969 	if (!mod->num_trace_evals)
9970 		return;
9971 
9972 	mutex_lock(&trace_eval_mutex);
9973 
9974 	map = trace_eval_maps;
9975 
9976 	while (map) {
9977 		if (map->head.mod == mod)
9978 			break;
9979 		map = trace_eval_jmp_to_tail(map);
9980 		last = &map->tail.next;
9981 		map = map->tail.next;
9982 	}
9983 	if (!map)
9984 		goto out;
9985 
9986 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9987 	kfree(map);
9988  out:
9989 	mutex_unlock(&trace_eval_mutex);
9990 }
9991 #else
9992 static inline void trace_module_remove_evals(struct module *mod) { }
9993 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9994 
9995 static int trace_module_notify(struct notifier_block *self,
9996 			       unsigned long val, void *data)
9997 {
9998 	struct module *mod = data;
9999 
10000 	switch (val) {
10001 	case MODULE_STATE_COMING:
10002 		trace_module_add_evals(mod);
10003 		break;
10004 	case MODULE_STATE_GOING:
10005 		trace_module_remove_evals(mod);
10006 		break;
10007 	}
10008 
10009 	return NOTIFY_OK;
10010 }
10011 
10012 static struct notifier_block trace_module_nb = {
10013 	.notifier_call = trace_module_notify,
10014 	.priority = 0,
10015 };
10016 #endif /* CONFIG_MODULES */
10017 
10018 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10019 {
10020 
10021 	event_trace_init();
10022 
10023 	init_tracer_tracefs(&global_trace, NULL);
10024 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10025 
10026 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10027 			&global_trace, &tracing_thresh_fops);
10028 
10029 	trace_create_file("README", TRACE_MODE_READ, NULL,
10030 			NULL, &tracing_readme_fops);
10031 
10032 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10033 			NULL, &tracing_saved_cmdlines_fops);
10034 
10035 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10036 			  NULL, &tracing_saved_cmdlines_size_fops);
10037 
10038 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10039 			NULL, &tracing_saved_tgids_fops);
10040 
10041 	trace_create_eval_file(NULL);
10042 
10043 #ifdef CONFIG_MODULES
10044 	register_module_notifier(&trace_module_nb);
10045 #endif
10046 
10047 #ifdef CONFIG_DYNAMIC_FTRACE
10048 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10049 			NULL, &tracing_dyn_info_fops);
10050 #endif
10051 
10052 	create_trace_instances(NULL);
10053 
10054 	update_tracer_options(&global_trace);
10055 }
10056 
10057 static __init int tracer_init_tracefs(void)
10058 {
10059 	int ret;
10060 
10061 	trace_access_lock_init();
10062 
10063 	ret = tracing_init_dentry();
10064 	if (ret)
10065 		return 0;
10066 
10067 	if (eval_map_wq) {
10068 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10069 		queue_work(eval_map_wq, &tracerfs_init_work);
10070 	} else {
10071 		tracer_init_tracefs_work_func(NULL);
10072 	}
10073 
10074 	rv_init_interface();
10075 
10076 	return 0;
10077 }
10078 
10079 fs_initcall(tracer_init_tracefs);
10080 
10081 static int trace_die_panic_handler(struct notifier_block *self,
10082 				unsigned long ev, void *unused);
10083 
10084 static struct notifier_block trace_panic_notifier = {
10085 	.notifier_call = trace_die_panic_handler,
10086 	.priority = INT_MAX - 1,
10087 };
10088 
10089 static struct notifier_block trace_die_notifier = {
10090 	.notifier_call = trace_die_panic_handler,
10091 	.priority = INT_MAX - 1,
10092 };
10093 
10094 /*
10095  * The idea is to execute the following die/panic callback early, in order
10096  * to avoid showing irrelevant information in the trace (like other panic
10097  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10098  * warnings get disabled (to prevent potential log flooding).
10099  */
10100 static int trace_die_panic_handler(struct notifier_block *self,
10101 				unsigned long ev, void *unused)
10102 {
10103 	if (!ftrace_dump_on_oops)
10104 		return NOTIFY_DONE;
10105 
10106 	/* The die notifier requires DIE_OOPS to trigger */
10107 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10108 		return NOTIFY_DONE;
10109 
10110 	ftrace_dump(ftrace_dump_on_oops);
10111 
10112 	return NOTIFY_DONE;
10113 }
10114 
10115 /*
10116  * printk is set to max of 1024, we really don't need it that big.
10117  * Nothing should be printing 1000 characters anyway.
10118  */
10119 #define TRACE_MAX_PRINT		1000
10120 
10121 /*
10122  * Define here KERN_TRACE so that we have one place to modify
10123  * it if we decide to change what log level the ftrace dump
10124  * should be at.
10125  */
10126 #define KERN_TRACE		KERN_EMERG
10127 
10128 void
10129 trace_printk_seq(struct trace_seq *s)
10130 {
10131 	/* Probably should print a warning here. */
10132 	if (s->seq.len >= TRACE_MAX_PRINT)
10133 		s->seq.len = TRACE_MAX_PRINT;
10134 
10135 	/*
10136 	 * More paranoid code. Although the buffer size is set to
10137 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10138 	 * an extra layer of protection.
10139 	 */
10140 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10141 		s->seq.len = s->seq.size - 1;
10142 
10143 	/* should be zero ended, but we are paranoid. */
10144 	s->buffer[s->seq.len] = 0;
10145 
10146 	printk(KERN_TRACE "%s", s->buffer);
10147 
10148 	trace_seq_init(s);
10149 }
10150 
10151 void trace_init_global_iter(struct trace_iterator *iter)
10152 {
10153 	iter->tr = &global_trace;
10154 	iter->trace = iter->tr->current_trace;
10155 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10156 	iter->array_buffer = &global_trace.array_buffer;
10157 
10158 	if (iter->trace && iter->trace->open)
10159 		iter->trace->open(iter);
10160 
10161 	/* Annotate start of buffers if we had overruns */
10162 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10163 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10164 
10165 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10166 	if (trace_clocks[iter->tr->clock_id].in_ns)
10167 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10168 
10169 	/* Can not use kmalloc for iter.temp and iter.fmt */
10170 	iter->temp = static_temp_buf;
10171 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10172 	iter->fmt = static_fmt_buf;
10173 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10174 }
10175 
10176 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10177 {
10178 	/* use static because iter can be a bit big for the stack */
10179 	static struct trace_iterator iter;
10180 	static atomic_t dump_running;
10181 	struct trace_array *tr = &global_trace;
10182 	unsigned int old_userobj;
10183 	unsigned long flags;
10184 	int cnt = 0, cpu;
10185 
10186 	/* Only allow one dump user at a time. */
10187 	if (atomic_inc_return(&dump_running) != 1) {
10188 		atomic_dec(&dump_running);
10189 		return;
10190 	}
10191 
10192 	/*
10193 	 * Always turn off tracing when we dump.
10194 	 * We don't need to show trace output of what happens
10195 	 * between multiple crashes.
10196 	 *
10197 	 * If the user does a sysrq-z, then they can re-enable
10198 	 * tracing with echo 1 > tracing_on.
10199 	 */
10200 	tracing_off();
10201 
10202 	local_irq_save(flags);
10203 
10204 	/* Simulate the iterator */
10205 	trace_init_global_iter(&iter);
10206 
10207 	for_each_tracing_cpu(cpu) {
10208 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10209 	}
10210 
10211 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10212 
10213 	/* don't look at user memory in panic mode */
10214 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10215 
10216 	switch (oops_dump_mode) {
10217 	case DUMP_ALL:
10218 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10219 		break;
10220 	case DUMP_ORIG:
10221 		iter.cpu_file = raw_smp_processor_id();
10222 		break;
10223 	case DUMP_NONE:
10224 		goto out_enable;
10225 	default:
10226 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10227 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10228 	}
10229 
10230 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10231 
10232 	/* Did function tracer already get disabled? */
10233 	if (ftrace_is_dead()) {
10234 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10235 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10236 	}
10237 
10238 	/*
10239 	 * We need to stop all tracing on all CPUS to read
10240 	 * the next buffer. This is a bit expensive, but is
10241 	 * not done often. We fill all what we can read,
10242 	 * and then release the locks again.
10243 	 */
10244 
10245 	while (!trace_empty(&iter)) {
10246 
10247 		if (!cnt)
10248 			printk(KERN_TRACE "---------------------------------\n");
10249 
10250 		cnt++;
10251 
10252 		trace_iterator_reset(&iter);
10253 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10254 
10255 		if (trace_find_next_entry_inc(&iter) != NULL) {
10256 			int ret;
10257 
10258 			ret = print_trace_line(&iter);
10259 			if (ret != TRACE_TYPE_NO_CONSUME)
10260 				trace_consume(&iter);
10261 		}
10262 		touch_nmi_watchdog();
10263 
10264 		trace_printk_seq(&iter.seq);
10265 	}
10266 
10267 	if (!cnt)
10268 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10269 	else
10270 		printk(KERN_TRACE "---------------------------------\n");
10271 
10272  out_enable:
10273 	tr->trace_flags |= old_userobj;
10274 
10275 	for_each_tracing_cpu(cpu) {
10276 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10277 	}
10278 	atomic_dec(&dump_running);
10279 	local_irq_restore(flags);
10280 }
10281 EXPORT_SYMBOL_GPL(ftrace_dump);
10282 
10283 #define WRITE_BUFSIZE  4096
10284 
10285 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10286 				size_t count, loff_t *ppos,
10287 				int (*createfn)(const char *))
10288 {
10289 	char *kbuf, *buf, *tmp;
10290 	int ret = 0;
10291 	size_t done = 0;
10292 	size_t size;
10293 
10294 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10295 	if (!kbuf)
10296 		return -ENOMEM;
10297 
10298 	while (done < count) {
10299 		size = count - done;
10300 
10301 		if (size >= WRITE_BUFSIZE)
10302 			size = WRITE_BUFSIZE - 1;
10303 
10304 		if (copy_from_user(kbuf, buffer + done, size)) {
10305 			ret = -EFAULT;
10306 			goto out;
10307 		}
10308 		kbuf[size] = '\0';
10309 		buf = kbuf;
10310 		do {
10311 			tmp = strchr(buf, '\n');
10312 			if (tmp) {
10313 				*tmp = '\0';
10314 				size = tmp - buf + 1;
10315 			} else {
10316 				size = strlen(buf);
10317 				if (done + size < count) {
10318 					if (buf != kbuf)
10319 						break;
10320 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10321 					pr_warn("Line length is too long: Should be less than %d\n",
10322 						WRITE_BUFSIZE - 2);
10323 					ret = -EINVAL;
10324 					goto out;
10325 				}
10326 			}
10327 			done += size;
10328 
10329 			/* Remove comments */
10330 			tmp = strchr(buf, '#');
10331 
10332 			if (tmp)
10333 				*tmp = '\0';
10334 
10335 			ret = createfn(buf);
10336 			if (ret)
10337 				goto out;
10338 			buf += size;
10339 
10340 		} while (done < count);
10341 	}
10342 	ret = done;
10343 
10344 out:
10345 	kfree(kbuf);
10346 
10347 	return ret;
10348 }
10349 
10350 #ifdef CONFIG_TRACER_MAX_TRACE
10351 __init static bool tr_needs_alloc_snapshot(const char *name)
10352 {
10353 	char *test;
10354 	int len = strlen(name);
10355 	bool ret;
10356 
10357 	if (!boot_snapshot_index)
10358 		return false;
10359 
10360 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10361 	    boot_snapshot_info[len] == '\t')
10362 		return true;
10363 
10364 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10365 	if (!test)
10366 		return false;
10367 
10368 	sprintf(test, "\t%s\t", name);
10369 	ret = strstr(boot_snapshot_info, test) == NULL;
10370 	kfree(test);
10371 	return ret;
10372 }
10373 
10374 __init static void do_allocate_snapshot(const char *name)
10375 {
10376 	if (!tr_needs_alloc_snapshot(name))
10377 		return;
10378 
10379 	/*
10380 	 * When allocate_snapshot is set, the next call to
10381 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10382 	 * will allocate the snapshot buffer. That will alse clear
10383 	 * this flag.
10384 	 */
10385 	allocate_snapshot = true;
10386 }
10387 #else
10388 static inline void do_allocate_snapshot(const char *name) { }
10389 #endif
10390 
10391 __init static void enable_instances(void)
10392 {
10393 	struct trace_array *tr;
10394 	char *curr_str;
10395 	char *str;
10396 	char *tok;
10397 
10398 	/* A tab is always appended */
10399 	boot_instance_info[boot_instance_index - 1] = '\0';
10400 	str = boot_instance_info;
10401 
10402 	while ((curr_str = strsep(&str, "\t"))) {
10403 
10404 		tok = strsep(&curr_str, ",");
10405 
10406 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10407 			do_allocate_snapshot(tok);
10408 
10409 		tr = trace_array_get_by_name(tok);
10410 		if (!tr) {
10411 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10412 			continue;
10413 		}
10414 		/* Allow user space to delete it */
10415 		trace_array_put(tr);
10416 
10417 		while ((tok = strsep(&curr_str, ","))) {
10418 			early_enable_events(tr, tok, true);
10419 		}
10420 	}
10421 }
10422 
10423 __init static int tracer_alloc_buffers(void)
10424 {
10425 	int ring_buf_size;
10426 	int ret = -ENOMEM;
10427 
10428 
10429 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10430 		pr_warn("Tracing disabled due to lockdown\n");
10431 		return -EPERM;
10432 	}
10433 
10434 	/*
10435 	 * Make sure we don't accidentally add more trace options
10436 	 * than we have bits for.
10437 	 */
10438 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10439 
10440 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10441 		goto out;
10442 
10443 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10444 		goto out_free_buffer_mask;
10445 
10446 	/* Only allocate trace_printk buffers if a trace_printk exists */
10447 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10448 		/* Must be called before global_trace.buffer is allocated */
10449 		trace_printk_init_buffers();
10450 
10451 	/* To save memory, keep the ring buffer size to its minimum */
10452 	if (ring_buffer_expanded)
10453 		ring_buf_size = trace_buf_size;
10454 	else
10455 		ring_buf_size = 1;
10456 
10457 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10458 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10459 
10460 	raw_spin_lock_init(&global_trace.start_lock);
10461 
10462 	/*
10463 	 * The prepare callbacks allocates some memory for the ring buffer. We
10464 	 * don't free the buffer if the CPU goes down. If we were to free
10465 	 * the buffer, then the user would lose any trace that was in the
10466 	 * buffer. The memory will be removed once the "instance" is removed.
10467 	 */
10468 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10469 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10470 				      NULL);
10471 	if (ret < 0)
10472 		goto out_free_cpumask;
10473 	/* Used for event triggers */
10474 	ret = -ENOMEM;
10475 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10476 	if (!temp_buffer)
10477 		goto out_rm_hp_state;
10478 
10479 	if (trace_create_savedcmd() < 0)
10480 		goto out_free_temp_buffer;
10481 
10482 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10483 		goto out_free_savedcmd;
10484 
10485 	/* TODO: make the number of buffers hot pluggable with CPUS */
10486 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10487 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10488 		goto out_free_pipe_cpumask;
10489 	}
10490 	if (global_trace.buffer_disabled)
10491 		tracing_off();
10492 
10493 	if (trace_boot_clock) {
10494 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10495 		if (ret < 0)
10496 			pr_warn("Trace clock %s not defined, going back to default\n",
10497 				trace_boot_clock);
10498 	}
10499 
10500 	/*
10501 	 * register_tracer() might reference current_trace, so it
10502 	 * needs to be set before we register anything. This is
10503 	 * just a bootstrap of current_trace anyway.
10504 	 */
10505 	global_trace.current_trace = &nop_trace;
10506 
10507 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10508 
10509 	ftrace_init_global_array_ops(&global_trace);
10510 
10511 	init_trace_flags_index(&global_trace);
10512 
10513 	register_tracer(&nop_trace);
10514 
10515 	/* Function tracing may start here (via kernel command line) */
10516 	init_function_trace();
10517 
10518 	/* All seems OK, enable tracing */
10519 	tracing_disabled = 0;
10520 
10521 	atomic_notifier_chain_register(&panic_notifier_list,
10522 				       &trace_panic_notifier);
10523 
10524 	register_die_notifier(&trace_die_notifier);
10525 
10526 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10527 
10528 	INIT_LIST_HEAD(&global_trace.systems);
10529 	INIT_LIST_HEAD(&global_trace.events);
10530 	INIT_LIST_HEAD(&global_trace.hist_vars);
10531 	INIT_LIST_HEAD(&global_trace.err_log);
10532 	list_add(&global_trace.list, &ftrace_trace_arrays);
10533 
10534 	apply_trace_boot_options();
10535 
10536 	register_snapshot_cmd();
10537 
10538 	test_can_verify();
10539 
10540 	return 0;
10541 
10542 out_free_pipe_cpumask:
10543 	free_cpumask_var(global_trace.pipe_cpumask);
10544 out_free_savedcmd:
10545 	free_saved_cmdlines_buffer(savedcmd);
10546 out_free_temp_buffer:
10547 	ring_buffer_free(temp_buffer);
10548 out_rm_hp_state:
10549 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10550 out_free_cpumask:
10551 	free_cpumask_var(global_trace.tracing_cpumask);
10552 out_free_buffer_mask:
10553 	free_cpumask_var(tracing_buffer_mask);
10554 out:
10555 	return ret;
10556 }
10557 
10558 void __init ftrace_boot_snapshot(void)
10559 {
10560 #ifdef CONFIG_TRACER_MAX_TRACE
10561 	struct trace_array *tr;
10562 
10563 	if (!snapshot_at_boot)
10564 		return;
10565 
10566 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10567 		if (!tr->allocated_snapshot)
10568 			continue;
10569 
10570 		tracing_snapshot_instance(tr);
10571 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10572 	}
10573 #endif
10574 }
10575 
10576 void __init early_trace_init(void)
10577 {
10578 	if (tracepoint_printk) {
10579 		tracepoint_print_iter =
10580 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10581 		if (MEM_FAIL(!tracepoint_print_iter,
10582 			     "Failed to allocate trace iterator\n"))
10583 			tracepoint_printk = 0;
10584 		else
10585 			static_key_enable(&tracepoint_printk_key.key);
10586 	}
10587 	tracer_alloc_buffers();
10588 
10589 	init_events();
10590 }
10591 
10592 void __init trace_init(void)
10593 {
10594 	trace_event_init();
10595 
10596 	if (boot_instance_index)
10597 		enable_instances();
10598 }
10599 
10600 __init static void clear_boot_tracer(void)
10601 {
10602 	/*
10603 	 * The default tracer at boot buffer is an init section.
10604 	 * This function is called in lateinit. If we did not
10605 	 * find the boot tracer, then clear it out, to prevent
10606 	 * later registration from accessing the buffer that is
10607 	 * about to be freed.
10608 	 */
10609 	if (!default_bootup_tracer)
10610 		return;
10611 
10612 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10613 	       default_bootup_tracer);
10614 	default_bootup_tracer = NULL;
10615 }
10616 
10617 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10618 __init static void tracing_set_default_clock(void)
10619 {
10620 	/* sched_clock_stable() is determined in late_initcall */
10621 	if (!trace_boot_clock && !sched_clock_stable()) {
10622 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10623 			pr_warn("Can not set tracing clock due to lockdown\n");
10624 			return;
10625 		}
10626 
10627 		printk(KERN_WARNING
10628 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10629 		       "If you want to keep using the local clock, then add:\n"
10630 		       "  \"trace_clock=local\"\n"
10631 		       "on the kernel command line\n");
10632 		tracing_set_clock(&global_trace, "global");
10633 	}
10634 }
10635 #else
10636 static inline void tracing_set_default_clock(void) { }
10637 #endif
10638 
10639 __init static int late_trace_init(void)
10640 {
10641 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10642 		static_key_disable(&tracepoint_printk_key.key);
10643 		tracepoint_printk = 0;
10644 	}
10645 
10646 	tracing_set_default_clock();
10647 	clear_boot_tracer();
10648 	return 0;
10649 }
10650 
10651 late_initcall_sync(late_trace_init);
10652