xref: /openbmc/linux/kernel/trace/trace.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 /*
59  * On boot up, the ring buffer is set to the minimum size, so that
60  * we do not waste memory on systems that are not using tracing.
61  */
62 bool ring_buffer_expanded;
63 
64 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 /*
66  * We need to change this state when a selftest is running.
67  * A selftest will lurk into the ring-buffer to count the
68  * entries inserted during the selftest although some concurrent
69  * insertions into the ring-buffer such as trace_printk could occurred
70  * at the same time, giving false positive or negative results.
71  */
72 static bool __read_mostly tracing_selftest_running;
73 
74 /*
75  * If boot-time tracing including tracers/events via kernel cmdline
76  * is running, we do not want to run SELFTEST.
77  */
78 bool __read_mostly tracing_selftest_disabled;
79 
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 	if (!tracing_selftest_disabled) {
83 		tracing_selftest_disabled = true;
84 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 	}
86 }
87 #else
88 #define tracing_selftest_running	0
89 #define tracing_selftest_disabled	0
90 #endif
91 
92 /* Pipe tracepoints to printk */
93 static struct trace_iterator *tracepoint_print_iter;
94 int tracepoint_printk;
95 static bool tracepoint_printk_stop_on_boot __initdata;
96 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97 
98 /* For tracers that don't implement custom flags */
99 static struct tracer_opt dummy_tracer_opt[] = {
100 	{ }
101 };
102 
103 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)104 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 {
106 	return 0;
107 }
108 
109 /*
110  * To prevent the comm cache from being overwritten when no
111  * tracing is active, only save the comm when a trace event
112  * occurred.
113  */
114 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115 
116 /*
117  * Kill all tracing for good (never come back).
118  * It is initialized to 1 but will turn to zero if the initialization
119  * of the tracer is successful. But that is the only place that sets
120  * this back to zero.
121  */
122 static int tracing_disabled = 1;
123 
124 cpumask_var_t __read_mostly	tracing_buffer_mask;
125 
126 /*
127  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128  *
129  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130  * is set, then ftrace_dump is called. This will output the contents
131  * of the ftrace buffers to the console.  This is very useful for
132  * capturing traces that lead to crashes and outputing it to a
133  * serial console.
134  *
135  * It is default off, but you can enable it with either specifying
136  * "ftrace_dump_on_oops" in the kernel command line, or setting
137  * /proc/sys/kernel/ftrace_dump_on_oops
138  * Set 1 if you want to dump buffers of all CPUs
139  * Set 2 if you want to dump the buffer of the CPU that triggered oops
140  */
141 
142 enum ftrace_dump_mode ftrace_dump_on_oops;
143 
144 /* When set, tracing will stop when a WARN*() is hit */
145 int __disable_trace_on_warning;
146 
147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148 /* Map of enums to their values, for "eval_map" file */
149 struct trace_eval_map_head {
150 	struct module			*mod;
151 	unsigned long			length;
152 };
153 
154 union trace_eval_map_item;
155 
156 struct trace_eval_map_tail {
157 	/*
158 	 * "end" is first and points to NULL as it must be different
159 	 * than "mod" or "eval_string"
160 	 */
161 	union trace_eval_map_item	*next;
162 	const char			*end;	/* points to NULL */
163 };
164 
165 static DEFINE_MUTEX(trace_eval_mutex);
166 
167 /*
168  * The trace_eval_maps are saved in an array with two extra elements,
169  * one at the beginning, and one at the end. The beginning item contains
170  * the count of the saved maps (head.length), and the module they
171  * belong to if not built in (head.mod). The ending item contains a
172  * pointer to the next array of saved eval_map items.
173  */
174 union trace_eval_map_item {
175 	struct trace_eval_map		map;
176 	struct trace_eval_map_head	head;
177 	struct trace_eval_map_tail	tail;
178 };
179 
180 static union trace_eval_map_item *trace_eval_maps;
181 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182 
183 int tracing_set_tracer(struct trace_array *tr, const char *buf);
184 static void ftrace_trace_userstack(struct trace_array *tr,
185 				   struct trace_buffer *buffer,
186 				   unsigned int trace_ctx);
187 
188 #define MAX_TRACER_SIZE		100
189 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190 static char *default_bootup_tracer;
191 
192 static bool allocate_snapshot;
193 static bool snapshot_at_boot;
194 
195 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_instance_index;
197 
198 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199 static int boot_snapshot_index;
200 
set_cmdline_ftrace(char * str)201 static int __init set_cmdline_ftrace(char *str)
202 {
203 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204 	default_bootup_tracer = bootup_tracer_buf;
205 	/* We are using ftrace early, expand it */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("ftrace=", set_cmdline_ftrace);
210 
set_ftrace_dump_on_oops(char * str)211 static int __init set_ftrace_dump_on_oops(char *str)
212 {
213 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
214 		ftrace_dump_on_oops = DUMP_ALL;
215 		return 1;
216 	}
217 
218 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219 		ftrace_dump_on_oops = DUMP_ORIG;
220                 return 1;
221         }
222 
223         return 0;
224 }
225 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226 
stop_trace_on_warning(char * str)227 static int __init stop_trace_on_warning(char *str)
228 {
229 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230 		__disable_trace_on_warning = 1;
231 	return 1;
232 }
233 __setup("traceoff_on_warning", stop_trace_on_warning);
234 
boot_alloc_snapshot(char * str)235 static int __init boot_alloc_snapshot(char *str)
236 {
237 	char *slot = boot_snapshot_info + boot_snapshot_index;
238 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239 	int ret;
240 
241 	if (str[0] == '=') {
242 		str++;
243 		if (strlen(str) >= left)
244 			return -1;
245 
246 		ret = snprintf(slot, left, "%s\t", str);
247 		boot_snapshot_index += ret;
248 	} else {
249 		allocate_snapshot = true;
250 		/* We also need the main ring buffer expanded */
251 		ring_buffer_expanded = true;
252 	}
253 	return 1;
254 }
255 __setup("alloc_snapshot", boot_alloc_snapshot);
256 
257 
boot_snapshot(char * str)258 static int __init boot_snapshot(char *str)
259 {
260 	snapshot_at_boot = true;
261 	boot_alloc_snapshot(str);
262 	return 1;
263 }
264 __setup("ftrace_boot_snapshot", boot_snapshot);
265 
266 
boot_instance(char * str)267 static int __init boot_instance(char *str)
268 {
269 	char *slot = boot_instance_info + boot_instance_index;
270 	int left = sizeof(boot_instance_info) - boot_instance_index;
271 	int ret;
272 
273 	if (strlen(str) >= left)
274 		return -1;
275 
276 	ret = snprintf(slot, left, "%s\t", str);
277 	boot_instance_index += ret;
278 
279 	return 1;
280 }
281 __setup("trace_instance=", boot_instance);
282 
283 
284 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285 
set_trace_boot_options(char * str)286 static int __init set_trace_boot_options(char *str)
287 {
288 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289 	return 1;
290 }
291 __setup("trace_options=", set_trace_boot_options);
292 
293 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294 static char *trace_boot_clock __initdata;
295 
set_trace_boot_clock(char * str)296 static int __init set_trace_boot_clock(char *str)
297 {
298 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299 	trace_boot_clock = trace_boot_clock_buf;
300 	return 1;
301 }
302 __setup("trace_clock=", set_trace_boot_clock);
303 
set_tracepoint_printk(char * str)304 static int __init set_tracepoint_printk(char *str)
305 {
306 	/* Ignore the "tp_printk_stop_on_boot" param */
307 	if (*str == '_')
308 		return 0;
309 
310 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311 		tracepoint_printk = 1;
312 	return 1;
313 }
314 __setup("tp_printk", set_tracepoint_printk);
315 
set_tracepoint_printk_stop(char * str)316 static int __init set_tracepoint_printk_stop(char *str)
317 {
318 	tracepoint_printk_stop_on_boot = true;
319 	return 1;
320 }
321 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322 
ns2usecs(u64 nsec)323 unsigned long long ns2usecs(u64 nsec)
324 {
325 	nsec += 500;
326 	do_div(nsec, 1000);
327 	return nsec;
328 }
329 
330 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)331 trace_process_export(struct trace_export *export,
332 	       struct ring_buffer_event *event, int flag)
333 {
334 	struct trace_entry *entry;
335 	unsigned int size = 0;
336 
337 	if (export->flags & flag) {
338 		entry = ring_buffer_event_data(event);
339 		size = ring_buffer_event_length(event);
340 		export->write(export, entry, size);
341 	}
342 }
343 
344 static DEFINE_MUTEX(ftrace_export_lock);
345 
346 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347 
348 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351 
ftrace_exports_enable(struct trace_export * export)352 static inline void ftrace_exports_enable(struct trace_export *export)
353 {
354 	if (export->flags & TRACE_EXPORT_FUNCTION)
355 		static_branch_inc(&trace_function_exports_enabled);
356 
357 	if (export->flags & TRACE_EXPORT_EVENT)
358 		static_branch_inc(&trace_event_exports_enabled);
359 
360 	if (export->flags & TRACE_EXPORT_MARKER)
361 		static_branch_inc(&trace_marker_exports_enabled);
362 }
363 
ftrace_exports_disable(struct trace_export * export)364 static inline void ftrace_exports_disable(struct trace_export *export)
365 {
366 	if (export->flags & TRACE_EXPORT_FUNCTION)
367 		static_branch_dec(&trace_function_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_EVENT)
370 		static_branch_dec(&trace_event_exports_enabled);
371 
372 	if (export->flags & TRACE_EXPORT_MARKER)
373 		static_branch_dec(&trace_marker_exports_enabled);
374 }
375 
ftrace_exports(struct ring_buffer_event * event,int flag)376 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 {
378 	struct trace_export *export;
379 
380 	preempt_disable_notrace();
381 
382 	export = rcu_dereference_raw_check(ftrace_exports_list);
383 	while (export) {
384 		trace_process_export(export, event, flag);
385 		export = rcu_dereference_raw_check(export->next);
386 	}
387 
388 	preempt_enable_notrace();
389 }
390 
391 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)392 add_trace_export(struct trace_export **list, struct trace_export *export)
393 {
394 	rcu_assign_pointer(export->next, *list);
395 	/*
396 	 * We are entering export into the list but another
397 	 * CPU might be walking that list. We need to make sure
398 	 * the export->next pointer is valid before another CPU sees
399 	 * the export pointer included into the list.
400 	 */
401 	rcu_assign_pointer(*list, export);
402 }
403 
404 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)405 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 {
407 	struct trace_export **p;
408 
409 	for (p = list; *p != NULL; p = &(*p)->next)
410 		if (*p == export)
411 			break;
412 
413 	if (*p != export)
414 		return -1;
415 
416 	rcu_assign_pointer(*p, (*p)->next);
417 
418 	return 0;
419 }
420 
421 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)422 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 {
424 	ftrace_exports_enable(export);
425 
426 	add_trace_export(list, export);
427 }
428 
429 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)430 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431 {
432 	int ret;
433 
434 	ret = rm_trace_export(list, export);
435 	ftrace_exports_disable(export);
436 
437 	return ret;
438 }
439 
register_ftrace_export(struct trace_export * export)440 int register_ftrace_export(struct trace_export *export)
441 {
442 	if (WARN_ON_ONCE(!export->write))
443 		return -1;
444 
445 	mutex_lock(&ftrace_export_lock);
446 
447 	add_ftrace_export(&ftrace_exports_list, export);
448 
449 	mutex_unlock(&ftrace_export_lock);
450 
451 	return 0;
452 }
453 EXPORT_SYMBOL_GPL(register_ftrace_export);
454 
unregister_ftrace_export(struct trace_export * export)455 int unregister_ftrace_export(struct trace_export *export)
456 {
457 	int ret;
458 
459 	mutex_lock(&ftrace_export_lock);
460 
461 	ret = rm_ftrace_export(&ftrace_exports_list, export);
462 
463 	mutex_unlock(&ftrace_export_lock);
464 
465 	return ret;
466 }
467 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468 
469 /* trace_flags holds trace_options default values */
470 #define TRACE_DEFAULT_FLAGS						\
471 	(FUNCTION_DEFAULT_FLAGS |					\
472 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
473 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
474 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
475 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
476 	 TRACE_ITER_HASH_PTR)
477 
478 /* trace_options that are only supported by global_trace */
479 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
480 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481 
482 /* trace_flags that are default zero for instances */
483 #define ZEROED_TRACE_FLAGS \
484 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485 
486 /*
487  * The global_trace is the descriptor that holds the top-level tracing
488  * buffers for the live tracing.
489  */
490 static struct trace_array global_trace = {
491 	.trace_flags = TRACE_DEFAULT_FLAGS,
492 };
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
trace_array_get(struct trace_array * this_tr)496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
__trace_array_put(struct trace_array * this_tr)514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
trace_array_put(struct trace_array * this_tr)529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
tracing_check_open_get_tr(struct trace_array * tr)540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
trace_pid_show(struct seq_file * m,void * v)710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
buffer_ftrace_now(struct array_buffer * buf,int cpu)808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
ftrace_now(int cpu)822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
tracing_is_enabled(void)836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
trace_access_lock(int cpu)895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
trace_access_unlock(int cpu)911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
trace_access_lock_init(void)921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
trace_access_lock(int cpu)933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
trace_access_unlock(int cpu)939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
trace_access_lock_init(void)945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
tracer_tracing_on(struct trace_array * tr)999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
tracing_on(void)1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
__trace_puts(unsigned long ip,const char * str,int size)1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
__trace_bputs(unsigned long ip,const char * str)1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
tracing_snapshot_instance(struct trace_array * tr)1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
tracing_snapshot(void)1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
tracing_cond_snapshot_data(struct trace_array * tr)1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
tracing_alloc_snapshot_instance(struct trace_array * tr)1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int ret;
1267 
1268 	if (!tr->allocated_snapshot) {
1269 
1270 		/* allocate spare buffer */
1271 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 		if (ret < 0)
1274 			return ret;
1275 
1276 		tr->allocated_snapshot = true;
1277 	}
1278 
1279 	return 0;
1280 }
1281 
free_snapshot(struct trace_array * tr)1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 	/*
1285 	 * We don't free the ring buffer. instead, resize it because
1286 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 	 * we want preserve it.
1288 	 */
1289 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 	set_buffer_entries(&tr->max_buffer, 1);
1291 	tracing_reset_online_cpus(&tr->max_buffer);
1292 	tr->allocated_snapshot = false;
1293 }
1294 
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
tracing_alloc_snapshot(void)1305 int tracing_alloc_snapshot(void)
1306 {
1307 	struct trace_array *tr = &global_trace;
1308 	int ret;
1309 
1310 	ret = tracing_alloc_snapshot_instance(tr);
1311 	WARN_ON(ret < 0);
1312 
1313 	return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316 
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
tracing_snapshot_alloc(void)1328 void tracing_snapshot_alloc(void)
1329 {
1330 	int ret;
1331 
1332 	ret = tracing_alloc_snapshot();
1333 	if (ret < 0)
1334 		return;
1335 
1336 	tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339 
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:		The tracing instance
1343  * @cond_data:	User data to associate with the snapshot
1344  * @update:	Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 				 cond_update_fn_t update)
1355 {
1356 	struct cond_snapshot *cond_snapshot;
1357 	int ret = 0;
1358 
1359 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 	if (!cond_snapshot)
1361 		return -ENOMEM;
1362 
1363 	cond_snapshot->cond_data = cond_data;
1364 	cond_snapshot->update = update;
1365 
1366 	mutex_lock(&trace_types_lock);
1367 
1368 	ret = tracing_alloc_snapshot_instance(tr);
1369 	if (ret)
1370 		goto fail_unlock;
1371 
1372 	if (tr->current_trace->use_max_tr) {
1373 		ret = -EBUSY;
1374 		goto fail_unlock;
1375 	}
1376 
1377 	/*
1378 	 * The cond_snapshot can only change to NULL without the
1379 	 * trace_types_lock. We don't care if we race with it going
1380 	 * to NULL, but we want to make sure that it's not set to
1381 	 * something other than NULL when we get here, which we can
1382 	 * do safely with only holding the trace_types_lock and not
1383 	 * having to take the max_lock.
1384 	 */
1385 	if (tr->cond_snapshot) {
1386 		ret = -EBUSY;
1387 		goto fail_unlock;
1388 	}
1389 
1390 	local_irq_disable();
1391 	arch_spin_lock(&tr->max_lock);
1392 	tr->cond_snapshot = cond_snapshot;
1393 	arch_spin_unlock(&tr->max_lock);
1394 	local_irq_enable();
1395 
1396 	mutex_unlock(&trace_types_lock);
1397 
1398 	return ret;
1399 
1400  fail_unlock:
1401 	mutex_unlock(&trace_types_lock);
1402 	kfree(cond_snapshot);
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406 
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:		The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
tracing_snapshot_cond_disable(struct trace_array * tr)1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 	int ret = 0;
1420 
1421 	local_irq_disable();
1422 	arch_spin_lock(&tr->max_lock);
1423 
1424 	if (!tr->cond_snapshot)
1425 		ret = -EINVAL;
1426 	else {
1427 		kfree(tr->cond_snapshot);
1428 		tr->cond_snapshot = NULL;
1429 	}
1430 
1431 	arch_spin_unlock(&tr->max_lock);
1432 	local_irq_enable();
1433 
1434 	return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
tracing_snapshot(void)1438 void tracing_snapshot(void)
1439 {
1440 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1448 int tracing_alloc_snapshot(void)
1449 {
1450 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 	return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1454 void tracing_snapshot_alloc(void)
1455 {
1456 	/* Give warning */
1457 	tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 	return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 	return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 	return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)	do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477 
tracer_tracing_off(struct trace_array * tr)1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 	if (tr->array_buffer.buffer)
1481 		ring_buffer_record_off(tr->array_buffer.buffer);
1482 	/*
1483 	 * This flag is looked at when buffers haven't been allocated
1484 	 * yet, or by some tracers (like irqsoff), that just want to
1485 	 * know if the ring buffer has been disabled, but it can handle
1486 	 * races of where it gets disabled but we still do a record.
1487 	 * As the check is in the fast path of the tracers, it is more
1488 	 * important to be fast than accurate.
1489 	 */
1490 	tr->buffer_disabled = 1;
1491 	/* Make the flag seen by readers */
1492 	smp_wmb();
1493 }
1494 
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
tracing_off(void)1503 void tracing_off(void)
1504 {
1505 	tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508 
disable_trace_on_warning(void)1509 void disable_trace_on_warning(void)
1510 {
1511 	if (__disable_trace_on_warning) {
1512 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 			"Disabling tracing due to warning\n");
1514 		tracing_off();
1515 	}
1516 }
1517 
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
tracer_tracing_is_on(struct trace_array * tr)1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 	if (tr->array_buffer.buffer)
1527 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 	return !tr->buffer_disabled;
1529 }
1530 
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
tracing_is_on(void)1534 int tracing_is_on(void)
1535 {
1536 	return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539 
set_buf_size(char * str)1540 static int __init set_buf_size(char *str)
1541 {
1542 	unsigned long buf_size;
1543 
1544 	if (!str)
1545 		return 0;
1546 	buf_size = memparse(str, &str);
1547 	/*
1548 	 * nr_entries can not be zero and the startup
1549 	 * tests require some buffer space. Therefore
1550 	 * ensure we have at least 4096 bytes of buffer.
1551 	 */
1552 	trace_buf_size = max(4096UL, buf_size);
1553 	return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556 
set_tracing_thresh(char * str)1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 	unsigned long threshold;
1560 	int ret;
1561 
1562 	if (!str)
1563 		return 0;
1564 	ret = kstrtoul(str, 0, &threshold);
1565 	if (ret < 0)
1566 		return 0;
1567 	tracing_thresh = threshold * 1000;
1568 	return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571 
nsecs_to_usecs(unsigned long nsecs)1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 	return nsecs / 1000;
1575 }
1576 
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585 
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 	TRACE_FLAGS
1589 	NULL
1590 };
1591 
1592 static struct {
1593 	u64 (*func)(void);
1594 	const char *name;
1595 	int in_ns;		/* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 	{ trace_clock_local,		"local",	1 },
1598 	{ trace_clock_global,		"global",	1 },
1599 	{ trace_clock_counter,		"counter",	0 },
1600 	{ trace_clock_jiffies,		"uptime",	0 },
1601 	{ trace_clock,			"perf",		1 },
1602 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1603 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1604 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1605 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1606 	ARCH_TRACE_CLOCKS
1607 };
1608 
trace_clock_in_ns(struct trace_array * tr)1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 	if (trace_clocks[tr->clock_id].in_ns)
1612 		return true;
1613 
1614 	return false;
1615 }
1616 
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
trace_parser_get_init(struct trace_parser * parser,int size)1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 	memset(parser, 0, sizeof(*parser));
1623 
1624 	parser->buffer = kmalloc(size, GFP_KERNEL);
1625 	if (!parser->buffer)
1626 		return 1;
1627 
1628 	parser->size = size;
1629 	return 0;
1630 }
1631 
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
trace_parser_put(struct trace_parser * parser)1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 	kfree(parser->buffer);
1638 	parser->buffer = NULL;
1639 }
1640 
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 	size_t cnt, loff_t *ppos)
1654 {
1655 	char ch;
1656 	size_t read = 0;
1657 	ssize_t ret;
1658 
1659 	if (!*ppos)
1660 		trace_parser_clear(parser);
1661 
1662 	ret = get_user(ch, ubuf++);
1663 	if (ret)
1664 		goto out;
1665 
1666 	read++;
1667 	cnt--;
1668 
1669 	/*
1670 	 * The parser is not finished with the last write,
1671 	 * continue reading the user input without skipping spaces.
1672 	 */
1673 	if (!parser->cont) {
1674 		/* skip white space */
1675 		while (cnt && isspace(ch)) {
1676 			ret = get_user(ch, ubuf++);
1677 			if (ret)
1678 				goto out;
1679 			read++;
1680 			cnt--;
1681 		}
1682 
1683 		parser->idx = 0;
1684 
1685 		/* only spaces were written */
1686 		if (isspace(ch) || !ch) {
1687 			*ppos += read;
1688 			ret = read;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	/* read the non-space input */
1694 	while (cnt && !isspace(ch) && ch) {
1695 		if (parser->idx < parser->size - 1)
1696 			parser->buffer[parser->idx++] = ch;
1697 		else {
1698 			ret = -EINVAL;
1699 			goto out;
1700 		}
1701 		ret = get_user(ch, ubuf++);
1702 		if (ret)
1703 			goto out;
1704 		read++;
1705 		cnt--;
1706 	}
1707 
1708 	/* We either got finished input or we have to wait for another call. */
1709 	if (isspace(ch) || !ch) {
1710 		parser->buffer[parser->idx] = 0;
1711 		parser->cont = false;
1712 	} else if (parser->idx < parser->size - 1) {
1713 		parser->cont = true;
1714 		parser->buffer[parser->idx++] = ch;
1715 		/* Make sure the parsed string always terminates with '\0'. */
1716 		parser->buffer[parser->idx] = 0;
1717 	} else {
1718 		ret = -EINVAL;
1719 		goto out;
1720 	}
1721 
1722 	*ppos += read;
1723 	ret = read;
1724 
1725 out:
1726 	return ret;
1727 }
1728 
1729 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 	int len;
1733 
1734 	if (trace_seq_used(s) <= s->readpos)
1735 		return -EBUSY;
1736 
1737 	len = trace_seq_used(s) - s->readpos;
1738 	if (cnt > len)
1739 		cnt = len;
1740 	memcpy(buf, s->buffer + s->readpos, cnt);
1741 
1742 	s->readpos += cnt;
1743 	return cnt;
1744 }
1745 
1746 unsigned long __read_mostly	tracing_thresh;
1747 
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750 
1751 #ifdef LATENCY_FS_NOTIFY
1752 
1753 static struct workqueue_struct *fsnotify_wq;
1754 
latency_fsnotify_workfn(struct work_struct * work)1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 	struct trace_array *tr = container_of(work, struct trace_array,
1758 					      fsnotify_work);
1759 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 	struct trace_array *tr = container_of(iwork, struct trace_array,
1765 					      fsnotify_irqwork);
1766 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 				     struct dentry *d_tracer)
1771 {
1772 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 					      TRACE_MODE_WRITE,
1776 					      d_tracer, tr,
1777 					      &tracing_max_lat_fops);
1778 }
1779 
latency_fsnotify_init(void)1780 __init static int latency_fsnotify_init(void)
1781 {
1782 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 	if (!fsnotify_wq) {
1785 		pr_err("Unable to allocate tr_max_lat_wq\n");
1786 		return -ENOMEM;
1787 	}
1788 	return 0;
1789 }
1790 
1791 late_initcall_sync(latency_fsnotify_init);
1792 
latency_fsnotify(struct trace_array * tr)1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 	if (!fsnotify_wq)
1796 		return;
1797 	/*
1798 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 	 * possible that we are called from __schedule() or do_idle(), which
1800 	 * could cause a deadlock.
1801 	 */
1802 	irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804 
1805 #else /* !LATENCY_FS_NOTIFY */
1806 
1807 #define trace_create_maxlat_file(tr, d_tracer)				\
1808 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1809 			  d_tracer, tr, &tracing_max_lat_fops)
1810 
1811 #endif
1812 
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 	struct array_buffer *trace_buf = &tr->array_buffer;
1822 	struct array_buffer *max_buf = &tr->max_buffer;
1823 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825 
1826 	max_buf->cpu = cpu;
1827 	max_buf->time_start = data->preempt_timestamp;
1828 
1829 	max_data->saved_latency = tr->max_latency;
1830 	max_data->critical_start = data->critical_start;
1831 	max_data->critical_end = data->critical_end;
1832 
1833 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 	max_data->pid = tsk->pid;
1835 	/*
1836 	 * If tsk == current, then use current_uid(), as that does not use
1837 	 * RCU. The irq tracer can be called out of RCU scope.
1838 	 */
1839 	if (tsk == current)
1840 		max_data->uid = current_uid();
1841 	else
1842 		max_data->uid = task_uid(tsk);
1843 
1844 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 	max_data->policy = tsk->policy;
1846 	max_data->rt_priority = tsk->rt_priority;
1847 
1848 	/* record this tasks comm */
1849 	tracing_record_cmdline(tsk);
1850 	latency_fsnotify(tr);
1851 }
1852 
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 	      void *cond_data)
1866 {
1867 	if (tr->stop_count)
1868 		return;
1869 
1870 	WARN_ON_ONCE(!irqs_disabled());
1871 
1872 	if (!tr->allocated_snapshot) {
1873 		/* Only the nop tracer should hit this when disabling */
1874 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 		return;
1876 	}
1877 
1878 	arch_spin_lock(&tr->max_lock);
1879 
1880 	/* Inherit the recordable setting from array_buffer */
1881 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 		ring_buffer_record_on(tr->max_buffer.buffer);
1883 	else
1884 		ring_buffer_record_off(tr->max_buffer.buffer);
1885 
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 		arch_spin_unlock(&tr->max_lock);
1889 		return;
1890 	}
1891 #endif
1892 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893 
1894 	__update_max_tr(tr, tsk, cpu);
1895 
1896 	arch_spin_unlock(&tr->max_lock);
1897 
1898 	/* Any waiters on the old snapshot buffer need to wake up */
1899 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901 
1902 /**
1903  * update_max_tr_single - only copy one trace over, and reset the rest
1904  * @tr: tracer
1905  * @tsk: task with the latency
1906  * @cpu: the cpu of the buffer to copy.
1907  *
1908  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909  */
1910 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 	int ret;
1914 
1915 	if (tr->stop_count)
1916 		return;
1917 
1918 	WARN_ON_ONCE(!irqs_disabled());
1919 	if (!tr->allocated_snapshot) {
1920 		/* Only the nop tracer should hit this when disabling */
1921 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 		return;
1923 	}
1924 
1925 	arch_spin_lock(&tr->max_lock);
1926 
1927 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928 
1929 	if (ret == -EBUSY) {
1930 		/*
1931 		 * We failed to swap the buffer due to a commit taking
1932 		 * place on this CPU. We fail to record, but we reset
1933 		 * the max trace buffer (no one writes directly to it)
1934 		 * and flag that it failed.
1935 		 * Another reason is resize is in progress.
1936 		 */
1937 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 			"Failed to swap buffers due to commit or resize in progress\n");
1939 	}
1940 
1941 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942 
1943 	__update_max_tr(tr, tsk, cpu);
1944 	arch_spin_unlock(&tr->max_lock);
1945 }
1946 
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948 
wait_on_pipe(struct trace_iterator * iter,int full)1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 	int ret;
1952 
1953 	/* Iterators are static, they should be filled or empty */
1954 	if (trace_buffer_iter(iter, iter->cpu_file))
1955 		return 0;
1956 
1957 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958 
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 	/*
1961 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 	 * to happen, this would now be the main buffer.
1963 	 */
1964 	if (iter->snapshot)
1965 		iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 	return ret;
1968 }
1969 
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972 
1973 struct trace_selftests {
1974 	struct list_head		list;
1975 	struct tracer			*type;
1976 };
1977 
1978 static LIST_HEAD(postponed_selftests);
1979 
save_selftest(struct tracer * type)1980 static int save_selftest(struct tracer *type)
1981 {
1982 	struct trace_selftests *selftest;
1983 
1984 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 	if (!selftest)
1986 		return -ENOMEM;
1987 
1988 	selftest->type = type;
1989 	list_add(&selftest->list, &postponed_selftests);
1990 	return 0;
1991 }
1992 
run_tracer_selftest(struct tracer * type)1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 	struct trace_array *tr = &global_trace;
1996 	struct tracer *saved_tracer = tr->current_trace;
1997 	int ret;
1998 
1999 	if (!type->selftest || tracing_selftest_disabled)
2000 		return 0;
2001 
2002 	/*
2003 	 * If a tracer registers early in boot up (before scheduling is
2004 	 * initialized and such), then do not run its selftests yet.
2005 	 * Instead, run it a little later in the boot process.
2006 	 */
2007 	if (!selftests_can_run)
2008 		return save_selftest(type);
2009 
2010 	if (!tracing_is_on()) {
2011 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 			type->name);
2013 		return 0;
2014 	}
2015 
2016 	/*
2017 	 * Run a selftest on this tracer.
2018 	 * Here we reset the trace buffer, and set the current
2019 	 * tracer to be this tracer. The tracer can then run some
2020 	 * internal tracing to verify that everything is in order.
2021 	 * If we fail, we do not register this tracer.
2022 	 */
2023 	tracing_reset_online_cpus(&tr->array_buffer);
2024 
2025 	tr->current_trace = type;
2026 
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 	if (type->use_max_tr) {
2029 		/* If we expanded the buffers, make sure the max is expanded too */
2030 		if (ring_buffer_expanded)
2031 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 					   RING_BUFFER_ALL_CPUS);
2033 		tr->allocated_snapshot = true;
2034 	}
2035 #endif
2036 
2037 	/* the test is responsible for initializing and enabling */
2038 	pr_info("Testing tracer %s: ", type->name);
2039 	ret = type->selftest(type, tr);
2040 	/* the test is responsible for resetting too */
2041 	tr->current_trace = saved_tracer;
2042 	if (ret) {
2043 		printk(KERN_CONT "FAILED!\n");
2044 		/* Add the warning after printing 'FAILED' */
2045 		WARN_ON(1);
2046 		return -1;
2047 	}
2048 	/* Only reset on passing, to avoid touching corrupted buffers */
2049 	tracing_reset_online_cpus(&tr->array_buffer);
2050 
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 	if (type->use_max_tr) {
2053 		tr->allocated_snapshot = false;
2054 
2055 		/* Shrink the max buffer again */
2056 		if (ring_buffer_expanded)
2057 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 					   RING_BUFFER_ALL_CPUS);
2059 	}
2060 #endif
2061 
2062 	printk(KERN_CONT "PASSED\n");
2063 	return 0;
2064 }
2065 
do_run_tracer_selftest(struct tracer * type)2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 	int ret;
2069 
2070 	/*
2071 	 * Tests can take a long time, especially if they are run one after the
2072 	 * other, as does happen during bootup when all the tracers are
2073 	 * registered. This could cause the soft lockup watchdog to trigger.
2074 	 */
2075 	cond_resched();
2076 
2077 	tracing_selftest_running = true;
2078 	ret = run_tracer_selftest(type);
2079 	tracing_selftest_running = false;
2080 
2081 	return ret;
2082 }
2083 
init_trace_selftests(void)2084 static __init int init_trace_selftests(void)
2085 {
2086 	struct trace_selftests *p, *n;
2087 	struct tracer *t, **last;
2088 	int ret;
2089 
2090 	selftests_can_run = true;
2091 
2092 	mutex_lock(&trace_types_lock);
2093 
2094 	if (list_empty(&postponed_selftests))
2095 		goto out;
2096 
2097 	pr_info("Running postponed tracer tests:\n");
2098 
2099 	tracing_selftest_running = true;
2100 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 		/* This loop can take minutes when sanitizers are enabled, so
2102 		 * lets make sure we allow RCU processing.
2103 		 */
2104 		cond_resched();
2105 		ret = run_tracer_selftest(p->type);
2106 		/* If the test fails, then warn and remove from available_tracers */
2107 		if (ret < 0) {
2108 			WARN(1, "tracer: %s failed selftest, disabling\n",
2109 			     p->type->name);
2110 			last = &trace_types;
2111 			for (t = trace_types; t; t = t->next) {
2112 				if (t == p->type) {
2113 					*last = t->next;
2114 					break;
2115 				}
2116 				last = &t->next;
2117 			}
2118 		}
2119 		list_del(&p->list);
2120 		kfree(p);
2121 	}
2122 	tracing_selftest_running = false;
2123 
2124  out:
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
run_tracer_selftest(struct tracer * type)2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 	return 0;
2134 }
do_run_tracer_selftest(struct tracer * type)2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 	return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140 
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142 
2143 static void __init apply_trace_boot_options(void);
2144 
2145 /**
2146  * register_tracer - register a tracer with the ftrace system.
2147  * @type: the plugin for the tracer
2148  *
2149  * Register a new plugin tracer.
2150  */
register_tracer(struct tracer * type)2151 int __init register_tracer(struct tracer *type)
2152 {
2153 	struct tracer *t;
2154 	int ret = 0;
2155 
2156 	if (!type->name) {
2157 		pr_info("Tracer must have a name\n");
2158 		return -1;
2159 	}
2160 
2161 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 		return -1;
2164 	}
2165 
2166 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 		pr_warn("Can not register tracer %s due to lockdown\n",
2168 			   type->name);
2169 		return -EPERM;
2170 	}
2171 
2172 	mutex_lock(&trace_types_lock);
2173 
2174 	for (t = trace_types; t; t = t->next) {
2175 		if (strcmp(type->name, t->name) == 0) {
2176 			/* already found */
2177 			pr_info("Tracer %s already registered\n",
2178 				type->name);
2179 			ret = -1;
2180 			goto out;
2181 		}
2182 	}
2183 
2184 	if (!type->set_flag)
2185 		type->set_flag = &dummy_set_flag;
2186 	if (!type->flags) {
2187 		/*allocate a dummy tracer_flags*/
2188 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 		if (!type->flags) {
2190 			ret = -ENOMEM;
2191 			goto out;
2192 		}
2193 		type->flags->val = 0;
2194 		type->flags->opts = dummy_tracer_opt;
2195 	} else
2196 		if (!type->flags->opts)
2197 			type->flags->opts = dummy_tracer_opt;
2198 
2199 	/* store the tracer for __set_tracer_option */
2200 	type->flags->trace = type;
2201 
2202 	ret = do_run_tracer_selftest(type);
2203 	if (ret < 0)
2204 		goto out;
2205 
2206 	type->next = trace_types;
2207 	trace_types = type;
2208 	add_tracer_options(&global_trace, type);
2209 
2210  out:
2211 	mutex_unlock(&trace_types_lock);
2212 
2213 	if (ret || !default_bootup_tracer)
2214 		goto out_unlock;
2215 
2216 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 		goto out_unlock;
2218 
2219 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 	/* Do we want this tracer to start on bootup? */
2221 	tracing_set_tracer(&global_trace, type->name);
2222 	default_bootup_tracer = NULL;
2223 
2224 	apply_trace_boot_options();
2225 
2226 	/* disable other selftests, since this will break it. */
2227 	disable_tracing_selftest("running a tracer");
2228 
2229  out_unlock:
2230 	return ret;
2231 }
2232 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 	struct trace_buffer *buffer = buf->buffer;
2236 
2237 	if (!buffer)
2238 		return;
2239 
2240 	ring_buffer_record_disable(buffer);
2241 
2242 	/* Make sure all commits have finished */
2243 	synchronize_rcu();
2244 	ring_buffer_reset_cpu(buffer, cpu);
2245 
2246 	ring_buffer_record_enable(buffer);
2247 }
2248 
tracing_reset_online_cpus(struct array_buffer * buf)2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 	struct trace_buffer *buffer = buf->buffer;
2252 
2253 	if (!buffer)
2254 		return;
2255 
2256 	ring_buffer_record_disable(buffer);
2257 
2258 	/* Make sure all commits have finished */
2259 	synchronize_rcu();
2260 
2261 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262 
2263 	ring_buffer_reset_online_cpus(buffer);
2264 
2265 	ring_buffer_record_enable(buffer);
2266 }
2267 
2268 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 	struct trace_array *tr;
2272 
2273 	lockdep_assert_held(&trace_types_lock);
2274 
2275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 		if (!tr->clear_trace)
2277 			continue;
2278 		tr->clear_trace = false;
2279 		tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 		tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 	}
2284 }
2285 
tracing_reset_all_online_cpus(void)2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 	mutex_lock(&trace_types_lock);
2289 	tracing_reset_all_online_cpus_unlocked();
2290 	mutex_unlock(&trace_types_lock);
2291 }
2292 
2293 /*
2294  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295  * is the tgid last observed corresponding to pid=i.
2296  */
2297 static int *tgid_map;
2298 
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301 
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305  * Preemption must be disabled before acquiring trace_cmdline_lock.
2306  * The various trace_arrays' max_lock must be acquired in a context
2307  * where interrupt is disabled.
2308  */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 	unsigned *map_cmdline_to_pid;
2313 	unsigned cmdline_num;
2314 	int cmdline_idx;
2315 	char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318 
2319 /* Holds the size of a cmdline and pid element */
2320 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s)			\
2321 	(TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
2322 
get_saved_cmdlines(int idx)2323 static inline char *get_saved_cmdlines(int idx)
2324 {
2325 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2326 }
2327 
set_cmdline(int idx,const char * cmdline)2328 static inline void set_cmdline(int idx, const char *cmdline)
2329 {
2330 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2331 }
2332 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2333 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2334 {
2335 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2336 
2337 	kmemleak_free(s);
2338 	free_pages((unsigned long)s, order);
2339 }
2340 
allocate_cmdlines_buffer(unsigned int val)2341 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2342 {
2343 	struct saved_cmdlines_buffer *s;
2344 	struct page *page;
2345 	int orig_size, size;
2346 	int order;
2347 
2348 	/* Figure out how much is needed to hold the given number of cmdlines */
2349 	orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2350 	order = get_order(orig_size);
2351 	size = 1 << (order + PAGE_SHIFT);
2352 	page = alloc_pages(GFP_KERNEL, order);
2353 	if (!page)
2354 		return NULL;
2355 
2356 	s = page_address(page);
2357 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2358 	memset(s, 0, sizeof(*s));
2359 
2360 	/* Round up to actual allocation */
2361 	val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2362 	s->cmdline_num = val;
2363 
2364 	/* Place map_cmdline_to_pid array right after saved_cmdlines */
2365 	s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2366 
2367 	s->cmdline_idx = 0;
2368 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2369 	       sizeof(s->map_pid_to_cmdline));
2370 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2371 	       val * sizeof(*s->map_cmdline_to_pid));
2372 
2373 	return s;
2374 }
2375 
trace_create_savedcmd(void)2376 static int trace_create_savedcmd(void)
2377 {
2378 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2379 
2380 	return savedcmd ? 0 : -ENOMEM;
2381 }
2382 
is_tracing_stopped(void)2383 int is_tracing_stopped(void)
2384 {
2385 	return global_trace.stop_count;
2386 }
2387 
tracing_start_tr(struct trace_array * tr)2388 static void tracing_start_tr(struct trace_array *tr)
2389 {
2390 	struct trace_buffer *buffer;
2391 	unsigned long flags;
2392 
2393 	if (tracing_disabled)
2394 		return;
2395 
2396 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2397 	if (--tr->stop_count) {
2398 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2399 			/* Someone screwed up their debugging */
2400 			tr->stop_count = 0;
2401 		}
2402 		goto out;
2403 	}
2404 
2405 	/* Prevent the buffers from switching */
2406 	arch_spin_lock(&tr->max_lock);
2407 
2408 	buffer = tr->array_buffer.buffer;
2409 	if (buffer)
2410 		ring_buffer_record_enable(buffer);
2411 
2412 #ifdef CONFIG_TRACER_MAX_TRACE
2413 	buffer = tr->max_buffer.buffer;
2414 	if (buffer)
2415 		ring_buffer_record_enable(buffer);
2416 #endif
2417 
2418 	arch_spin_unlock(&tr->max_lock);
2419 
2420  out:
2421 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2422 }
2423 
2424 /**
2425  * tracing_start - quick start of the tracer
2426  *
2427  * If tracing is enabled but was stopped by tracing_stop,
2428  * this will start the tracer back up.
2429  */
tracing_start(void)2430 void tracing_start(void)
2431 
2432 {
2433 	return tracing_start_tr(&global_trace);
2434 }
2435 
tracing_stop_tr(struct trace_array * tr)2436 static void tracing_stop_tr(struct trace_array *tr)
2437 {
2438 	struct trace_buffer *buffer;
2439 	unsigned long flags;
2440 
2441 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2442 	if (tr->stop_count++)
2443 		goto out;
2444 
2445 	/* Prevent the buffers from switching */
2446 	arch_spin_lock(&tr->max_lock);
2447 
2448 	buffer = tr->array_buffer.buffer;
2449 	if (buffer)
2450 		ring_buffer_record_disable(buffer);
2451 
2452 #ifdef CONFIG_TRACER_MAX_TRACE
2453 	buffer = tr->max_buffer.buffer;
2454 	if (buffer)
2455 		ring_buffer_record_disable(buffer);
2456 #endif
2457 
2458 	arch_spin_unlock(&tr->max_lock);
2459 
2460  out:
2461 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2462 }
2463 
2464 /**
2465  * tracing_stop - quick stop of the tracer
2466  *
2467  * Light weight way to stop tracing. Use in conjunction with
2468  * tracing_start.
2469  */
tracing_stop(void)2470 void tracing_stop(void)
2471 {
2472 	return tracing_stop_tr(&global_trace);
2473 }
2474 
trace_save_cmdline(struct task_struct * tsk)2475 static int trace_save_cmdline(struct task_struct *tsk)
2476 {
2477 	unsigned tpid, idx;
2478 
2479 	/* treat recording of idle task as a success */
2480 	if (!tsk->pid)
2481 		return 1;
2482 
2483 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2484 
2485 	/*
2486 	 * It's not the end of the world if we don't get
2487 	 * the lock, but we also don't want to spin
2488 	 * nor do we want to disable interrupts,
2489 	 * so if we miss here, then better luck next time.
2490 	 *
2491 	 * This is called within the scheduler and wake up, so interrupts
2492 	 * had better been disabled and run queue lock been held.
2493 	 */
2494 	lockdep_assert_preemption_disabled();
2495 	if (!arch_spin_trylock(&trace_cmdline_lock))
2496 		return 0;
2497 
2498 	idx = savedcmd->map_pid_to_cmdline[tpid];
2499 	if (idx == NO_CMDLINE_MAP) {
2500 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2501 
2502 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2503 		savedcmd->cmdline_idx = idx;
2504 	}
2505 
2506 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2507 	set_cmdline(idx, tsk->comm);
2508 
2509 	arch_spin_unlock(&trace_cmdline_lock);
2510 
2511 	return 1;
2512 }
2513 
__trace_find_cmdline(int pid,char comm[])2514 static void __trace_find_cmdline(int pid, char comm[])
2515 {
2516 	unsigned map;
2517 	int tpid;
2518 
2519 	if (!pid) {
2520 		strcpy(comm, "<idle>");
2521 		return;
2522 	}
2523 
2524 	if (WARN_ON_ONCE(pid < 0)) {
2525 		strcpy(comm, "<XXX>");
2526 		return;
2527 	}
2528 
2529 	tpid = pid & (PID_MAX_DEFAULT - 1);
2530 	map = savedcmd->map_pid_to_cmdline[tpid];
2531 	if (map != NO_CMDLINE_MAP) {
2532 		tpid = savedcmd->map_cmdline_to_pid[map];
2533 		if (tpid == pid) {
2534 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2535 			return;
2536 		}
2537 	}
2538 	strcpy(comm, "<...>");
2539 }
2540 
trace_find_cmdline(int pid,char comm[])2541 void trace_find_cmdline(int pid, char comm[])
2542 {
2543 	preempt_disable();
2544 	arch_spin_lock(&trace_cmdline_lock);
2545 
2546 	__trace_find_cmdline(pid, comm);
2547 
2548 	arch_spin_unlock(&trace_cmdline_lock);
2549 	preempt_enable();
2550 }
2551 
trace_find_tgid_ptr(int pid)2552 static int *trace_find_tgid_ptr(int pid)
2553 {
2554 	/*
2555 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2556 	 * if we observe a non-NULL tgid_map then we also observe the correct
2557 	 * tgid_map_max.
2558 	 */
2559 	int *map = smp_load_acquire(&tgid_map);
2560 
2561 	if (unlikely(!map || pid > tgid_map_max))
2562 		return NULL;
2563 
2564 	return &map[pid];
2565 }
2566 
trace_find_tgid(int pid)2567 int trace_find_tgid(int pid)
2568 {
2569 	int *ptr = trace_find_tgid_ptr(pid);
2570 
2571 	return ptr ? *ptr : 0;
2572 }
2573 
trace_save_tgid(struct task_struct * tsk)2574 static int trace_save_tgid(struct task_struct *tsk)
2575 {
2576 	int *ptr;
2577 
2578 	/* treat recording of idle task as a success */
2579 	if (!tsk->pid)
2580 		return 1;
2581 
2582 	ptr = trace_find_tgid_ptr(tsk->pid);
2583 	if (!ptr)
2584 		return 0;
2585 
2586 	*ptr = tsk->tgid;
2587 	return 1;
2588 }
2589 
tracing_record_taskinfo_skip(int flags)2590 static bool tracing_record_taskinfo_skip(int flags)
2591 {
2592 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2593 		return true;
2594 	if (!__this_cpu_read(trace_taskinfo_save))
2595 		return true;
2596 	return false;
2597 }
2598 
2599 /**
2600  * tracing_record_taskinfo - record the task info of a task
2601  *
2602  * @task:  task to record
2603  * @flags: TRACE_RECORD_CMDLINE for recording comm
2604  *         TRACE_RECORD_TGID for recording tgid
2605  */
tracing_record_taskinfo(struct task_struct * task,int flags)2606 void tracing_record_taskinfo(struct task_struct *task, int flags)
2607 {
2608 	bool done;
2609 
2610 	if (tracing_record_taskinfo_skip(flags))
2611 		return;
2612 
2613 	/*
2614 	 * Record as much task information as possible. If some fail, continue
2615 	 * to try to record the others.
2616 	 */
2617 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2618 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2619 
2620 	/* If recording any information failed, retry again soon. */
2621 	if (!done)
2622 		return;
2623 
2624 	__this_cpu_write(trace_taskinfo_save, false);
2625 }
2626 
2627 /**
2628  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2629  *
2630  * @prev: previous task during sched_switch
2631  * @next: next task during sched_switch
2632  * @flags: TRACE_RECORD_CMDLINE for recording comm
2633  *         TRACE_RECORD_TGID for recording tgid
2634  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2635 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2636 					  struct task_struct *next, int flags)
2637 {
2638 	bool done;
2639 
2640 	if (tracing_record_taskinfo_skip(flags))
2641 		return;
2642 
2643 	/*
2644 	 * Record as much task information as possible. If some fail, continue
2645 	 * to try to record the others.
2646 	 */
2647 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2648 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2649 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2650 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2651 
2652 	/* If recording any information failed, retry again soon. */
2653 	if (!done)
2654 		return;
2655 
2656 	__this_cpu_write(trace_taskinfo_save, false);
2657 }
2658 
2659 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2660 void tracing_record_cmdline(struct task_struct *task)
2661 {
2662 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2663 }
2664 
tracing_record_tgid(struct task_struct * task)2665 void tracing_record_tgid(struct task_struct *task)
2666 {
2667 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2668 }
2669 
2670 /*
2671  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2672  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2673  * simplifies those functions and keeps them in sync.
2674  */
trace_handle_return(struct trace_seq * s)2675 enum print_line_t trace_handle_return(struct trace_seq *s)
2676 {
2677 	return trace_seq_has_overflowed(s) ?
2678 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2679 }
2680 EXPORT_SYMBOL_GPL(trace_handle_return);
2681 
migration_disable_value(void)2682 static unsigned short migration_disable_value(void)
2683 {
2684 #if defined(CONFIG_SMP)
2685 	return current->migration_disabled;
2686 #else
2687 	return 0;
2688 #endif
2689 }
2690 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2691 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2692 {
2693 	unsigned int trace_flags = irqs_status;
2694 	unsigned int pc;
2695 
2696 	pc = preempt_count();
2697 
2698 	if (pc & NMI_MASK)
2699 		trace_flags |= TRACE_FLAG_NMI;
2700 	if (pc & HARDIRQ_MASK)
2701 		trace_flags |= TRACE_FLAG_HARDIRQ;
2702 	if (in_serving_softirq())
2703 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2704 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2705 		trace_flags |= TRACE_FLAG_BH_OFF;
2706 
2707 	if (tif_need_resched())
2708 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2709 	if (test_preempt_need_resched())
2710 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2711 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2712 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2713 }
2714 
2715 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2716 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2717 			  int type,
2718 			  unsigned long len,
2719 			  unsigned int trace_ctx)
2720 {
2721 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2722 }
2723 
2724 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2725 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2726 static int trace_buffered_event_ref;
2727 
2728 /**
2729  * trace_buffered_event_enable - enable buffering events
2730  *
2731  * When events are being filtered, it is quicker to use a temporary
2732  * buffer to write the event data into if there's a likely chance
2733  * that it will not be committed. The discard of the ring buffer
2734  * is not as fast as committing, and is much slower than copying
2735  * a commit.
2736  *
2737  * When an event is to be filtered, allocate per cpu buffers to
2738  * write the event data into, and if the event is filtered and discarded
2739  * it is simply dropped, otherwise, the entire data is to be committed
2740  * in one shot.
2741  */
trace_buffered_event_enable(void)2742 void trace_buffered_event_enable(void)
2743 {
2744 	struct ring_buffer_event *event;
2745 	struct page *page;
2746 	int cpu;
2747 
2748 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2749 
2750 	if (trace_buffered_event_ref++)
2751 		return;
2752 
2753 	for_each_tracing_cpu(cpu) {
2754 		page = alloc_pages_node(cpu_to_node(cpu),
2755 					GFP_KERNEL | __GFP_NORETRY, 0);
2756 		/* This is just an optimization and can handle failures */
2757 		if (!page) {
2758 			pr_err("Failed to allocate event buffer\n");
2759 			break;
2760 		}
2761 
2762 		event = page_address(page);
2763 		memset(event, 0, sizeof(*event));
2764 
2765 		per_cpu(trace_buffered_event, cpu) = event;
2766 
2767 		preempt_disable();
2768 		if (cpu == smp_processor_id() &&
2769 		    __this_cpu_read(trace_buffered_event) !=
2770 		    per_cpu(trace_buffered_event, cpu))
2771 			WARN_ON_ONCE(1);
2772 		preempt_enable();
2773 	}
2774 }
2775 
enable_trace_buffered_event(void * data)2776 static void enable_trace_buffered_event(void *data)
2777 {
2778 	/* Probably not needed, but do it anyway */
2779 	smp_rmb();
2780 	this_cpu_dec(trace_buffered_event_cnt);
2781 }
2782 
disable_trace_buffered_event(void * data)2783 static void disable_trace_buffered_event(void *data)
2784 {
2785 	this_cpu_inc(trace_buffered_event_cnt);
2786 }
2787 
2788 /**
2789  * trace_buffered_event_disable - disable buffering events
2790  *
2791  * When a filter is removed, it is faster to not use the buffered
2792  * events, and to commit directly into the ring buffer. Free up
2793  * the temp buffers when there are no more users. This requires
2794  * special synchronization with current events.
2795  */
trace_buffered_event_disable(void)2796 void trace_buffered_event_disable(void)
2797 {
2798 	int cpu;
2799 
2800 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2801 
2802 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2803 		return;
2804 
2805 	if (--trace_buffered_event_ref)
2806 		return;
2807 
2808 	/* For each CPU, set the buffer as used. */
2809 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2810 			 NULL, true);
2811 
2812 	/* Wait for all current users to finish */
2813 	synchronize_rcu();
2814 
2815 	for_each_tracing_cpu(cpu) {
2816 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2817 		per_cpu(trace_buffered_event, cpu) = NULL;
2818 	}
2819 
2820 	/*
2821 	 * Wait for all CPUs that potentially started checking if they can use
2822 	 * their event buffer only after the previous synchronize_rcu() call and
2823 	 * they still read a valid pointer from trace_buffered_event. It must be
2824 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2825 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2826 	 */
2827 	synchronize_rcu();
2828 
2829 	/* For each CPU, relinquish the buffer */
2830 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2831 			 true);
2832 }
2833 
2834 static struct trace_buffer *temp_buffer;
2835 
2836 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2837 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2838 			  struct trace_event_file *trace_file,
2839 			  int type, unsigned long len,
2840 			  unsigned int trace_ctx)
2841 {
2842 	struct ring_buffer_event *entry;
2843 	struct trace_array *tr = trace_file->tr;
2844 	int val;
2845 
2846 	*current_rb = tr->array_buffer.buffer;
2847 
2848 	if (!tr->no_filter_buffering_ref &&
2849 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2850 		preempt_disable_notrace();
2851 		/*
2852 		 * Filtering is on, so try to use the per cpu buffer first.
2853 		 * This buffer will simulate a ring_buffer_event,
2854 		 * where the type_len is zero and the array[0] will
2855 		 * hold the full length.
2856 		 * (see include/linux/ring-buffer.h for details on
2857 		 *  how the ring_buffer_event is structured).
2858 		 *
2859 		 * Using a temp buffer during filtering and copying it
2860 		 * on a matched filter is quicker than writing directly
2861 		 * into the ring buffer and then discarding it when
2862 		 * it doesn't match. That is because the discard
2863 		 * requires several atomic operations to get right.
2864 		 * Copying on match and doing nothing on a failed match
2865 		 * is still quicker than no copy on match, but having
2866 		 * to discard out of the ring buffer on a failed match.
2867 		 */
2868 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2869 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2870 
2871 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2872 
2873 			/*
2874 			 * Preemption is disabled, but interrupts and NMIs
2875 			 * can still come in now. If that happens after
2876 			 * the above increment, then it will have to go
2877 			 * back to the old method of allocating the event
2878 			 * on the ring buffer, and if the filter fails, it
2879 			 * will have to call ring_buffer_discard_commit()
2880 			 * to remove it.
2881 			 *
2882 			 * Need to also check the unlikely case that the
2883 			 * length is bigger than the temp buffer size.
2884 			 * If that happens, then the reserve is pretty much
2885 			 * guaranteed to fail, as the ring buffer currently
2886 			 * only allows events less than a page. But that may
2887 			 * change in the future, so let the ring buffer reserve
2888 			 * handle the failure in that case.
2889 			 */
2890 			if (val == 1 && likely(len <= max_len)) {
2891 				trace_event_setup(entry, type, trace_ctx);
2892 				entry->array[0] = len;
2893 				/* Return with preemption disabled */
2894 				return entry;
2895 			}
2896 			this_cpu_dec(trace_buffered_event_cnt);
2897 		}
2898 		/* __trace_buffer_lock_reserve() disables preemption */
2899 		preempt_enable_notrace();
2900 	}
2901 
2902 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2903 					    trace_ctx);
2904 	/*
2905 	 * If tracing is off, but we have triggers enabled
2906 	 * we still need to look at the event data. Use the temp_buffer
2907 	 * to store the trace event for the trigger to use. It's recursive
2908 	 * safe and will not be recorded anywhere.
2909 	 */
2910 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2911 		*current_rb = temp_buffer;
2912 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2913 						    trace_ctx);
2914 	}
2915 	return entry;
2916 }
2917 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2918 
2919 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2920 static DEFINE_MUTEX(tracepoint_printk_mutex);
2921 
output_printk(struct trace_event_buffer * fbuffer)2922 static void output_printk(struct trace_event_buffer *fbuffer)
2923 {
2924 	struct trace_event_call *event_call;
2925 	struct trace_event_file *file;
2926 	struct trace_event *event;
2927 	unsigned long flags;
2928 	struct trace_iterator *iter = tracepoint_print_iter;
2929 
2930 	/* We should never get here if iter is NULL */
2931 	if (WARN_ON_ONCE(!iter))
2932 		return;
2933 
2934 	event_call = fbuffer->trace_file->event_call;
2935 	if (!event_call || !event_call->event.funcs ||
2936 	    !event_call->event.funcs->trace)
2937 		return;
2938 
2939 	file = fbuffer->trace_file;
2940 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2941 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2942 	     !filter_match_preds(file->filter, fbuffer->entry)))
2943 		return;
2944 
2945 	event = &fbuffer->trace_file->event_call->event;
2946 
2947 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2948 	trace_seq_init(&iter->seq);
2949 	iter->ent = fbuffer->entry;
2950 	event_call->event.funcs->trace(iter, 0, event);
2951 	trace_seq_putc(&iter->seq, 0);
2952 	printk("%s", iter->seq.buffer);
2953 
2954 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2955 }
2956 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2957 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2958 			     void *buffer, size_t *lenp,
2959 			     loff_t *ppos)
2960 {
2961 	int save_tracepoint_printk;
2962 	int ret;
2963 
2964 	mutex_lock(&tracepoint_printk_mutex);
2965 	save_tracepoint_printk = tracepoint_printk;
2966 
2967 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2968 
2969 	/*
2970 	 * This will force exiting early, as tracepoint_printk
2971 	 * is always zero when tracepoint_printk_iter is not allocated
2972 	 */
2973 	if (!tracepoint_print_iter)
2974 		tracepoint_printk = 0;
2975 
2976 	if (save_tracepoint_printk == tracepoint_printk)
2977 		goto out;
2978 
2979 	if (tracepoint_printk)
2980 		static_key_enable(&tracepoint_printk_key.key);
2981 	else
2982 		static_key_disable(&tracepoint_printk_key.key);
2983 
2984  out:
2985 	mutex_unlock(&tracepoint_printk_mutex);
2986 
2987 	return ret;
2988 }
2989 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2990 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2991 {
2992 	enum event_trigger_type tt = ETT_NONE;
2993 	struct trace_event_file *file = fbuffer->trace_file;
2994 
2995 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2996 			fbuffer->entry, &tt))
2997 		goto discard;
2998 
2999 	if (static_key_false(&tracepoint_printk_key.key))
3000 		output_printk(fbuffer);
3001 
3002 	if (static_branch_unlikely(&trace_event_exports_enabled))
3003 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3004 
3005 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3006 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3007 
3008 discard:
3009 	if (tt)
3010 		event_triggers_post_call(file, tt);
3011 
3012 }
3013 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3014 
3015 /*
3016  * Skip 3:
3017  *
3018  *   trace_buffer_unlock_commit_regs()
3019  *   trace_event_buffer_commit()
3020  *   trace_event_raw_event_xxx()
3021  */
3022 # define STACK_SKIP 3
3023 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3024 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3025 				     struct trace_buffer *buffer,
3026 				     struct ring_buffer_event *event,
3027 				     unsigned int trace_ctx,
3028 				     struct pt_regs *regs)
3029 {
3030 	__buffer_unlock_commit(buffer, event);
3031 
3032 	/*
3033 	 * If regs is not set, then skip the necessary functions.
3034 	 * Note, we can still get here via blktrace, wakeup tracer
3035 	 * and mmiotrace, but that's ok if they lose a function or
3036 	 * two. They are not that meaningful.
3037 	 */
3038 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3039 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3040 }
3041 
3042 /*
3043  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3044  */
3045 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3046 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3047 				   struct ring_buffer_event *event)
3048 {
3049 	__buffer_unlock_commit(buffer, event);
3050 }
3051 
3052 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3053 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3054 	       parent_ip, unsigned int trace_ctx)
3055 {
3056 	struct trace_event_call *call = &event_function;
3057 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3058 	struct ring_buffer_event *event;
3059 	struct ftrace_entry *entry;
3060 
3061 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3062 					    trace_ctx);
3063 	if (!event)
3064 		return;
3065 	entry	= ring_buffer_event_data(event);
3066 	entry->ip			= ip;
3067 	entry->parent_ip		= parent_ip;
3068 
3069 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3070 		if (static_branch_unlikely(&trace_function_exports_enabled))
3071 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3072 		__buffer_unlock_commit(buffer, event);
3073 	}
3074 }
3075 
3076 #ifdef CONFIG_STACKTRACE
3077 
3078 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3079 #define FTRACE_KSTACK_NESTING	4
3080 
3081 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3082 
3083 struct ftrace_stack {
3084 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3085 };
3086 
3087 
3088 struct ftrace_stacks {
3089 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3090 };
3091 
3092 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3093 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3094 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3095 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3096 				 unsigned int trace_ctx,
3097 				 int skip, struct pt_regs *regs)
3098 {
3099 	struct trace_event_call *call = &event_kernel_stack;
3100 	struct ring_buffer_event *event;
3101 	unsigned int size, nr_entries;
3102 	struct ftrace_stack *fstack;
3103 	struct stack_entry *entry;
3104 	int stackidx;
3105 
3106 	/*
3107 	 * Add one, for this function and the call to save_stack_trace()
3108 	 * If regs is set, then these functions will not be in the way.
3109 	 */
3110 #ifndef CONFIG_UNWINDER_ORC
3111 	if (!regs)
3112 		skip++;
3113 #endif
3114 
3115 	preempt_disable_notrace();
3116 
3117 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3118 
3119 	/* This should never happen. If it does, yell once and skip */
3120 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3121 		goto out;
3122 
3123 	/*
3124 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3125 	 * interrupt will either see the value pre increment or post
3126 	 * increment. If the interrupt happens pre increment it will have
3127 	 * restored the counter when it returns.  We just need a barrier to
3128 	 * keep gcc from moving things around.
3129 	 */
3130 	barrier();
3131 
3132 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3133 	size = ARRAY_SIZE(fstack->calls);
3134 
3135 	if (regs) {
3136 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3137 						   size, skip);
3138 	} else {
3139 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3140 	}
3141 
3142 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3143 				    struct_size(entry, caller, nr_entries),
3144 				    trace_ctx);
3145 	if (!event)
3146 		goto out;
3147 	entry = ring_buffer_event_data(event);
3148 
3149 	entry->size = nr_entries;
3150 	memcpy(&entry->caller, fstack->calls,
3151 	       flex_array_size(entry, caller, nr_entries));
3152 
3153 	if (!call_filter_check_discard(call, entry, buffer, event))
3154 		__buffer_unlock_commit(buffer, event);
3155 
3156  out:
3157 	/* Again, don't let gcc optimize things here */
3158 	barrier();
3159 	__this_cpu_dec(ftrace_stack_reserve);
3160 	preempt_enable_notrace();
3161 
3162 }
3163 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3164 static inline void ftrace_trace_stack(struct trace_array *tr,
3165 				      struct trace_buffer *buffer,
3166 				      unsigned int trace_ctx,
3167 				      int skip, struct pt_regs *regs)
3168 {
3169 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3170 		return;
3171 
3172 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3173 }
3174 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3175 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3176 		   int skip)
3177 {
3178 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3179 
3180 	if (rcu_is_watching()) {
3181 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182 		return;
3183 	}
3184 
3185 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3186 		return;
3187 
3188 	/*
3189 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3190 	 * but if the above rcu_is_watching() failed, then the NMI
3191 	 * triggered someplace critical, and ct_irq_enter() should
3192 	 * not be called from NMI.
3193 	 */
3194 	if (unlikely(in_nmi()))
3195 		return;
3196 
3197 	ct_irq_enter_irqson();
3198 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199 	ct_irq_exit_irqson();
3200 }
3201 
3202 /**
3203  * trace_dump_stack - record a stack back trace in the trace buffer
3204  * @skip: Number of functions to skip (helper handlers)
3205  */
trace_dump_stack(int skip)3206 void trace_dump_stack(int skip)
3207 {
3208 	if (tracing_disabled || tracing_selftest_running)
3209 		return;
3210 
3211 #ifndef CONFIG_UNWINDER_ORC
3212 	/* Skip 1 to skip this function. */
3213 	skip++;
3214 #endif
3215 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3216 			     tracing_gen_ctx(), skip, NULL);
3217 }
3218 EXPORT_SYMBOL_GPL(trace_dump_stack);
3219 
3220 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3221 static DEFINE_PER_CPU(int, user_stack_count);
3222 
3223 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3224 ftrace_trace_userstack(struct trace_array *tr,
3225 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3226 {
3227 	struct trace_event_call *call = &event_user_stack;
3228 	struct ring_buffer_event *event;
3229 	struct userstack_entry *entry;
3230 
3231 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3232 		return;
3233 
3234 	/*
3235 	 * NMIs can not handle page faults, even with fix ups.
3236 	 * The save user stack can (and often does) fault.
3237 	 */
3238 	if (unlikely(in_nmi()))
3239 		return;
3240 
3241 	/*
3242 	 * prevent recursion, since the user stack tracing may
3243 	 * trigger other kernel events.
3244 	 */
3245 	preempt_disable();
3246 	if (__this_cpu_read(user_stack_count))
3247 		goto out;
3248 
3249 	__this_cpu_inc(user_stack_count);
3250 
3251 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3252 					    sizeof(*entry), trace_ctx);
3253 	if (!event)
3254 		goto out_drop_count;
3255 	entry	= ring_buffer_event_data(event);
3256 
3257 	entry->tgid		= current->tgid;
3258 	memset(&entry->caller, 0, sizeof(entry->caller));
3259 
3260 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3261 	if (!call_filter_check_discard(call, entry, buffer, event))
3262 		__buffer_unlock_commit(buffer, event);
3263 
3264  out_drop_count:
3265 	__this_cpu_dec(user_stack_count);
3266  out:
3267 	preempt_enable();
3268 }
3269 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3270 static void ftrace_trace_userstack(struct trace_array *tr,
3271 				   struct trace_buffer *buffer,
3272 				   unsigned int trace_ctx)
3273 {
3274 }
3275 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3276 
3277 #endif /* CONFIG_STACKTRACE */
3278 
3279 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3280 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3281 			  unsigned long long delta)
3282 {
3283 	entry->bottom_delta_ts = delta & U32_MAX;
3284 	entry->top_delta_ts = (delta >> 32);
3285 }
3286 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3287 void trace_last_func_repeats(struct trace_array *tr,
3288 			     struct trace_func_repeats *last_info,
3289 			     unsigned int trace_ctx)
3290 {
3291 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3292 	struct func_repeats_entry *entry;
3293 	struct ring_buffer_event *event;
3294 	u64 delta;
3295 
3296 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3297 					    sizeof(*entry), trace_ctx);
3298 	if (!event)
3299 		return;
3300 
3301 	delta = ring_buffer_event_time_stamp(buffer, event) -
3302 		last_info->ts_last_call;
3303 
3304 	entry = ring_buffer_event_data(event);
3305 	entry->ip = last_info->ip;
3306 	entry->parent_ip = last_info->parent_ip;
3307 	entry->count = last_info->count;
3308 	func_repeats_set_delta_ts(entry, delta);
3309 
3310 	__buffer_unlock_commit(buffer, event);
3311 }
3312 
3313 /* created for use with alloc_percpu */
3314 struct trace_buffer_struct {
3315 	int nesting;
3316 	char buffer[4][TRACE_BUF_SIZE];
3317 };
3318 
3319 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3320 
3321 /*
3322  * This allows for lockless recording.  If we're nested too deeply, then
3323  * this returns NULL.
3324  */
get_trace_buf(void)3325 static char *get_trace_buf(void)
3326 {
3327 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3328 
3329 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3330 		return NULL;
3331 
3332 	buffer->nesting++;
3333 
3334 	/* Interrupts must see nesting incremented before we use the buffer */
3335 	barrier();
3336 	return &buffer->buffer[buffer->nesting - 1][0];
3337 }
3338 
put_trace_buf(void)3339 static void put_trace_buf(void)
3340 {
3341 	/* Don't let the decrement of nesting leak before this */
3342 	barrier();
3343 	this_cpu_dec(trace_percpu_buffer->nesting);
3344 }
3345 
alloc_percpu_trace_buffer(void)3346 static int alloc_percpu_trace_buffer(void)
3347 {
3348 	struct trace_buffer_struct __percpu *buffers;
3349 
3350 	if (trace_percpu_buffer)
3351 		return 0;
3352 
3353 	buffers = alloc_percpu(struct trace_buffer_struct);
3354 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3355 		return -ENOMEM;
3356 
3357 	trace_percpu_buffer = buffers;
3358 	return 0;
3359 }
3360 
3361 static int buffers_allocated;
3362 
trace_printk_init_buffers(void)3363 void trace_printk_init_buffers(void)
3364 {
3365 	if (buffers_allocated)
3366 		return;
3367 
3368 	if (alloc_percpu_trace_buffer())
3369 		return;
3370 
3371 	/* trace_printk() is for debug use only. Don't use it in production. */
3372 
3373 	pr_warn("\n");
3374 	pr_warn("**********************************************************\n");
3375 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3376 	pr_warn("**                                                      **\n");
3377 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3378 	pr_warn("**                                                      **\n");
3379 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3380 	pr_warn("** unsafe for production use.                           **\n");
3381 	pr_warn("**                                                      **\n");
3382 	pr_warn("** If you see this message and you are not debugging    **\n");
3383 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3384 	pr_warn("**                                                      **\n");
3385 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3386 	pr_warn("**********************************************************\n");
3387 
3388 	/* Expand the buffers to set size */
3389 	tracing_update_buffers();
3390 
3391 	buffers_allocated = 1;
3392 
3393 	/*
3394 	 * trace_printk_init_buffers() can be called by modules.
3395 	 * If that happens, then we need to start cmdline recording
3396 	 * directly here. If the global_trace.buffer is already
3397 	 * allocated here, then this was called by module code.
3398 	 */
3399 	if (global_trace.array_buffer.buffer)
3400 		tracing_start_cmdline_record();
3401 }
3402 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3403 
trace_printk_start_comm(void)3404 void trace_printk_start_comm(void)
3405 {
3406 	/* Start tracing comms if trace printk is set */
3407 	if (!buffers_allocated)
3408 		return;
3409 	tracing_start_cmdline_record();
3410 }
3411 
trace_printk_start_stop_comm(int enabled)3412 static void trace_printk_start_stop_comm(int enabled)
3413 {
3414 	if (!buffers_allocated)
3415 		return;
3416 
3417 	if (enabled)
3418 		tracing_start_cmdline_record();
3419 	else
3420 		tracing_stop_cmdline_record();
3421 }
3422 
3423 /**
3424  * trace_vbprintk - write binary msg to tracing buffer
3425  * @ip:    The address of the caller
3426  * @fmt:   The string format to write to the buffer
3427  * @args:  Arguments for @fmt
3428  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3429 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3430 {
3431 	struct trace_event_call *call = &event_bprint;
3432 	struct ring_buffer_event *event;
3433 	struct trace_buffer *buffer;
3434 	struct trace_array *tr = &global_trace;
3435 	struct bprint_entry *entry;
3436 	unsigned int trace_ctx;
3437 	char *tbuffer;
3438 	int len = 0, size;
3439 
3440 	if (unlikely(tracing_selftest_running || tracing_disabled))
3441 		return 0;
3442 
3443 	/* Don't pollute graph traces with trace_vprintk internals */
3444 	pause_graph_tracing();
3445 
3446 	trace_ctx = tracing_gen_ctx();
3447 	preempt_disable_notrace();
3448 
3449 	tbuffer = get_trace_buf();
3450 	if (!tbuffer) {
3451 		len = 0;
3452 		goto out_nobuffer;
3453 	}
3454 
3455 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3456 
3457 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3458 		goto out_put;
3459 
3460 	size = sizeof(*entry) + sizeof(u32) * len;
3461 	buffer = tr->array_buffer.buffer;
3462 	ring_buffer_nest_start(buffer);
3463 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3464 					    trace_ctx);
3465 	if (!event)
3466 		goto out;
3467 	entry = ring_buffer_event_data(event);
3468 	entry->ip			= ip;
3469 	entry->fmt			= fmt;
3470 
3471 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3472 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3473 		__buffer_unlock_commit(buffer, event);
3474 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3475 	}
3476 
3477 out:
3478 	ring_buffer_nest_end(buffer);
3479 out_put:
3480 	put_trace_buf();
3481 
3482 out_nobuffer:
3483 	preempt_enable_notrace();
3484 	unpause_graph_tracing();
3485 
3486 	return len;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_vbprintk);
3489 
3490 __printf(3, 0)
3491 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3492 __trace_array_vprintk(struct trace_buffer *buffer,
3493 		      unsigned long ip, const char *fmt, va_list args)
3494 {
3495 	struct trace_event_call *call = &event_print;
3496 	struct ring_buffer_event *event;
3497 	int len = 0, size;
3498 	struct print_entry *entry;
3499 	unsigned int trace_ctx;
3500 	char *tbuffer;
3501 
3502 	if (tracing_disabled)
3503 		return 0;
3504 
3505 	/* Don't pollute graph traces with trace_vprintk internals */
3506 	pause_graph_tracing();
3507 
3508 	trace_ctx = tracing_gen_ctx();
3509 	preempt_disable_notrace();
3510 
3511 
3512 	tbuffer = get_trace_buf();
3513 	if (!tbuffer) {
3514 		len = 0;
3515 		goto out_nobuffer;
3516 	}
3517 
3518 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3519 
3520 	size = sizeof(*entry) + len + 1;
3521 	ring_buffer_nest_start(buffer);
3522 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3523 					    trace_ctx);
3524 	if (!event)
3525 		goto out;
3526 	entry = ring_buffer_event_data(event);
3527 	entry->ip = ip;
3528 
3529 	memcpy(&entry->buf, tbuffer, len + 1);
3530 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3531 		__buffer_unlock_commit(buffer, event);
3532 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3533 	}
3534 
3535 out:
3536 	ring_buffer_nest_end(buffer);
3537 	put_trace_buf();
3538 
3539 out_nobuffer:
3540 	preempt_enable_notrace();
3541 	unpause_graph_tracing();
3542 
3543 	return len;
3544 }
3545 
3546 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3547 int trace_array_vprintk(struct trace_array *tr,
3548 			unsigned long ip, const char *fmt, va_list args)
3549 {
3550 	if (tracing_selftest_running && tr == &global_trace)
3551 		return 0;
3552 
3553 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3554 }
3555 
3556 /**
3557  * trace_array_printk - Print a message to a specific instance
3558  * @tr: The instance trace_array descriptor
3559  * @ip: The instruction pointer that this is called from.
3560  * @fmt: The format to print (printf format)
3561  *
3562  * If a subsystem sets up its own instance, they have the right to
3563  * printk strings into their tracing instance buffer using this
3564  * function. Note, this function will not write into the top level
3565  * buffer (use trace_printk() for that), as writing into the top level
3566  * buffer should only have events that can be individually disabled.
3567  * trace_printk() is only used for debugging a kernel, and should not
3568  * be ever incorporated in normal use.
3569  *
3570  * trace_array_printk() can be used, as it will not add noise to the
3571  * top level tracing buffer.
3572  *
3573  * Note, trace_array_init_printk() must be called on @tr before this
3574  * can be used.
3575  */
3576 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3577 int trace_array_printk(struct trace_array *tr,
3578 		       unsigned long ip, const char *fmt, ...)
3579 {
3580 	int ret;
3581 	va_list ap;
3582 
3583 	if (!tr)
3584 		return -ENOENT;
3585 
3586 	/* This is only allowed for created instances */
3587 	if (tr == &global_trace)
3588 		return 0;
3589 
3590 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3591 		return 0;
3592 
3593 	va_start(ap, fmt);
3594 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3595 	va_end(ap);
3596 	return ret;
3597 }
3598 EXPORT_SYMBOL_GPL(trace_array_printk);
3599 
3600 /**
3601  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3602  * @tr: The trace array to initialize the buffers for
3603  *
3604  * As trace_array_printk() only writes into instances, they are OK to
3605  * have in the kernel (unlike trace_printk()). This needs to be called
3606  * before trace_array_printk() can be used on a trace_array.
3607  */
trace_array_init_printk(struct trace_array * tr)3608 int trace_array_init_printk(struct trace_array *tr)
3609 {
3610 	if (!tr)
3611 		return -ENOENT;
3612 
3613 	/* This is only allowed for created instances */
3614 	if (tr == &global_trace)
3615 		return -EINVAL;
3616 
3617 	return alloc_percpu_trace_buffer();
3618 }
3619 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3620 
3621 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3622 int trace_array_printk_buf(struct trace_buffer *buffer,
3623 			   unsigned long ip, const char *fmt, ...)
3624 {
3625 	int ret;
3626 	va_list ap;
3627 
3628 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3629 		return 0;
3630 
3631 	va_start(ap, fmt);
3632 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3633 	va_end(ap);
3634 	return ret;
3635 }
3636 
3637 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3638 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3639 {
3640 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3641 }
3642 EXPORT_SYMBOL_GPL(trace_vprintk);
3643 
trace_iterator_increment(struct trace_iterator * iter)3644 static void trace_iterator_increment(struct trace_iterator *iter)
3645 {
3646 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3647 
3648 	iter->idx++;
3649 	if (buf_iter)
3650 		ring_buffer_iter_advance(buf_iter);
3651 }
3652 
3653 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3654 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3655 		unsigned long *lost_events)
3656 {
3657 	struct ring_buffer_event *event;
3658 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3659 
3660 	if (buf_iter) {
3661 		event = ring_buffer_iter_peek(buf_iter, ts);
3662 		if (lost_events)
3663 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3664 				(unsigned long)-1 : 0;
3665 	} else {
3666 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3667 					 lost_events);
3668 	}
3669 
3670 	if (event) {
3671 		iter->ent_size = ring_buffer_event_length(event);
3672 		return ring_buffer_event_data(event);
3673 	}
3674 	iter->ent_size = 0;
3675 	return NULL;
3676 }
3677 
3678 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3679 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3680 		  unsigned long *missing_events, u64 *ent_ts)
3681 {
3682 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3683 	struct trace_entry *ent, *next = NULL;
3684 	unsigned long lost_events = 0, next_lost = 0;
3685 	int cpu_file = iter->cpu_file;
3686 	u64 next_ts = 0, ts;
3687 	int next_cpu = -1;
3688 	int next_size = 0;
3689 	int cpu;
3690 
3691 	/*
3692 	 * If we are in a per_cpu trace file, don't bother by iterating over
3693 	 * all cpu and peek directly.
3694 	 */
3695 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3696 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3697 			return NULL;
3698 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3699 		if (ent_cpu)
3700 			*ent_cpu = cpu_file;
3701 
3702 		return ent;
3703 	}
3704 
3705 	for_each_tracing_cpu(cpu) {
3706 
3707 		if (ring_buffer_empty_cpu(buffer, cpu))
3708 			continue;
3709 
3710 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3711 
3712 		/*
3713 		 * Pick the entry with the smallest timestamp:
3714 		 */
3715 		if (ent && (!next || ts < next_ts)) {
3716 			next = ent;
3717 			next_cpu = cpu;
3718 			next_ts = ts;
3719 			next_lost = lost_events;
3720 			next_size = iter->ent_size;
3721 		}
3722 	}
3723 
3724 	iter->ent_size = next_size;
3725 
3726 	if (ent_cpu)
3727 		*ent_cpu = next_cpu;
3728 
3729 	if (ent_ts)
3730 		*ent_ts = next_ts;
3731 
3732 	if (missing_events)
3733 		*missing_events = next_lost;
3734 
3735 	return next;
3736 }
3737 
3738 #define STATIC_FMT_BUF_SIZE	128
3739 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3740 
trace_iter_expand_format(struct trace_iterator * iter)3741 char *trace_iter_expand_format(struct trace_iterator *iter)
3742 {
3743 	char *tmp;
3744 
3745 	/*
3746 	 * iter->tr is NULL when used with tp_printk, which makes
3747 	 * this get called where it is not safe to call krealloc().
3748 	 */
3749 	if (!iter->tr || iter->fmt == static_fmt_buf)
3750 		return NULL;
3751 
3752 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3753 		       GFP_KERNEL);
3754 	if (tmp) {
3755 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3756 		iter->fmt = tmp;
3757 	}
3758 
3759 	return tmp;
3760 }
3761 
3762 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3763 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3764 {
3765 	unsigned long addr = (unsigned long)str;
3766 	struct trace_event *trace_event;
3767 	struct trace_event_call *event;
3768 
3769 	/* OK if part of the event data */
3770 	if ((addr >= (unsigned long)iter->ent) &&
3771 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3772 		return true;
3773 
3774 	/* OK if part of the temp seq buffer */
3775 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3776 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3777 		return true;
3778 
3779 	/* Core rodata can not be freed */
3780 	if (is_kernel_rodata(addr))
3781 		return true;
3782 
3783 	if (trace_is_tracepoint_string(str))
3784 		return true;
3785 
3786 	/*
3787 	 * Now this could be a module event, referencing core module
3788 	 * data, which is OK.
3789 	 */
3790 	if (!iter->ent)
3791 		return false;
3792 
3793 	trace_event = ftrace_find_event(iter->ent->type);
3794 	if (!trace_event)
3795 		return false;
3796 
3797 	event = container_of(trace_event, struct trace_event_call, event);
3798 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3799 		return false;
3800 
3801 	/* Would rather have rodata, but this will suffice */
3802 	if (within_module_core(addr, event->module))
3803 		return true;
3804 
3805 	return false;
3806 }
3807 
3808 /**
3809  * ignore_event - Check dereferenced fields while writing to the seq buffer
3810  * @iter: The iterator that holds the seq buffer and the event being printed
3811  *
3812  * At boot up, test_event_printk() will flag any event that dereferences
3813  * a string with "%s" that does exist in the ring buffer. It may still
3814  * be valid, as the string may point to a static string in the kernel
3815  * rodata that never gets freed. But if the string pointer is pointing
3816  * to something that was allocated, there's a chance that it can be freed
3817  * by the time the user reads the trace. This would cause a bad memory
3818  * access by the kernel and possibly crash the system.
3819  *
3820  * This function will check if the event has any fields flagged as needing
3821  * to be checked at runtime and perform those checks.
3822  *
3823  * If it is found that a field is unsafe, it will write into the @iter->seq
3824  * a message stating what was found to be unsafe.
3825  *
3826  * @return: true if the event is unsafe and should be ignored,
3827  *          false otherwise.
3828  */
ignore_event(struct trace_iterator * iter)3829 bool ignore_event(struct trace_iterator *iter)
3830 {
3831 	struct ftrace_event_field *field;
3832 	struct trace_event *trace_event;
3833 	struct trace_event_call *event;
3834 	struct list_head *head;
3835 	struct trace_seq *seq;
3836 	const void *ptr;
3837 
3838 	trace_event = ftrace_find_event(iter->ent->type);
3839 
3840 	seq = &iter->seq;
3841 
3842 	if (!trace_event) {
3843 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3844 		return true;
3845 	}
3846 
3847 	event = container_of(trace_event, struct trace_event_call, event);
3848 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3849 		return false;
3850 
3851 	head = trace_get_fields(event);
3852 	if (!head) {
3853 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3854 				 trace_event_name(event));
3855 		return true;
3856 	}
3857 
3858 	/* Offsets are from the iter->ent that points to the raw event */
3859 	ptr = iter->ent;
3860 
3861 	list_for_each_entry(field, head, link) {
3862 		const char *str;
3863 		bool good;
3864 
3865 		if (!field->needs_test)
3866 			continue;
3867 
3868 		str = *(const char **)(ptr + field->offset);
3869 
3870 		good = trace_safe_str(iter, str);
3871 
3872 		/*
3873 		 * If you hit this warning, it is likely that the
3874 		 * trace event in question used %s on a string that
3875 		 * was saved at the time of the event, but may not be
3876 		 * around when the trace is read. Use __string(),
3877 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3878 		 * instead. See samples/trace_events/trace-events-sample.h
3879 		 * for reference.
3880 		 */
3881 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3882 			      trace_event_name(event), field->name)) {
3883 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3884 					 trace_event_name(event), field->name);
3885 			return true;
3886 		}
3887 	}
3888 	return false;
3889 }
3890 
trace_event_format(struct trace_iterator * iter,const char * fmt)3891 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3892 {
3893 	const char *p, *new_fmt;
3894 	char *q;
3895 
3896 	if (WARN_ON_ONCE(!fmt))
3897 		return fmt;
3898 
3899 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3900 		return fmt;
3901 
3902 	p = fmt;
3903 	new_fmt = q = iter->fmt;
3904 	while (*p) {
3905 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3906 			if (!trace_iter_expand_format(iter))
3907 				return fmt;
3908 
3909 			q += iter->fmt - new_fmt;
3910 			new_fmt = iter->fmt;
3911 		}
3912 
3913 		*q++ = *p++;
3914 
3915 		/* Replace %p with %px */
3916 		if (p[-1] == '%') {
3917 			if (p[0] == '%') {
3918 				*q++ = *p++;
3919 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3920 				*q++ = *p++;
3921 				*q++ = 'x';
3922 			}
3923 		}
3924 	}
3925 	*q = '\0';
3926 
3927 	return new_fmt;
3928 }
3929 
3930 #define STATIC_TEMP_BUF_SIZE	128
3931 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3932 
3933 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3934 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3935 					  int *ent_cpu, u64 *ent_ts)
3936 {
3937 	/* __find_next_entry will reset ent_size */
3938 	int ent_size = iter->ent_size;
3939 	struct trace_entry *entry;
3940 
3941 	/*
3942 	 * If called from ftrace_dump(), then the iter->temp buffer
3943 	 * will be the static_temp_buf and not created from kmalloc.
3944 	 * If the entry size is greater than the buffer, we can
3945 	 * not save it. Just return NULL in that case. This is only
3946 	 * used to add markers when two consecutive events' time
3947 	 * stamps have a large delta. See trace_print_lat_context()
3948 	 */
3949 	if (iter->temp == static_temp_buf &&
3950 	    STATIC_TEMP_BUF_SIZE < ent_size)
3951 		return NULL;
3952 
3953 	/*
3954 	 * The __find_next_entry() may call peek_next_entry(), which may
3955 	 * call ring_buffer_peek() that may make the contents of iter->ent
3956 	 * undefined. Need to copy iter->ent now.
3957 	 */
3958 	if (iter->ent && iter->ent != iter->temp) {
3959 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3960 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3961 			void *temp;
3962 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3963 			if (!temp)
3964 				return NULL;
3965 			kfree(iter->temp);
3966 			iter->temp = temp;
3967 			iter->temp_size = iter->ent_size;
3968 		}
3969 		memcpy(iter->temp, iter->ent, iter->ent_size);
3970 		iter->ent = iter->temp;
3971 	}
3972 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3973 	/* Put back the original ent_size */
3974 	iter->ent_size = ent_size;
3975 
3976 	return entry;
3977 }
3978 
3979 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3980 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3981 {
3982 	iter->ent = __find_next_entry(iter, &iter->cpu,
3983 				      &iter->lost_events, &iter->ts);
3984 
3985 	if (iter->ent)
3986 		trace_iterator_increment(iter);
3987 
3988 	return iter->ent ? iter : NULL;
3989 }
3990 
trace_consume(struct trace_iterator * iter)3991 static void trace_consume(struct trace_iterator *iter)
3992 {
3993 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3994 			    &iter->lost_events);
3995 }
3996 
s_next(struct seq_file * m,void * v,loff_t * pos)3997 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3998 {
3999 	struct trace_iterator *iter = m->private;
4000 	int i = (int)*pos;
4001 	void *ent;
4002 
4003 	WARN_ON_ONCE(iter->leftover);
4004 
4005 	(*pos)++;
4006 
4007 	/* can't go backwards */
4008 	if (iter->idx > i)
4009 		return NULL;
4010 
4011 	if (iter->idx < 0)
4012 		ent = trace_find_next_entry_inc(iter);
4013 	else
4014 		ent = iter;
4015 
4016 	while (ent && iter->idx < i)
4017 		ent = trace_find_next_entry_inc(iter);
4018 
4019 	iter->pos = *pos;
4020 
4021 	return ent;
4022 }
4023 
tracing_iter_reset(struct trace_iterator * iter,int cpu)4024 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4025 {
4026 	struct ring_buffer_iter *buf_iter;
4027 	unsigned long entries = 0;
4028 	u64 ts;
4029 
4030 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4031 
4032 	buf_iter = trace_buffer_iter(iter, cpu);
4033 	if (!buf_iter)
4034 		return;
4035 
4036 	ring_buffer_iter_reset(buf_iter);
4037 
4038 	/*
4039 	 * We could have the case with the max latency tracers
4040 	 * that a reset never took place on a cpu. This is evident
4041 	 * by the timestamp being before the start of the buffer.
4042 	 */
4043 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4044 		if (ts >= iter->array_buffer->time_start)
4045 			break;
4046 		entries++;
4047 		ring_buffer_iter_advance(buf_iter);
4048 		/* This could be a big loop */
4049 		cond_resched();
4050 	}
4051 
4052 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4053 }
4054 
4055 /*
4056  * The current tracer is copied to avoid a global locking
4057  * all around.
4058  */
s_start(struct seq_file * m,loff_t * pos)4059 static void *s_start(struct seq_file *m, loff_t *pos)
4060 {
4061 	struct trace_iterator *iter = m->private;
4062 	struct trace_array *tr = iter->tr;
4063 	int cpu_file = iter->cpu_file;
4064 	void *p = NULL;
4065 	loff_t l = 0;
4066 	int cpu;
4067 
4068 	mutex_lock(&trace_types_lock);
4069 	if (unlikely(tr->current_trace != iter->trace)) {
4070 		/* Close iter->trace before switching to the new current tracer */
4071 		if (iter->trace->close)
4072 			iter->trace->close(iter);
4073 		iter->trace = tr->current_trace;
4074 		/* Reopen the new current tracer */
4075 		if (iter->trace->open)
4076 			iter->trace->open(iter);
4077 	}
4078 	mutex_unlock(&trace_types_lock);
4079 
4080 #ifdef CONFIG_TRACER_MAX_TRACE
4081 	if (iter->snapshot && iter->trace->use_max_tr)
4082 		return ERR_PTR(-EBUSY);
4083 #endif
4084 
4085 	if (*pos != iter->pos) {
4086 		iter->ent = NULL;
4087 		iter->cpu = 0;
4088 		iter->idx = -1;
4089 
4090 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4091 			for_each_tracing_cpu(cpu)
4092 				tracing_iter_reset(iter, cpu);
4093 		} else
4094 			tracing_iter_reset(iter, cpu_file);
4095 
4096 		iter->leftover = 0;
4097 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4098 			;
4099 
4100 	} else {
4101 		/*
4102 		 * If we overflowed the seq_file before, then we want
4103 		 * to just reuse the trace_seq buffer again.
4104 		 */
4105 		if (iter->leftover)
4106 			p = iter;
4107 		else {
4108 			l = *pos - 1;
4109 			p = s_next(m, p, &l);
4110 		}
4111 	}
4112 
4113 	trace_event_read_lock();
4114 	trace_access_lock(cpu_file);
4115 	return p;
4116 }
4117 
s_stop(struct seq_file * m,void * p)4118 static void s_stop(struct seq_file *m, void *p)
4119 {
4120 	struct trace_iterator *iter = m->private;
4121 
4122 #ifdef CONFIG_TRACER_MAX_TRACE
4123 	if (iter->snapshot && iter->trace->use_max_tr)
4124 		return;
4125 #endif
4126 
4127 	trace_access_unlock(iter->cpu_file);
4128 	trace_event_read_unlock();
4129 }
4130 
4131 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4132 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4133 		      unsigned long *entries, int cpu)
4134 {
4135 	unsigned long count;
4136 
4137 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4138 	/*
4139 	 * If this buffer has skipped entries, then we hold all
4140 	 * entries for the trace and we need to ignore the
4141 	 * ones before the time stamp.
4142 	 */
4143 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4144 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4145 		/* total is the same as the entries */
4146 		*total = count;
4147 	} else
4148 		*total = count +
4149 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4150 	*entries = count;
4151 }
4152 
4153 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4154 get_total_entries(struct array_buffer *buf,
4155 		  unsigned long *total, unsigned long *entries)
4156 {
4157 	unsigned long t, e;
4158 	int cpu;
4159 
4160 	*total = 0;
4161 	*entries = 0;
4162 
4163 	for_each_tracing_cpu(cpu) {
4164 		get_total_entries_cpu(buf, &t, &e, cpu);
4165 		*total += t;
4166 		*entries += e;
4167 	}
4168 }
4169 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4170 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4171 {
4172 	unsigned long total, entries;
4173 
4174 	if (!tr)
4175 		tr = &global_trace;
4176 
4177 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4178 
4179 	return entries;
4180 }
4181 
trace_total_entries(struct trace_array * tr)4182 unsigned long trace_total_entries(struct trace_array *tr)
4183 {
4184 	unsigned long total, entries;
4185 
4186 	if (!tr)
4187 		tr = &global_trace;
4188 
4189 	get_total_entries(&tr->array_buffer, &total, &entries);
4190 
4191 	return entries;
4192 }
4193 
print_lat_help_header(struct seq_file * m)4194 static void print_lat_help_header(struct seq_file *m)
4195 {
4196 	seq_puts(m, "#                    _------=> CPU#            \n"
4197 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4198 		    "#                  | / _----=> need-resched    \n"
4199 		    "#                  || / _---=> hardirq/softirq \n"
4200 		    "#                  ||| / _--=> preempt-depth   \n"
4201 		    "#                  |||| / _-=> migrate-disable \n"
4202 		    "#                  ||||| /     delay           \n"
4203 		    "#  cmd     pid     |||||| time  |   caller     \n"
4204 		    "#     \\   /        ||||||  \\    |    /       \n");
4205 }
4206 
print_event_info(struct array_buffer * buf,struct seq_file * m)4207 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4208 {
4209 	unsigned long total;
4210 	unsigned long entries;
4211 
4212 	get_total_entries(buf, &total, &entries);
4213 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4214 		   entries, total, num_online_cpus());
4215 	seq_puts(m, "#\n");
4216 }
4217 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4218 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4219 				   unsigned int flags)
4220 {
4221 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222 
4223 	print_event_info(buf, m);
4224 
4225 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4226 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4227 }
4228 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4229 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4230 				       unsigned int flags)
4231 {
4232 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4233 	static const char space[] = "            ";
4234 	int prec = tgid ? 12 : 2;
4235 
4236 	print_event_info(buf, m);
4237 
4238 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4239 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4240 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4241 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4242 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4243 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4244 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4245 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4246 }
4247 
4248 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4249 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4250 {
4251 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4252 	struct array_buffer *buf = iter->array_buffer;
4253 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4254 	struct tracer *type = iter->trace;
4255 	unsigned long entries;
4256 	unsigned long total;
4257 	const char *name = type->name;
4258 
4259 	get_total_entries(buf, &total, &entries);
4260 
4261 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4262 		   name, UTS_RELEASE);
4263 	seq_puts(m, "# -----------------------------------"
4264 		 "---------------------------------\n");
4265 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4266 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4267 		   nsecs_to_usecs(data->saved_latency),
4268 		   entries,
4269 		   total,
4270 		   buf->cpu,
4271 		   preempt_model_none()      ? "server" :
4272 		   preempt_model_voluntary() ? "desktop" :
4273 		   preempt_model_full()      ? "preempt" :
4274 		   preempt_model_rt()        ? "preempt_rt" :
4275 		   "unknown",
4276 		   /* These are reserved for later use */
4277 		   0, 0, 0, 0);
4278 #ifdef CONFIG_SMP
4279 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4280 #else
4281 	seq_puts(m, ")\n");
4282 #endif
4283 	seq_puts(m, "#    -----------------\n");
4284 	seq_printf(m, "#    | task: %.16s-%d "
4285 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4286 		   data->comm, data->pid,
4287 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4288 		   data->policy, data->rt_priority);
4289 	seq_puts(m, "#    -----------------\n");
4290 
4291 	if (data->critical_start) {
4292 		seq_puts(m, "#  => started at: ");
4293 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4294 		trace_print_seq(m, &iter->seq);
4295 		seq_puts(m, "\n#  => ended at:   ");
4296 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4297 		trace_print_seq(m, &iter->seq);
4298 		seq_puts(m, "\n#\n");
4299 	}
4300 
4301 	seq_puts(m, "#\n");
4302 }
4303 
test_cpu_buff_start(struct trace_iterator * iter)4304 static void test_cpu_buff_start(struct trace_iterator *iter)
4305 {
4306 	struct trace_seq *s = &iter->seq;
4307 	struct trace_array *tr = iter->tr;
4308 
4309 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4310 		return;
4311 
4312 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4313 		return;
4314 
4315 	if (cpumask_available(iter->started) &&
4316 	    cpumask_test_cpu(iter->cpu, iter->started))
4317 		return;
4318 
4319 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4320 		return;
4321 
4322 	if (cpumask_available(iter->started))
4323 		cpumask_set_cpu(iter->cpu, iter->started);
4324 
4325 	/* Don't print started cpu buffer for the first entry of the trace */
4326 	if (iter->idx > 1)
4327 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4328 				iter->cpu);
4329 }
4330 
print_trace_fmt(struct trace_iterator * iter)4331 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4332 {
4333 	struct trace_array *tr = iter->tr;
4334 	struct trace_seq *s = &iter->seq;
4335 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4336 	struct trace_entry *entry;
4337 	struct trace_event *event;
4338 
4339 	entry = iter->ent;
4340 
4341 	test_cpu_buff_start(iter);
4342 
4343 	event = ftrace_find_event(entry->type);
4344 
4345 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4346 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4347 			trace_print_lat_context(iter);
4348 		else
4349 			trace_print_context(iter);
4350 	}
4351 
4352 	if (trace_seq_has_overflowed(s))
4353 		return TRACE_TYPE_PARTIAL_LINE;
4354 
4355 	if (event) {
4356 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4357 			return print_event_fields(iter, event);
4358 		return event->funcs->trace(iter, sym_flags, event);
4359 	}
4360 
4361 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4362 
4363 	return trace_handle_return(s);
4364 }
4365 
print_raw_fmt(struct trace_iterator * iter)4366 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4367 {
4368 	struct trace_array *tr = iter->tr;
4369 	struct trace_seq *s = &iter->seq;
4370 	struct trace_entry *entry;
4371 	struct trace_event *event;
4372 
4373 	entry = iter->ent;
4374 
4375 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4376 		trace_seq_printf(s, "%d %d %llu ",
4377 				 entry->pid, iter->cpu, iter->ts);
4378 
4379 	if (trace_seq_has_overflowed(s))
4380 		return TRACE_TYPE_PARTIAL_LINE;
4381 
4382 	event = ftrace_find_event(entry->type);
4383 	if (event)
4384 		return event->funcs->raw(iter, 0, event);
4385 
4386 	trace_seq_printf(s, "%d ?\n", entry->type);
4387 
4388 	return trace_handle_return(s);
4389 }
4390 
print_hex_fmt(struct trace_iterator * iter)4391 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4392 {
4393 	struct trace_array *tr = iter->tr;
4394 	struct trace_seq *s = &iter->seq;
4395 	unsigned char newline = '\n';
4396 	struct trace_entry *entry;
4397 	struct trace_event *event;
4398 
4399 	entry = iter->ent;
4400 
4401 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4402 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4403 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4404 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4405 		if (trace_seq_has_overflowed(s))
4406 			return TRACE_TYPE_PARTIAL_LINE;
4407 	}
4408 
4409 	event = ftrace_find_event(entry->type);
4410 	if (event) {
4411 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4412 		if (ret != TRACE_TYPE_HANDLED)
4413 			return ret;
4414 	}
4415 
4416 	SEQ_PUT_FIELD(s, newline);
4417 
4418 	return trace_handle_return(s);
4419 }
4420 
print_bin_fmt(struct trace_iterator * iter)4421 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4422 {
4423 	struct trace_array *tr = iter->tr;
4424 	struct trace_seq *s = &iter->seq;
4425 	struct trace_entry *entry;
4426 	struct trace_event *event;
4427 
4428 	entry = iter->ent;
4429 
4430 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4431 		SEQ_PUT_FIELD(s, entry->pid);
4432 		SEQ_PUT_FIELD(s, iter->cpu);
4433 		SEQ_PUT_FIELD(s, iter->ts);
4434 		if (trace_seq_has_overflowed(s))
4435 			return TRACE_TYPE_PARTIAL_LINE;
4436 	}
4437 
4438 	event = ftrace_find_event(entry->type);
4439 	return event ? event->funcs->binary(iter, 0, event) :
4440 		TRACE_TYPE_HANDLED;
4441 }
4442 
trace_empty(struct trace_iterator * iter)4443 int trace_empty(struct trace_iterator *iter)
4444 {
4445 	struct ring_buffer_iter *buf_iter;
4446 	int cpu;
4447 
4448 	/* If we are looking at one CPU buffer, only check that one */
4449 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4450 		cpu = iter->cpu_file;
4451 		buf_iter = trace_buffer_iter(iter, cpu);
4452 		if (buf_iter) {
4453 			if (!ring_buffer_iter_empty(buf_iter))
4454 				return 0;
4455 		} else {
4456 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4457 				return 0;
4458 		}
4459 		return 1;
4460 	}
4461 
4462 	for_each_tracing_cpu(cpu) {
4463 		buf_iter = trace_buffer_iter(iter, cpu);
4464 		if (buf_iter) {
4465 			if (!ring_buffer_iter_empty(buf_iter))
4466 				return 0;
4467 		} else {
4468 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4469 				return 0;
4470 		}
4471 	}
4472 
4473 	return 1;
4474 }
4475 
4476 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4477 enum print_line_t print_trace_line(struct trace_iterator *iter)
4478 {
4479 	struct trace_array *tr = iter->tr;
4480 	unsigned long trace_flags = tr->trace_flags;
4481 	enum print_line_t ret;
4482 
4483 	if (iter->lost_events) {
4484 		if (iter->lost_events == (unsigned long)-1)
4485 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4486 					 iter->cpu);
4487 		else
4488 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4489 					 iter->cpu, iter->lost_events);
4490 		if (trace_seq_has_overflowed(&iter->seq))
4491 			return TRACE_TYPE_PARTIAL_LINE;
4492 	}
4493 
4494 	if (iter->trace && iter->trace->print_line) {
4495 		ret = iter->trace->print_line(iter);
4496 		if (ret != TRACE_TYPE_UNHANDLED)
4497 			return ret;
4498 	}
4499 
4500 	if (iter->ent->type == TRACE_BPUTS &&
4501 			trace_flags & TRACE_ITER_PRINTK &&
4502 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4503 		return trace_print_bputs_msg_only(iter);
4504 
4505 	if (iter->ent->type == TRACE_BPRINT &&
4506 			trace_flags & TRACE_ITER_PRINTK &&
4507 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508 		return trace_print_bprintk_msg_only(iter);
4509 
4510 	if (iter->ent->type == TRACE_PRINT &&
4511 			trace_flags & TRACE_ITER_PRINTK &&
4512 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513 		return trace_print_printk_msg_only(iter);
4514 
4515 	if (trace_flags & TRACE_ITER_BIN)
4516 		return print_bin_fmt(iter);
4517 
4518 	if (trace_flags & TRACE_ITER_HEX)
4519 		return print_hex_fmt(iter);
4520 
4521 	if (trace_flags & TRACE_ITER_RAW)
4522 		return print_raw_fmt(iter);
4523 
4524 	return print_trace_fmt(iter);
4525 }
4526 
trace_latency_header(struct seq_file * m)4527 void trace_latency_header(struct seq_file *m)
4528 {
4529 	struct trace_iterator *iter = m->private;
4530 	struct trace_array *tr = iter->tr;
4531 
4532 	/* print nothing if the buffers are empty */
4533 	if (trace_empty(iter))
4534 		return;
4535 
4536 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4537 		print_trace_header(m, iter);
4538 
4539 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4540 		print_lat_help_header(m);
4541 }
4542 
trace_default_header(struct seq_file * m)4543 void trace_default_header(struct seq_file *m)
4544 {
4545 	struct trace_iterator *iter = m->private;
4546 	struct trace_array *tr = iter->tr;
4547 	unsigned long trace_flags = tr->trace_flags;
4548 
4549 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4550 		return;
4551 
4552 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4553 		/* print nothing if the buffers are empty */
4554 		if (trace_empty(iter))
4555 			return;
4556 		print_trace_header(m, iter);
4557 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4558 			print_lat_help_header(m);
4559 	} else {
4560 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4561 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4562 				print_func_help_header_irq(iter->array_buffer,
4563 							   m, trace_flags);
4564 			else
4565 				print_func_help_header(iter->array_buffer, m,
4566 						       trace_flags);
4567 		}
4568 	}
4569 }
4570 
test_ftrace_alive(struct seq_file * m)4571 static void test_ftrace_alive(struct seq_file *m)
4572 {
4573 	if (!ftrace_is_dead())
4574 		return;
4575 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4576 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4577 }
4578 
4579 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4580 static void show_snapshot_main_help(struct seq_file *m)
4581 {
4582 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4583 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4584 		    "#                      Takes a snapshot of the main buffer.\n"
4585 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4586 		    "#                      (Doesn't have to be '2' works with any number that\n"
4587 		    "#                       is not a '0' or '1')\n");
4588 }
4589 
show_snapshot_percpu_help(struct seq_file * m)4590 static void show_snapshot_percpu_help(struct seq_file *m)
4591 {
4592 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4593 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4594 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4595 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4596 #else
4597 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4598 		    "#                     Must use main snapshot file to allocate.\n");
4599 #endif
4600 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4601 		    "#                      (Doesn't have to be '2' works with any number that\n"
4602 		    "#                       is not a '0' or '1')\n");
4603 }
4604 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4605 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4606 {
4607 	if (iter->tr->allocated_snapshot)
4608 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4609 	else
4610 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4611 
4612 	seq_puts(m, "# Snapshot commands:\n");
4613 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4614 		show_snapshot_main_help(m);
4615 	else
4616 		show_snapshot_percpu_help(m);
4617 }
4618 #else
4619 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4620 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4621 #endif
4622 
s_show(struct seq_file * m,void * v)4623 static int s_show(struct seq_file *m, void *v)
4624 {
4625 	struct trace_iterator *iter = v;
4626 	int ret;
4627 
4628 	if (iter->ent == NULL) {
4629 		if (iter->tr) {
4630 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4631 			seq_puts(m, "#\n");
4632 			test_ftrace_alive(m);
4633 		}
4634 		if (iter->snapshot && trace_empty(iter))
4635 			print_snapshot_help(m, iter);
4636 		else if (iter->trace && iter->trace->print_header)
4637 			iter->trace->print_header(m);
4638 		else
4639 			trace_default_header(m);
4640 
4641 	} else if (iter->leftover) {
4642 		/*
4643 		 * If we filled the seq_file buffer earlier, we
4644 		 * want to just show it now.
4645 		 */
4646 		ret = trace_print_seq(m, &iter->seq);
4647 
4648 		/* ret should this time be zero, but you never know */
4649 		iter->leftover = ret;
4650 
4651 	} else {
4652 		ret = print_trace_line(iter);
4653 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4654 			iter->seq.full = 0;
4655 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4656 		}
4657 		ret = trace_print_seq(m, &iter->seq);
4658 		/*
4659 		 * If we overflow the seq_file buffer, then it will
4660 		 * ask us for this data again at start up.
4661 		 * Use that instead.
4662 		 *  ret is 0 if seq_file write succeeded.
4663 		 *        -1 otherwise.
4664 		 */
4665 		iter->leftover = ret;
4666 	}
4667 
4668 	return 0;
4669 }
4670 
4671 /*
4672  * Should be used after trace_array_get(), trace_types_lock
4673  * ensures that i_cdev was already initialized.
4674  */
tracing_get_cpu(struct inode * inode)4675 static inline int tracing_get_cpu(struct inode *inode)
4676 {
4677 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4678 		return (long)inode->i_cdev - 1;
4679 	return RING_BUFFER_ALL_CPUS;
4680 }
4681 
4682 static const struct seq_operations tracer_seq_ops = {
4683 	.start		= s_start,
4684 	.next		= s_next,
4685 	.stop		= s_stop,
4686 	.show		= s_show,
4687 };
4688 
4689 /*
4690  * Note, as iter itself can be allocated and freed in different
4691  * ways, this function is only used to free its content, and not
4692  * the iterator itself. The only requirement to all the allocations
4693  * is that it must zero all fields (kzalloc), as freeing works with
4694  * ethier allocated content or NULL.
4695  */
free_trace_iter_content(struct trace_iterator * iter)4696 static void free_trace_iter_content(struct trace_iterator *iter)
4697 {
4698 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4699 	if (iter->fmt != static_fmt_buf)
4700 		kfree(iter->fmt);
4701 
4702 	kfree(iter->temp);
4703 	kfree(iter->buffer_iter);
4704 	mutex_destroy(&iter->mutex);
4705 	free_cpumask_var(iter->started);
4706 }
4707 
4708 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4709 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4710 {
4711 	struct trace_array *tr = inode->i_private;
4712 	struct trace_iterator *iter;
4713 	int cpu;
4714 
4715 	if (tracing_disabled)
4716 		return ERR_PTR(-ENODEV);
4717 
4718 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4719 	if (!iter)
4720 		return ERR_PTR(-ENOMEM);
4721 
4722 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4723 				    GFP_KERNEL);
4724 	if (!iter->buffer_iter)
4725 		goto release;
4726 
4727 	/*
4728 	 * trace_find_next_entry() may need to save off iter->ent.
4729 	 * It will place it into the iter->temp buffer. As most
4730 	 * events are less than 128, allocate a buffer of that size.
4731 	 * If one is greater, then trace_find_next_entry() will
4732 	 * allocate a new buffer to adjust for the bigger iter->ent.
4733 	 * It's not critical if it fails to get allocated here.
4734 	 */
4735 	iter->temp = kmalloc(128, GFP_KERNEL);
4736 	if (iter->temp)
4737 		iter->temp_size = 128;
4738 
4739 	/*
4740 	 * trace_event_printf() may need to modify given format
4741 	 * string to replace %p with %px so that it shows real address
4742 	 * instead of hash value. However, that is only for the event
4743 	 * tracing, other tracer may not need. Defer the allocation
4744 	 * until it is needed.
4745 	 */
4746 	iter->fmt = NULL;
4747 	iter->fmt_size = 0;
4748 
4749 	mutex_lock(&trace_types_lock);
4750 	iter->trace = tr->current_trace;
4751 
4752 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4753 		goto fail;
4754 
4755 	iter->tr = tr;
4756 
4757 #ifdef CONFIG_TRACER_MAX_TRACE
4758 	/* Currently only the top directory has a snapshot */
4759 	if (tr->current_trace->print_max || snapshot)
4760 		iter->array_buffer = &tr->max_buffer;
4761 	else
4762 #endif
4763 		iter->array_buffer = &tr->array_buffer;
4764 	iter->snapshot = snapshot;
4765 	iter->pos = -1;
4766 	iter->cpu_file = tracing_get_cpu(inode);
4767 	mutex_init(&iter->mutex);
4768 
4769 	/* Notify the tracer early; before we stop tracing. */
4770 	if (iter->trace->open)
4771 		iter->trace->open(iter);
4772 
4773 	/* Annotate start of buffers if we had overruns */
4774 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4775 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4776 
4777 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4778 	if (trace_clocks[tr->clock_id].in_ns)
4779 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4780 
4781 	/*
4782 	 * If pause-on-trace is enabled, then stop the trace while
4783 	 * dumping, unless this is the "snapshot" file
4784 	 */
4785 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4786 		tracing_stop_tr(tr);
4787 
4788 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4789 		for_each_tracing_cpu(cpu) {
4790 			iter->buffer_iter[cpu] =
4791 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4792 							 cpu, GFP_KERNEL);
4793 		}
4794 		ring_buffer_read_prepare_sync();
4795 		for_each_tracing_cpu(cpu) {
4796 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4797 			tracing_iter_reset(iter, cpu);
4798 		}
4799 	} else {
4800 		cpu = iter->cpu_file;
4801 		iter->buffer_iter[cpu] =
4802 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4803 						 cpu, GFP_KERNEL);
4804 		ring_buffer_read_prepare_sync();
4805 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4806 		tracing_iter_reset(iter, cpu);
4807 	}
4808 
4809 	mutex_unlock(&trace_types_lock);
4810 
4811 	return iter;
4812 
4813  fail:
4814 	mutex_unlock(&trace_types_lock);
4815 	free_trace_iter_content(iter);
4816 release:
4817 	seq_release_private(inode, file);
4818 	return ERR_PTR(-ENOMEM);
4819 }
4820 
tracing_open_generic(struct inode * inode,struct file * filp)4821 int tracing_open_generic(struct inode *inode, struct file *filp)
4822 {
4823 	int ret;
4824 
4825 	ret = tracing_check_open_get_tr(NULL);
4826 	if (ret)
4827 		return ret;
4828 
4829 	filp->private_data = inode->i_private;
4830 	return 0;
4831 }
4832 
tracing_is_disabled(void)4833 bool tracing_is_disabled(void)
4834 {
4835 	return (tracing_disabled) ? true: false;
4836 }
4837 
4838 /*
4839  * Open and update trace_array ref count.
4840  * Must have the current trace_array passed to it.
4841  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4842 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4843 {
4844 	struct trace_array *tr = inode->i_private;
4845 	int ret;
4846 
4847 	ret = tracing_check_open_get_tr(tr);
4848 	if (ret)
4849 		return ret;
4850 
4851 	filp->private_data = inode->i_private;
4852 
4853 	return 0;
4854 }
4855 
4856 /*
4857  * The private pointer of the inode is the trace_event_file.
4858  * Update the tr ref count associated to it.
4859  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4860 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4861 {
4862 	struct trace_event_file *file = inode->i_private;
4863 	int ret;
4864 
4865 	ret = tracing_check_open_get_tr(file->tr);
4866 	if (ret)
4867 		return ret;
4868 
4869 	mutex_lock(&event_mutex);
4870 
4871 	/* Fail if the file is marked for removal */
4872 	if (file->flags & EVENT_FILE_FL_FREED) {
4873 		trace_array_put(file->tr);
4874 		ret = -ENODEV;
4875 	} else {
4876 		event_file_get(file);
4877 	}
4878 
4879 	mutex_unlock(&event_mutex);
4880 	if (ret)
4881 		return ret;
4882 
4883 	filp->private_data = inode->i_private;
4884 
4885 	return 0;
4886 }
4887 
tracing_release_file_tr(struct inode * inode,struct file * filp)4888 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4889 {
4890 	struct trace_event_file *file = inode->i_private;
4891 
4892 	trace_array_put(file->tr);
4893 	event_file_put(file);
4894 
4895 	return 0;
4896 }
4897 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4898 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4899 {
4900 	tracing_release_file_tr(inode, filp);
4901 	return single_release(inode, filp);
4902 }
4903 
tracing_mark_open(struct inode * inode,struct file * filp)4904 static int tracing_mark_open(struct inode *inode, struct file *filp)
4905 {
4906 	stream_open(inode, filp);
4907 	return tracing_open_generic_tr(inode, filp);
4908 }
4909 
tracing_release(struct inode * inode,struct file * file)4910 static int tracing_release(struct inode *inode, struct file *file)
4911 {
4912 	struct trace_array *tr = inode->i_private;
4913 	struct seq_file *m = file->private_data;
4914 	struct trace_iterator *iter;
4915 	int cpu;
4916 
4917 	if (!(file->f_mode & FMODE_READ)) {
4918 		trace_array_put(tr);
4919 		return 0;
4920 	}
4921 
4922 	/* Writes do not use seq_file */
4923 	iter = m->private;
4924 	mutex_lock(&trace_types_lock);
4925 
4926 	for_each_tracing_cpu(cpu) {
4927 		if (iter->buffer_iter[cpu])
4928 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4929 	}
4930 
4931 	if (iter->trace && iter->trace->close)
4932 		iter->trace->close(iter);
4933 
4934 	if (!iter->snapshot && tr->stop_count)
4935 		/* reenable tracing if it was previously enabled */
4936 		tracing_start_tr(tr);
4937 
4938 	__trace_array_put(tr);
4939 
4940 	mutex_unlock(&trace_types_lock);
4941 
4942 	free_trace_iter_content(iter);
4943 	seq_release_private(inode, file);
4944 
4945 	return 0;
4946 }
4947 
tracing_release_generic_tr(struct inode * inode,struct file * file)4948 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4949 {
4950 	struct trace_array *tr = inode->i_private;
4951 
4952 	trace_array_put(tr);
4953 	return 0;
4954 }
4955 
tracing_single_release_tr(struct inode * inode,struct file * file)4956 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4957 {
4958 	struct trace_array *tr = inode->i_private;
4959 
4960 	trace_array_put(tr);
4961 
4962 	return single_release(inode, file);
4963 }
4964 
tracing_open(struct inode * inode,struct file * file)4965 static int tracing_open(struct inode *inode, struct file *file)
4966 {
4967 	struct trace_array *tr = inode->i_private;
4968 	struct trace_iterator *iter;
4969 	int ret;
4970 
4971 	ret = tracing_check_open_get_tr(tr);
4972 	if (ret)
4973 		return ret;
4974 
4975 	/* If this file was open for write, then erase contents */
4976 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4977 		int cpu = tracing_get_cpu(inode);
4978 		struct array_buffer *trace_buf = &tr->array_buffer;
4979 
4980 #ifdef CONFIG_TRACER_MAX_TRACE
4981 		if (tr->current_trace->print_max)
4982 			trace_buf = &tr->max_buffer;
4983 #endif
4984 
4985 		if (cpu == RING_BUFFER_ALL_CPUS)
4986 			tracing_reset_online_cpus(trace_buf);
4987 		else
4988 			tracing_reset_cpu(trace_buf, cpu);
4989 	}
4990 
4991 	if (file->f_mode & FMODE_READ) {
4992 		iter = __tracing_open(inode, file, false);
4993 		if (IS_ERR(iter))
4994 			ret = PTR_ERR(iter);
4995 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4996 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4997 	}
4998 
4999 	if (ret < 0)
5000 		trace_array_put(tr);
5001 
5002 	return ret;
5003 }
5004 
5005 /*
5006  * Some tracers are not suitable for instance buffers.
5007  * A tracer is always available for the global array (toplevel)
5008  * or if it explicitly states that it is.
5009  */
5010 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5011 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5012 {
5013 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5014 }
5015 
5016 /* Find the next tracer that this trace array may use */
5017 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5018 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5019 {
5020 	while (t && !trace_ok_for_array(t, tr))
5021 		t = t->next;
5022 
5023 	return t;
5024 }
5025 
5026 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5027 t_next(struct seq_file *m, void *v, loff_t *pos)
5028 {
5029 	struct trace_array *tr = m->private;
5030 	struct tracer *t = v;
5031 
5032 	(*pos)++;
5033 
5034 	if (t)
5035 		t = get_tracer_for_array(tr, t->next);
5036 
5037 	return t;
5038 }
5039 
t_start(struct seq_file * m,loff_t * pos)5040 static void *t_start(struct seq_file *m, loff_t *pos)
5041 {
5042 	struct trace_array *tr = m->private;
5043 	struct tracer *t;
5044 	loff_t l = 0;
5045 
5046 	mutex_lock(&trace_types_lock);
5047 
5048 	t = get_tracer_for_array(tr, trace_types);
5049 	for (; t && l < *pos; t = t_next(m, t, &l))
5050 			;
5051 
5052 	return t;
5053 }
5054 
t_stop(struct seq_file * m,void * p)5055 static void t_stop(struct seq_file *m, void *p)
5056 {
5057 	mutex_unlock(&trace_types_lock);
5058 }
5059 
t_show(struct seq_file * m,void * v)5060 static int t_show(struct seq_file *m, void *v)
5061 {
5062 	struct tracer *t = v;
5063 
5064 	if (!t)
5065 		return 0;
5066 
5067 	seq_puts(m, t->name);
5068 	if (t->next)
5069 		seq_putc(m, ' ');
5070 	else
5071 		seq_putc(m, '\n');
5072 
5073 	return 0;
5074 }
5075 
5076 static const struct seq_operations show_traces_seq_ops = {
5077 	.start		= t_start,
5078 	.next		= t_next,
5079 	.stop		= t_stop,
5080 	.show		= t_show,
5081 };
5082 
show_traces_open(struct inode * inode,struct file * file)5083 static int show_traces_open(struct inode *inode, struct file *file)
5084 {
5085 	struct trace_array *tr = inode->i_private;
5086 	struct seq_file *m;
5087 	int ret;
5088 
5089 	ret = tracing_check_open_get_tr(tr);
5090 	if (ret)
5091 		return ret;
5092 
5093 	ret = seq_open(file, &show_traces_seq_ops);
5094 	if (ret) {
5095 		trace_array_put(tr);
5096 		return ret;
5097 	}
5098 
5099 	m = file->private_data;
5100 	m->private = tr;
5101 
5102 	return 0;
5103 }
5104 
show_traces_release(struct inode * inode,struct file * file)5105 static int show_traces_release(struct inode *inode, struct file *file)
5106 {
5107 	struct trace_array *tr = inode->i_private;
5108 
5109 	trace_array_put(tr);
5110 	return seq_release(inode, file);
5111 }
5112 
5113 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5114 tracing_write_stub(struct file *filp, const char __user *ubuf,
5115 		   size_t count, loff_t *ppos)
5116 {
5117 	return count;
5118 }
5119 
tracing_lseek(struct file * file,loff_t offset,int whence)5120 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5121 {
5122 	int ret;
5123 
5124 	if (file->f_mode & FMODE_READ)
5125 		ret = seq_lseek(file, offset, whence);
5126 	else
5127 		file->f_pos = ret = 0;
5128 
5129 	return ret;
5130 }
5131 
5132 static const struct file_operations tracing_fops = {
5133 	.open		= tracing_open,
5134 	.read		= seq_read,
5135 	.read_iter	= seq_read_iter,
5136 	.splice_read	= copy_splice_read,
5137 	.write		= tracing_write_stub,
5138 	.llseek		= tracing_lseek,
5139 	.release	= tracing_release,
5140 };
5141 
5142 static const struct file_operations show_traces_fops = {
5143 	.open		= show_traces_open,
5144 	.read		= seq_read,
5145 	.llseek		= seq_lseek,
5146 	.release	= show_traces_release,
5147 };
5148 
5149 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5150 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5151 		     size_t count, loff_t *ppos)
5152 {
5153 	struct trace_array *tr = file_inode(filp)->i_private;
5154 	char *mask_str;
5155 	int len;
5156 
5157 	len = snprintf(NULL, 0, "%*pb\n",
5158 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5159 	mask_str = kmalloc(len, GFP_KERNEL);
5160 	if (!mask_str)
5161 		return -ENOMEM;
5162 
5163 	len = snprintf(mask_str, len, "%*pb\n",
5164 		       cpumask_pr_args(tr->tracing_cpumask));
5165 	if (len >= count) {
5166 		count = -EINVAL;
5167 		goto out_err;
5168 	}
5169 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5170 
5171 out_err:
5172 	kfree(mask_str);
5173 
5174 	return count;
5175 }
5176 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5177 int tracing_set_cpumask(struct trace_array *tr,
5178 			cpumask_var_t tracing_cpumask_new)
5179 {
5180 	int cpu;
5181 
5182 	if (!tr)
5183 		return -EINVAL;
5184 
5185 	local_irq_disable();
5186 	arch_spin_lock(&tr->max_lock);
5187 	for_each_tracing_cpu(cpu) {
5188 		/*
5189 		 * Increase/decrease the disabled counter if we are
5190 		 * about to flip a bit in the cpumask:
5191 		 */
5192 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5193 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5194 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5195 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5196 #ifdef CONFIG_TRACER_MAX_TRACE
5197 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5198 #endif
5199 		}
5200 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5201 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5202 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5203 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5204 #ifdef CONFIG_TRACER_MAX_TRACE
5205 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5206 #endif
5207 		}
5208 	}
5209 	arch_spin_unlock(&tr->max_lock);
5210 	local_irq_enable();
5211 
5212 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5213 
5214 	return 0;
5215 }
5216 
5217 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5218 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5219 		      size_t count, loff_t *ppos)
5220 {
5221 	struct trace_array *tr = file_inode(filp)->i_private;
5222 	cpumask_var_t tracing_cpumask_new;
5223 	int err;
5224 
5225 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5226 		return -EINVAL;
5227 
5228 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5229 		return -ENOMEM;
5230 
5231 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5232 	if (err)
5233 		goto err_free;
5234 
5235 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5236 	if (err)
5237 		goto err_free;
5238 
5239 	free_cpumask_var(tracing_cpumask_new);
5240 
5241 	return count;
5242 
5243 err_free:
5244 	free_cpumask_var(tracing_cpumask_new);
5245 
5246 	return err;
5247 }
5248 
5249 static const struct file_operations tracing_cpumask_fops = {
5250 	.open		= tracing_open_generic_tr,
5251 	.read		= tracing_cpumask_read,
5252 	.write		= tracing_cpumask_write,
5253 	.release	= tracing_release_generic_tr,
5254 	.llseek		= generic_file_llseek,
5255 };
5256 
tracing_trace_options_show(struct seq_file * m,void * v)5257 static int tracing_trace_options_show(struct seq_file *m, void *v)
5258 {
5259 	struct tracer_opt *trace_opts;
5260 	struct trace_array *tr = m->private;
5261 	u32 tracer_flags;
5262 	int i;
5263 
5264 	mutex_lock(&trace_types_lock);
5265 	tracer_flags = tr->current_trace->flags->val;
5266 	trace_opts = tr->current_trace->flags->opts;
5267 
5268 	for (i = 0; trace_options[i]; i++) {
5269 		if (tr->trace_flags & (1 << i))
5270 			seq_printf(m, "%s\n", trace_options[i]);
5271 		else
5272 			seq_printf(m, "no%s\n", trace_options[i]);
5273 	}
5274 
5275 	for (i = 0; trace_opts[i].name; i++) {
5276 		if (tracer_flags & trace_opts[i].bit)
5277 			seq_printf(m, "%s\n", trace_opts[i].name);
5278 		else
5279 			seq_printf(m, "no%s\n", trace_opts[i].name);
5280 	}
5281 	mutex_unlock(&trace_types_lock);
5282 
5283 	return 0;
5284 }
5285 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5286 static int __set_tracer_option(struct trace_array *tr,
5287 			       struct tracer_flags *tracer_flags,
5288 			       struct tracer_opt *opts, int neg)
5289 {
5290 	struct tracer *trace = tracer_flags->trace;
5291 	int ret;
5292 
5293 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5294 	if (ret)
5295 		return ret;
5296 
5297 	if (neg)
5298 		tracer_flags->val &= ~opts->bit;
5299 	else
5300 		tracer_flags->val |= opts->bit;
5301 	return 0;
5302 }
5303 
5304 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5305 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5306 {
5307 	struct tracer *trace = tr->current_trace;
5308 	struct tracer_flags *tracer_flags = trace->flags;
5309 	struct tracer_opt *opts = NULL;
5310 	int i;
5311 
5312 	for (i = 0; tracer_flags->opts[i].name; i++) {
5313 		opts = &tracer_flags->opts[i];
5314 
5315 		if (strcmp(cmp, opts->name) == 0)
5316 			return __set_tracer_option(tr, trace->flags, opts, neg);
5317 	}
5318 
5319 	return -EINVAL;
5320 }
5321 
5322 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5323 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5324 {
5325 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5326 		return -1;
5327 
5328 	return 0;
5329 }
5330 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5331 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5332 {
5333 	int *map;
5334 
5335 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5336 	    (mask == TRACE_ITER_RECORD_CMD))
5337 		lockdep_assert_held(&event_mutex);
5338 
5339 	/* do nothing if flag is already set */
5340 	if (!!(tr->trace_flags & mask) == !!enabled)
5341 		return 0;
5342 
5343 	/* Give the tracer a chance to approve the change */
5344 	if (tr->current_trace->flag_changed)
5345 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5346 			return -EINVAL;
5347 
5348 	if (enabled)
5349 		tr->trace_flags |= mask;
5350 	else
5351 		tr->trace_flags &= ~mask;
5352 
5353 	if (mask == TRACE_ITER_RECORD_CMD)
5354 		trace_event_enable_cmd_record(enabled);
5355 
5356 	if (mask == TRACE_ITER_RECORD_TGID) {
5357 		if (!tgid_map) {
5358 			tgid_map_max = pid_max;
5359 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5360 				       GFP_KERNEL);
5361 
5362 			/*
5363 			 * Pairs with smp_load_acquire() in
5364 			 * trace_find_tgid_ptr() to ensure that if it observes
5365 			 * the tgid_map we just allocated then it also observes
5366 			 * the corresponding tgid_map_max value.
5367 			 */
5368 			smp_store_release(&tgid_map, map);
5369 		}
5370 		if (!tgid_map) {
5371 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5372 			return -ENOMEM;
5373 		}
5374 
5375 		trace_event_enable_tgid_record(enabled);
5376 	}
5377 
5378 	if (mask == TRACE_ITER_EVENT_FORK)
5379 		trace_event_follow_fork(tr, enabled);
5380 
5381 	if (mask == TRACE_ITER_FUNC_FORK)
5382 		ftrace_pid_follow_fork(tr, enabled);
5383 
5384 	if (mask == TRACE_ITER_OVERWRITE) {
5385 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5386 #ifdef CONFIG_TRACER_MAX_TRACE
5387 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5388 #endif
5389 	}
5390 
5391 	if (mask == TRACE_ITER_PRINTK) {
5392 		trace_printk_start_stop_comm(enabled);
5393 		trace_printk_control(enabled);
5394 	}
5395 
5396 	return 0;
5397 }
5398 
trace_set_options(struct trace_array * tr,char * option)5399 int trace_set_options(struct trace_array *tr, char *option)
5400 {
5401 	char *cmp;
5402 	int neg = 0;
5403 	int ret;
5404 	size_t orig_len = strlen(option);
5405 	int len;
5406 
5407 	cmp = strstrip(option);
5408 
5409 	len = str_has_prefix(cmp, "no");
5410 	if (len)
5411 		neg = 1;
5412 
5413 	cmp += len;
5414 
5415 	mutex_lock(&event_mutex);
5416 	mutex_lock(&trace_types_lock);
5417 
5418 	ret = match_string(trace_options, -1, cmp);
5419 	/* If no option could be set, test the specific tracer options */
5420 	if (ret < 0)
5421 		ret = set_tracer_option(tr, cmp, neg);
5422 	else
5423 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5424 
5425 	mutex_unlock(&trace_types_lock);
5426 	mutex_unlock(&event_mutex);
5427 
5428 	/*
5429 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5430 	 * turn it back into a space.
5431 	 */
5432 	if (orig_len > strlen(option))
5433 		option[strlen(option)] = ' ';
5434 
5435 	return ret;
5436 }
5437 
apply_trace_boot_options(void)5438 static void __init apply_trace_boot_options(void)
5439 {
5440 	char *buf = trace_boot_options_buf;
5441 	char *option;
5442 
5443 	while (true) {
5444 		option = strsep(&buf, ",");
5445 
5446 		if (!option)
5447 			break;
5448 
5449 		if (*option)
5450 			trace_set_options(&global_trace, option);
5451 
5452 		/* Put back the comma to allow this to be called again */
5453 		if (buf)
5454 			*(buf - 1) = ',';
5455 	}
5456 }
5457 
5458 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5459 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5460 			size_t cnt, loff_t *ppos)
5461 {
5462 	struct seq_file *m = filp->private_data;
5463 	struct trace_array *tr = m->private;
5464 	char buf[64];
5465 	int ret;
5466 
5467 	if (cnt >= sizeof(buf))
5468 		return -EINVAL;
5469 
5470 	if (copy_from_user(buf, ubuf, cnt))
5471 		return -EFAULT;
5472 
5473 	buf[cnt] = 0;
5474 
5475 	ret = trace_set_options(tr, buf);
5476 	if (ret < 0)
5477 		return ret;
5478 
5479 	*ppos += cnt;
5480 
5481 	return cnt;
5482 }
5483 
tracing_trace_options_open(struct inode * inode,struct file * file)5484 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5485 {
5486 	struct trace_array *tr = inode->i_private;
5487 	int ret;
5488 
5489 	ret = tracing_check_open_get_tr(tr);
5490 	if (ret)
5491 		return ret;
5492 
5493 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5494 	if (ret < 0)
5495 		trace_array_put(tr);
5496 
5497 	return ret;
5498 }
5499 
5500 static const struct file_operations tracing_iter_fops = {
5501 	.open		= tracing_trace_options_open,
5502 	.read		= seq_read,
5503 	.llseek		= seq_lseek,
5504 	.release	= tracing_single_release_tr,
5505 	.write		= tracing_trace_options_write,
5506 };
5507 
5508 static const char readme_msg[] =
5509 	"tracing mini-HOWTO:\n\n"
5510 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5511 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5512 	" Important files:\n"
5513 	"  trace\t\t\t- The static contents of the buffer\n"
5514 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5515 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5516 	"  current_tracer\t- function and latency tracers\n"
5517 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5518 	"  error_log\t- error log for failed commands (that support it)\n"
5519 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5520 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5521 	"  trace_clock\t\t- change the clock used to order events\n"
5522 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5523 	"      global:   Synced across CPUs but slows tracing down.\n"
5524 	"     counter:   Not a clock, but just an increment\n"
5525 	"      uptime:   Jiffy counter from time of boot\n"
5526 	"        perf:   Same clock that perf events use\n"
5527 #ifdef CONFIG_X86_64
5528 	"     x86-tsc:   TSC cycle counter\n"
5529 #endif
5530 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5531 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5532 	"    absolute:   Absolute (standalone) timestamp\n"
5533 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5534 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5535 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5536 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5537 	"\t\t\t  Remove sub-buffer with rmdir\n"
5538 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5539 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5540 	"\t\t\t  option name\n"
5541 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5542 #ifdef CONFIG_DYNAMIC_FTRACE
5543 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5544 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5545 	"\t\t\t  functions\n"
5546 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5547 	"\t     modules: Can select a group via module\n"
5548 	"\t      Format: :mod:<module-name>\n"
5549 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5550 	"\t    triggers: a command to perform when function is hit\n"
5551 	"\t      Format: <function>:<trigger>[:count]\n"
5552 	"\t     trigger: traceon, traceoff\n"
5553 	"\t\t      enable_event:<system>:<event>\n"
5554 	"\t\t      disable_event:<system>:<event>\n"
5555 #ifdef CONFIG_STACKTRACE
5556 	"\t\t      stacktrace\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 	"\t\t      snapshot\n"
5560 #endif
5561 	"\t\t      dump\n"
5562 	"\t\t      cpudump\n"
5563 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5564 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5565 	"\t     The first one will disable tracing every time do_fault is hit\n"
5566 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5567 	"\t       The first time do trap is hit and it disables tracing, the\n"
5568 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5569 	"\t       the counter will not decrement. It only decrements when the\n"
5570 	"\t       trigger did work\n"
5571 	"\t     To remove trigger without count:\n"
5572 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5573 	"\t     To remove trigger with a count:\n"
5574 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5575 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5576 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5577 	"\t    modules: Can select a group via module command :mod:\n"
5578 	"\t    Does not accept triggers\n"
5579 #endif /* CONFIG_DYNAMIC_FTRACE */
5580 #ifdef CONFIG_FUNCTION_TRACER
5581 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5582 	"\t\t    (function)\n"
5583 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5584 	"\t\t    (function)\n"
5585 #endif
5586 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5587 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5588 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5589 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5590 #endif
5591 #ifdef CONFIG_TRACER_SNAPSHOT
5592 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5593 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5594 	"\t\t\t  information\n"
5595 #endif
5596 #ifdef CONFIG_STACK_TRACER
5597 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5598 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5599 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5600 	"\t\t\t  new trace)\n"
5601 #ifdef CONFIG_DYNAMIC_FTRACE
5602 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5603 	"\t\t\t  traces\n"
5604 #endif
5605 #endif /* CONFIG_STACK_TRACER */
5606 #ifdef CONFIG_DYNAMIC_EVENTS
5607 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5608 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5609 #endif
5610 #ifdef CONFIG_KPROBE_EVENTS
5611 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5612 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5613 #endif
5614 #ifdef CONFIG_UPROBE_EVENTS
5615 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5616 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5617 #endif
5618 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5619     defined(CONFIG_FPROBE_EVENTS)
5620 	"\t  accepts: event-definitions (one definition per line)\n"
5621 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5622 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5623 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5624 #endif
5625 #ifdef CONFIG_FPROBE_EVENTS
5626 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5627 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5628 #endif
5629 #ifdef CONFIG_HIST_TRIGGERS
5630 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5631 #endif
5632 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5633 	"\t           -:[<group>/][<event>]\n"
5634 #ifdef CONFIG_KPROBE_EVENTS
5635 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5636   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5637 #endif
5638 #ifdef CONFIG_UPROBE_EVENTS
5639   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5640 #endif
5641 	"\t     args: <name>=fetcharg[:type]\n"
5642 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5643 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5644 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5645 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5646 	"\t           <argname>[->field[->field|.field...]],\n"
5647 #else
5648 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5649 #endif
5650 #else
5651 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5652 #endif
5653 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5654 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5655 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5656 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5657 	"\t           symstr, <type>\\[<array-size>\\]\n"
5658 #ifdef CONFIG_HIST_TRIGGERS
5659 	"\t    field: <stype> <name>;\n"
5660 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5661 	"\t           [unsigned] char/int/long\n"
5662 #endif
5663 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5664 	"\t            of the <attached-group>/<attached-event>.\n"
5665 #endif
5666 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5667 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5668 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5669 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5670 	"\t\t\t  events\n"
5671 	"      filter\t\t- If set, only events passing filter are traced\n"
5672 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5673 	"\t\t\t  <event>:\n"
5674 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5675 	"      filter\t\t- If set, only events passing filter are traced\n"
5676 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5677 	"\t    Format: <trigger>[:count][if <filter>]\n"
5678 	"\t   trigger: traceon, traceoff\n"
5679 	"\t            enable_event:<system>:<event>\n"
5680 	"\t            disable_event:<system>:<event>\n"
5681 #ifdef CONFIG_HIST_TRIGGERS
5682 	"\t            enable_hist:<system>:<event>\n"
5683 	"\t            disable_hist:<system>:<event>\n"
5684 #endif
5685 #ifdef CONFIG_STACKTRACE
5686 	"\t\t    stacktrace\n"
5687 #endif
5688 #ifdef CONFIG_TRACER_SNAPSHOT
5689 	"\t\t    snapshot\n"
5690 #endif
5691 #ifdef CONFIG_HIST_TRIGGERS
5692 	"\t\t    hist (see below)\n"
5693 #endif
5694 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5695 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5696 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5697 	"\t                  events/block/block_unplug/trigger\n"
5698 	"\t   The first disables tracing every time block_unplug is hit.\n"
5699 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5700 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5701 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5702 	"\t   Like function triggers, the counter is only decremented if it\n"
5703 	"\t    enabled or disabled tracing.\n"
5704 	"\t   To remove a trigger without a count:\n"
5705 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5706 	"\t   To remove a trigger with a count:\n"
5707 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5708 	"\t   Filters can be ignored when removing a trigger.\n"
5709 #ifdef CONFIG_HIST_TRIGGERS
5710 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5711 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5712 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5713 	"\t            [:values=<field1[,field2,...]>]\n"
5714 	"\t            [:sort=<field1[,field2,...]>]\n"
5715 	"\t            [:size=#entries]\n"
5716 	"\t            [:pause][:continue][:clear]\n"
5717 	"\t            [:name=histname1]\n"
5718 	"\t            [:nohitcount]\n"
5719 	"\t            [:<handler>.<action>]\n"
5720 	"\t            [if <filter>]\n\n"
5721 	"\t    Note, special fields can be used as well:\n"
5722 	"\t            common_timestamp - to record current timestamp\n"
5723 	"\t            common_cpu - to record the CPU the event happened on\n"
5724 	"\n"
5725 	"\t    A hist trigger variable can be:\n"
5726 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5727 	"\t        - a reference to another variable e.g. y=$x,\n"
5728 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5729 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5730 	"\n"
5731 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5732 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5733 	"\t    variable reference, field or numeric literal.\n"
5734 	"\n"
5735 	"\t    When a matching event is hit, an entry is added to a hash\n"
5736 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5737 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5738 	"\t    correspond to fields in the event's format description.  Keys\n"
5739 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5740 	"\t    Compound keys consisting of up to two fields can be specified\n"
5741 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5742 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5743 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5744 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5745 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5746 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5747 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5748 	"\t    its histogram data will be shared with other triggers of the\n"
5749 	"\t    same name, and trigger hits will update this common data.\n\n"
5750 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5751 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5752 	"\t    triggers attached to an event, there will be a table for each\n"
5753 	"\t    trigger in the output.  The table displayed for a named\n"
5754 	"\t    trigger will be the same as any other instance having the\n"
5755 	"\t    same name.  The default format used to display a given field\n"
5756 	"\t    can be modified by appending any of the following modifiers\n"
5757 	"\t    to the field name, as applicable:\n\n"
5758 	"\t            .hex        display a number as a hex value\n"
5759 	"\t            .sym        display an address as a symbol\n"
5760 	"\t            .sym-offset display an address as a symbol and offset\n"
5761 	"\t            .execname   display a common_pid as a program name\n"
5762 	"\t            .syscall    display a syscall id as a syscall name\n"
5763 	"\t            .log2       display log2 value rather than raw number\n"
5764 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5765 	"\t            .usecs      display a common_timestamp in microseconds\n"
5766 	"\t            .percent    display a number of percentage value\n"
5767 	"\t            .graph      display a bar-graph of a value\n\n"
5768 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5769 	"\t    trigger or to start a hist trigger but not log any events\n"
5770 	"\t    until told to do so.  'continue' can be used to start or\n"
5771 	"\t    restart a paused hist trigger.\n\n"
5772 	"\t    The 'clear' parameter will clear the contents of a running\n"
5773 	"\t    hist trigger and leave its current paused/active state\n"
5774 	"\t    unchanged.\n\n"
5775 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5776 	"\t    raw hitcount in the histogram.\n\n"
5777 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5778 	"\t    have one event conditionally start and stop another event's\n"
5779 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5780 	"\t    the enable_event and disable_event triggers.\n\n"
5781 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5782 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5783 	"\t        <handler>.<action>\n\n"
5784 	"\t    The available handlers are:\n\n"
5785 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5786 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5787 	"\t        onchange(var)            - invoke action if var changes\n\n"
5788 	"\t    The available actions are:\n\n"
5789 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5790 	"\t        save(field,...)                      - save current event fields\n"
5791 #ifdef CONFIG_TRACER_SNAPSHOT
5792 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5793 #endif
5794 #ifdef CONFIG_SYNTH_EVENTS
5795 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5796 	"\t  Write into this file to define/undefine new synthetic events.\n"
5797 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5798 #endif
5799 #endif
5800 ;
5801 
5802 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5803 tracing_readme_read(struct file *filp, char __user *ubuf,
5804 		       size_t cnt, loff_t *ppos)
5805 {
5806 	return simple_read_from_buffer(ubuf, cnt, ppos,
5807 					readme_msg, strlen(readme_msg));
5808 }
5809 
5810 static const struct file_operations tracing_readme_fops = {
5811 	.open		= tracing_open_generic,
5812 	.read		= tracing_readme_read,
5813 	.llseek		= generic_file_llseek,
5814 };
5815 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5816 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5817 {
5818 	int pid = ++(*pos);
5819 
5820 	return trace_find_tgid_ptr(pid);
5821 }
5822 
saved_tgids_start(struct seq_file * m,loff_t * pos)5823 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5824 {
5825 	int pid = *pos;
5826 
5827 	return trace_find_tgid_ptr(pid);
5828 }
5829 
saved_tgids_stop(struct seq_file * m,void * v)5830 static void saved_tgids_stop(struct seq_file *m, void *v)
5831 {
5832 }
5833 
saved_tgids_show(struct seq_file * m,void * v)5834 static int saved_tgids_show(struct seq_file *m, void *v)
5835 {
5836 	int *entry = (int *)v;
5837 	int pid = entry - tgid_map;
5838 	int tgid = *entry;
5839 
5840 	if (tgid == 0)
5841 		return SEQ_SKIP;
5842 
5843 	seq_printf(m, "%d %d\n", pid, tgid);
5844 	return 0;
5845 }
5846 
5847 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5848 	.start		= saved_tgids_start,
5849 	.stop		= saved_tgids_stop,
5850 	.next		= saved_tgids_next,
5851 	.show		= saved_tgids_show,
5852 };
5853 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5854 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5855 {
5856 	int ret;
5857 
5858 	ret = tracing_check_open_get_tr(NULL);
5859 	if (ret)
5860 		return ret;
5861 
5862 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5863 }
5864 
5865 
5866 static const struct file_operations tracing_saved_tgids_fops = {
5867 	.open		= tracing_saved_tgids_open,
5868 	.read		= seq_read,
5869 	.llseek		= seq_lseek,
5870 	.release	= seq_release,
5871 };
5872 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5873 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5874 {
5875 	unsigned int *ptr = v;
5876 
5877 	if (*pos || m->count)
5878 		ptr++;
5879 
5880 	(*pos)++;
5881 
5882 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5883 	     ptr++) {
5884 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5885 			continue;
5886 
5887 		return ptr;
5888 	}
5889 
5890 	return NULL;
5891 }
5892 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5893 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5894 {
5895 	void *v;
5896 	loff_t l = 0;
5897 
5898 	preempt_disable();
5899 	arch_spin_lock(&trace_cmdline_lock);
5900 
5901 	v = &savedcmd->map_cmdline_to_pid[0];
5902 	while (l <= *pos) {
5903 		v = saved_cmdlines_next(m, v, &l);
5904 		if (!v)
5905 			return NULL;
5906 	}
5907 
5908 	return v;
5909 }
5910 
saved_cmdlines_stop(struct seq_file * m,void * v)5911 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5912 {
5913 	arch_spin_unlock(&trace_cmdline_lock);
5914 	preempt_enable();
5915 }
5916 
saved_cmdlines_show(struct seq_file * m,void * v)5917 static int saved_cmdlines_show(struct seq_file *m, void *v)
5918 {
5919 	char buf[TASK_COMM_LEN];
5920 	unsigned int *pid = v;
5921 
5922 	__trace_find_cmdline(*pid, buf);
5923 	seq_printf(m, "%d %s\n", *pid, buf);
5924 	return 0;
5925 }
5926 
5927 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5928 	.start		= saved_cmdlines_start,
5929 	.next		= saved_cmdlines_next,
5930 	.stop		= saved_cmdlines_stop,
5931 	.show		= saved_cmdlines_show,
5932 };
5933 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5934 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5935 {
5936 	int ret;
5937 
5938 	ret = tracing_check_open_get_tr(NULL);
5939 	if (ret)
5940 		return ret;
5941 
5942 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5943 }
5944 
5945 static const struct file_operations tracing_saved_cmdlines_fops = {
5946 	.open		= tracing_saved_cmdlines_open,
5947 	.read		= seq_read,
5948 	.llseek		= seq_lseek,
5949 	.release	= seq_release,
5950 };
5951 
5952 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5953 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5954 				 size_t cnt, loff_t *ppos)
5955 {
5956 	char buf[64];
5957 	int r;
5958 
5959 	preempt_disable();
5960 	arch_spin_lock(&trace_cmdline_lock);
5961 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5962 	arch_spin_unlock(&trace_cmdline_lock);
5963 	preempt_enable();
5964 
5965 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5966 }
5967 
tracing_resize_saved_cmdlines(unsigned int val)5968 static int tracing_resize_saved_cmdlines(unsigned int val)
5969 {
5970 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5971 
5972 	s = allocate_cmdlines_buffer(val);
5973 	if (!s)
5974 		return -ENOMEM;
5975 
5976 	preempt_disable();
5977 	arch_spin_lock(&trace_cmdline_lock);
5978 	savedcmd_temp = savedcmd;
5979 	savedcmd = s;
5980 	arch_spin_unlock(&trace_cmdline_lock);
5981 	preempt_enable();
5982 	free_saved_cmdlines_buffer(savedcmd_temp);
5983 
5984 	return 0;
5985 }
5986 
5987 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5988 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5989 				  size_t cnt, loff_t *ppos)
5990 {
5991 	unsigned long val;
5992 	int ret;
5993 
5994 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5995 	if (ret)
5996 		return ret;
5997 
5998 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5999 	if (!val || val > PID_MAX_DEFAULT)
6000 		return -EINVAL;
6001 
6002 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6003 	if (ret < 0)
6004 		return ret;
6005 
6006 	*ppos += cnt;
6007 
6008 	return cnt;
6009 }
6010 
6011 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6012 	.open		= tracing_open_generic,
6013 	.read		= tracing_saved_cmdlines_size_read,
6014 	.write		= tracing_saved_cmdlines_size_write,
6015 };
6016 
6017 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6018 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6019 update_eval_map(union trace_eval_map_item *ptr)
6020 {
6021 	if (!ptr->map.eval_string) {
6022 		if (ptr->tail.next) {
6023 			ptr = ptr->tail.next;
6024 			/* Set ptr to the next real item (skip head) */
6025 			ptr++;
6026 		} else
6027 			return NULL;
6028 	}
6029 	return ptr;
6030 }
6031 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6032 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6033 {
6034 	union trace_eval_map_item *ptr = v;
6035 
6036 	/*
6037 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6038 	 * This really should never happen.
6039 	 */
6040 	(*pos)++;
6041 	ptr = update_eval_map(ptr);
6042 	if (WARN_ON_ONCE(!ptr))
6043 		return NULL;
6044 
6045 	ptr++;
6046 	ptr = update_eval_map(ptr);
6047 
6048 	return ptr;
6049 }
6050 
eval_map_start(struct seq_file * m,loff_t * pos)6051 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6052 {
6053 	union trace_eval_map_item *v;
6054 	loff_t l = 0;
6055 
6056 	mutex_lock(&trace_eval_mutex);
6057 
6058 	v = trace_eval_maps;
6059 	if (v)
6060 		v++;
6061 
6062 	while (v && l < *pos) {
6063 		v = eval_map_next(m, v, &l);
6064 	}
6065 
6066 	return v;
6067 }
6068 
eval_map_stop(struct seq_file * m,void * v)6069 static void eval_map_stop(struct seq_file *m, void *v)
6070 {
6071 	mutex_unlock(&trace_eval_mutex);
6072 }
6073 
eval_map_show(struct seq_file * m,void * v)6074 static int eval_map_show(struct seq_file *m, void *v)
6075 {
6076 	union trace_eval_map_item *ptr = v;
6077 
6078 	seq_printf(m, "%s %ld (%s)\n",
6079 		   ptr->map.eval_string, ptr->map.eval_value,
6080 		   ptr->map.system);
6081 
6082 	return 0;
6083 }
6084 
6085 static const struct seq_operations tracing_eval_map_seq_ops = {
6086 	.start		= eval_map_start,
6087 	.next		= eval_map_next,
6088 	.stop		= eval_map_stop,
6089 	.show		= eval_map_show,
6090 };
6091 
tracing_eval_map_open(struct inode * inode,struct file * filp)6092 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6093 {
6094 	int ret;
6095 
6096 	ret = tracing_check_open_get_tr(NULL);
6097 	if (ret)
6098 		return ret;
6099 
6100 	return seq_open(filp, &tracing_eval_map_seq_ops);
6101 }
6102 
6103 static const struct file_operations tracing_eval_map_fops = {
6104 	.open		= tracing_eval_map_open,
6105 	.read		= seq_read,
6106 	.llseek		= seq_lseek,
6107 	.release	= seq_release,
6108 };
6109 
6110 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6111 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6112 {
6113 	/* Return tail of array given the head */
6114 	return ptr + ptr->head.length + 1;
6115 }
6116 
6117 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6118 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6119 			   int len)
6120 {
6121 	struct trace_eval_map **stop;
6122 	struct trace_eval_map **map;
6123 	union trace_eval_map_item *map_array;
6124 	union trace_eval_map_item *ptr;
6125 
6126 	stop = start + len;
6127 
6128 	/*
6129 	 * The trace_eval_maps contains the map plus a head and tail item,
6130 	 * where the head holds the module and length of array, and the
6131 	 * tail holds a pointer to the next list.
6132 	 */
6133 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6134 	if (!map_array) {
6135 		pr_warn("Unable to allocate trace eval mapping\n");
6136 		return;
6137 	}
6138 
6139 	mutex_lock(&trace_eval_mutex);
6140 
6141 	if (!trace_eval_maps)
6142 		trace_eval_maps = map_array;
6143 	else {
6144 		ptr = trace_eval_maps;
6145 		for (;;) {
6146 			ptr = trace_eval_jmp_to_tail(ptr);
6147 			if (!ptr->tail.next)
6148 				break;
6149 			ptr = ptr->tail.next;
6150 
6151 		}
6152 		ptr->tail.next = map_array;
6153 	}
6154 	map_array->head.mod = mod;
6155 	map_array->head.length = len;
6156 	map_array++;
6157 
6158 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6159 		map_array->map = **map;
6160 		map_array++;
6161 	}
6162 	memset(map_array, 0, sizeof(*map_array));
6163 
6164 	mutex_unlock(&trace_eval_mutex);
6165 }
6166 
trace_create_eval_file(struct dentry * d_tracer)6167 static void trace_create_eval_file(struct dentry *d_tracer)
6168 {
6169 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6170 			  NULL, &tracing_eval_map_fops);
6171 }
6172 
6173 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6174 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6175 static inline void trace_insert_eval_map_file(struct module *mod,
6176 			      struct trace_eval_map **start, int len) { }
6177 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6178 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6179 static void trace_insert_eval_map(struct module *mod,
6180 				  struct trace_eval_map **start, int len)
6181 {
6182 	struct trace_eval_map **map;
6183 
6184 	if (len <= 0)
6185 		return;
6186 
6187 	map = start;
6188 
6189 	trace_event_eval_update(map, len);
6190 
6191 	trace_insert_eval_map_file(mod, start, len);
6192 }
6193 
6194 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6195 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6196 		       size_t cnt, loff_t *ppos)
6197 {
6198 	struct trace_array *tr = filp->private_data;
6199 	char buf[MAX_TRACER_SIZE+2];
6200 	int r;
6201 
6202 	mutex_lock(&trace_types_lock);
6203 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6204 	mutex_unlock(&trace_types_lock);
6205 
6206 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6207 }
6208 
tracer_init(struct tracer * t,struct trace_array * tr)6209 int tracer_init(struct tracer *t, struct trace_array *tr)
6210 {
6211 	tracing_reset_online_cpus(&tr->array_buffer);
6212 	return t->init(tr);
6213 }
6214 
set_buffer_entries(struct array_buffer * buf,unsigned long val)6215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6216 {
6217 	int cpu;
6218 
6219 	for_each_tracing_cpu(cpu)
6220 		per_cpu_ptr(buf->data, cpu)->entries = val;
6221 }
6222 
update_buffer_entries(struct array_buffer * buf,int cpu)6223 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6224 {
6225 	if (cpu == RING_BUFFER_ALL_CPUS) {
6226 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6227 	} else {
6228 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6229 	}
6230 }
6231 
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6234 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6235 					struct array_buffer *size_buf, int cpu_id)
6236 {
6237 	int cpu, ret = 0;
6238 
6239 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6240 		for_each_tracing_cpu(cpu) {
6241 			ret = ring_buffer_resize(trace_buf->buffer,
6242 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6243 			if (ret < 0)
6244 				break;
6245 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6246 				per_cpu_ptr(size_buf->data, cpu)->entries;
6247 		}
6248 	} else {
6249 		ret = ring_buffer_resize(trace_buf->buffer,
6250 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6251 		if (ret == 0)
6252 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6253 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6254 	}
6255 
6256 	return ret;
6257 }
6258 #endif /* CONFIG_TRACER_MAX_TRACE */
6259 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6260 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6261 					unsigned long size, int cpu)
6262 {
6263 	int ret;
6264 
6265 	/*
6266 	 * If kernel or user changes the size of the ring buffer
6267 	 * we use the size that was given, and we can forget about
6268 	 * expanding it later.
6269 	 */
6270 	ring_buffer_expanded = true;
6271 
6272 	/* May be called before buffers are initialized */
6273 	if (!tr->array_buffer.buffer)
6274 		return 0;
6275 
6276 	/* Do not allow tracing while resizing ring buffer */
6277 	tracing_stop_tr(tr);
6278 
6279 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6280 	if (ret < 0)
6281 		goto out_start;
6282 
6283 #ifdef CONFIG_TRACER_MAX_TRACE
6284 	if (!tr->allocated_snapshot)
6285 		goto out;
6286 
6287 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6288 	if (ret < 0) {
6289 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6290 						     &tr->array_buffer, cpu);
6291 		if (r < 0) {
6292 			/*
6293 			 * AARGH! We are left with different
6294 			 * size max buffer!!!!
6295 			 * The max buffer is our "snapshot" buffer.
6296 			 * When a tracer needs a snapshot (one of the
6297 			 * latency tracers), it swaps the max buffer
6298 			 * with the saved snap shot. We succeeded to
6299 			 * update the size of the main buffer, but failed to
6300 			 * update the size of the max buffer. But when we tried
6301 			 * to reset the main buffer to the original size, we
6302 			 * failed there too. This is very unlikely to
6303 			 * happen, but if it does, warn and kill all
6304 			 * tracing.
6305 			 */
6306 			WARN_ON(1);
6307 			tracing_disabled = 1;
6308 		}
6309 		goto out_start;
6310 	}
6311 
6312 	update_buffer_entries(&tr->max_buffer, cpu);
6313 
6314  out:
6315 #endif /* CONFIG_TRACER_MAX_TRACE */
6316 
6317 	update_buffer_entries(&tr->array_buffer, cpu);
6318  out_start:
6319 	tracing_start_tr(tr);
6320 	return ret;
6321 }
6322 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6323 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6324 				  unsigned long size, int cpu_id)
6325 {
6326 	int ret;
6327 
6328 	mutex_lock(&trace_types_lock);
6329 
6330 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6331 		/* make sure, this cpu is enabled in the mask */
6332 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6333 			ret = -EINVAL;
6334 			goto out;
6335 		}
6336 	}
6337 
6338 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6339 	if (ret < 0)
6340 		ret = -ENOMEM;
6341 
6342 out:
6343 	mutex_unlock(&trace_types_lock);
6344 
6345 	return ret;
6346 }
6347 
6348 
6349 /**
6350  * tracing_update_buffers - used by tracing facility to expand ring buffers
6351  *
6352  * To save on memory when the tracing is never used on a system with it
6353  * configured in. The ring buffers are set to a minimum size. But once
6354  * a user starts to use the tracing facility, then they need to grow
6355  * to their default size.
6356  *
6357  * This function is to be called when a tracer is about to be used.
6358  */
tracing_update_buffers(void)6359 int tracing_update_buffers(void)
6360 {
6361 	int ret = 0;
6362 
6363 	mutex_lock(&trace_types_lock);
6364 	if (!ring_buffer_expanded)
6365 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6366 						RING_BUFFER_ALL_CPUS);
6367 	mutex_unlock(&trace_types_lock);
6368 
6369 	return ret;
6370 }
6371 
6372 struct trace_option_dentry;
6373 
6374 static void
6375 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6376 
6377 /*
6378  * Used to clear out the tracer before deletion of an instance.
6379  * Must have trace_types_lock held.
6380  */
tracing_set_nop(struct trace_array * tr)6381 static void tracing_set_nop(struct trace_array *tr)
6382 {
6383 	if (tr->current_trace == &nop_trace)
6384 		return;
6385 
6386 	tr->current_trace->enabled--;
6387 
6388 	if (tr->current_trace->reset)
6389 		tr->current_trace->reset(tr);
6390 
6391 	tr->current_trace = &nop_trace;
6392 }
6393 
6394 static bool tracer_options_updated;
6395 
add_tracer_options(struct trace_array * tr,struct tracer * t)6396 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6397 {
6398 	/* Only enable if the directory has been created already. */
6399 	if (!tr->dir)
6400 		return;
6401 
6402 	/* Only create trace option files after update_tracer_options finish */
6403 	if (!tracer_options_updated)
6404 		return;
6405 
6406 	create_trace_option_files(tr, t);
6407 }
6408 
tracing_set_tracer(struct trace_array * tr,const char * buf)6409 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6410 {
6411 	struct tracer *t;
6412 #ifdef CONFIG_TRACER_MAX_TRACE
6413 	bool had_max_tr;
6414 #endif
6415 	int ret = 0;
6416 
6417 	mutex_lock(&trace_types_lock);
6418 
6419 	if (!ring_buffer_expanded) {
6420 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6421 						RING_BUFFER_ALL_CPUS);
6422 		if (ret < 0)
6423 			goto out;
6424 		ret = 0;
6425 	}
6426 
6427 	for (t = trace_types; t; t = t->next) {
6428 		if (strcmp(t->name, buf) == 0)
6429 			break;
6430 	}
6431 	if (!t) {
6432 		ret = -EINVAL;
6433 		goto out;
6434 	}
6435 	if (t == tr->current_trace)
6436 		goto out;
6437 
6438 #ifdef CONFIG_TRACER_SNAPSHOT
6439 	if (t->use_max_tr) {
6440 		local_irq_disable();
6441 		arch_spin_lock(&tr->max_lock);
6442 		if (tr->cond_snapshot)
6443 			ret = -EBUSY;
6444 		arch_spin_unlock(&tr->max_lock);
6445 		local_irq_enable();
6446 		if (ret)
6447 			goto out;
6448 	}
6449 #endif
6450 	/* Some tracers won't work on kernel command line */
6451 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6452 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6453 			t->name);
6454 		goto out;
6455 	}
6456 
6457 	/* Some tracers are only allowed for the top level buffer */
6458 	if (!trace_ok_for_array(t, tr)) {
6459 		ret = -EINVAL;
6460 		goto out;
6461 	}
6462 
6463 	/* If trace pipe files are being read, we can't change the tracer */
6464 	if (tr->trace_ref) {
6465 		ret = -EBUSY;
6466 		goto out;
6467 	}
6468 
6469 	trace_branch_disable();
6470 
6471 	tr->current_trace->enabled--;
6472 
6473 	if (tr->current_trace->reset)
6474 		tr->current_trace->reset(tr);
6475 
6476 #ifdef CONFIG_TRACER_MAX_TRACE
6477 	had_max_tr = tr->current_trace->use_max_tr;
6478 
6479 	/* Current trace needs to be nop_trace before synchronize_rcu */
6480 	tr->current_trace = &nop_trace;
6481 
6482 	if (had_max_tr && !t->use_max_tr) {
6483 		/*
6484 		 * We need to make sure that the update_max_tr sees that
6485 		 * current_trace changed to nop_trace to keep it from
6486 		 * swapping the buffers after we resize it.
6487 		 * The update_max_tr is called from interrupts disabled
6488 		 * so a synchronized_sched() is sufficient.
6489 		 */
6490 		synchronize_rcu();
6491 		free_snapshot(tr);
6492 	}
6493 
6494 	if (t->use_max_tr && !tr->allocated_snapshot) {
6495 		ret = tracing_alloc_snapshot_instance(tr);
6496 		if (ret < 0)
6497 			goto out;
6498 	}
6499 #else
6500 	tr->current_trace = &nop_trace;
6501 #endif
6502 
6503 	if (t->init) {
6504 		ret = tracer_init(t, tr);
6505 		if (ret)
6506 			goto out;
6507 	}
6508 
6509 	tr->current_trace = t;
6510 	tr->current_trace->enabled++;
6511 	trace_branch_enable(tr);
6512  out:
6513 	mutex_unlock(&trace_types_lock);
6514 
6515 	return ret;
6516 }
6517 
6518 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6519 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6520 			size_t cnt, loff_t *ppos)
6521 {
6522 	struct trace_array *tr = filp->private_data;
6523 	char buf[MAX_TRACER_SIZE+1];
6524 	char *name;
6525 	size_t ret;
6526 	int err;
6527 
6528 	ret = cnt;
6529 
6530 	if (cnt > MAX_TRACER_SIZE)
6531 		cnt = MAX_TRACER_SIZE;
6532 
6533 	if (copy_from_user(buf, ubuf, cnt))
6534 		return -EFAULT;
6535 
6536 	buf[cnt] = 0;
6537 
6538 	name = strim(buf);
6539 
6540 	err = tracing_set_tracer(tr, name);
6541 	if (err)
6542 		return err;
6543 
6544 	*ppos += ret;
6545 
6546 	return ret;
6547 }
6548 
6549 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6550 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6551 		   size_t cnt, loff_t *ppos)
6552 {
6553 	char buf[64];
6554 	int r;
6555 
6556 	r = snprintf(buf, sizeof(buf), "%ld\n",
6557 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6558 	if (r > sizeof(buf))
6559 		r = sizeof(buf);
6560 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6561 }
6562 
6563 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6564 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6565 		    size_t cnt, loff_t *ppos)
6566 {
6567 	unsigned long val;
6568 	int ret;
6569 
6570 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6571 	if (ret)
6572 		return ret;
6573 
6574 	*ptr = val * 1000;
6575 
6576 	return cnt;
6577 }
6578 
6579 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6580 tracing_thresh_read(struct file *filp, char __user *ubuf,
6581 		    size_t cnt, loff_t *ppos)
6582 {
6583 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6584 }
6585 
6586 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6587 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6588 		     size_t cnt, loff_t *ppos)
6589 {
6590 	struct trace_array *tr = filp->private_data;
6591 	int ret;
6592 
6593 	mutex_lock(&trace_types_lock);
6594 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6595 	if (ret < 0)
6596 		goto out;
6597 
6598 	if (tr->current_trace->update_thresh) {
6599 		ret = tr->current_trace->update_thresh(tr);
6600 		if (ret < 0)
6601 			goto out;
6602 	}
6603 
6604 	ret = cnt;
6605 out:
6606 	mutex_unlock(&trace_types_lock);
6607 
6608 	return ret;
6609 }
6610 
6611 #ifdef CONFIG_TRACER_MAX_TRACE
6612 
6613 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6614 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6615 		     size_t cnt, loff_t *ppos)
6616 {
6617 	struct trace_array *tr = filp->private_data;
6618 
6619 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6620 }
6621 
6622 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6623 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6624 		      size_t cnt, loff_t *ppos)
6625 {
6626 	struct trace_array *tr = filp->private_data;
6627 
6628 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6629 }
6630 
6631 #endif
6632 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6633 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6634 {
6635 	if (cpu == RING_BUFFER_ALL_CPUS) {
6636 		if (cpumask_empty(tr->pipe_cpumask)) {
6637 			cpumask_setall(tr->pipe_cpumask);
6638 			return 0;
6639 		}
6640 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6641 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6642 		return 0;
6643 	}
6644 	return -EBUSY;
6645 }
6646 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6647 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6648 {
6649 	if (cpu == RING_BUFFER_ALL_CPUS) {
6650 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6651 		cpumask_clear(tr->pipe_cpumask);
6652 	} else {
6653 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6654 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6655 	}
6656 }
6657 
tracing_open_pipe(struct inode * inode,struct file * filp)6658 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6659 {
6660 	struct trace_array *tr = inode->i_private;
6661 	struct trace_iterator *iter;
6662 	int cpu;
6663 	int ret;
6664 
6665 	ret = tracing_check_open_get_tr(tr);
6666 	if (ret)
6667 		return ret;
6668 
6669 	mutex_lock(&trace_types_lock);
6670 	cpu = tracing_get_cpu(inode);
6671 	ret = open_pipe_on_cpu(tr, cpu);
6672 	if (ret)
6673 		goto fail_pipe_on_cpu;
6674 
6675 	/* create a buffer to store the information to pass to userspace */
6676 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6677 	if (!iter) {
6678 		ret = -ENOMEM;
6679 		goto fail_alloc_iter;
6680 	}
6681 
6682 	trace_seq_init(&iter->seq);
6683 	iter->trace = tr->current_trace;
6684 
6685 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6686 		ret = -ENOMEM;
6687 		goto fail;
6688 	}
6689 
6690 	/* trace pipe does not show start of buffer */
6691 	cpumask_setall(iter->started);
6692 
6693 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6694 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6695 
6696 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6697 	if (trace_clocks[tr->clock_id].in_ns)
6698 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6699 
6700 	iter->tr = tr;
6701 	iter->array_buffer = &tr->array_buffer;
6702 	iter->cpu_file = cpu;
6703 	mutex_init(&iter->mutex);
6704 	filp->private_data = iter;
6705 
6706 	if (iter->trace->pipe_open)
6707 		iter->trace->pipe_open(iter);
6708 
6709 	nonseekable_open(inode, filp);
6710 
6711 	tr->trace_ref++;
6712 
6713 	mutex_unlock(&trace_types_lock);
6714 	return ret;
6715 
6716 fail:
6717 	kfree(iter);
6718 fail_alloc_iter:
6719 	close_pipe_on_cpu(tr, cpu);
6720 fail_pipe_on_cpu:
6721 	__trace_array_put(tr);
6722 	mutex_unlock(&trace_types_lock);
6723 	return ret;
6724 }
6725 
tracing_release_pipe(struct inode * inode,struct file * file)6726 static int tracing_release_pipe(struct inode *inode, struct file *file)
6727 {
6728 	struct trace_iterator *iter = file->private_data;
6729 	struct trace_array *tr = inode->i_private;
6730 
6731 	mutex_lock(&trace_types_lock);
6732 
6733 	tr->trace_ref--;
6734 
6735 	if (iter->trace->pipe_close)
6736 		iter->trace->pipe_close(iter);
6737 	close_pipe_on_cpu(tr, iter->cpu_file);
6738 	mutex_unlock(&trace_types_lock);
6739 
6740 	free_trace_iter_content(iter);
6741 	kfree(iter);
6742 
6743 	trace_array_put(tr);
6744 
6745 	return 0;
6746 }
6747 
6748 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6749 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6750 {
6751 	struct trace_array *tr = iter->tr;
6752 
6753 	/* Iterators are static, they should be filled or empty */
6754 	if (trace_buffer_iter(iter, iter->cpu_file))
6755 		return EPOLLIN | EPOLLRDNORM;
6756 
6757 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6758 		/*
6759 		 * Always select as readable when in blocking mode
6760 		 */
6761 		return EPOLLIN | EPOLLRDNORM;
6762 	else
6763 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6764 					     filp, poll_table, iter->tr->buffer_percent);
6765 }
6766 
6767 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6768 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6769 {
6770 	struct trace_iterator *iter = filp->private_data;
6771 
6772 	return trace_poll(iter, filp, poll_table);
6773 }
6774 
6775 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6776 static int tracing_wait_pipe(struct file *filp)
6777 {
6778 	struct trace_iterator *iter = filp->private_data;
6779 	int ret;
6780 
6781 	while (trace_empty(iter)) {
6782 
6783 		if ((filp->f_flags & O_NONBLOCK)) {
6784 			return -EAGAIN;
6785 		}
6786 
6787 		/*
6788 		 * We block until we read something and tracing is disabled.
6789 		 * We still block if tracing is disabled, but we have never
6790 		 * read anything. This allows a user to cat this file, and
6791 		 * then enable tracing. But after we have read something,
6792 		 * we give an EOF when tracing is again disabled.
6793 		 *
6794 		 * iter->pos will be 0 if we haven't read anything.
6795 		 */
6796 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6797 			break;
6798 
6799 		mutex_unlock(&iter->mutex);
6800 
6801 		ret = wait_on_pipe(iter, 0);
6802 
6803 		mutex_lock(&iter->mutex);
6804 
6805 		if (ret)
6806 			return ret;
6807 	}
6808 
6809 	return 1;
6810 }
6811 
6812 /*
6813  * Consumer reader.
6814  */
6815 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6816 tracing_read_pipe(struct file *filp, char __user *ubuf,
6817 		  size_t cnt, loff_t *ppos)
6818 {
6819 	struct trace_iterator *iter = filp->private_data;
6820 	ssize_t sret;
6821 
6822 	/*
6823 	 * Avoid more than one consumer on a single file descriptor
6824 	 * This is just a matter of traces coherency, the ring buffer itself
6825 	 * is protected.
6826 	 */
6827 	mutex_lock(&iter->mutex);
6828 
6829 	/* return any leftover data */
6830 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6831 	if (sret != -EBUSY)
6832 		goto out;
6833 
6834 	trace_seq_init(&iter->seq);
6835 
6836 	if (iter->trace->read) {
6837 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6838 		if (sret)
6839 			goto out;
6840 	}
6841 
6842 waitagain:
6843 	sret = tracing_wait_pipe(filp);
6844 	if (sret <= 0)
6845 		goto out;
6846 
6847 	/* stop when tracing is finished */
6848 	if (trace_empty(iter)) {
6849 		sret = 0;
6850 		goto out;
6851 	}
6852 
6853 	if (cnt >= PAGE_SIZE)
6854 		cnt = PAGE_SIZE - 1;
6855 
6856 	/* reset all but tr, trace, and overruns */
6857 	trace_iterator_reset(iter);
6858 	cpumask_clear(iter->started);
6859 	trace_seq_init(&iter->seq);
6860 
6861 	trace_event_read_lock();
6862 	trace_access_lock(iter->cpu_file);
6863 	while (trace_find_next_entry_inc(iter) != NULL) {
6864 		enum print_line_t ret;
6865 		int save_len = iter->seq.seq.len;
6866 
6867 		ret = print_trace_line(iter);
6868 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6869 			/*
6870 			 * If one print_trace_line() fills entire trace_seq in one shot,
6871 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6872 			 * In this case, we need to consume it, otherwise, loop will peek
6873 			 * this event next time, resulting in an infinite loop.
6874 			 */
6875 			if (save_len == 0) {
6876 				iter->seq.full = 0;
6877 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6878 				trace_consume(iter);
6879 				break;
6880 			}
6881 
6882 			/* In other cases, don't print partial lines */
6883 			iter->seq.seq.len = save_len;
6884 			break;
6885 		}
6886 		if (ret != TRACE_TYPE_NO_CONSUME)
6887 			trace_consume(iter);
6888 
6889 		if (trace_seq_used(&iter->seq) >= cnt)
6890 			break;
6891 
6892 		/*
6893 		 * Setting the full flag means we reached the trace_seq buffer
6894 		 * size and we should leave by partial output condition above.
6895 		 * One of the trace_seq_* functions is not used properly.
6896 		 */
6897 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6898 			  iter->ent->type);
6899 	}
6900 	trace_access_unlock(iter->cpu_file);
6901 	trace_event_read_unlock();
6902 
6903 	/* Now copy what we have to the user */
6904 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6905 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6906 		trace_seq_init(&iter->seq);
6907 
6908 	/*
6909 	 * If there was nothing to send to user, in spite of consuming trace
6910 	 * entries, go back to wait for more entries.
6911 	 */
6912 	if (sret == -EBUSY)
6913 		goto waitagain;
6914 
6915 out:
6916 	mutex_unlock(&iter->mutex);
6917 
6918 	return sret;
6919 }
6920 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6921 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6922 				     unsigned int idx)
6923 {
6924 	__free_page(spd->pages[idx]);
6925 }
6926 
6927 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6928 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6929 {
6930 	size_t count;
6931 	int save_len;
6932 	int ret;
6933 
6934 	/* Seq buffer is page-sized, exactly what we need. */
6935 	for (;;) {
6936 		save_len = iter->seq.seq.len;
6937 		ret = print_trace_line(iter);
6938 
6939 		if (trace_seq_has_overflowed(&iter->seq)) {
6940 			iter->seq.seq.len = save_len;
6941 			break;
6942 		}
6943 
6944 		/*
6945 		 * This should not be hit, because it should only
6946 		 * be set if the iter->seq overflowed. But check it
6947 		 * anyway to be safe.
6948 		 */
6949 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6950 			iter->seq.seq.len = save_len;
6951 			break;
6952 		}
6953 
6954 		count = trace_seq_used(&iter->seq) - save_len;
6955 		if (rem < count) {
6956 			rem = 0;
6957 			iter->seq.seq.len = save_len;
6958 			break;
6959 		}
6960 
6961 		if (ret != TRACE_TYPE_NO_CONSUME)
6962 			trace_consume(iter);
6963 		rem -= count;
6964 		if (!trace_find_next_entry_inc(iter))	{
6965 			rem = 0;
6966 			iter->ent = NULL;
6967 			break;
6968 		}
6969 	}
6970 
6971 	return rem;
6972 }
6973 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6974 static ssize_t tracing_splice_read_pipe(struct file *filp,
6975 					loff_t *ppos,
6976 					struct pipe_inode_info *pipe,
6977 					size_t len,
6978 					unsigned int flags)
6979 {
6980 	struct page *pages_def[PIPE_DEF_BUFFERS];
6981 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6982 	struct trace_iterator *iter = filp->private_data;
6983 	struct splice_pipe_desc spd = {
6984 		.pages		= pages_def,
6985 		.partial	= partial_def,
6986 		.nr_pages	= 0, /* This gets updated below. */
6987 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6988 		.ops		= &default_pipe_buf_ops,
6989 		.spd_release	= tracing_spd_release_pipe,
6990 	};
6991 	ssize_t ret;
6992 	size_t rem;
6993 	unsigned int i;
6994 
6995 	if (splice_grow_spd(pipe, &spd))
6996 		return -ENOMEM;
6997 
6998 	mutex_lock(&iter->mutex);
6999 
7000 	if (iter->trace->splice_read) {
7001 		ret = iter->trace->splice_read(iter, filp,
7002 					       ppos, pipe, len, flags);
7003 		if (ret)
7004 			goto out_err;
7005 	}
7006 
7007 	ret = tracing_wait_pipe(filp);
7008 	if (ret <= 0)
7009 		goto out_err;
7010 
7011 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7012 		ret = -EFAULT;
7013 		goto out_err;
7014 	}
7015 
7016 	trace_event_read_lock();
7017 	trace_access_lock(iter->cpu_file);
7018 
7019 	/* Fill as many pages as possible. */
7020 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7021 		spd.pages[i] = alloc_page(GFP_KERNEL);
7022 		if (!spd.pages[i])
7023 			break;
7024 
7025 		rem = tracing_fill_pipe_page(rem, iter);
7026 
7027 		/* Copy the data into the page, so we can start over. */
7028 		ret = trace_seq_to_buffer(&iter->seq,
7029 					  page_address(spd.pages[i]),
7030 					  trace_seq_used(&iter->seq));
7031 		if (ret < 0) {
7032 			__free_page(spd.pages[i]);
7033 			break;
7034 		}
7035 		spd.partial[i].offset = 0;
7036 		spd.partial[i].len = trace_seq_used(&iter->seq);
7037 
7038 		trace_seq_init(&iter->seq);
7039 	}
7040 
7041 	trace_access_unlock(iter->cpu_file);
7042 	trace_event_read_unlock();
7043 	mutex_unlock(&iter->mutex);
7044 
7045 	spd.nr_pages = i;
7046 
7047 	if (i)
7048 		ret = splice_to_pipe(pipe, &spd);
7049 	else
7050 		ret = 0;
7051 out:
7052 	splice_shrink_spd(&spd);
7053 	return ret;
7054 
7055 out_err:
7056 	mutex_unlock(&iter->mutex);
7057 	goto out;
7058 }
7059 
7060 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7061 tracing_entries_read(struct file *filp, char __user *ubuf,
7062 		     size_t cnt, loff_t *ppos)
7063 {
7064 	struct inode *inode = file_inode(filp);
7065 	struct trace_array *tr = inode->i_private;
7066 	int cpu = tracing_get_cpu(inode);
7067 	char buf[64];
7068 	int r = 0;
7069 	ssize_t ret;
7070 
7071 	mutex_lock(&trace_types_lock);
7072 
7073 	if (cpu == RING_BUFFER_ALL_CPUS) {
7074 		int cpu, buf_size_same;
7075 		unsigned long size;
7076 
7077 		size = 0;
7078 		buf_size_same = 1;
7079 		/* check if all cpu sizes are same */
7080 		for_each_tracing_cpu(cpu) {
7081 			/* fill in the size from first enabled cpu */
7082 			if (size == 0)
7083 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7084 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7085 				buf_size_same = 0;
7086 				break;
7087 			}
7088 		}
7089 
7090 		if (buf_size_same) {
7091 			if (!ring_buffer_expanded)
7092 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7093 					    size >> 10,
7094 					    trace_buf_size >> 10);
7095 			else
7096 				r = sprintf(buf, "%lu\n", size >> 10);
7097 		} else
7098 			r = sprintf(buf, "X\n");
7099 	} else
7100 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7101 
7102 	mutex_unlock(&trace_types_lock);
7103 
7104 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7105 	return ret;
7106 }
7107 
7108 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7109 tracing_entries_write(struct file *filp, const char __user *ubuf,
7110 		      size_t cnt, loff_t *ppos)
7111 {
7112 	struct inode *inode = file_inode(filp);
7113 	struct trace_array *tr = inode->i_private;
7114 	unsigned long val;
7115 	int ret;
7116 
7117 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7118 	if (ret)
7119 		return ret;
7120 
7121 	/* must have at least 1 entry */
7122 	if (!val)
7123 		return -EINVAL;
7124 
7125 	/* value is in KB */
7126 	val <<= 10;
7127 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7128 	if (ret < 0)
7129 		return ret;
7130 
7131 	*ppos += cnt;
7132 
7133 	return cnt;
7134 }
7135 
7136 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7137 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7138 				size_t cnt, loff_t *ppos)
7139 {
7140 	struct trace_array *tr = filp->private_data;
7141 	char buf[64];
7142 	int r, cpu;
7143 	unsigned long size = 0, expanded_size = 0;
7144 
7145 	mutex_lock(&trace_types_lock);
7146 	for_each_tracing_cpu(cpu) {
7147 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7148 		if (!ring_buffer_expanded)
7149 			expanded_size += trace_buf_size >> 10;
7150 	}
7151 	if (ring_buffer_expanded)
7152 		r = sprintf(buf, "%lu\n", size);
7153 	else
7154 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7155 	mutex_unlock(&trace_types_lock);
7156 
7157 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7158 }
7159 
7160 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7161 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7162 			  size_t cnt, loff_t *ppos)
7163 {
7164 	/*
7165 	 * There is no need to read what the user has written, this function
7166 	 * is just to make sure that there is no error when "echo" is used
7167 	 */
7168 
7169 	*ppos += cnt;
7170 
7171 	return cnt;
7172 }
7173 
7174 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7175 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7176 {
7177 	struct trace_array *tr = inode->i_private;
7178 
7179 	/* disable tracing ? */
7180 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7181 		tracer_tracing_off(tr);
7182 	/* resize the ring buffer to 0 */
7183 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7184 
7185 	trace_array_put(tr);
7186 
7187 	return 0;
7188 }
7189 
7190 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7191 tracing_mark_write(struct file *filp, const char __user *ubuf,
7192 					size_t cnt, loff_t *fpos)
7193 {
7194 	struct trace_array *tr = filp->private_data;
7195 	struct ring_buffer_event *event;
7196 	enum event_trigger_type tt = ETT_NONE;
7197 	struct trace_buffer *buffer;
7198 	struct print_entry *entry;
7199 	ssize_t written;
7200 	int size;
7201 	int len;
7202 
7203 /* Used in tracing_mark_raw_write() as well */
7204 #define FAULTED_STR "<faulted>"
7205 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7206 
7207 	if (tracing_disabled)
7208 		return -EINVAL;
7209 
7210 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7211 		return -EINVAL;
7212 
7213 	if (cnt > TRACE_BUF_SIZE)
7214 		cnt = TRACE_BUF_SIZE;
7215 
7216 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7217 
7218 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7219 
7220 	/* If less than "<faulted>", then make sure we can still add that */
7221 	if (cnt < FAULTED_SIZE)
7222 		size += FAULTED_SIZE - cnt;
7223 
7224 	buffer = tr->array_buffer.buffer;
7225 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7226 					    tracing_gen_ctx());
7227 	if (unlikely(!event))
7228 		/* Ring buffer disabled, return as if not open for write */
7229 		return -EBADF;
7230 
7231 	entry = ring_buffer_event_data(event);
7232 	entry->ip = _THIS_IP_;
7233 
7234 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7235 	if (len) {
7236 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7237 		cnt = FAULTED_SIZE;
7238 		written = -EFAULT;
7239 	} else
7240 		written = cnt;
7241 
7242 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7243 		/* do not add \n before testing triggers, but add \0 */
7244 		entry->buf[cnt] = '\0';
7245 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7246 	}
7247 
7248 	if (entry->buf[cnt - 1] != '\n') {
7249 		entry->buf[cnt] = '\n';
7250 		entry->buf[cnt + 1] = '\0';
7251 	} else
7252 		entry->buf[cnt] = '\0';
7253 
7254 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7255 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7256 	__buffer_unlock_commit(buffer, event);
7257 
7258 	if (tt)
7259 		event_triggers_post_call(tr->trace_marker_file, tt);
7260 
7261 	return written;
7262 }
7263 
7264 /* Limit it for now to 3K (including tag) */
7265 #define RAW_DATA_MAX_SIZE (1024*3)
7266 
7267 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7268 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7269 					size_t cnt, loff_t *fpos)
7270 {
7271 	struct trace_array *tr = filp->private_data;
7272 	struct ring_buffer_event *event;
7273 	struct trace_buffer *buffer;
7274 	struct raw_data_entry *entry;
7275 	ssize_t written;
7276 	int size;
7277 	int len;
7278 
7279 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7280 
7281 	if (tracing_disabled)
7282 		return -EINVAL;
7283 
7284 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7285 		return -EINVAL;
7286 
7287 	/* The marker must at least have a tag id */
7288 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7289 		return -EINVAL;
7290 
7291 	if (cnt > TRACE_BUF_SIZE)
7292 		cnt = TRACE_BUF_SIZE;
7293 
7294 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7295 
7296 	size = sizeof(*entry) + cnt;
7297 	if (cnt < FAULT_SIZE_ID)
7298 		size += FAULT_SIZE_ID - cnt;
7299 
7300 	buffer = tr->array_buffer.buffer;
7301 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7302 					    tracing_gen_ctx());
7303 	if (!event)
7304 		/* Ring buffer disabled, return as if not open for write */
7305 		return -EBADF;
7306 
7307 	entry = ring_buffer_event_data(event);
7308 
7309 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7310 	if (len) {
7311 		entry->id = -1;
7312 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7313 		written = -EFAULT;
7314 	} else
7315 		written = cnt;
7316 
7317 	__buffer_unlock_commit(buffer, event);
7318 
7319 	return written;
7320 }
7321 
tracing_clock_show(struct seq_file * m,void * v)7322 static int tracing_clock_show(struct seq_file *m, void *v)
7323 {
7324 	struct trace_array *tr = m->private;
7325 	int i;
7326 
7327 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7328 		seq_printf(m,
7329 			"%s%s%s%s", i ? " " : "",
7330 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7331 			i == tr->clock_id ? "]" : "");
7332 	seq_putc(m, '\n');
7333 
7334 	return 0;
7335 }
7336 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7337 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7338 {
7339 	int i;
7340 
7341 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7342 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7343 			break;
7344 	}
7345 	if (i == ARRAY_SIZE(trace_clocks))
7346 		return -EINVAL;
7347 
7348 	mutex_lock(&trace_types_lock);
7349 
7350 	tr->clock_id = i;
7351 
7352 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7353 
7354 	/*
7355 	 * New clock may not be consistent with the previous clock.
7356 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7357 	 */
7358 	tracing_reset_online_cpus(&tr->array_buffer);
7359 
7360 #ifdef CONFIG_TRACER_MAX_TRACE
7361 	if (tr->max_buffer.buffer)
7362 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7363 	tracing_reset_online_cpus(&tr->max_buffer);
7364 #endif
7365 
7366 	mutex_unlock(&trace_types_lock);
7367 
7368 	return 0;
7369 }
7370 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7371 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7372 				   size_t cnt, loff_t *fpos)
7373 {
7374 	struct seq_file *m = filp->private_data;
7375 	struct trace_array *tr = m->private;
7376 	char buf[64];
7377 	const char *clockstr;
7378 	int ret;
7379 
7380 	if (cnt >= sizeof(buf))
7381 		return -EINVAL;
7382 
7383 	if (copy_from_user(buf, ubuf, cnt))
7384 		return -EFAULT;
7385 
7386 	buf[cnt] = 0;
7387 
7388 	clockstr = strstrip(buf);
7389 
7390 	ret = tracing_set_clock(tr, clockstr);
7391 	if (ret)
7392 		return ret;
7393 
7394 	*fpos += cnt;
7395 
7396 	return cnt;
7397 }
7398 
tracing_clock_open(struct inode * inode,struct file * file)7399 static int tracing_clock_open(struct inode *inode, struct file *file)
7400 {
7401 	struct trace_array *tr = inode->i_private;
7402 	int ret;
7403 
7404 	ret = tracing_check_open_get_tr(tr);
7405 	if (ret)
7406 		return ret;
7407 
7408 	ret = single_open(file, tracing_clock_show, inode->i_private);
7409 	if (ret < 0)
7410 		trace_array_put(tr);
7411 
7412 	return ret;
7413 }
7414 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7415 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7416 {
7417 	struct trace_array *tr = m->private;
7418 
7419 	mutex_lock(&trace_types_lock);
7420 
7421 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7422 		seq_puts(m, "delta [absolute]\n");
7423 	else
7424 		seq_puts(m, "[delta] absolute\n");
7425 
7426 	mutex_unlock(&trace_types_lock);
7427 
7428 	return 0;
7429 }
7430 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7431 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7432 {
7433 	struct trace_array *tr = inode->i_private;
7434 	int ret;
7435 
7436 	ret = tracing_check_open_get_tr(tr);
7437 	if (ret)
7438 		return ret;
7439 
7440 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7441 	if (ret < 0)
7442 		trace_array_put(tr);
7443 
7444 	return ret;
7445 }
7446 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7447 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7448 {
7449 	if (rbe == this_cpu_read(trace_buffered_event))
7450 		return ring_buffer_time_stamp(buffer);
7451 
7452 	return ring_buffer_event_time_stamp(buffer, rbe);
7453 }
7454 
7455 /*
7456  * Set or disable using the per CPU trace_buffer_event when possible.
7457  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7458 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7459 {
7460 	int ret = 0;
7461 
7462 	mutex_lock(&trace_types_lock);
7463 
7464 	if (set && tr->no_filter_buffering_ref++)
7465 		goto out;
7466 
7467 	if (!set) {
7468 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7469 			ret = -EINVAL;
7470 			goto out;
7471 		}
7472 
7473 		--tr->no_filter_buffering_ref;
7474 	}
7475  out:
7476 	mutex_unlock(&trace_types_lock);
7477 
7478 	return ret;
7479 }
7480 
7481 struct ftrace_buffer_info {
7482 	struct trace_iterator	iter;
7483 	void			*spare;
7484 	unsigned int		spare_cpu;
7485 	unsigned int		read;
7486 };
7487 
7488 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7489 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7490 {
7491 	struct trace_array *tr = inode->i_private;
7492 	struct trace_iterator *iter;
7493 	struct seq_file *m;
7494 	int ret;
7495 
7496 	ret = tracing_check_open_get_tr(tr);
7497 	if (ret)
7498 		return ret;
7499 
7500 	if (file->f_mode & FMODE_READ) {
7501 		iter = __tracing_open(inode, file, true);
7502 		if (IS_ERR(iter))
7503 			ret = PTR_ERR(iter);
7504 	} else {
7505 		/* Writes still need the seq_file to hold the private data */
7506 		ret = -ENOMEM;
7507 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7508 		if (!m)
7509 			goto out;
7510 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7511 		if (!iter) {
7512 			kfree(m);
7513 			goto out;
7514 		}
7515 		ret = 0;
7516 
7517 		iter->tr = tr;
7518 		iter->array_buffer = &tr->max_buffer;
7519 		iter->cpu_file = tracing_get_cpu(inode);
7520 		m->private = iter;
7521 		file->private_data = m;
7522 	}
7523 out:
7524 	if (ret < 0)
7525 		trace_array_put(tr);
7526 
7527 	return ret;
7528 }
7529 
tracing_swap_cpu_buffer(void * tr)7530 static void tracing_swap_cpu_buffer(void *tr)
7531 {
7532 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7533 }
7534 
7535 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7536 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7537 		       loff_t *ppos)
7538 {
7539 	struct seq_file *m = filp->private_data;
7540 	struct trace_iterator *iter = m->private;
7541 	struct trace_array *tr = iter->tr;
7542 	unsigned long val;
7543 	int ret;
7544 
7545 	ret = tracing_update_buffers();
7546 	if (ret < 0)
7547 		return ret;
7548 
7549 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7550 	if (ret)
7551 		return ret;
7552 
7553 	mutex_lock(&trace_types_lock);
7554 
7555 	if (tr->current_trace->use_max_tr) {
7556 		ret = -EBUSY;
7557 		goto out;
7558 	}
7559 
7560 	local_irq_disable();
7561 	arch_spin_lock(&tr->max_lock);
7562 	if (tr->cond_snapshot)
7563 		ret = -EBUSY;
7564 	arch_spin_unlock(&tr->max_lock);
7565 	local_irq_enable();
7566 	if (ret)
7567 		goto out;
7568 
7569 	switch (val) {
7570 	case 0:
7571 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7572 			ret = -EINVAL;
7573 			break;
7574 		}
7575 		if (tr->allocated_snapshot)
7576 			free_snapshot(tr);
7577 		break;
7578 	case 1:
7579 /* Only allow per-cpu swap if the ring buffer supports it */
7580 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7581 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7582 			ret = -EINVAL;
7583 			break;
7584 		}
7585 #endif
7586 		if (tr->allocated_snapshot)
7587 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7588 					&tr->array_buffer, iter->cpu_file);
7589 		else
7590 			ret = tracing_alloc_snapshot_instance(tr);
7591 		if (ret < 0)
7592 			break;
7593 		/* Now, we're going to swap */
7594 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7595 			local_irq_disable();
7596 			update_max_tr(tr, current, smp_processor_id(), NULL);
7597 			local_irq_enable();
7598 		} else {
7599 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7600 						 (void *)tr, 1);
7601 		}
7602 		break;
7603 	default:
7604 		if (tr->allocated_snapshot) {
7605 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7606 				tracing_reset_online_cpus(&tr->max_buffer);
7607 			else
7608 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7609 		}
7610 		break;
7611 	}
7612 
7613 	if (ret >= 0) {
7614 		*ppos += cnt;
7615 		ret = cnt;
7616 	}
7617 out:
7618 	mutex_unlock(&trace_types_lock);
7619 	return ret;
7620 }
7621 
tracing_snapshot_release(struct inode * inode,struct file * file)7622 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7623 {
7624 	struct seq_file *m = file->private_data;
7625 	int ret;
7626 
7627 	ret = tracing_release(inode, file);
7628 
7629 	if (file->f_mode & FMODE_READ)
7630 		return ret;
7631 
7632 	/* If write only, the seq_file is just a stub */
7633 	if (m)
7634 		kfree(m->private);
7635 	kfree(m);
7636 
7637 	return 0;
7638 }
7639 
7640 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7641 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7642 				    size_t count, loff_t *ppos);
7643 static int tracing_buffers_release(struct inode *inode, struct file *file);
7644 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7645 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7646 
snapshot_raw_open(struct inode * inode,struct file * filp)7647 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7648 {
7649 	struct ftrace_buffer_info *info;
7650 	int ret;
7651 
7652 	/* The following checks for tracefs lockdown */
7653 	ret = tracing_buffers_open(inode, filp);
7654 	if (ret < 0)
7655 		return ret;
7656 
7657 	info = filp->private_data;
7658 
7659 	if (info->iter.trace->use_max_tr) {
7660 		tracing_buffers_release(inode, filp);
7661 		return -EBUSY;
7662 	}
7663 
7664 	info->iter.snapshot = true;
7665 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7666 
7667 	return ret;
7668 }
7669 
7670 #endif /* CONFIG_TRACER_SNAPSHOT */
7671 
7672 
7673 static const struct file_operations tracing_thresh_fops = {
7674 	.open		= tracing_open_generic,
7675 	.read		= tracing_thresh_read,
7676 	.write		= tracing_thresh_write,
7677 	.llseek		= generic_file_llseek,
7678 };
7679 
7680 #ifdef CONFIG_TRACER_MAX_TRACE
7681 static const struct file_operations tracing_max_lat_fops = {
7682 	.open		= tracing_open_generic_tr,
7683 	.read		= tracing_max_lat_read,
7684 	.write		= tracing_max_lat_write,
7685 	.llseek		= generic_file_llseek,
7686 	.release	= tracing_release_generic_tr,
7687 };
7688 #endif
7689 
7690 static const struct file_operations set_tracer_fops = {
7691 	.open		= tracing_open_generic_tr,
7692 	.read		= tracing_set_trace_read,
7693 	.write		= tracing_set_trace_write,
7694 	.llseek		= generic_file_llseek,
7695 	.release	= tracing_release_generic_tr,
7696 };
7697 
7698 static const struct file_operations tracing_pipe_fops = {
7699 	.open		= tracing_open_pipe,
7700 	.poll		= tracing_poll_pipe,
7701 	.read		= tracing_read_pipe,
7702 	.splice_read	= tracing_splice_read_pipe,
7703 	.release	= tracing_release_pipe,
7704 	.llseek		= no_llseek,
7705 };
7706 
7707 static const struct file_operations tracing_entries_fops = {
7708 	.open		= tracing_open_generic_tr,
7709 	.read		= tracing_entries_read,
7710 	.write		= tracing_entries_write,
7711 	.llseek		= generic_file_llseek,
7712 	.release	= tracing_release_generic_tr,
7713 };
7714 
7715 static const struct file_operations tracing_total_entries_fops = {
7716 	.open		= tracing_open_generic_tr,
7717 	.read		= tracing_total_entries_read,
7718 	.llseek		= generic_file_llseek,
7719 	.release	= tracing_release_generic_tr,
7720 };
7721 
7722 static const struct file_operations tracing_free_buffer_fops = {
7723 	.open		= tracing_open_generic_tr,
7724 	.write		= tracing_free_buffer_write,
7725 	.release	= tracing_free_buffer_release,
7726 };
7727 
7728 static const struct file_operations tracing_mark_fops = {
7729 	.open		= tracing_mark_open,
7730 	.write		= tracing_mark_write,
7731 	.release	= tracing_release_generic_tr,
7732 };
7733 
7734 static const struct file_operations tracing_mark_raw_fops = {
7735 	.open		= tracing_mark_open,
7736 	.write		= tracing_mark_raw_write,
7737 	.release	= tracing_release_generic_tr,
7738 };
7739 
7740 static const struct file_operations trace_clock_fops = {
7741 	.open		= tracing_clock_open,
7742 	.read		= seq_read,
7743 	.llseek		= seq_lseek,
7744 	.release	= tracing_single_release_tr,
7745 	.write		= tracing_clock_write,
7746 };
7747 
7748 static const struct file_operations trace_time_stamp_mode_fops = {
7749 	.open		= tracing_time_stamp_mode_open,
7750 	.read		= seq_read,
7751 	.llseek		= seq_lseek,
7752 	.release	= tracing_single_release_tr,
7753 };
7754 
7755 #ifdef CONFIG_TRACER_SNAPSHOT
7756 static const struct file_operations snapshot_fops = {
7757 	.open		= tracing_snapshot_open,
7758 	.read		= seq_read,
7759 	.write		= tracing_snapshot_write,
7760 	.llseek		= tracing_lseek,
7761 	.release	= tracing_snapshot_release,
7762 };
7763 
7764 static const struct file_operations snapshot_raw_fops = {
7765 	.open		= snapshot_raw_open,
7766 	.read		= tracing_buffers_read,
7767 	.release	= tracing_buffers_release,
7768 	.splice_read	= tracing_buffers_splice_read,
7769 	.llseek		= no_llseek,
7770 };
7771 
7772 #endif /* CONFIG_TRACER_SNAPSHOT */
7773 
7774 /*
7775  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7776  * @filp: The active open file structure
7777  * @ubuf: The userspace provided buffer to read value into
7778  * @cnt: The maximum number of bytes to read
7779  * @ppos: The current "file" position
7780  *
7781  * This function implements the write interface for a struct trace_min_max_param.
7782  * The filp->private_data must point to a trace_min_max_param structure that
7783  * defines where to write the value, the min and the max acceptable values,
7784  * and a lock to protect the write.
7785  */
7786 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7787 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7788 {
7789 	struct trace_min_max_param *param = filp->private_data;
7790 	u64 val;
7791 	int err;
7792 
7793 	if (!param)
7794 		return -EFAULT;
7795 
7796 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7797 	if (err)
7798 		return err;
7799 
7800 	if (param->lock)
7801 		mutex_lock(param->lock);
7802 
7803 	if (param->min && val < *param->min)
7804 		err = -EINVAL;
7805 
7806 	if (param->max && val > *param->max)
7807 		err = -EINVAL;
7808 
7809 	if (!err)
7810 		*param->val = val;
7811 
7812 	if (param->lock)
7813 		mutex_unlock(param->lock);
7814 
7815 	if (err)
7816 		return err;
7817 
7818 	return cnt;
7819 }
7820 
7821 /*
7822  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7823  * @filp: The active open file structure
7824  * @ubuf: The userspace provided buffer to read value into
7825  * @cnt: The maximum number of bytes to read
7826  * @ppos: The current "file" position
7827  *
7828  * This function implements the read interface for a struct trace_min_max_param.
7829  * The filp->private_data must point to a trace_min_max_param struct with valid
7830  * data.
7831  */
7832 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7833 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7834 {
7835 	struct trace_min_max_param *param = filp->private_data;
7836 	char buf[U64_STR_SIZE];
7837 	int len;
7838 	u64 val;
7839 
7840 	if (!param)
7841 		return -EFAULT;
7842 
7843 	val = *param->val;
7844 
7845 	if (cnt > sizeof(buf))
7846 		cnt = sizeof(buf);
7847 
7848 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7849 
7850 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7851 }
7852 
7853 const struct file_operations trace_min_max_fops = {
7854 	.open		= tracing_open_generic,
7855 	.read		= trace_min_max_read,
7856 	.write		= trace_min_max_write,
7857 };
7858 
7859 #define TRACING_LOG_ERRS_MAX	8
7860 #define TRACING_LOG_LOC_MAX	128
7861 
7862 #define CMD_PREFIX "  Command: "
7863 
7864 struct err_info {
7865 	const char	**errs;	/* ptr to loc-specific array of err strings */
7866 	u8		type;	/* index into errs -> specific err string */
7867 	u16		pos;	/* caret position */
7868 	u64		ts;
7869 };
7870 
7871 struct tracing_log_err {
7872 	struct list_head	list;
7873 	struct err_info		info;
7874 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7875 	char			*cmd;                     /* what caused err */
7876 };
7877 
7878 static DEFINE_MUTEX(tracing_err_log_lock);
7879 
alloc_tracing_log_err(int len)7880 static struct tracing_log_err *alloc_tracing_log_err(int len)
7881 {
7882 	struct tracing_log_err *err;
7883 
7884 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7885 	if (!err)
7886 		return ERR_PTR(-ENOMEM);
7887 
7888 	err->cmd = kzalloc(len, GFP_KERNEL);
7889 	if (!err->cmd) {
7890 		kfree(err);
7891 		return ERR_PTR(-ENOMEM);
7892 	}
7893 
7894 	return err;
7895 }
7896 
free_tracing_log_err(struct tracing_log_err * err)7897 static void free_tracing_log_err(struct tracing_log_err *err)
7898 {
7899 	kfree(err->cmd);
7900 	kfree(err);
7901 }
7902 
get_tracing_log_err(struct trace_array * tr,int len)7903 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7904 						   int len)
7905 {
7906 	struct tracing_log_err *err;
7907 	char *cmd;
7908 
7909 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7910 		err = alloc_tracing_log_err(len);
7911 		if (PTR_ERR(err) != -ENOMEM)
7912 			tr->n_err_log_entries++;
7913 
7914 		return err;
7915 	}
7916 	cmd = kzalloc(len, GFP_KERNEL);
7917 	if (!cmd)
7918 		return ERR_PTR(-ENOMEM);
7919 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7920 	kfree(err->cmd);
7921 	err->cmd = cmd;
7922 	list_del(&err->list);
7923 
7924 	return err;
7925 }
7926 
7927 /**
7928  * err_pos - find the position of a string within a command for error careting
7929  * @cmd: The tracing command that caused the error
7930  * @str: The string to position the caret at within @cmd
7931  *
7932  * Finds the position of the first occurrence of @str within @cmd.  The
7933  * return value can be passed to tracing_log_err() for caret placement
7934  * within @cmd.
7935  *
7936  * Returns the index within @cmd of the first occurrence of @str or 0
7937  * if @str was not found.
7938  */
err_pos(char * cmd,const char * str)7939 unsigned int err_pos(char *cmd, const char *str)
7940 {
7941 	char *found;
7942 
7943 	if (WARN_ON(!strlen(cmd)))
7944 		return 0;
7945 
7946 	found = strstr(cmd, str);
7947 	if (found)
7948 		return found - cmd;
7949 
7950 	return 0;
7951 }
7952 
7953 /**
7954  * tracing_log_err - write an error to the tracing error log
7955  * @tr: The associated trace array for the error (NULL for top level array)
7956  * @loc: A string describing where the error occurred
7957  * @cmd: The tracing command that caused the error
7958  * @errs: The array of loc-specific static error strings
7959  * @type: The index into errs[], which produces the specific static err string
7960  * @pos: The position the caret should be placed in the cmd
7961  *
7962  * Writes an error into tracing/error_log of the form:
7963  *
7964  * <loc>: error: <text>
7965  *   Command: <cmd>
7966  *              ^
7967  *
7968  * tracing/error_log is a small log file containing the last
7969  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7970  * unless there has been a tracing error, and the error log can be
7971  * cleared and have its memory freed by writing the empty string in
7972  * truncation mode to it i.e. echo > tracing/error_log.
7973  *
7974  * NOTE: the @errs array along with the @type param are used to
7975  * produce a static error string - this string is not copied and saved
7976  * when the error is logged - only a pointer to it is saved.  See
7977  * existing callers for examples of how static strings are typically
7978  * defined for use with tracing_log_err().
7979  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7980 void tracing_log_err(struct trace_array *tr,
7981 		     const char *loc, const char *cmd,
7982 		     const char **errs, u8 type, u16 pos)
7983 {
7984 	struct tracing_log_err *err;
7985 	int len = 0;
7986 
7987 	if (!tr)
7988 		tr = &global_trace;
7989 
7990 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7991 
7992 	mutex_lock(&tracing_err_log_lock);
7993 	err = get_tracing_log_err(tr, len);
7994 	if (PTR_ERR(err) == -ENOMEM) {
7995 		mutex_unlock(&tracing_err_log_lock);
7996 		return;
7997 	}
7998 
7999 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8000 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8001 
8002 	err->info.errs = errs;
8003 	err->info.type = type;
8004 	err->info.pos = pos;
8005 	err->info.ts = local_clock();
8006 
8007 	list_add_tail(&err->list, &tr->err_log);
8008 	mutex_unlock(&tracing_err_log_lock);
8009 }
8010 
clear_tracing_err_log(struct trace_array * tr)8011 static void clear_tracing_err_log(struct trace_array *tr)
8012 {
8013 	struct tracing_log_err *err, *next;
8014 
8015 	mutex_lock(&tracing_err_log_lock);
8016 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8017 		list_del(&err->list);
8018 		free_tracing_log_err(err);
8019 	}
8020 
8021 	tr->n_err_log_entries = 0;
8022 	mutex_unlock(&tracing_err_log_lock);
8023 }
8024 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8025 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8026 {
8027 	struct trace_array *tr = m->private;
8028 
8029 	mutex_lock(&tracing_err_log_lock);
8030 
8031 	return seq_list_start(&tr->err_log, *pos);
8032 }
8033 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8034 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8035 {
8036 	struct trace_array *tr = m->private;
8037 
8038 	return seq_list_next(v, &tr->err_log, pos);
8039 }
8040 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8041 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8042 {
8043 	mutex_unlock(&tracing_err_log_lock);
8044 }
8045 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8046 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8047 {
8048 	u16 i;
8049 
8050 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8051 		seq_putc(m, ' ');
8052 	for (i = 0; i < pos; i++)
8053 		seq_putc(m, ' ');
8054 	seq_puts(m, "^\n");
8055 }
8056 
tracing_err_log_seq_show(struct seq_file * m,void * v)8057 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8058 {
8059 	struct tracing_log_err *err = v;
8060 
8061 	if (err) {
8062 		const char *err_text = err->info.errs[err->info.type];
8063 		u64 sec = err->info.ts;
8064 		u32 nsec;
8065 
8066 		nsec = do_div(sec, NSEC_PER_SEC);
8067 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8068 			   err->loc, err_text);
8069 		seq_printf(m, "%s", err->cmd);
8070 		tracing_err_log_show_pos(m, err->info.pos);
8071 	}
8072 
8073 	return 0;
8074 }
8075 
8076 static const struct seq_operations tracing_err_log_seq_ops = {
8077 	.start  = tracing_err_log_seq_start,
8078 	.next   = tracing_err_log_seq_next,
8079 	.stop   = tracing_err_log_seq_stop,
8080 	.show   = tracing_err_log_seq_show
8081 };
8082 
tracing_err_log_open(struct inode * inode,struct file * file)8083 static int tracing_err_log_open(struct inode *inode, struct file *file)
8084 {
8085 	struct trace_array *tr = inode->i_private;
8086 	int ret = 0;
8087 
8088 	ret = tracing_check_open_get_tr(tr);
8089 	if (ret)
8090 		return ret;
8091 
8092 	/* If this file was opened for write, then erase contents */
8093 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8094 		clear_tracing_err_log(tr);
8095 
8096 	if (file->f_mode & FMODE_READ) {
8097 		ret = seq_open(file, &tracing_err_log_seq_ops);
8098 		if (!ret) {
8099 			struct seq_file *m = file->private_data;
8100 			m->private = tr;
8101 		} else {
8102 			trace_array_put(tr);
8103 		}
8104 	}
8105 	return ret;
8106 }
8107 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8108 static ssize_t tracing_err_log_write(struct file *file,
8109 				     const char __user *buffer,
8110 				     size_t count, loff_t *ppos)
8111 {
8112 	return count;
8113 }
8114 
tracing_err_log_release(struct inode * inode,struct file * file)8115 static int tracing_err_log_release(struct inode *inode, struct file *file)
8116 {
8117 	struct trace_array *tr = inode->i_private;
8118 
8119 	trace_array_put(tr);
8120 
8121 	if (file->f_mode & FMODE_READ)
8122 		seq_release(inode, file);
8123 
8124 	return 0;
8125 }
8126 
8127 static const struct file_operations tracing_err_log_fops = {
8128 	.open           = tracing_err_log_open,
8129 	.write		= tracing_err_log_write,
8130 	.read           = seq_read,
8131 	.llseek         = tracing_lseek,
8132 	.release        = tracing_err_log_release,
8133 };
8134 
tracing_buffers_open(struct inode * inode,struct file * filp)8135 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8136 {
8137 	struct trace_array *tr = inode->i_private;
8138 	struct ftrace_buffer_info *info;
8139 	int ret;
8140 
8141 	ret = tracing_check_open_get_tr(tr);
8142 	if (ret)
8143 		return ret;
8144 
8145 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8146 	if (!info) {
8147 		trace_array_put(tr);
8148 		return -ENOMEM;
8149 	}
8150 
8151 	mutex_lock(&trace_types_lock);
8152 
8153 	info->iter.tr		= tr;
8154 	info->iter.cpu_file	= tracing_get_cpu(inode);
8155 	info->iter.trace	= tr->current_trace;
8156 	info->iter.array_buffer = &tr->array_buffer;
8157 	info->spare		= NULL;
8158 	/* Force reading ring buffer for first read */
8159 	info->read		= (unsigned int)-1;
8160 
8161 	filp->private_data = info;
8162 
8163 	tr->trace_ref++;
8164 
8165 	mutex_unlock(&trace_types_lock);
8166 
8167 	ret = nonseekable_open(inode, filp);
8168 	if (ret < 0)
8169 		trace_array_put(tr);
8170 
8171 	return ret;
8172 }
8173 
8174 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8175 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8176 {
8177 	struct ftrace_buffer_info *info = filp->private_data;
8178 	struct trace_iterator *iter = &info->iter;
8179 
8180 	return trace_poll(iter, filp, poll_table);
8181 }
8182 
8183 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8184 tracing_buffers_read(struct file *filp, char __user *ubuf,
8185 		     size_t count, loff_t *ppos)
8186 {
8187 	struct ftrace_buffer_info *info = filp->private_data;
8188 	struct trace_iterator *iter = &info->iter;
8189 	ssize_t ret = 0;
8190 	ssize_t size;
8191 
8192 	if (!count)
8193 		return 0;
8194 
8195 #ifdef CONFIG_TRACER_MAX_TRACE
8196 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8197 		return -EBUSY;
8198 #endif
8199 
8200 	if (!info->spare) {
8201 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8202 							  iter->cpu_file);
8203 		if (IS_ERR(info->spare)) {
8204 			ret = PTR_ERR(info->spare);
8205 			info->spare = NULL;
8206 		} else {
8207 			info->spare_cpu = iter->cpu_file;
8208 		}
8209 	}
8210 	if (!info->spare)
8211 		return ret;
8212 
8213 	/* Do we have previous read data to read? */
8214 	if (info->read < PAGE_SIZE)
8215 		goto read;
8216 
8217  again:
8218 	trace_access_lock(iter->cpu_file);
8219 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8220 				    &info->spare,
8221 				    count,
8222 				    iter->cpu_file, 0);
8223 	trace_access_unlock(iter->cpu_file);
8224 
8225 	if (ret < 0) {
8226 		if (trace_empty(iter)) {
8227 			if ((filp->f_flags & O_NONBLOCK))
8228 				return -EAGAIN;
8229 
8230 			ret = wait_on_pipe(iter, 0);
8231 			if (ret)
8232 				return ret;
8233 
8234 			goto again;
8235 		}
8236 		return 0;
8237 	}
8238 
8239 	info->read = 0;
8240  read:
8241 	size = PAGE_SIZE - info->read;
8242 	if (size > count)
8243 		size = count;
8244 
8245 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8246 	if (ret == size)
8247 		return -EFAULT;
8248 
8249 	size -= ret;
8250 
8251 	*ppos += size;
8252 	info->read += size;
8253 
8254 	return size;
8255 }
8256 
tracing_buffers_flush(struct file * file,fl_owner_t id)8257 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8258 {
8259 	struct ftrace_buffer_info *info = file->private_data;
8260 	struct trace_iterator *iter = &info->iter;
8261 
8262 	iter->wait_index++;
8263 	/* Make sure the waiters see the new wait_index */
8264 	smp_wmb();
8265 
8266 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8267 
8268 	return 0;
8269 }
8270 
tracing_buffers_release(struct inode * inode,struct file * file)8271 static int tracing_buffers_release(struct inode *inode, struct file *file)
8272 {
8273 	struct ftrace_buffer_info *info = file->private_data;
8274 	struct trace_iterator *iter = &info->iter;
8275 
8276 	mutex_lock(&trace_types_lock);
8277 
8278 	iter->tr->trace_ref--;
8279 
8280 	__trace_array_put(iter->tr);
8281 
8282 	if (info->spare)
8283 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8284 					   info->spare_cpu, info->spare);
8285 	kvfree(info);
8286 
8287 	mutex_unlock(&trace_types_lock);
8288 
8289 	return 0;
8290 }
8291 
8292 struct buffer_ref {
8293 	struct trace_buffer	*buffer;
8294 	void			*page;
8295 	int			cpu;
8296 	refcount_t		refcount;
8297 };
8298 
buffer_ref_release(struct buffer_ref * ref)8299 static void buffer_ref_release(struct buffer_ref *ref)
8300 {
8301 	if (!refcount_dec_and_test(&ref->refcount))
8302 		return;
8303 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8304 	kfree(ref);
8305 }
8306 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8307 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8308 				    struct pipe_buffer *buf)
8309 {
8310 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8311 
8312 	buffer_ref_release(ref);
8313 	buf->private = 0;
8314 }
8315 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8316 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8317 				struct pipe_buffer *buf)
8318 {
8319 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8320 
8321 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8322 		return false;
8323 
8324 	refcount_inc(&ref->refcount);
8325 	return true;
8326 }
8327 
8328 /* Pipe buffer operations for a buffer. */
8329 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8330 	.release		= buffer_pipe_buf_release,
8331 	.get			= buffer_pipe_buf_get,
8332 };
8333 
8334 /*
8335  * Callback from splice_to_pipe(), if we need to release some pages
8336  * at the end of the spd in case we error'ed out in filling the pipe.
8337  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8338 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8339 {
8340 	struct buffer_ref *ref =
8341 		(struct buffer_ref *)spd->partial[i].private;
8342 
8343 	buffer_ref_release(ref);
8344 	spd->partial[i].private = 0;
8345 }
8346 
8347 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8348 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8349 			    struct pipe_inode_info *pipe, size_t len,
8350 			    unsigned int flags)
8351 {
8352 	struct ftrace_buffer_info *info = file->private_data;
8353 	struct trace_iterator *iter = &info->iter;
8354 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8355 	struct page *pages_def[PIPE_DEF_BUFFERS];
8356 	struct splice_pipe_desc spd = {
8357 		.pages		= pages_def,
8358 		.partial	= partial_def,
8359 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8360 		.ops		= &buffer_pipe_buf_ops,
8361 		.spd_release	= buffer_spd_release,
8362 	};
8363 	struct buffer_ref *ref;
8364 	int entries, i;
8365 	ssize_t ret = 0;
8366 
8367 #ifdef CONFIG_TRACER_MAX_TRACE
8368 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8369 		return -EBUSY;
8370 #endif
8371 
8372 	if (*ppos & (PAGE_SIZE - 1))
8373 		return -EINVAL;
8374 
8375 	if (len & (PAGE_SIZE - 1)) {
8376 		if (len < PAGE_SIZE)
8377 			return -EINVAL;
8378 		len &= PAGE_MASK;
8379 	}
8380 
8381 	if (splice_grow_spd(pipe, &spd))
8382 		return -ENOMEM;
8383 
8384  again:
8385 	trace_access_lock(iter->cpu_file);
8386 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8387 
8388 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8389 		struct page *page;
8390 		int r;
8391 
8392 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8393 		if (!ref) {
8394 			ret = -ENOMEM;
8395 			break;
8396 		}
8397 
8398 		refcount_set(&ref->refcount, 1);
8399 		ref->buffer = iter->array_buffer->buffer;
8400 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8401 		if (IS_ERR(ref->page)) {
8402 			ret = PTR_ERR(ref->page);
8403 			ref->page = NULL;
8404 			kfree(ref);
8405 			break;
8406 		}
8407 		ref->cpu = iter->cpu_file;
8408 
8409 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8410 					  len, iter->cpu_file, 1);
8411 		if (r < 0) {
8412 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8413 						   ref->page);
8414 			kfree(ref);
8415 			break;
8416 		}
8417 
8418 		page = virt_to_page(ref->page);
8419 
8420 		spd.pages[i] = page;
8421 		spd.partial[i].len = PAGE_SIZE;
8422 		spd.partial[i].offset = 0;
8423 		spd.partial[i].private = (unsigned long)ref;
8424 		spd.nr_pages++;
8425 		*ppos += PAGE_SIZE;
8426 
8427 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8428 	}
8429 
8430 	trace_access_unlock(iter->cpu_file);
8431 	spd.nr_pages = i;
8432 
8433 	/* did we read anything? */
8434 	if (!spd.nr_pages) {
8435 		long wait_index;
8436 
8437 		if (ret)
8438 			goto out;
8439 
8440 		ret = -EAGAIN;
8441 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8442 			goto out;
8443 
8444 		wait_index = READ_ONCE(iter->wait_index);
8445 
8446 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8447 		if (ret)
8448 			goto out;
8449 
8450 		/* No need to wait after waking up when tracing is off */
8451 		if (!tracer_tracing_is_on(iter->tr))
8452 			goto out;
8453 
8454 		/* Make sure we see the new wait_index */
8455 		smp_rmb();
8456 		if (wait_index != iter->wait_index)
8457 			goto out;
8458 
8459 		goto again;
8460 	}
8461 
8462 	ret = splice_to_pipe(pipe, &spd);
8463 out:
8464 	splice_shrink_spd(&spd);
8465 
8466 	return ret;
8467 }
8468 
8469 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8470 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8471 {
8472 	struct ftrace_buffer_info *info = file->private_data;
8473 	struct trace_iterator *iter = &info->iter;
8474 
8475 	if (cmd)
8476 		return -ENOIOCTLCMD;
8477 
8478 	mutex_lock(&trace_types_lock);
8479 
8480 	iter->wait_index++;
8481 	/* Make sure the waiters see the new wait_index */
8482 	smp_wmb();
8483 
8484 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8485 
8486 	mutex_unlock(&trace_types_lock);
8487 	return 0;
8488 }
8489 
8490 static const struct file_operations tracing_buffers_fops = {
8491 	.open		= tracing_buffers_open,
8492 	.read		= tracing_buffers_read,
8493 	.poll		= tracing_buffers_poll,
8494 	.release	= tracing_buffers_release,
8495 	.flush		= tracing_buffers_flush,
8496 	.splice_read	= tracing_buffers_splice_read,
8497 	.unlocked_ioctl = tracing_buffers_ioctl,
8498 	.llseek		= no_llseek,
8499 };
8500 
8501 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8502 tracing_stats_read(struct file *filp, char __user *ubuf,
8503 		   size_t count, loff_t *ppos)
8504 {
8505 	struct inode *inode = file_inode(filp);
8506 	struct trace_array *tr = inode->i_private;
8507 	struct array_buffer *trace_buf = &tr->array_buffer;
8508 	int cpu = tracing_get_cpu(inode);
8509 	struct trace_seq *s;
8510 	unsigned long cnt;
8511 	unsigned long long t;
8512 	unsigned long usec_rem;
8513 
8514 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8515 	if (!s)
8516 		return -ENOMEM;
8517 
8518 	trace_seq_init(s);
8519 
8520 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8521 	trace_seq_printf(s, "entries: %ld\n", cnt);
8522 
8523 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8524 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8525 
8526 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8527 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8528 
8529 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8530 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8531 
8532 	if (trace_clocks[tr->clock_id].in_ns) {
8533 		/* local or global for trace_clock */
8534 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8535 		usec_rem = do_div(t, USEC_PER_SEC);
8536 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8537 								t, usec_rem);
8538 
8539 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8540 		usec_rem = do_div(t, USEC_PER_SEC);
8541 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8542 	} else {
8543 		/* counter or tsc mode for trace_clock */
8544 		trace_seq_printf(s, "oldest event ts: %llu\n",
8545 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8546 
8547 		trace_seq_printf(s, "now ts: %llu\n",
8548 				ring_buffer_time_stamp(trace_buf->buffer));
8549 	}
8550 
8551 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8552 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8553 
8554 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8555 	trace_seq_printf(s, "read events: %ld\n", cnt);
8556 
8557 	count = simple_read_from_buffer(ubuf, count, ppos,
8558 					s->buffer, trace_seq_used(s));
8559 
8560 	kfree(s);
8561 
8562 	return count;
8563 }
8564 
8565 static const struct file_operations tracing_stats_fops = {
8566 	.open		= tracing_open_generic_tr,
8567 	.read		= tracing_stats_read,
8568 	.llseek		= generic_file_llseek,
8569 	.release	= tracing_release_generic_tr,
8570 };
8571 
8572 #ifdef CONFIG_DYNAMIC_FTRACE
8573 
8574 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8575 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8576 		  size_t cnt, loff_t *ppos)
8577 {
8578 	ssize_t ret;
8579 	char *buf;
8580 	int r;
8581 
8582 	/* 256 should be plenty to hold the amount needed */
8583 	buf = kmalloc(256, GFP_KERNEL);
8584 	if (!buf)
8585 		return -ENOMEM;
8586 
8587 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8588 		      ftrace_update_tot_cnt,
8589 		      ftrace_number_of_pages,
8590 		      ftrace_number_of_groups);
8591 
8592 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8593 	kfree(buf);
8594 	return ret;
8595 }
8596 
8597 static const struct file_operations tracing_dyn_info_fops = {
8598 	.open		= tracing_open_generic,
8599 	.read		= tracing_read_dyn_info,
8600 	.llseek		= generic_file_llseek,
8601 };
8602 #endif /* CONFIG_DYNAMIC_FTRACE */
8603 
8604 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8605 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8606 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8607 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8608 		void *data)
8609 {
8610 	tracing_snapshot_instance(tr);
8611 }
8612 
8613 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8614 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8615 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8616 		      void *data)
8617 {
8618 	struct ftrace_func_mapper *mapper = data;
8619 	long *count = NULL;
8620 
8621 	if (mapper)
8622 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8623 
8624 	if (count) {
8625 
8626 		if (*count <= 0)
8627 			return;
8628 
8629 		(*count)--;
8630 	}
8631 
8632 	tracing_snapshot_instance(tr);
8633 }
8634 
8635 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8636 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8637 		      struct ftrace_probe_ops *ops, void *data)
8638 {
8639 	struct ftrace_func_mapper *mapper = data;
8640 	long *count = NULL;
8641 
8642 	seq_printf(m, "%ps:", (void *)ip);
8643 
8644 	seq_puts(m, "snapshot");
8645 
8646 	if (mapper)
8647 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8648 
8649 	if (count)
8650 		seq_printf(m, ":count=%ld\n", *count);
8651 	else
8652 		seq_puts(m, ":unlimited\n");
8653 
8654 	return 0;
8655 }
8656 
8657 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8658 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8659 		     unsigned long ip, void *init_data, void **data)
8660 {
8661 	struct ftrace_func_mapper *mapper = *data;
8662 
8663 	if (!mapper) {
8664 		mapper = allocate_ftrace_func_mapper();
8665 		if (!mapper)
8666 			return -ENOMEM;
8667 		*data = mapper;
8668 	}
8669 
8670 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8671 }
8672 
8673 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8674 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8675 		     unsigned long ip, void *data)
8676 {
8677 	struct ftrace_func_mapper *mapper = data;
8678 
8679 	if (!ip) {
8680 		if (!mapper)
8681 			return;
8682 		free_ftrace_func_mapper(mapper, NULL);
8683 		return;
8684 	}
8685 
8686 	ftrace_func_mapper_remove_ip(mapper, ip);
8687 }
8688 
8689 static struct ftrace_probe_ops snapshot_probe_ops = {
8690 	.func			= ftrace_snapshot,
8691 	.print			= ftrace_snapshot_print,
8692 };
8693 
8694 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8695 	.func			= ftrace_count_snapshot,
8696 	.print			= ftrace_snapshot_print,
8697 	.init			= ftrace_snapshot_init,
8698 	.free			= ftrace_snapshot_free,
8699 };
8700 
8701 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8702 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8703 			       char *glob, char *cmd, char *param, int enable)
8704 {
8705 	struct ftrace_probe_ops *ops;
8706 	void *count = (void *)-1;
8707 	char *number;
8708 	int ret;
8709 
8710 	if (!tr)
8711 		return -ENODEV;
8712 
8713 	/* hash funcs only work with set_ftrace_filter */
8714 	if (!enable)
8715 		return -EINVAL;
8716 
8717 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8718 
8719 	if (glob[0] == '!')
8720 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8721 
8722 	if (!param)
8723 		goto out_reg;
8724 
8725 	number = strsep(&param, ":");
8726 
8727 	if (!strlen(number))
8728 		goto out_reg;
8729 
8730 	/*
8731 	 * We use the callback data field (which is a pointer)
8732 	 * as our counter.
8733 	 */
8734 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8735 	if (ret)
8736 		return ret;
8737 
8738  out_reg:
8739 	ret = tracing_alloc_snapshot_instance(tr);
8740 	if (ret < 0)
8741 		goto out;
8742 
8743 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8744 
8745  out:
8746 	return ret < 0 ? ret : 0;
8747 }
8748 
8749 static struct ftrace_func_command ftrace_snapshot_cmd = {
8750 	.name			= "snapshot",
8751 	.func			= ftrace_trace_snapshot_callback,
8752 };
8753 
register_snapshot_cmd(void)8754 static __init int register_snapshot_cmd(void)
8755 {
8756 	return register_ftrace_command(&ftrace_snapshot_cmd);
8757 }
8758 #else
register_snapshot_cmd(void)8759 static inline __init int register_snapshot_cmd(void) { return 0; }
8760 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8761 
tracing_get_dentry(struct trace_array * tr)8762 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8763 {
8764 	if (WARN_ON(!tr->dir))
8765 		return ERR_PTR(-ENODEV);
8766 
8767 	/* Top directory uses NULL as the parent */
8768 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8769 		return NULL;
8770 
8771 	/* All sub buffers have a descriptor */
8772 	return tr->dir;
8773 }
8774 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8775 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8776 {
8777 	struct dentry *d_tracer;
8778 
8779 	if (tr->percpu_dir)
8780 		return tr->percpu_dir;
8781 
8782 	d_tracer = tracing_get_dentry(tr);
8783 	if (IS_ERR(d_tracer))
8784 		return NULL;
8785 
8786 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8787 
8788 	MEM_FAIL(!tr->percpu_dir,
8789 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8790 
8791 	return tr->percpu_dir;
8792 }
8793 
8794 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8795 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8796 		      void *data, long cpu, const struct file_operations *fops)
8797 {
8798 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8799 
8800 	if (ret) /* See tracing_get_cpu() */
8801 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8802 	return ret;
8803 }
8804 
8805 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8806 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8807 {
8808 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8809 	struct dentry *d_cpu;
8810 	char cpu_dir[30]; /* 30 characters should be more than enough */
8811 
8812 	if (!d_percpu)
8813 		return;
8814 
8815 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8816 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8817 	if (!d_cpu) {
8818 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8819 		return;
8820 	}
8821 
8822 	/* per cpu trace_pipe */
8823 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8824 				tr, cpu, &tracing_pipe_fops);
8825 
8826 	/* per cpu trace */
8827 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8828 				tr, cpu, &tracing_fops);
8829 
8830 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8831 				tr, cpu, &tracing_buffers_fops);
8832 
8833 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8834 				tr, cpu, &tracing_stats_fops);
8835 
8836 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8837 				tr, cpu, &tracing_entries_fops);
8838 
8839 #ifdef CONFIG_TRACER_SNAPSHOT
8840 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8841 				tr, cpu, &snapshot_fops);
8842 
8843 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8844 				tr, cpu, &snapshot_raw_fops);
8845 #endif
8846 }
8847 
8848 #ifdef CONFIG_FTRACE_SELFTEST
8849 /* Let selftest have access to static functions in this file */
8850 #include "trace_selftest.c"
8851 #endif
8852 
8853 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8854 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8855 			loff_t *ppos)
8856 {
8857 	struct trace_option_dentry *topt = filp->private_data;
8858 	char *buf;
8859 
8860 	if (topt->flags->val & topt->opt->bit)
8861 		buf = "1\n";
8862 	else
8863 		buf = "0\n";
8864 
8865 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8866 }
8867 
8868 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8869 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8870 			 loff_t *ppos)
8871 {
8872 	struct trace_option_dentry *topt = filp->private_data;
8873 	unsigned long val;
8874 	int ret;
8875 
8876 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8877 	if (ret)
8878 		return ret;
8879 
8880 	if (val != 0 && val != 1)
8881 		return -EINVAL;
8882 
8883 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8884 		mutex_lock(&trace_types_lock);
8885 		ret = __set_tracer_option(topt->tr, topt->flags,
8886 					  topt->opt, !val);
8887 		mutex_unlock(&trace_types_lock);
8888 		if (ret)
8889 			return ret;
8890 	}
8891 
8892 	*ppos += cnt;
8893 
8894 	return cnt;
8895 }
8896 
tracing_open_options(struct inode * inode,struct file * filp)8897 static int tracing_open_options(struct inode *inode, struct file *filp)
8898 {
8899 	struct trace_option_dentry *topt = inode->i_private;
8900 	int ret;
8901 
8902 	ret = tracing_check_open_get_tr(topt->tr);
8903 	if (ret)
8904 		return ret;
8905 
8906 	filp->private_data = inode->i_private;
8907 	return 0;
8908 }
8909 
tracing_release_options(struct inode * inode,struct file * file)8910 static int tracing_release_options(struct inode *inode, struct file *file)
8911 {
8912 	struct trace_option_dentry *topt = file->private_data;
8913 
8914 	trace_array_put(topt->tr);
8915 	return 0;
8916 }
8917 
8918 static const struct file_operations trace_options_fops = {
8919 	.open = tracing_open_options,
8920 	.read = trace_options_read,
8921 	.write = trace_options_write,
8922 	.llseek	= generic_file_llseek,
8923 	.release = tracing_release_options,
8924 };
8925 
8926 /*
8927  * In order to pass in both the trace_array descriptor as well as the index
8928  * to the flag that the trace option file represents, the trace_array
8929  * has a character array of trace_flags_index[], which holds the index
8930  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8931  * The address of this character array is passed to the flag option file
8932  * read/write callbacks.
8933  *
8934  * In order to extract both the index and the trace_array descriptor,
8935  * get_tr_index() uses the following algorithm.
8936  *
8937  *   idx = *ptr;
8938  *
8939  * As the pointer itself contains the address of the index (remember
8940  * index[1] == 1).
8941  *
8942  * Then to get the trace_array descriptor, by subtracting that index
8943  * from the ptr, we get to the start of the index itself.
8944  *
8945  *   ptr - idx == &index[0]
8946  *
8947  * Then a simple container_of() from that pointer gets us to the
8948  * trace_array descriptor.
8949  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8950 static void get_tr_index(void *data, struct trace_array **ptr,
8951 			 unsigned int *pindex)
8952 {
8953 	*pindex = *(unsigned char *)data;
8954 
8955 	*ptr = container_of(data - *pindex, struct trace_array,
8956 			    trace_flags_index);
8957 }
8958 
8959 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8960 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8961 			loff_t *ppos)
8962 {
8963 	void *tr_index = filp->private_data;
8964 	struct trace_array *tr;
8965 	unsigned int index;
8966 	char *buf;
8967 
8968 	get_tr_index(tr_index, &tr, &index);
8969 
8970 	if (tr->trace_flags & (1 << index))
8971 		buf = "1\n";
8972 	else
8973 		buf = "0\n";
8974 
8975 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8976 }
8977 
8978 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8979 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8980 			 loff_t *ppos)
8981 {
8982 	void *tr_index = filp->private_data;
8983 	struct trace_array *tr;
8984 	unsigned int index;
8985 	unsigned long val;
8986 	int ret;
8987 
8988 	get_tr_index(tr_index, &tr, &index);
8989 
8990 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8991 	if (ret)
8992 		return ret;
8993 
8994 	if (val != 0 && val != 1)
8995 		return -EINVAL;
8996 
8997 	mutex_lock(&event_mutex);
8998 	mutex_lock(&trace_types_lock);
8999 	ret = set_tracer_flag(tr, 1 << index, val);
9000 	mutex_unlock(&trace_types_lock);
9001 	mutex_unlock(&event_mutex);
9002 
9003 	if (ret < 0)
9004 		return ret;
9005 
9006 	*ppos += cnt;
9007 
9008 	return cnt;
9009 }
9010 
9011 static const struct file_operations trace_options_core_fops = {
9012 	.open = tracing_open_generic,
9013 	.read = trace_options_core_read,
9014 	.write = trace_options_core_write,
9015 	.llseek = generic_file_llseek,
9016 };
9017 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9018 struct dentry *trace_create_file(const char *name,
9019 				 umode_t mode,
9020 				 struct dentry *parent,
9021 				 void *data,
9022 				 const struct file_operations *fops)
9023 {
9024 	struct dentry *ret;
9025 
9026 	ret = tracefs_create_file(name, mode, parent, data, fops);
9027 	if (!ret)
9028 		pr_warn("Could not create tracefs '%s' entry\n", name);
9029 
9030 	return ret;
9031 }
9032 
9033 
trace_options_init_dentry(struct trace_array * tr)9034 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9035 {
9036 	struct dentry *d_tracer;
9037 
9038 	if (tr->options)
9039 		return tr->options;
9040 
9041 	d_tracer = tracing_get_dentry(tr);
9042 	if (IS_ERR(d_tracer))
9043 		return NULL;
9044 
9045 	tr->options = tracefs_create_dir("options", d_tracer);
9046 	if (!tr->options) {
9047 		pr_warn("Could not create tracefs directory 'options'\n");
9048 		return NULL;
9049 	}
9050 
9051 	return tr->options;
9052 }
9053 
9054 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9055 create_trace_option_file(struct trace_array *tr,
9056 			 struct trace_option_dentry *topt,
9057 			 struct tracer_flags *flags,
9058 			 struct tracer_opt *opt)
9059 {
9060 	struct dentry *t_options;
9061 
9062 	t_options = trace_options_init_dentry(tr);
9063 	if (!t_options)
9064 		return;
9065 
9066 	topt->flags = flags;
9067 	topt->opt = opt;
9068 	topt->tr = tr;
9069 
9070 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9071 					t_options, topt, &trace_options_fops);
9072 
9073 }
9074 
9075 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9076 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9077 {
9078 	struct trace_option_dentry *topts;
9079 	struct trace_options *tr_topts;
9080 	struct tracer_flags *flags;
9081 	struct tracer_opt *opts;
9082 	int cnt;
9083 	int i;
9084 
9085 	if (!tracer)
9086 		return;
9087 
9088 	flags = tracer->flags;
9089 
9090 	if (!flags || !flags->opts)
9091 		return;
9092 
9093 	/*
9094 	 * If this is an instance, only create flags for tracers
9095 	 * the instance may have.
9096 	 */
9097 	if (!trace_ok_for_array(tracer, tr))
9098 		return;
9099 
9100 	for (i = 0; i < tr->nr_topts; i++) {
9101 		/* Make sure there's no duplicate flags. */
9102 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9103 			return;
9104 	}
9105 
9106 	opts = flags->opts;
9107 
9108 	for (cnt = 0; opts[cnt].name; cnt++)
9109 		;
9110 
9111 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9112 	if (!topts)
9113 		return;
9114 
9115 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9116 			    GFP_KERNEL);
9117 	if (!tr_topts) {
9118 		kfree(topts);
9119 		return;
9120 	}
9121 
9122 	tr->topts = tr_topts;
9123 	tr->topts[tr->nr_topts].tracer = tracer;
9124 	tr->topts[tr->nr_topts].topts = topts;
9125 	tr->nr_topts++;
9126 
9127 	for (cnt = 0; opts[cnt].name; cnt++) {
9128 		create_trace_option_file(tr, &topts[cnt], flags,
9129 					 &opts[cnt]);
9130 		MEM_FAIL(topts[cnt].entry == NULL,
9131 			  "Failed to create trace option: %s",
9132 			  opts[cnt].name);
9133 	}
9134 }
9135 
9136 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9137 create_trace_option_core_file(struct trace_array *tr,
9138 			      const char *option, long index)
9139 {
9140 	struct dentry *t_options;
9141 
9142 	t_options = trace_options_init_dentry(tr);
9143 	if (!t_options)
9144 		return NULL;
9145 
9146 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9147 				 (void *)&tr->trace_flags_index[index],
9148 				 &trace_options_core_fops);
9149 }
9150 
create_trace_options_dir(struct trace_array * tr)9151 static void create_trace_options_dir(struct trace_array *tr)
9152 {
9153 	struct dentry *t_options;
9154 	bool top_level = tr == &global_trace;
9155 	int i;
9156 
9157 	t_options = trace_options_init_dentry(tr);
9158 	if (!t_options)
9159 		return;
9160 
9161 	for (i = 0; trace_options[i]; i++) {
9162 		if (top_level ||
9163 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9164 			create_trace_option_core_file(tr, trace_options[i], i);
9165 	}
9166 }
9167 
9168 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9169 rb_simple_read(struct file *filp, char __user *ubuf,
9170 	       size_t cnt, loff_t *ppos)
9171 {
9172 	struct trace_array *tr = filp->private_data;
9173 	char buf[64];
9174 	int r;
9175 
9176 	r = tracer_tracing_is_on(tr);
9177 	r = sprintf(buf, "%d\n", r);
9178 
9179 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9180 }
9181 
9182 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9183 rb_simple_write(struct file *filp, const char __user *ubuf,
9184 		size_t cnt, loff_t *ppos)
9185 {
9186 	struct trace_array *tr = filp->private_data;
9187 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9188 	unsigned long val;
9189 	int ret;
9190 
9191 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9192 	if (ret)
9193 		return ret;
9194 
9195 	if (buffer) {
9196 		mutex_lock(&trace_types_lock);
9197 		if (!!val == tracer_tracing_is_on(tr)) {
9198 			val = 0; /* do nothing */
9199 		} else if (val) {
9200 			tracer_tracing_on(tr);
9201 			if (tr->current_trace->start)
9202 				tr->current_trace->start(tr);
9203 		} else {
9204 			tracer_tracing_off(tr);
9205 			if (tr->current_trace->stop)
9206 				tr->current_trace->stop(tr);
9207 			/* Wake up any waiters */
9208 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9209 		}
9210 		mutex_unlock(&trace_types_lock);
9211 	}
9212 
9213 	(*ppos)++;
9214 
9215 	return cnt;
9216 }
9217 
9218 static const struct file_operations rb_simple_fops = {
9219 	.open		= tracing_open_generic_tr,
9220 	.read		= rb_simple_read,
9221 	.write		= rb_simple_write,
9222 	.release	= tracing_release_generic_tr,
9223 	.llseek		= default_llseek,
9224 };
9225 
9226 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9227 buffer_percent_read(struct file *filp, char __user *ubuf,
9228 		    size_t cnt, loff_t *ppos)
9229 {
9230 	struct trace_array *tr = filp->private_data;
9231 	char buf[64];
9232 	int r;
9233 
9234 	r = tr->buffer_percent;
9235 	r = sprintf(buf, "%d\n", r);
9236 
9237 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9238 }
9239 
9240 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9241 buffer_percent_write(struct file *filp, const char __user *ubuf,
9242 		     size_t cnt, loff_t *ppos)
9243 {
9244 	struct trace_array *tr = filp->private_data;
9245 	unsigned long val;
9246 	int ret;
9247 
9248 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9249 	if (ret)
9250 		return ret;
9251 
9252 	if (val > 100)
9253 		return -EINVAL;
9254 
9255 	tr->buffer_percent = val;
9256 
9257 	(*ppos)++;
9258 
9259 	return cnt;
9260 }
9261 
9262 static const struct file_operations buffer_percent_fops = {
9263 	.open		= tracing_open_generic_tr,
9264 	.read		= buffer_percent_read,
9265 	.write		= buffer_percent_write,
9266 	.release	= tracing_release_generic_tr,
9267 	.llseek		= default_llseek,
9268 };
9269 
9270 static struct dentry *trace_instance_dir;
9271 
9272 static void
9273 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9274 
9275 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9276 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9277 {
9278 	enum ring_buffer_flags rb_flags;
9279 
9280 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9281 
9282 	buf->tr = tr;
9283 
9284 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9285 	if (!buf->buffer)
9286 		return -ENOMEM;
9287 
9288 	buf->data = alloc_percpu(struct trace_array_cpu);
9289 	if (!buf->data) {
9290 		ring_buffer_free(buf->buffer);
9291 		buf->buffer = NULL;
9292 		return -ENOMEM;
9293 	}
9294 
9295 	/* Allocate the first page for all buffers */
9296 	set_buffer_entries(&tr->array_buffer,
9297 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9298 
9299 	return 0;
9300 }
9301 
free_trace_buffer(struct array_buffer * buf)9302 static void free_trace_buffer(struct array_buffer *buf)
9303 {
9304 	if (buf->buffer) {
9305 		ring_buffer_free(buf->buffer);
9306 		buf->buffer = NULL;
9307 		free_percpu(buf->data);
9308 		buf->data = NULL;
9309 	}
9310 }
9311 
allocate_trace_buffers(struct trace_array * tr,int size)9312 static int allocate_trace_buffers(struct trace_array *tr, int size)
9313 {
9314 	int ret;
9315 
9316 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9317 	if (ret)
9318 		return ret;
9319 
9320 #ifdef CONFIG_TRACER_MAX_TRACE
9321 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9322 				    allocate_snapshot ? size : 1);
9323 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9324 		free_trace_buffer(&tr->array_buffer);
9325 		return -ENOMEM;
9326 	}
9327 	tr->allocated_snapshot = allocate_snapshot;
9328 
9329 	allocate_snapshot = false;
9330 #endif
9331 
9332 	return 0;
9333 }
9334 
free_trace_buffers(struct trace_array * tr)9335 static void free_trace_buffers(struct trace_array *tr)
9336 {
9337 	if (!tr)
9338 		return;
9339 
9340 	free_trace_buffer(&tr->array_buffer);
9341 
9342 #ifdef CONFIG_TRACER_MAX_TRACE
9343 	free_trace_buffer(&tr->max_buffer);
9344 #endif
9345 }
9346 
init_trace_flags_index(struct trace_array * tr)9347 static void init_trace_flags_index(struct trace_array *tr)
9348 {
9349 	int i;
9350 
9351 	/* Used by the trace options files */
9352 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9353 		tr->trace_flags_index[i] = i;
9354 }
9355 
__update_tracer_options(struct trace_array * tr)9356 static void __update_tracer_options(struct trace_array *tr)
9357 {
9358 	struct tracer *t;
9359 
9360 	for (t = trace_types; t; t = t->next)
9361 		add_tracer_options(tr, t);
9362 }
9363 
update_tracer_options(struct trace_array * tr)9364 static void update_tracer_options(struct trace_array *tr)
9365 {
9366 	mutex_lock(&trace_types_lock);
9367 	tracer_options_updated = true;
9368 	__update_tracer_options(tr);
9369 	mutex_unlock(&trace_types_lock);
9370 }
9371 
9372 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9373 struct trace_array *trace_array_find(const char *instance)
9374 {
9375 	struct trace_array *tr, *found = NULL;
9376 
9377 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9378 		if (tr->name && strcmp(tr->name, instance) == 0) {
9379 			found = tr;
9380 			break;
9381 		}
9382 	}
9383 
9384 	return found;
9385 }
9386 
trace_array_find_get(const char * instance)9387 struct trace_array *trace_array_find_get(const char *instance)
9388 {
9389 	struct trace_array *tr;
9390 
9391 	mutex_lock(&trace_types_lock);
9392 	tr = trace_array_find(instance);
9393 	if (tr)
9394 		tr->ref++;
9395 	mutex_unlock(&trace_types_lock);
9396 
9397 	return tr;
9398 }
9399 
trace_array_create_dir(struct trace_array * tr)9400 static int trace_array_create_dir(struct trace_array *tr)
9401 {
9402 	int ret;
9403 
9404 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9405 	if (!tr->dir)
9406 		return -EINVAL;
9407 
9408 	ret = event_trace_add_tracer(tr->dir, tr);
9409 	if (ret) {
9410 		tracefs_remove(tr->dir);
9411 		return ret;
9412 	}
9413 
9414 	init_tracer_tracefs(tr, tr->dir);
9415 	__update_tracer_options(tr);
9416 
9417 	return ret;
9418 }
9419 
trace_array_create(const char * name)9420 static struct trace_array *trace_array_create(const char *name)
9421 {
9422 	struct trace_array *tr;
9423 	int ret;
9424 
9425 	ret = -ENOMEM;
9426 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9427 	if (!tr)
9428 		return ERR_PTR(ret);
9429 
9430 	tr->name = kstrdup(name, GFP_KERNEL);
9431 	if (!tr->name)
9432 		goto out_free_tr;
9433 
9434 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9435 		goto out_free_tr;
9436 
9437 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9438 		goto out_free_tr;
9439 
9440 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9441 
9442 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9443 
9444 	raw_spin_lock_init(&tr->start_lock);
9445 
9446 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9447 
9448 	tr->current_trace = &nop_trace;
9449 
9450 	INIT_LIST_HEAD(&tr->systems);
9451 	INIT_LIST_HEAD(&tr->events);
9452 	INIT_LIST_HEAD(&tr->hist_vars);
9453 	INIT_LIST_HEAD(&tr->err_log);
9454 
9455 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9456 		goto out_free_tr;
9457 
9458 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9459 		goto out_free_tr;
9460 
9461 	ftrace_init_trace_array(tr);
9462 
9463 	init_trace_flags_index(tr);
9464 
9465 	if (trace_instance_dir) {
9466 		ret = trace_array_create_dir(tr);
9467 		if (ret)
9468 			goto out_free_tr;
9469 	} else
9470 		__trace_early_add_events(tr);
9471 
9472 	list_add(&tr->list, &ftrace_trace_arrays);
9473 
9474 	tr->ref++;
9475 
9476 	return tr;
9477 
9478  out_free_tr:
9479 	ftrace_free_ftrace_ops(tr);
9480 	free_trace_buffers(tr);
9481 	free_cpumask_var(tr->pipe_cpumask);
9482 	free_cpumask_var(tr->tracing_cpumask);
9483 	kfree(tr->name);
9484 	kfree(tr);
9485 
9486 	return ERR_PTR(ret);
9487 }
9488 
instance_mkdir(const char * name)9489 static int instance_mkdir(const char *name)
9490 {
9491 	struct trace_array *tr;
9492 	int ret;
9493 
9494 	mutex_lock(&event_mutex);
9495 	mutex_lock(&trace_types_lock);
9496 
9497 	ret = -EEXIST;
9498 	if (trace_array_find(name))
9499 		goto out_unlock;
9500 
9501 	tr = trace_array_create(name);
9502 
9503 	ret = PTR_ERR_OR_ZERO(tr);
9504 
9505 out_unlock:
9506 	mutex_unlock(&trace_types_lock);
9507 	mutex_unlock(&event_mutex);
9508 	return ret;
9509 }
9510 
9511 /**
9512  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9513  * @name: The name of the trace array to be looked up/created.
9514  *
9515  * Returns pointer to trace array with given name.
9516  * NULL, if it cannot be created.
9517  *
9518  * NOTE: This function increments the reference counter associated with the
9519  * trace array returned. This makes sure it cannot be freed while in use.
9520  * Use trace_array_put() once the trace array is no longer needed.
9521  * If the trace_array is to be freed, trace_array_destroy() needs to
9522  * be called after the trace_array_put(), or simply let user space delete
9523  * it from the tracefs instances directory. But until the
9524  * trace_array_put() is called, user space can not delete it.
9525  *
9526  */
trace_array_get_by_name(const char * name)9527 struct trace_array *trace_array_get_by_name(const char *name)
9528 {
9529 	struct trace_array *tr;
9530 
9531 	mutex_lock(&event_mutex);
9532 	mutex_lock(&trace_types_lock);
9533 
9534 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9535 		if (tr->name && strcmp(tr->name, name) == 0)
9536 			goto out_unlock;
9537 	}
9538 
9539 	tr = trace_array_create(name);
9540 
9541 	if (IS_ERR(tr))
9542 		tr = NULL;
9543 out_unlock:
9544 	if (tr)
9545 		tr->ref++;
9546 
9547 	mutex_unlock(&trace_types_lock);
9548 	mutex_unlock(&event_mutex);
9549 	return tr;
9550 }
9551 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9552 
__remove_instance(struct trace_array * tr)9553 static int __remove_instance(struct trace_array *tr)
9554 {
9555 	int i;
9556 
9557 	/* Reference counter for a newly created trace array = 1. */
9558 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9559 		return -EBUSY;
9560 
9561 	list_del(&tr->list);
9562 
9563 	/* Disable all the flags that were enabled coming in */
9564 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9565 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9566 			set_tracer_flag(tr, 1 << i, 0);
9567 	}
9568 
9569 	tracing_set_nop(tr);
9570 	clear_ftrace_function_probes(tr);
9571 	event_trace_del_tracer(tr);
9572 	ftrace_clear_pids(tr);
9573 	ftrace_destroy_function_files(tr);
9574 	tracefs_remove(tr->dir);
9575 	free_percpu(tr->last_func_repeats);
9576 	free_trace_buffers(tr);
9577 	clear_tracing_err_log(tr);
9578 
9579 	for (i = 0; i < tr->nr_topts; i++) {
9580 		kfree(tr->topts[i].topts);
9581 	}
9582 	kfree(tr->topts);
9583 
9584 	free_cpumask_var(tr->pipe_cpumask);
9585 	free_cpumask_var(tr->tracing_cpumask);
9586 	kfree(tr->name);
9587 	kfree(tr);
9588 
9589 	return 0;
9590 }
9591 
trace_array_destroy(struct trace_array * this_tr)9592 int trace_array_destroy(struct trace_array *this_tr)
9593 {
9594 	struct trace_array *tr;
9595 	int ret;
9596 
9597 	if (!this_tr)
9598 		return -EINVAL;
9599 
9600 	mutex_lock(&event_mutex);
9601 	mutex_lock(&trace_types_lock);
9602 
9603 	ret = -ENODEV;
9604 
9605 	/* Making sure trace array exists before destroying it. */
9606 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9607 		if (tr == this_tr) {
9608 			ret = __remove_instance(tr);
9609 			break;
9610 		}
9611 	}
9612 
9613 	mutex_unlock(&trace_types_lock);
9614 	mutex_unlock(&event_mutex);
9615 
9616 	return ret;
9617 }
9618 EXPORT_SYMBOL_GPL(trace_array_destroy);
9619 
instance_rmdir(const char * name)9620 static int instance_rmdir(const char *name)
9621 {
9622 	struct trace_array *tr;
9623 	int ret;
9624 
9625 	mutex_lock(&event_mutex);
9626 	mutex_lock(&trace_types_lock);
9627 
9628 	ret = -ENODEV;
9629 	tr = trace_array_find(name);
9630 	if (tr)
9631 		ret = __remove_instance(tr);
9632 
9633 	mutex_unlock(&trace_types_lock);
9634 	mutex_unlock(&event_mutex);
9635 
9636 	return ret;
9637 }
9638 
create_trace_instances(struct dentry * d_tracer)9639 static __init void create_trace_instances(struct dentry *d_tracer)
9640 {
9641 	struct trace_array *tr;
9642 
9643 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9644 							 instance_mkdir,
9645 							 instance_rmdir);
9646 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9647 		return;
9648 
9649 	mutex_lock(&event_mutex);
9650 	mutex_lock(&trace_types_lock);
9651 
9652 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9653 		if (!tr->name)
9654 			continue;
9655 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9656 			     "Failed to create instance directory\n"))
9657 			break;
9658 	}
9659 
9660 	mutex_unlock(&trace_types_lock);
9661 	mutex_unlock(&event_mutex);
9662 }
9663 
9664 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9665 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9666 {
9667 	int cpu;
9668 
9669 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9670 			tr, &show_traces_fops);
9671 
9672 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9673 			tr, &set_tracer_fops);
9674 
9675 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9676 			  tr, &tracing_cpumask_fops);
9677 
9678 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9679 			  tr, &tracing_iter_fops);
9680 
9681 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9682 			  tr, &tracing_fops);
9683 
9684 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9685 			  tr, &tracing_pipe_fops);
9686 
9687 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9688 			  tr, &tracing_entries_fops);
9689 
9690 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9691 			  tr, &tracing_total_entries_fops);
9692 
9693 	trace_create_file("free_buffer", 0200, d_tracer,
9694 			  tr, &tracing_free_buffer_fops);
9695 
9696 	trace_create_file("trace_marker", 0220, d_tracer,
9697 			  tr, &tracing_mark_fops);
9698 
9699 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9700 
9701 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9702 			  tr, &tracing_mark_raw_fops);
9703 
9704 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9705 			  &trace_clock_fops);
9706 
9707 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9708 			  tr, &rb_simple_fops);
9709 
9710 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9711 			  &trace_time_stamp_mode_fops);
9712 
9713 	tr->buffer_percent = 50;
9714 
9715 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9716 			tr, &buffer_percent_fops);
9717 
9718 	create_trace_options_dir(tr);
9719 
9720 #ifdef CONFIG_TRACER_MAX_TRACE
9721 	trace_create_maxlat_file(tr, d_tracer);
9722 #endif
9723 
9724 	if (ftrace_create_function_files(tr, d_tracer))
9725 		MEM_FAIL(1, "Could not allocate function filter files");
9726 
9727 #ifdef CONFIG_TRACER_SNAPSHOT
9728 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9729 			  tr, &snapshot_fops);
9730 #endif
9731 
9732 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9733 			  tr, &tracing_err_log_fops);
9734 
9735 	for_each_tracing_cpu(cpu)
9736 		tracing_init_tracefs_percpu(tr, cpu);
9737 
9738 	ftrace_init_tracefs(tr, d_tracer);
9739 }
9740 
trace_automount(struct dentry * mntpt,void * ingore)9741 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9742 {
9743 	struct vfsmount *mnt;
9744 	struct file_system_type *type;
9745 
9746 	/*
9747 	 * To maintain backward compatibility for tools that mount
9748 	 * debugfs to get to the tracing facility, tracefs is automatically
9749 	 * mounted to the debugfs/tracing directory.
9750 	 */
9751 	type = get_fs_type("tracefs");
9752 	if (!type)
9753 		return NULL;
9754 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9755 	put_filesystem(type);
9756 	if (IS_ERR(mnt))
9757 		return NULL;
9758 	mntget(mnt);
9759 
9760 	return mnt;
9761 }
9762 
9763 /**
9764  * tracing_init_dentry - initialize top level trace array
9765  *
9766  * This is called when creating files or directories in the tracing
9767  * directory. It is called via fs_initcall() by any of the boot up code
9768  * and expects to return the dentry of the top level tracing directory.
9769  */
tracing_init_dentry(void)9770 int tracing_init_dentry(void)
9771 {
9772 	struct trace_array *tr = &global_trace;
9773 
9774 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9775 		pr_warn("Tracing disabled due to lockdown\n");
9776 		return -EPERM;
9777 	}
9778 
9779 	/* The top level trace array uses  NULL as parent */
9780 	if (tr->dir)
9781 		return 0;
9782 
9783 	if (WARN_ON(!tracefs_initialized()))
9784 		return -ENODEV;
9785 
9786 	/*
9787 	 * As there may still be users that expect the tracing
9788 	 * files to exist in debugfs/tracing, we must automount
9789 	 * the tracefs file system there, so older tools still
9790 	 * work with the newer kernel.
9791 	 */
9792 	tr->dir = debugfs_create_automount("tracing", NULL,
9793 					   trace_automount, NULL);
9794 
9795 	return 0;
9796 }
9797 
9798 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9799 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9800 
9801 static struct workqueue_struct *eval_map_wq __initdata;
9802 static struct work_struct eval_map_work __initdata;
9803 static struct work_struct tracerfs_init_work __initdata;
9804 
eval_map_work_func(struct work_struct * work)9805 static void __init eval_map_work_func(struct work_struct *work)
9806 {
9807 	int len;
9808 
9809 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9810 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9811 }
9812 
trace_eval_init(void)9813 static int __init trace_eval_init(void)
9814 {
9815 	INIT_WORK(&eval_map_work, eval_map_work_func);
9816 
9817 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9818 	if (!eval_map_wq) {
9819 		pr_err("Unable to allocate eval_map_wq\n");
9820 		/* Do work here */
9821 		eval_map_work_func(&eval_map_work);
9822 		return -ENOMEM;
9823 	}
9824 
9825 	queue_work(eval_map_wq, &eval_map_work);
9826 	return 0;
9827 }
9828 
9829 subsys_initcall(trace_eval_init);
9830 
trace_eval_sync(void)9831 static int __init trace_eval_sync(void)
9832 {
9833 	/* Make sure the eval map updates are finished */
9834 	if (eval_map_wq)
9835 		destroy_workqueue(eval_map_wq);
9836 	return 0;
9837 }
9838 
9839 late_initcall_sync(trace_eval_sync);
9840 
9841 
9842 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9843 static void trace_module_add_evals(struct module *mod)
9844 {
9845 	if (!mod->num_trace_evals)
9846 		return;
9847 
9848 	/*
9849 	 * Modules with bad taint do not have events created, do
9850 	 * not bother with enums either.
9851 	 */
9852 	if (trace_module_has_bad_taint(mod))
9853 		return;
9854 
9855 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9856 }
9857 
9858 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9859 static void trace_module_remove_evals(struct module *mod)
9860 {
9861 	union trace_eval_map_item *map;
9862 	union trace_eval_map_item **last = &trace_eval_maps;
9863 
9864 	if (!mod->num_trace_evals)
9865 		return;
9866 
9867 	mutex_lock(&trace_eval_mutex);
9868 
9869 	map = trace_eval_maps;
9870 
9871 	while (map) {
9872 		if (map->head.mod == mod)
9873 			break;
9874 		map = trace_eval_jmp_to_tail(map);
9875 		last = &map->tail.next;
9876 		map = map->tail.next;
9877 	}
9878 	if (!map)
9879 		goto out;
9880 
9881 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9882 	kfree(map);
9883  out:
9884 	mutex_unlock(&trace_eval_mutex);
9885 }
9886 #else
trace_module_remove_evals(struct module * mod)9887 static inline void trace_module_remove_evals(struct module *mod) { }
9888 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9889 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9890 static int trace_module_notify(struct notifier_block *self,
9891 			       unsigned long val, void *data)
9892 {
9893 	struct module *mod = data;
9894 
9895 	switch (val) {
9896 	case MODULE_STATE_COMING:
9897 		trace_module_add_evals(mod);
9898 		break;
9899 	case MODULE_STATE_GOING:
9900 		trace_module_remove_evals(mod);
9901 		break;
9902 	}
9903 
9904 	return NOTIFY_OK;
9905 }
9906 
9907 static struct notifier_block trace_module_nb = {
9908 	.notifier_call = trace_module_notify,
9909 	.priority = 0,
9910 };
9911 #endif /* CONFIG_MODULES */
9912 
tracer_init_tracefs_work_func(struct work_struct * work)9913 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9914 {
9915 
9916 	event_trace_init();
9917 
9918 	init_tracer_tracefs(&global_trace, NULL);
9919 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9920 
9921 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9922 			&global_trace, &tracing_thresh_fops);
9923 
9924 	trace_create_file("README", TRACE_MODE_READ, NULL,
9925 			NULL, &tracing_readme_fops);
9926 
9927 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9928 			NULL, &tracing_saved_cmdlines_fops);
9929 
9930 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9931 			  NULL, &tracing_saved_cmdlines_size_fops);
9932 
9933 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9934 			NULL, &tracing_saved_tgids_fops);
9935 
9936 	trace_create_eval_file(NULL);
9937 
9938 #ifdef CONFIG_MODULES
9939 	register_module_notifier(&trace_module_nb);
9940 #endif
9941 
9942 #ifdef CONFIG_DYNAMIC_FTRACE
9943 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9944 			NULL, &tracing_dyn_info_fops);
9945 #endif
9946 
9947 	create_trace_instances(NULL);
9948 
9949 	update_tracer_options(&global_trace);
9950 }
9951 
tracer_init_tracefs(void)9952 static __init int tracer_init_tracefs(void)
9953 {
9954 	int ret;
9955 
9956 	trace_access_lock_init();
9957 
9958 	ret = tracing_init_dentry();
9959 	if (ret)
9960 		return 0;
9961 
9962 	if (eval_map_wq) {
9963 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9964 		queue_work(eval_map_wq, &tracerfs_init_work);
9965 	} else {
9966 		tracer_init_tracefs_work_func(NULL);
9967 	}
9968 
9969 	rv_init_interface();
9970 
9971 	return 0;
9972 }
9973 
9974 fs_initcall(tracer_init_tracefs);
9975 
9976 static int trace_die_panic_handler(struct notifier_block *self,
9977 				unsigned long ev, void *unused);
9978 
9979 static struct notifier_block trace_panic_notifier = {
9980 	.notifier_call = trace_die_panic_handler,
9981 	.priority = INT_MAX - 1,
9982 };
9983 
9984 static struct notifier_block trace_die_notifier = {
9985 	.notifier_call = trace_die_panic_handler,
9986 	.priority = INT_MAX - 1,
9987 };
9988 
9989 /*
9990  * The idea is to execute the following die/panic callback early, in order
9991  * to avoid showing irrelevant information in the trace (like other panic
9992  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9993  * warnings get disabled (to prevent potential log flooding).
9994  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)9995 static int trace_die_panic_handler(struct notifier_block *self,
9996 				unsigned long ev, void *unused)
9997 {
9998 	if (!ftrace_dump_on_oops)
9999 		return NOTIFY_DONE;
10000 
10001 	/* The die notifier requires DIE_OOPS to trigger */
10002 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10003 		return NOTIFY_DONE;
10004 
10005 	ftrace_dump(ftrace_dump_on_oops);
10006 
10007 	return NOTIFY_DONE;
10008 }
10009 
10010 /*
10011  * printk is set to max of 1024, we really don't need it that big.
10012  * Nothing should be printing 1000 characters anyway.
10013  */
10014 #define TRACE_MAX_PRINT		1000
10015 
10016 /*
10017  * Define here KERN_TRACE so that we have one place to modify
10018  * it if we decide to change what log level the ftrace dump
10019  * should be at.
10020  */
10021 #define KERN_TRACE		KERN_EMERG
10022 
10023 void
trace_printk_seq(struct trace_seq * s)10024 trace_printk_seq(struct trace_seq *s)
10025 {
10026 	/* Probably should print a warning here. */
10027 	if (s->seq.len >= TRACE_MAX_PRINT)
10028 		s->seq.len = TRACE_MAX_PRINT;
10029 
10030 	/*
10031 	 * More paranoid code. Although the buffer size is set to
10032 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10033 	 * an extra layer of protection.
10034 	 */
10035 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10036 		s->seq.len = s->seq.size - 1;
10037 
10038 	/* should be zero ended, but we are paranoid. */
10039 	s->buffer[s->seq.len] = 0;
10040 
10041 	printk(KERN_TRACE "%s", s->buffer);
10042 
10043 	trace_seq_init(s);
10044 }
10045 
trace_init_global_iter(struct trace_iterator * iter)10046 void trace_init_global_iter(struct trace_iterator *iter)
10047 {
10048 	iter->tr = &global_trace;
10049 	iter->trace = iter->tr->current_trace;
10050 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10051 	iter->array_buffer = &global_trace.array_buffer;
10052 
10053 	if (iter->trace && iter->trace->open)
10054 		iter->trace->open(iter);
10055 
10056 	/* Annotate start of buffers if we had overruns */
10057 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10058 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10059 
10060 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10061 	if (trace_clocks[iter->tr->clock_id].in_ns)
10062 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10063 
10064 	/* Can not use kmalloc for iter.temp and iter.fmt */
10065 	iter->temp = static_temp_buf;
10066 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10067 	iter->fmt = static_fmt_buf;
10068 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10069 }
10070 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10071 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10072 {
10073 	/* use static because iter can be a bit big for the stack */
10074 	static struct trace_iterator iter;
10075 	static atomic_t dump_running;
10076 	struct trace_array *tr = &global_trace;
10077 	unsigned int old_userobj;
10078 	unsigned long flags;
10079 	int cnt = 0, cpu;
10080 
10081 	/* Only allow one dump user at a time. */
10082 	if (atomic_inc_return(&dump_running) != 1) {
10083 		atomic_dec(&dump_running);
10084 		return;
10085 	}
10086 
10087 	/*
10088 	 * Always turn off tracing when we dump.
10089 	 * We don't need to show trace output of what happens
10090 	 * between multiple crashes.
10091 	 *
10092 	 * If the user does a sysrq-z, then they can re-enable
10093 	 * tracing with echo 1 > tracing_on.
10094 	 */
10095 	tracing_off();
10096 
10097 	local_irq_save(flags);
10098 
10099 	/* Simulate the iterator */
10100 	trace_init_global_iter(&iter);
10101 
10102 	for_each_tracing_cpu(cpu) {
10103 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10104 	}
10105 
10106 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10107 
10108 	/* don't look at user memory in panic mode */
10109 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10110 
10111 	switch (oops_dump_mode) {
10112 	case DUMP_ALL:
10113 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10114 		break;
10115 	case DUMP_ORIG:
10116 		iter.cpu_file = raw_smp_processor_id();
10117 		break;
10118 	case DUMP_NONE:
10119 		goto out_enable;
10120 	default:
10121 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10122 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10123 	}
10124 
10125 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10126 
10127 	/* Did function tracer already get disabled? */
10128 	if (ftrace_is_dead()) {
10129 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10130 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10131 	}
10132 
10133 	/*
10134 	 * We need to stop all tracing on all CPUS to read
10135 	 * the next buffer. This is a bit expensive, but is
10136 	 * not done often. We fill all what we can read,
10137 	 * and then release the locks again.
10138 	 */
10139 
10140 	while (!trace_empty(&iter)) {
10141 
10142 		if (!cnt)
10143 			printk(KERN_TRACE "---------------------------------\n");
10144 
10145 		cnt++;
10146 
10147 		trace_iterator_reset(&iter);
10148 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10149 
10150 		if (trace_find_next_entry_inc(&iter) != NULL) {
10151 			int ret;
10152 
10153 			ret = print_trace_line(&iter);
10154 			if (ret != TRACE_TYPE_NO_CONSUME)
10155 				trace_consume(&iter);
10156 		}
10157 		touch_nmi_watchdog();
10158 
10159 		trace_printk_seq(&iter.seq);
10160 	}
10161 
10162 	if (!cnt)
10163 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10164 	else
10165 		printk(KERN_TRACE "---------------------------------\n");
10166 
10167  out_enable:
10168 	tr->trace_flags |= old_userobj;
10169 
10170 	for_each_tracing_cpu(cpu) {
10171 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10172 	}
10173 	atomic_dec(&dump_running);
10174 	local_irq_restore(flags);
10175 }
10176 EXPORT_SYMBOL_GPL(ftrace_dump);
10177 
10178 #define WRITE_BUFSIZE  4096
10179 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10180 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10181 				size_t count, loff_t *ppos,
10182 				int (*createfn)(const char *))
10183 {
10184 	char *kbuf, *buf, *tmp;
10185 	int ret = 0;
10186 	size_t done = 0;
10187 	size_t size;
10188 
10189 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10190 	if (!kbuf)
10191 		return -ENOMEM;
10192 
10193 	while (done < count) {
10194 		size = count - done;
10195 
10196 		if (size >= WRITE_BUFSIZE)
10197 			size = WRITE_BUFSIZE - 1;
10198 
10199 		if (copy_from_user(kbuf, buffer + done, size)) {
10200 			ret = -EFAULT;
10201 			goto out;
10202 		}
10203 		kbuf[size] = '\0';
10204 		buf = kbuf;
10205 		do {
10206 			tmp = strchr(buf, '\n');
10207 			if (tmp) {
10208 				*tmp = '\0';
10209 				size = tmp - buf + 1;
10210 			} else {
10211 				size = strlen(buf);
10212 				if (done + size < count) {
10213 					if (buf != kbuf)
10214 						break;
10215 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10216 					pr_warn("Line length is too long: Should be less than %d\n",
10217 						WRITE_BUFSIZE - 2);
10218 					ret = -EINVAL;
10219 					goto out;
10220 				}
10221 			}
10222 			done += size;
10223 
10224 			/* Remove comments */
10225 			tmp = strchr(buf, '#');
10226 
10227 			if (tmp)
10228 				*tmp = '\0';
10229 
10230 			ret = createfn(buf);
10231 			if (ret)
10232 				goto out;
10233 			buf += size;
10234 
10235 		} while (done < count);
10236 	}
10237 	ret = done;
10238 
10239 out:
10240 	kfree(kbuf);
10241 
10242 	return ret;
10243 }
10244 
10245 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10246 __init static bool tr_needs_alloc_snapshot(const char *name)
10247 {
10248 	char *test;
10249 	int len = strlen(name);
10250 	bool ret;
10251 
10252 	if (!boot_snapshot_index)
10253 		return false;
10254 
10255 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10256 	    boot_snapshot_info[len] == '\t')
10257 		return true;
10258 
10259 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10260 	if (!test)
10261 		return false;
10262 
10263 	sprintf(test, "\t%s\t", name);
10264 	ret = strstr(boot_snapshot_info, test) == NULL;
10265 	kfree(test);
10266 	return ret;
10267 }
10268 
do_allocate_snapshot(const char * name)10269 __init static void do_allocate_snapshot(const char *name)
10270 {
10271 	if (!tr_needs_alloc_snapshot(name))
10272 		return;
10273 
10274 	/*
10275 	 * When allocate_snapshot is set, the next call to
10276 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10277 	 * will allocate the snapshot buffer. That will alse clear
10278 	 * this flag.
10279 	 */
10280 	allocate_snapshot = true;
10281 }
10282 #else
do_allocate_snapshot(const char * name)10283 static inline void do_allocate_snapshot(const char *name) { }
10284 #endif
10285 
enable_instances(void)10286 __init static void enable_instances(void)
10287 {
10288 	struct trace_array *tr;
10289 	char *curr_str;
10290 	char *str;
10291 	char *tok;
10292 
10293 	/* A tab is always appended */
10294 	boot_instance_info[boot_instance_index - 1] = '\0';
10295 	str = boot_instance_info;
10296 
10297 	while ((curr_str = strsep(&str, "\t"))) {
10298 
10299 		tok = strsep(&curr_str, ",");
10300 
10301 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10302 			do_allocate_snapshot(tok);
10303 
10304 		tr = trace_array_get_by_name(tok);
10305 		if (!tr) {
10306 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10307 			continue;
10308 		}
10309 		/* Allow user space to delete it */
10310 		trace_array_put(tr);
10311 
10312 		while ((tok = strsep(&curr_str, ","))) {
10313 			early_enable_events(tr, tok, true);
10314 		}
10315 	}
10316 }
10317 
tracer_alloc_buffers(void)10318 __init static int tracer_alloc_buffers(void)
10319 {
10320 	int ring_buf_size;
10321 	int ret = -ENOMEM;
10322 
10323 
10324 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10325 		pr_warn("Tracing disabled due to lockdown\n");
10326 		return -EPERM;
10327 	}
10328 
10329 	/*
10330 	 * Make sure we don't accidentally add more trace options
10331 	 * than we have bits for.
10332 	 */
10333 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10334 
10335 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10336 		goto out;
10337 
10338 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10339 		goto out_free_buffer_mask;
10340 
10341 	/* Only allocate trace_printk buffers if a trace_printk exists */
10342 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10343 		/* Must be called before global_trace.buffer is allocated */
10344 		trace_printk_init_buffers();
10345 
10346 	/* To save memory, keep the ring buffer size to its minimum */
10347 	if (ring_buffer_expanded)
10348 		ring_buf_size = trace_buf_size;
10349 	else
10350 		ring_buf_size = 1;
10351 
10352 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10353 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10354 
10355 	raw_spin_lock_init(&global_trace.start_lock);
10356 
10357 	/*
10358 	 * The prepare callbacks allocates some memory for the ring buffer. We
10359 	 * don't free the buffer if the CPU goes down. If we were to free
10360 	 * the buffer, then the user would lose any trace that was in the
10361 	 * buffer. The memory will be removed once the "instance" is removed.
10362 	 */
10363 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10364 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10365 				      NULL);
10366 	if (ret < 0)
10367 		goto out_free_cpumask;
10368 	/* Used for event triggers */
10369 	ret = -ENOMEM;
10370 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10371 	if (!temp_buffer)
10372 		goto out_rm_hp_state;
10373 
10374 	if (trace_create_savedcmd() < 0)
10375 		goto out_free_temp_buffer;
10376 
10377 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10378 		goto out_free_savedcmd;
10379 
10380 	/* TODO: make the number of buffers hot pluggable with CPUS */
10381 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10382 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10383 		goto out_free_pipe_cpumask;
10384 	}
10385 	if (global_trace.buffer_disabled)
10386 		tracing_off();
10387 
10388 	if (trace_boot_clock) {
10389 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10390 		if (ret < 0)
10391 			pr_warn("Trace clock %s not defined, going back to default\n",
10392 				trace_boot_clock);
10393 	}
10394 
10395 	/*
10396 	 * register_tracer() might reference current_trace, so it
10397 	 * needs to be set before we register anything. This is
10398 	 * just a bootstrap of current_trace anyway.
10399 	 */
10400 	global_trace.current_trace = &nop_trace;
10401 
10402 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10403 
10404 	ftrace_init_global_array_ops(&global_trace);
10405 
10406 	init_trace_flags_index(&global_trace);
10407 
10408 	register_tracer(&nop_trace);
10409 
10410 	/* Function tracing may start here (via kernel command line) */
10411 	init_function_trace();
10412 
10413 	/* All seems OK, enable tracing */
10414 	tracing_disabled = 0;
10415 
10416 	atomic_notifier_chain_register(&panic_notifier_list,
10417 				       &trace_panic_notifier);
10418 
10419 	register_die_notifier(&trace_die_notifier);
10420 
10421 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10422 
10423 	INIT_LIST_HEAD(&global_trace.systems);
10424 	INIT_LIST_HEAD(&global_trace.events);
10425 	INIT_LIST_HEAD(&global_trace.hist_vars);
10426 	INIT_LIST_HEAD(&global_trace.err_log);
10427 	list_add(&global_trace.list, &ftrace_trace_arrays);
10428 
10429 	apply_trace_boot_options();
10430 
10431 	register_snapshot_cmd();
10432 
10433 	return 0;
10434 
10435 out_free_pipe_cpumask:
10436 	free_cpumask_var(global_trace.pipe_cpumask);
10437 out_free_savedcmd:
10438 	free_saved_cmdlines_buffer(savedcmd);
10439 out_free_temp_buffer:
10440 	ring_buffer_free(temp_buffer);
10441 out_rm_hp_state:
10442 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10443 out_free_cpumask:
10444 	free_cpumask_var(global_trace.tracing_cpumask);
10445 out_free_buffer_mask:
10446 	free_cpumask_var(tracing_buffer_mask);
10447 out:
10448 	return ret;
10449 }
10450 
ftrace_boot_snapshot(void)10451 void __init ftrace_boot_snapshot(void)
10452 {
10453 #ifdef CONFIG_TRACER_MAX_TRACE
10454 	struct trace_array *tr;
10455 
10456 	if (!snapshot_at_boot)
10457 		return;
10458 
10459 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10460 		if (!tr->allocated_snapshot)
10461 			continue;
10462 
10463 		tracing_snapshot_instance(tr);
10464 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10465 	}
10466 #endif
10467 }
10468 
early_trace_init(void)10469 void __init early_trace_init(void)
10470 {
10471 	if (tracepoint_printk) {
10472 		tracepoint_print_iter =
10473 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10474 		if (MEM_FAIL(!tracepoint_print_iter,
10475 			     "Failed to allocate trace iterator\n"))
10476 			tracepoint_printk = 0;
10477 		else
10478 			static_key_enable(&tracepoint_printk_key.key);
10479 	}
10480 	tracer_alloc_buffers();
10481 
10482 	init_events();
10483 }
10484 
trace_init(void)10485 void __init trace_init(void)
10486 {
10487 	trace_event_init();
10488 
10489 	if (boot_instance_index)
10490 		enable_instances();
10491 }
10492 
clear_boot_tracer(void)10493 __init static void clear_boot_tracer(void)
10494 {
10495 	/*
10496 	 * The default tracer at boot buffer is an init section.
10497 	 * This function is called in lateinit. If we did not
10498 	 * find the boot tracer, then clear it out, to prevent
10499 	 * later registration from accessing the buffer that is
10500 	 * about to be freed.
10501 	 */
10502 	if (!default_bootup_tracer)
10503 		return;
10504 
10505 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10506 	       default_bootup_tracer);
10507 	default_bootup_tracer = NULL;
10508 }
10509 
10510 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10511 __init static void tracing_set_default_clock(void)
10512 {
10513 	/* sched_clock_stable() is determined in late_initcall */
10514 	if (!trace_boot_clock && !sched_clock_stable()) {
10515 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10516 			pr_warn("Can not set tracing clock due to lockdown\n");
10517 			return;
10518 		}
10519 
10520 		printk(KERN_WARNING
10521 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10522 		       "If you want to keep using the local clock, then add:\n"
10523 		       "  \"trace_clock=local\"\n"
10524 		       "on the kernel command line\n");
10525 		tracing_set_clock(&global_trace, "global");
10526 	}
10527 }
10528 #else
tracing_set_default_clock(void)10529 static inline void tracing_set_default_clock(void) { }
10530 #endif
10531 
late_trace_init(void)10532 __init static int late_trace_init(void)
10533 {
10534 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10535 		static_key_disable(&tracepoint_printk_key.key);
10536 		tracepoint_printk = 0;
10537 	}
10538 
10539 	tracing_set_default_clock();
10540 	clear_boot_tracer();
10541 	return 0;
10542 }
10543 
10544 late_initcall_sync(late_trace_init);
10545