xref: /openbmc/linux/kernel/trace/trace.c (revision 44ad3baf1cca483e418b6aadf2d3994f69e0f16a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 /*
59  * On boot up, the ring buffer is set to the minimum size, so that
60  * we do not waste memory on systems that are not using tracing.
61  */
62 bool ring_buffer_expanded;
63 
64 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 /*
66  * We need to change this state when a selftest is running.
67  * A selftest will lurk into the ring-buffer to count the
68  * entries inserted during the selftest although some concurrent
69  * insertions into the ring-buffer such as trace_printk could occurred
70  * at the same time, giving false positive or negative results.
71  */
72 static bool __read_mostly tracing_selftest_running;
73 
74 /*
75  * If boot-time tracing including tracers/events via kernel cmdline
76  * is running, we do not want to run SELFTEST.
77  */
78 bool __read_mostly tracing_selftest_disabled;
79 
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 	if (!tracing_selftest_disabled) {
83 		tracing_selftest_disabled = true;
84 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 	}
86 }
87 #else
88 #define tracing_selftest_running	0
89 #define tracing_selftest_disabled	0
90 #endif
91 
92 /* Pipe tracepoints to printk */
93 static struct trace_iterator *tracepoint_print_iter;
94 int tracepoint_printk;
95 static bool tracepoint_printk_stop_on_boot __initdata;
96 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97 
98 /* For tracers that don't implement custom flags */
99 static struct tracer_opt dummy_tracer_opt[] = {
100 	{ }
101 };
102 
103 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)104 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 {
106 	return 0;
107 }
108 
109 /*
110  * To prevent the comm cache from being overwritten when no
111  * tracing is active, only save the comm when a trace event
112  * occurred.
113  */
114 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
115 
116 /*
117  * Kill all tracing for good (never come back).
118  * It is initialized to 1 but will turn to zero if the initialization
119  * of the tracer is successful. But that is the only place that sets
120  * this back to zero.
121  */
122 static int tracing_disabled = 1;
123 
124 cpumask_var_t __read_mostly	tracing_buffer_mask;
125 
126 /*
127  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128  *
129  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
130  * is set, then ftrace_dump is called. This will output the contents
131  * of the ftrace buffers to the console.  This is very useful for
132  * capturing traces that lead to crashes and outputing it to a
133  * serial console.
134  *
135  * It is default off, but you can enable it with either specifying
136  * "ftrace_dump_on_oops" in the kernel command line, or setting
137  * /proc/sys/kernel/ftrace_dump_on_oops
138  * Set 1 if you want to dump buffers of all CPUs
139  * Set 2 if you want to dump the buffer of the CPU that triggered oops
140  */
141 
142 enum ftrace_dump_mode ftrace_dump_on_oops;
143 
144 /* When set, tracing will stop when a WARN*() is hit */
145 int __disable_trace_on_warning;
146 
147 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
148 /* Map of enums to their values, for "eval_map" file */
149 struct trace_eval_map_head {
150 	struct module			*mod;
151 	unsigned long			length;
152 };
153 
154 union trace_eval_map_item;
155 
156 struct trace_eval_map_tail {
157 	/*
158 	 * "end" is first and points to NULL as it must be different
159 	 * than "mod" or "eval_string"
160 	 */
161 	union trace_eval_map_item	*next;
162 	const char			*end;	/* points to NULL */
163 };
164 
165 static DEFINE_MUTEX(trace_eval_mutex);
166 
167 /*
168  * The trace_eval_maps are saved in an array with two extra elements,
169  * one at the beginning, and one at the end. The beginning item contains
170  * the count of the saved maps (head.length), and the module they
171  * belong to if not built in (head.mod). The ending item contains a
172  * pointer to the next array of saved eval_map items.
173  */
174 union trace_eval_map_item {
175 	struct trace_eval_map		map;
176 	struct trace_eval_map_head	head;
177 	struct trace_eval_map_tail	tail;
178 };
179 
180 static union trace_eval_map_item *trace_eval_maps;
181 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182 
183 int tracing_set_tracer(struct trace_array *tr, const char *buf);
184 static void ftrace_trace_userstack(struct trace_array *tr,
185 				   struct trace_buffer *buffer,
186 				   unsigned int trace_ctx);
187 
188 #define MAX_TRACER_SIZE		100
189 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
190 static char *default_bootup_tracer;
191 
192 static bool allocate_snapshot;
193 static bool snapshot_at_boot;
194 
195 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_instance_index;
197 
198 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
199 static int boot_snapshot_index;
200 
set_cmdline_ftrace(char * str)201 static int __init set_cmdline_ftrace(char *str)
202 {
203 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
204 	default_bootup_tracer = bootup_tracer_buf;
205 	/* We are using ftrace early, expand it */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("ftrace=", set_cmdline_ftrace);
210 
set_ftrace_dump_on_oops(char * str)211 static int __init set_ftrace_dump_on_oops(char *str)
212 {
213 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
214 		ftrace_dump_on_oops = DUMP_ALL;
215 		return 1;
216 	}
217 
218 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
219 		ftrace_dump_on_oops = DUMP_ORIG;
220                 return 1;
221         }
222 
223         return 0;
224 }
225 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226 
stop_trace_on_warning(char * str)227 static int __init stop_trace_on_warning(char *str)
228 {
229 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
230 		__disable_trace_on_warning = 1;
231 	return 1;
232 }
233 __setup("traceoff_on_warning", stop_trace_on_warning);
234 
boot_alloc_snapshot(char * str)235 static int __init boot_alloc_snapshot(char *str)
236 {
237 	char *slot = boot_snapshot_info + boot_snapshot_index;
238 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
239 	int ret;
240 
241 	if (str[0] == '=') {
242 		str++;
243 		if (strlen(str) >= left)
244 			return -1;
245 
246 		ret = snprintf(slot, left, "%s\t", str);
247 		boot_snapshot_index += ret;
248 	} else {
249 		allocate_snapshot = true;
250 		/* We also need the main ring buffer expanded */
251 		ring_buffer_expanded = true;
252 	}
253 	return 1;
254 }
255 __setup("alloc_snapshot", boot_alloc_snapshot);
256 
257 
boot_snapshot(char * str)258 static int __init boot_snapshot(char *str)
259 {
260 	snapshot_at_boot = true;
261 	boot_alloc_snapshot(str);
262 	return 1;
263 }
264 __setup("ftrace_boot_snapshot", boot_snapshot);
265 
266 
boot_instance(char * str)267 static int __init boot_instance(char *str)
268 {
269 	char *slot = boot_instance_info + boot_instance_index;
270 	int left = sizeof(boot_instance_info) - boot_instance_index;
271 	int ret;
272 
273 	if (strlen(str) >= left)
274 		return -1;
275 
276 	ret = snprintf(slot, left, "%s\t", str);
277 	boot_instance_index += ret;
278 
279 	return 1;
280 }
281 __setup("trace_instance=", boot_instance);
282 
283 
284 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285 
set_trace_boot_options(char * str)286 static int __init set_trace_boot_options(char *str)
287 {
288 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
289 	return 1;
290 }
291 __setup("trace_options=", set_trace_boot_options);
292 
293 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
294 static char *trace_boot_clock __initdata;
295 
set_trace_boot_clock(char * str)296 static int __init set_trace_boot_clock(char *str)
297 {
298 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
299 	trace_boot_clock = trace_boot_clock_buf;
300 	return 1;
301 }
302 __setup("trace_clock=", set_trace_boot_clock);
303 
set_tracepoint_printk(char * str)304 static int __init set_tracepoint_printk(char *str)
305 {
306 	/* Ignore the "tp_printk_stop_on_boot" param */
307 	if (*str == '_')
308 		return 0;
309 
310 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
311 		tracepoint_printk = 1;
312 	return 1;
313 }
314 __setup("tp_printk", set_tracepoint_printk);
315 
set_tracepoint_printk_stop(char * str)316 static int __init set_tracepoint_printk_stop(char *str)
317 {
318 	tracepoint_printk_stop_on_boot = true;
319 	return 1;
320 }
321 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322 
ns2usecs(u64 nsec)323 unsigned long long ns2usecs(u64 nsec)
324 {
325 	nsec += 500;
326 	do_div(nsec, 1000);
327 	return nsec;
328 }
329 
330 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)331 trace_process_export(struct trace_export *export,
332 	       struct ring_buffer_event *event, int flag)
333 {
334 	struct trace_entry *entry;
335 	unsigned int size = 0;
336 
337 	if (export->flags & flag) {
338 		entry = ring_buffer_event_data(event);
339 		size = ring_buffer_event_length(event);
340 		export->write(export, entry, size);
341 	}
342 }
343 
344 static DEFINE_MUTEX(ftrace_export_lock);
345 
346 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347 
348 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351 
ftrace_exports_enable(struct trace_export * export)352 static inline void ftrace_exports_enable(struct trace_export *export)
353 {
354 	if (export->flags & TRACE_EXPORT_FUNCTION)
355 		static_branch_inc(&trace_function_exports_enabled);
356 
357 	if (export->flags & TRACE_EXPORT_EVENT)
358 		static_branch_inc(&trace_event_exports_enabled);
359 
360 	if (export->flags & TRACE_EXPORT_MARKER)
361 		static_branch_inc(&trace_marker_exports_enabled);
362 }
363 
ftrace_exports_disable(struct trace_export * export)364 static inline void ftrace_exports_disable(struct trace_export *export)
365 {
366 	if (export->flags & TRACE_EXPORT_FUNCTION)
367 		static_branch_dec(&trace_function_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_EVENT)
370 		static_branch_dec(&trace_event_exports_enabled);
371 
372 	if (export->flags & TRACE_EXPORT_MARKER)
373 		static_branch_dec(&trace_marker_exports_enabled);
374 }
375 
ftrace_exports(struct ring_buffer_event * event,int flag)376 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 {
378 	struct trace_export *export;
379 
380 	preempt_disable_notrace();
381 
382 	export = rcu_dereference_raw_check(ftrace_exports_list);
383 	while (export) {
384 		trace_process_export(export, event, flag);
385 		export = rcu_dereference_raw_check(export->next);
386 	}
387 
388 	preempt_enable_notrace();
389 }
390 
391 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)392 add_trace_export(struct trace_export **list, struct trace_export *export)
393 {
394 	rcu_assign_pointer(export->next, *list);
395 	/*
396 	 * We are entering export into the list but another
397 	 * CPU might be walking that list. We need to make sure
398 	 * the export->next pointer is valid before another CPU sees
399 	 * the export pointer included into the list.
400 	 */
401 	rcu_assign_pointer(*list, export);
402 }
403 
404 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)405 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 {
407 	struct trace_export **p;
408 
409 	for (p = list; *p != NULL; p = &(*p)->next)
410 		if (*p == export)
411 			break;
412 
413 	if (*p != export)
414 		return -1;
415 
416 	rcu_assign_pointer(*p, (*p)->next);
417 
418 	return 0;
419 }
420 
421 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)422 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 {
424 	ftrace_exports_enable(export);
425 
426 	add_trace_export(list, export);
427 }
428 
429 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)430 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
431 {
432 	int ret;
433 
434 	ret = rm_trace_export(list, export);
435 	ftrace_exports_disable(export);
436 
437 	return ret;
438 }
439 
register_ftrace_export(struct trace_export * export)440 int register_ftrace_export(struct trace_export *export)
441 {
442 	if (WARN_ON_ONCE(!export->write))
443 		return -1;
444 
445 	mutex_lock(&ftrace_export_lock);
446 
447 	add_ftrace_export(&ftrace_exports_list, export);
448 
449 	mutex_unlock(&ftrace_export_lock);
450 
451 	return 0;
452 }
453 EXPORT_SYMBOL_GPL(register_ftrace_export);
454 
unregister_ftrace_export(struct trace_export * export)455 int unregister_ftrace_export(struct trace_export *export)
456 {
457 	int ret;
458 
459 	mutex_lock(&ftrace_export_lock);
460 
461 	ret = rm_ftrace_export(&ftrace_exports_list, export);
462 
463 	mutex_unlock(&ftrace_export_lock);
464 
465 	return ret;
466 }
467 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468 
469 /* trace_flags holds trace_options default values */
470 #define TRACE_DEFAULT_FLAGS						\
471 	(FUNCTION_DEFAULT_FLAGS |					\
472 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
473 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
474 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
475 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
476 	 TRACE_ITER_HASH_PTR)
477 
478 /* trace_options that are only supported by global_trace */
479 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
480 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481 
482 /* trace_flags that are default zero for instances */
483 #define ZEROED_TRACE_FLAGS \
484 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
485 
486 /*
487  * The global_trace is the descriptor that holds the top-level tracing
488  * buffers for the live tracing.
489  */
490 static struct trace_array global_trace = {
491 	.trace_flags = TRACE_DEFAULT_FLAGS,
492 };
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
trace_array_get(struct trace_array * this_tr)496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
__trace_array_put(struct trace_array * this_tr)514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
trace_array_put(struct trace_array * this_tr)529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
tracing_check_open_get_tr(struct trace_array * tr)540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
trace_pid_show(struct seq_file * m,void * v)710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
buffer_ftrace_now(struct array_buffer * buf,int cpu)808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
ftrace_now(int cpu)822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
tracing_is_enabled(void)836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
trace_access_lock(int cpu)895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
trace_access_unlock(int cpu)911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
trace_access_lock_init(void)921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
trace_access_lock(int cpu)933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
trace_access_unlock(int cpu)939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
trace_access_lock_init(void)945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
tracer_tracing_on(struct trace_array * tr)999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
tracing_on(void)1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
__trace_puts(unsigned long ip,const char * str,int size)1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
__trace_bputs(unsigned long ip,const char * str)1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
tracing_snapshot_instance(struct trace_array * tr)1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
tracing_snapshot(void)1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
tracing_cond_snapshot_data(struct trace_array * tr)1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
tracing_alloc_snapshot_instance(struct trace_array * tr)1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int ret;
1267 
1268 	if (!tr->allocated_snapshot) {
1269 
1270 		/* allocate spare buffer */
1271 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 		if (ret < 0)
1274 			return ret;
1275 
1276 		tr->allocated_snapshot = true;
1277 	}
1278 
1279 	return 0;
1280 }
1281 
free_snapshot(struct trace_array * tr)1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284 	/*
1285 	 * We don't free the ring buffer. instead, resize it because
1286 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 	 * we want preserve it.
1288 	 */
1289 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290 	set_buffer_entries(&tr->max_buffer, 1);
1291 	tracing_reset_online_cpus(&tr->max_buffer);
1292 	tr->allocated_snapshot = false;
1293 }
1294 
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
tracing_alloc_snapshot(void)1305 int tracing_alloc_snapshot(void)
1306 {
1307 	struct trace_array *tr = &global_trace;
1308 	int ret;
1309 
1310 	ret = tracing_alloc_snapshot_instance(tr);
1311 	WARN_ON(ret < 0);
1312 
1313 	return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316 
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
tracing_snapshot_alloc(void)1328 void tracing_snapshot_alloc(void)
1329 {
1330 	int ret;
1331 
1332 	ret = tracing_alloc_snapshot();
1333 	if (ret < 0)
1334 		return;
1335 
1336 	tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339 
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:		The tracing instance
1343  * @cond_data:	User data to associate with the snapshot
1344  * @update:	Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 				 cond_update_fn_t update)
1355 {
1356 	struct cond_snapshot *cond_snapshot;
1357 	int ret = 0;
1358 
1359 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360 	if (!cond_snapshot)
1361 		return -ENOMEM;
1362 
1363 	cond_snapshot->cond_data = cond_data;
1364 	cond_snapshot->update = update;
1365 
1366 	mutex_lock(&trace_types_lock);
1367 
1368 	ret = tracing_alloc_snapshot_instance(tr);
1369 	if (ret)
1370 		goto fail_unlock;
1371 
1372 	if (tr->current_trace->use_max_tr) {
1373 		ret = -EBUSY;
1374 		goto fail_unlock;
1375 	}
1376 
1377 	/*
1378 	 * The cond_snapshot can only change to NULL without the
1379 	 * trace_types_lock. We don't care if we race with it going
1380 	 * to NULL, but we want to make sure that it's not set to
1381 	 * something other than NULL when we get here, which we can
1382 	 * do safely with only holding the trace_types_lock and not
1383 	 * having to take the max_lock.
1384 	 */
1385 	if (tr->cond_snapshot) {
1386 		ret = -EBUSY;
1387 		goto fail_unlock;
1388 	}
1389 
1390 	local_irq_disable();
1391 	arch_spin_lock(&tr->max_lock);
1392 	tr->cond_snapshot = cond_snapshot;
1393 	arch_spin_unlock(&tr->max_lock);
1394 	local_irq_enable();
1395 
1396 	mutex_unlock(&trace_types_lock);
1397 
1398 	return ret;
1399 
1400  fail_unlock:
1401 	mutex_unlock(&trace_types_lock);
1402 	kfree(cond_snapshot);
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406 
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:		The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
tracing_snapshot_cond_disable(struct trace_array * tr)1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419 	int ret = 0;
1420 
1421 	local_irq_disable();
1422 	arch_spin_lock(&tr->max_lock);
1423 
1424 	if (!tr->cond_snapshot)
1425 		ret = -EINVAL;
1426 	else {
1427 		kfree(tr->cond_snapshot);
1428 		tr->cond_snapshot = NULL;
1429 	}
1430 
1431 	arch_spin_unlock(&tr->max_lock);
1432 	local_irq_enable();
1433 
1434 	return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
tracing_snapshot(void)1438 void tracing_snapshot(void)
1439 {
1440 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1448 int tracing_alloc_snapshot(void)
1449 {
1450 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 	return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1454 void tracing_snapshot_alloc(void)
1455 {
1456 	/* Give warning */
1457 	tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462 	return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467 	return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472 	return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)	do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477 
tracer_tracing_off(struct trace_array * tr)1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480 	if (tr->array_buffer.buffer)
1481 		ring_buffer_record_off(tr->array_buffer.buffer);
1482 	/*
1483 	 * This flag is looked at when buffers haven't been allocated
1484 	 * yet, or by some tracers (like irqsoff), that just want to
1485 	 * know if the ring buffer has been disabled, but it can handle
1486 	 * races of where it gets disabled but we still do a record.
1487 	 * As the check is in the fast path of the tracers, it is more
1488 	 * important to be fast than accurate.
1489 	 */
1490 	tr->buffer_disabled = 1;
1491 	/* Make the flag seen by readers */
1492 	smp_wmb();
1493 }
1494 
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
tracing_off(void)1503 void tracing_off(void)
1504 {
1505 	tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508 
disable_trace_on_warning(void)1509 void disable_trace_on_warning(void)
1510 {
1511 	if (__disable_trace_on_warning) {
1512 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513 			"Disabling tracing due to warning\n");
1514 		tracing_off();
1515 	}
1516 }
1517 
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
tracer_tracing_is_on(struct trace_array * tr)1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526 	if (tr->array_buffer.buffer)
1527 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528 	return !tr->buffer_disabled;
1529 }
1530 
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
tracing_is_on(void)1534 int tracing_is_on(void)
1535 {
1536 	return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539 
set_buf_size(char * str)1540 static int __init set_buf_size(char *str)
1541 {
1542 	unsigned long buf_size;
1543 
1544 	if (!str)
1545 		return 0;
1546 	buf_size = memparse(str, &str);
1547 	/*
1548 	 * nr_entries can not be zero and the startup
1549 	 * tests require some buffer space. Therefore
1550 	 * ensure we have at least 4096 bytes of buffer.
1551 	 */
1552 	trace_buf_size = max(4096UL, buf_size);
1553 	return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556 
set_tracing_thresh(char * str)1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 	unsigned long threshold;
1560 	int ret;
1561 
1562 	if (!str)
1563 		return 0;
1564 	ret = kstrtoul(str, 0, &threshold);
1565 	if (ret < 0)
1566 		return 0;
1567 	tracing_thresh = threshold * 1000;
1568 	return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571 
nsecs_to_usecs(unsigned long nsecs)1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 	return nsecs / 1000;
1575 }
1576 
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585 
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 	TRACE_FLAGS
1589 	NULL
1590 };
1591 
1592 static struct {
1593 	u64 (*func)(void);
1594 	const char *name;
1595 	int in_ns;		/* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 	{ trace_clock_local,		"local",	1 },
1598 	{ trace_clock_global,		"global",	1 },
1599 	{ trace_clock_counter,		"counter",	0 },
1600 	{ trace_clock_jiffies,		"uptime",	0 },
1601 	{ trace_clock,			"perf",		1 },
1602 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1603 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1604 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1605 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1606 	ARCH_TRACE_CLOCKS
1607 };
1608 
trace_clock_in_ns(struct trace_array * tr)1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 	if (trace_clocks[tr->clock_id].in_ns)
1612 		return true;
1613 
1614 	return false;
1615 }
1616 
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
trace_parser_get_init(struct trace_parser * parser,int size)1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 	memset(parser, 0, sizeof(*parser));
1623 
1624 	parser->buffer = kmalloc(size, GFP_KERNEL);
1625 	if (!parser->buffer)
1626 		return 1;
1627 
1628 	parser->size = size;
1629 	return 0;
1630 }
1631 
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
trace_parser_put(struct trace_parser * parser)1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 	kfree(parser->buffer);
1638 	parser->buffer = NULL;
1639 }
1640 
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 	size_t cnt, loff_t *ppos)
1654 {
1655 	char ch;
1656 	size_t read = 0;
1657 	ssize_t ret;
1658 
1659 	if (!*ppos)
1660 		trace_parser_clear(parser);
1661 
1662 	ret = get_user(ch, ubuf++);
1663 	if (ret)
1664 		goto out;
1665 
1666 	read++;
1667 	cnt--;
1668 
1669 	/*
1670 	 * The parser is not finished with the last write,
1671 	 * continue reading the user input without skipping spaces.
1672 	 */
1673 	if (!parser->cont) {
1674 		/* skip white space */
1675 		while (cnt && isspace(ch)) {
1676 			ret = get_user(ch, ubuf++);
1677 			if (ret)
1678 				goto out;
1679 			read++;
1680 			cnt--;
1681 		}
1682 
1683 		parser->idx = 0;
1684 
1685 		/* only spaces were written */
1686 		if (isspace(ch) || !ch) {
1687 			*ppos += read;
1688 			ret = read;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	/* read the non-space input */
1694 	while (cnt && !isspace(ch) && ch) {
1695 		if (parser->idx < parser->size - 1)
1696 			parser->buffer[parser->idx++] = ch;
1697 		else {
1698 			ret = -EINVAL;
1699 			goto out;
1700 		}
1701 		ret = get_user(ch, ubuf++);
1702 		if (ret)
1703 			goto out;
1704 		read++;
1705 		cnt--;
1706 	}
1707 
1708 	/* We either got finished input or we have to wait for another call. */
1709 	if (isspace(ch) || !ch) {
1710 		parser->buffer[parser->idx] = 0;
1711 		parser->cont = false;
1712 	} else if (parser->idx < parser->size - 1) {
1713 		parser->cont = true;
1714 		parser->buffer[parser->idx++] = ch;
1715 		/* Make sure the parsed string always terminates with '\0'. */
1716 		parser->buffer[parser->idx] = 0;
1717 	} else {
1718 		ret = -EINVAL;
1719 		goto out;
1720 	}
1721 
1722 	*ppos += read;
1723 	ret = read;
1724 
1725 out:
1726 	return ret;
1727 }
1728 
1729 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 	int len;
1733 
1734 	if (trace_seq_used(s) <= s->readpos)
1735 		return -EBUSY;
1736 
1737 	len = trace_seq_used(s) - s->readpos;
1738 	if (cnt > len)
1739 		cnt = len;
1740 	memcpy(buf, s->buffer + s->readpos, cnt);
1741 
1742 	s->readpos += cnt;
1743 	return cnt;
1744 }
1745 
1746 unsigned long __read_mostly	tracing_thresh;
1747 
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750 
1751 #ifdef LATENCY_FS_NOTIFY
1752 
1753 static struct workqueue_struct *fsnotify_wq;
1754 
latency_fsnotify_workfn(struct work_struct * work)1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 	struct trace_array *tr = container_of(work, struct trace_array,
1758 					      fsnotify_work);
1759 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 	struct trace_array *tr = container_of(iwork, struct trace_array,
1765 					      fsnotify_irqwork);
1766 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 				     struct dentry *d_tracer)
1771 {
1772 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 					      TRACE_MODE_WRITE,
1776 					      d_tracer, tr,
1777 					      &tracing_max_lat_fops);
1778 }
1779 
latency_fsnotify_init(void)1780 __init static int latency_fsnotify_init(void)
1781 {
1782 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 	if (!fsnotify_wq) {
1785 		pr_err("Unable to allocate tr_max_lat_wq\n");
1786 		return -ENOMEM;
1787 	}
1788 	return 0;
1789 }
1790 
1791 late_initcall_sync(latency_fsnotify_init);
1792 
latency_fsnotify(struct trace_array * tr)1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 	if (!fsnotify_wq)
1796 		return;
1797 	/*
1798 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 	 * possible that we are called from __schedule() or do_idle(), which
1800 	 * could cause a deadlock.
1801 	 */
1802 	irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804 
1805 #else /* !LATENCY_FS_NOTIFY */
1806 
1807 #define trace_create_maxlat_file(tr, d_tracer)				\
1808 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1809 			  d_tracer, tr, &tracing_max_lat_fops)
1810 
1811 #endif
1812 
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 	struct array_buffer *trace_buf = &tr->array_buffer;
1822 	struct array_buffer *max_buf = &tr->max_buffer;
1823 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825 
1826 	max_buf->cpu = cpu;
1827 	max_buf->time_start = data->preempt_timestamp;
1828 
1829 	max_data->saved_latency = tr->max_latency;
1830 	max_data->critical_start = data->critical_start;
1831 	max_data->critical_end = data->critical_end;
1832 
1833 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 	max_data->pid = tsk->pid;
1835 	/*
1836 	 * If tsk == current, then use current_uid(), as that does not use
1837 	 * RCU. The irq tracer can be called out of RCU scope.
1838 	 */
1839 	if (tsk == current)
1840 		max_data->uid = current_uid();
1841 	else
1842 		max_data->uid = task_uid(tsk);
1843 
1844 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 	max_data->policy = tsk->policy;
1846 	max_data->rt_priority = tsk->rt_priority;
1847 
1848 	/* record this tasks comm */
1849 	tracing_record_cmdline(tsk);
1850 	latency_fsnotify(tr);
1851 }
1852 
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 	      void *cond_data)
1866 {
1867 	if (tr->stop_count)
1868 		return;
1869 
1870 	WARN_ON_ONCE(!irqs_disabled());
1871 
1872 	if (!tr->allocated_snapshot) {
1873 		/* Only the nop tracer should hit this when disabling */
1874 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 		return;
1876 	}
1877 
1878 	arch_spin_lock(&tr->max_lock);
1879 
1880 	/* Inherit the recordable setting from array_buffer */
1881 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 		ring_buffer_record_on(tr->max_buffer.buffer);
1883 	else
1884 		ring_buffer_record_off(tr->max_buffer.buffer);
1885 
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 		arch_spin_unlock(&tr->max_lock);
1889 		return;
1890 	}
1891 #endif
1892 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893 
1894 	__update_max_tr(tr, tsk, cpu);
1895 
1896 	arch_spin_unlock(&tr->max_lock);
1897 
1898 	/* Any waiters on the old snapshot buffer need to wake up */
1899 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901 
1902 /**
1903  * update_max_tr_single - only copy one trace over, and reset the rest
1904  * @tr: tracer
1905  * @tsk: task with the latency
1906  * @cpu: the cpu of the buffer to copy.
1907  *
1908  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909  */
1910 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 	int ret;
1914 
1915 	if (tr->stop_count)
1916 		return;
1917 
1918 	WARN_ON_ONCE(!irqs_disabled());
1919 	if (!tr->allocated_snapshot) {
1920 		/* Only the nop tracer should hit this when disabling */
1921 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 		return;
1923 	}
1924 
1925 	arch_spin_lock(&tr->max_lock);
1926 
1927 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928 
1929 	if (ret == -EBUSY) {
1930 		/*
1931 		 * We failed to swap the buffer due to a commit taking
1932 		 * place on this CPU. We fail to record, but we reset
1933 		 * the max trace buffer (no one writes directly to it)
1934 		 * and flag that it failed.
1935 		 * Another reason is resize is in progress.
1936 		 */
1937 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 			"Failed to swap buffers due to commit or resize in progress\n");
1939 	}
1940 
1941 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942 
1943 	__update_max_tr(tr, tsk, cpu);
1944 	arch_spin_unlock(&tr->max_lock);
1945 }
1946 
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948 
wait_on_pipe(struct trace_iterator * iter,int full)1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 	int ret;
1952 
1953 	/* Iterators are static, they should be filled or empty */
1954 	if (trace_buffer_iter(iter, iter->cpu_file))
1955 		return 0;
1956 
1957 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958 
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 	/*
1961 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 	 * to happen, this would now be the main buffer.
1963 	 */
1964 	if (iter->snapshot)
1965 		iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 	return ret;
1968 }
1969 
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972 
1973 struct trace_selftests {
1974 	struct list_head		list;
1975 	struct tracer			*type;
1976 };
1977 
1978 static LIST_HEAD(postponed_selftests);
1979 
save_selftest(struct tracer * type)1980 static int save_selftest(struct tracer *type)
1981 {
1982 	struct trace_selftests *selftest;
1983 
1984 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 	if (!selftest)
1986 		return -ENOMEM;
1987 
1988 	selftest->type = type;
1989 	list_add(&selftest->list, &postponed_selftests);
1990 	return 0;
1991 }
1992 
run_tracer_selftest(struct tracer * type)1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 	struct trace_array *tr = &global_trace;
1996 	struct tracer *saved_tracer = tr->current_trace;
1997 	int ret;
1998 
1999 	if (!type->selftest || tracing_selftest_disabled)
2000 		return 0;
2001 
2002 	/*
2003 	 * If a tracer registers early in boot up (before scheduling is
2004 	 * initialized and such), then do not run its selftests yet.
2005 	 * Instead, run it a little later in the boot process.
2006 	 */
2007 	if (!selftests_can_run)
2008 		return save_selftest(type);
2009 
2010 	if (!tracing_is_on()) {
2011 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 			type->name);
2013 		return 0;
2014 	}
2015 
2016 	/*
2017 	 * Run a selftest on this tracer.
2018 	 * Here we reset the trace buffer, and set the current
2019 	 * tracer to be this tracer. The tracer can then run some
2020 	 * internal tracing to verify that everything is in order.
2021 	 * If we fail, we do not register this tracer.
2022 	 */
2023 	tracing_reset_online_cpus(&tr->array_buffer);
2024 
2025 	tr->current_trace = type;
2026 
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 	if (type->use_max_tr) {
2029 		/* If we expanded the buffers, make sure the max is expanded too */
2030 		if (ring_buffer_expanded)
2031 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 					   RING_BUFFER_ALL_CPUS);
2033 		tr->allocated_snapshot = true;
2034 	}
2035 #endif
2036 
2037 	/* the test is responsible for initializing and enabling */
2038 	pr_info("Testing tracer %s: ", type->name);
2039 	ret = type->selftest(type, tr);
2040 	/* the test is responsible for resetting too */
2041 	tr->current_trace = saved_tracer;
2042 	if (ret) {
2043 		printk(KERN_CONT "FAILED!\n");
2044 		/* Add the warning after printing 'FAILED' */
2045 		WARN_ON(1);
2046 		return -1;
2047 	}
2048 	/* Only reset on passing, to avoid touching corrupted buffers */
2049 	tracing_reset_online_cpus(&tr->array_buffer);
2050 
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 	if (type->use_max_tr) {
2053 		tr->allocated_snapshot = false;
2054 
2055 		/* Shrink the max buffer again */
2056 		if (ring_buffer_expanded)
2057 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 					   RING_BUFFER_ALL_CPUS);
2059 	}
2060 #endif
2061 
2062 	printk(KERN_CONT "PASSED\n");
2063 	return 0;
2064 }
2065 
do_run_tracer_selftest(struct tracer * type)2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 	int ret;
2069 
2070 	/*
2071 	 * Tests can take a long time, especially if they are run one after the
2072 	 * other, as does happen during bootup when all the tracers are
2073 	 * registered. This could cause the soft lockup watchdog to trigger.
2074 	 */
2075 	cond_resched();
2076 
2077 	tracing_selftest_running = true;
2078 	ret = run_tracer_selftest(type);
2079 	tracing_selftest_running = false;
2080 
2081 	return ret;
2082 }
2083 
init_trace_selftests(void)2084 static __init int init_trace_selftests(void)
2085 {
2086 	struct trace_selftests *p, *n;
2087 	struct tracer *t, **last;
2088 	int ret;
2089 
2090 	selftests_can_run = true;
2091 
2092 	mutex_lock(&trace_types_lock);
2093 
2094 	if (list_empty(&postponed_selftests))
2095 		goto out;
2096 
2097 	pr_info("Running postponed tracer tests:\n");
2098 
2099 	tracing_selftest_running = true;
2100 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 		/* This loop can take minutes when sanitizers are enabled, so
2102 		 * lets make sure we allow RCU processing.
2103 		 */
2104 		cond_resched();
2105 		ret = run_tracer_selftest(p->type);
2106 		/* If the test fails, then warn and remove from available_tracers */
2107 		if (ret < 0) {
2108 			WARN(1, "tracer: %s failed selftest, disabling\n",
2109 			     p->type->name);
2110 			last = &trace_types;
2111 			for (t = trace_types; t; t = t->next) {
2112 				if (t == p->type) {
2113 					*last = t->next;
2114 					break;
2115 				}
2116 				last = &t->next;
2117 			}
2118 		}
2119 		list_del(&p->list);
2120 		kfree(p);
2121 	}
2122 	tracing_selftest_running = false;
2123 
2124  out:
2125 	mutex_unlock(&trace_types_lock);
2126 
2127 	return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
run_tracer_selftest(struct tracer * type)2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 	return 0;
2134 }
do_run_tracer_selftest(struct tracer * type)2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 	return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140 
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142 
2143 static void __init apply_trace_boot_options(void);
2144 
2145 /**
2146  * register_tracer - register a tracer with the ftrace system.
2147  * @type: the plugin for the tracer
2148  *
2149  * Register a new plugin tracer.
2150  */
register_tracer(struct tracer * type)2151 int __init register_tracer(struct tracer *type)
2152 {
2153 	struct tracer *t;
2154 	int ret = 0;
2155 
2156 	if (!type->name) {
2157 		pr_info("Tracer must have a name\n");
2158 		return -1;
2159 	}
2160 
2161 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 		return -1;
2164 	}
2165 
2166 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 		pr_warn("Can not register tracer %s due to lockdown\n",
2168 			   type->name);
2169 		return -EPERM;
2170 	}
2171 
2172 	mutex_lock(&trace_types_lock);
2173 
2174 	for (t = trace_types; t; t = t->next) {
2175 		if (strcmp(type->name, t->name) == 0) {
2176 			/* already found */
2177 			pr_info("Tracer %s already registered\n",
2178 				type->name);
2179 			ret = -1;
2180 			goto out;
2181 		}
2182 	}
2183 
2184 	if (!type->set_flag)
2185 		type->set_flag = &dummy_set_flag;
2186 	if (!type->flags) {
2187 		/*allocate a dummy tracer_flags*/
2188 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 		if (!type->flags) {
2190 			ret = -ENOMEM;
2191 			goto out;
2192 		}
2193 		type->flags->val = 0;
2194 		type->flags->opts = dummy_tracer_opt;
2195 	} else
2196 		if (!type->flags->opts)
2197 			type->flags->opts = dummy_tracer_opt;
2198 
2199 	/* store the tracer for __set_tracer_option */
2200 	type->flags->trace = type;
2201 
2202 	ret = do_run_tracer_selftest(type);
2203 	if (ret < 0)
2204 		goto out;
2205 
2206 	type->next = trace_types;
2207 	trace_types = type;
2208 	add_tracer_options(&global_trace, type);
2209 
2210  out:
2211 	mutex_unlock(&trace_types_lock);
2212 
2213 	if (ret || !default_bootup_tracer)
2214 		goto out_unlock;
2215 
2216 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 		goto out_unlock;
2218 
2219 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 	/* Do we want this tracer to start on bootup? */
2221 	tracing_set_tracer(&global_trace, type->name);
2222 	default_bootup_tracer = NULL;
2223 
2224 	apply_trace_boot_options();
2225 
2226 	/* disable other selftests, since this will break it. */
2227 	disable_tracing_selftest("running a tracer");
2228 
2229  out_unlock:
2230 	return ret;
2231 }
2232 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 	struct trace_buffer *buffer = buf->buffer;
2236 
2237 	if (!buffer)
2238 		return;
2239 
2240 	ring_buffer_record_disable(buffer);
2241 
2242 	/* Make sure all commits have finished */
2243 	synchronize_rcu();
2244 	ring_buffer_reset_cpu(buffer, cpu);
2245 
2246 	ring_buffer_record_enable(buffer);
2247 }
2248 
tracing_reset_online_cpus(struct array_buffer * buf)2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 	struct trace_buffer *buffer = buf->buffer;
2252 
2253 	if (!buffer)
2254 		return;
2255 
2256 	ring_buffer_record_disable(buffer);
2257 
2258 	/* Make sure all commits have finished */
2259 	synchronize_rcu();
2260 
2261 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262 
2263 	ring_buffer_reset_online_cpus(buffer);
2264 
2265 	ring_buffer_record_enable(buffer);
2266 }
2267 
2268 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 	struct trace_array *tr;
2272 
2273 	lockdep_assert_held(&trace_types_lock);
2274 
2275 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 		if (!tr->clear_trace)
2277 			continue;
2278 		tr->clear_trace = false;
2279 		tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 		tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 	}
2284 }
2285 
tracing_reset_all_online_cpus(void)2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 	mutex_lock(&trace_types_lock);
2289 	tracing_reset_all_online_cpus_unlocked();
2290 	mutex_unlock(&trace_types_lock);
2291 }
2292 
2293 /*
2294  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295  * is the tgid last observed corresponding to pid=i.
2296  */
2297 static int *tgid_map;
2298 
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301 
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305  * Preemption must be disabled before acquiring trace_cmdline_lock.
2306  * The various trace_arrays' max_lock must be acquired in a context
2307  * where interrupt is disabled.
2308  */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 	unsigned *map_cmdline_to_pid;
2313 	unsigned cmdline_num;
2314 	int cmdline_idx;
2315 	char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318 
2319 /* Holds the size of a cmdline and pid element */
2320 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s)			\
2321 	(TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
2322 
get_saved_cmdlines(int idx)2323 static inline char *get_saved_cmdlines(int idx)
2324 {
2325 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2326 }
2327 
set_cmdline(int idx,const char * cmdline)2328 static inline void set_cmdline(int idx, const char *cmdline)
2329 {
2330 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2331 }
2332 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2333 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2334 {
2335 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2336 
2337 	kmemleak_free(s);
2338 	free_pages((unsigned long)s, order);
2339 }
2340 
allocate_cmdlines_buffer(unsigned int val)2341 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2342 {
2343 	struct saved_cmdlines_buffer *s;
2344 	struct page *page;
2345 	int orig_size, size;
2346 	int order;
2347 
2348 	/* Figure out how much is needed to hold the given number of cmdlines */
2349 	orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2350 	order = get_order(orig_size);
2351 	size = 1 << (order + PAGE_SHIFT);
2352 	page = alloc_pages(GFP_KERNEL, order);
2353 	if (!page)
2354 		return NULL;
2355 
2356 	s = page_address(page);
2357 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2358 	memset(s, 0, sizeof(*s));
2359 
2360 	/* Round up to actual allocation */
2361 	val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2362 	s->cmdline_num = val;
2363 
2364 	/* Place map_cmdline_to_pid array right after saved_cmdlines */
2365 	s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2366 
2367 	s->cmdline_idx = 0;
2368 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2369 	       sizeof(s->map_pid_to_cmdline));
2370 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2371 	       val * sizeof(*s->map_cmdline_to_pid));
2372 
2373 	return s;
2374 }
2375 
trace_create_savedcmd(void)2376 static int trace_create_savedcmd(void)
2377 {
2378 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2379 
2380 	return savedcmd ? 0 : -ENOMEM;
2381 }
2382 
is_tracing_stopped(void)2383 int is_tracing_stopped(void)
2384 {
2385 	return global_trace.stop_count;
2386 }
2387 
tracing_start_tr(struct trace_array * tr)2388 static void tracing_start_tr(struct trace_array *tr)
2389 {
2390 	struct trace_buffer *buffer;
2391 	unsigned long flags;
2392 
2393 	if (tracing_disabled)
2394 		return;
2395 
2396 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2397 	if (--tr->stop_count) {
2398 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2399 			/* Someone screwed up their debugging */
2400 			tr->stop_count = 0;
2401 		}
2402 		goto out;
2403 	}
2404 
2405 	/* Prevent the buffers from switching */
2406 	arch_spin_lock(&tr->max_lock);
2407 
2408 	buffer = tr->array_buffer.buffer;
2409 	if (buffer)
2410 		ring_buffer_record_enable(buffer);
2411 
2412 #ifdef CONFIG_TRACER_MAX_TRACE
2413 	buffer = tr->max_buffer.buffer;
2414 	if (buffer)
2415 		ring_buffer_record_enable(buffer);
2416 #endif
2417 
2418 	arch_spin_unlock(&tr->max_lock);
2419 
2420  out:
2421 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2422 }
2423 
2424 /**
2425  * tracing_start - quick start of the tracer
2426  *
2427  * If tracing is enabled but was stopped by tracing_stop,
2428  * this will start the tracer back up.
2429  */
tracing_start(void)2430 void tracing_start(void)
2431 
2432 {
2433 	return tracing_start_tr(&global_trace);
2434 }
2435 
tracing_stop_tr(struct trace_array * tr)2436 static void tracing_stop_tr(struct trace_array *tr)
2437 {
2438 	struct trace_buffer *buffer;
2439 	unsigned long flags;
2440 
2441 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2442 	if (tr->stop_count++)
2443 		goto out;
2444 
2445 	/* Prevent the buffers from switching */
2446 	arch_spin_lock(&tr->max_lock);
2447 
2448 	buffer = tr->array_buffer.buffer;
2449 	if (buffer)
2450 		ring_buffer_record_disable(buffer);
2451 
2452 #ifdef CONFIG_TRACER_MAX_TRACE
2453 	buffer = tr->max_buffer.buffer;
2454 	if (buffer)
2455 		ring_buffer_record_disable(buffer);
2456 #endif
2457 
2458 	arch_spin_unlock(&tr->max_lock);
2459 
2460  out:
2461 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2462 }
2463 
2464 /**
2465  * tracing_stop - quick stop of the tracer
2466  *
2467  * Light weight way to stop tracing. Use in conjunction with
2468  * tracing_start.
2469  */
tracing_stop(void)2470 void tracing_stop(void)
2471 {
2472 	return tracing_stop_tr(&global_trace);
2473 }
2474 
trace_save_cmdline(struct task_struct * tsk)2475 static int trace_save_cmdline(struct task_struct *tsk)
2476 {
2477 	unsigned tpid, idx;
2478 
2479 	/* treat recording of idle task as a success */
2480 	if (!tsk->pid)
2481 		return 1;
2482 
2483 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2484 
2485 	/*
2486 	 * It's not the end of the world if we don't get
2487 	 * the lock, but we also don't want to spin
2488 	 * nor do we want to disable interrupts,
2489 	 * so if we miss here, then better luck next time.
2490 	 *
2491 	 * This is called within the scheduler and wake up, so interrupts
2492 	 * had better been disabled and run queue lock been held.
2493 	 */
2494 	lockdep_assert_preemption_disabled();
2495 	if (!arch_spin_trylock(&trace_cmdline_lock))
2496 		return 0;
2497 
2498 	idx = savedcmd->map_pid_to_cmdline[tpid];
2499 	if (idx == NO_CMDLINE_MAP) {
2500 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2501 
2502 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2503 		savedcmd->cmdline_idx = idx;
2504 	}
2505 
2506 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2507 	set_cmdline(idx, tsk->comm);
2508 
2509 	arch_spin_unlock(&trace_cmdline_lock);
2510 
2511 	return 1;
2512 }
2513 
__trace_find_cmdline(int pid,char comm[])2514 static void __trace_find_cmdline(int pid, char comm[])
2515 {
2516 	unsigned map;
2517 	int tpid;
2518 
2519 	if (!pid) {
2520 		strcpy(comm, "<idle>");
2521 		return;
2522 	}
2523 
2524 	if (WARN_ON_ONCE(pid < 0)) {
2525 		strcpy(comm, "<XXX>");
2526 		return;
2527 	}
2528 
2529 	tpid = pid & (PID_MAX_DEFAULT - 1);
2530 	map = savedcmd->map_pid_to_cmdline[tpid];
2531 	if (map != NO_CMDLINE_MAP) {
2532 		tpid = savedcmd->map_cmdline_to_pid[map];
2533 		if (tpid == pid) {
2534 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2535 			return;
2536 		}
2537 	}
2538 	strcpy(comm, "<...>");
2539 }
2540 
trace_find_cmdline(int pid,char comm[])2541 void trace_find_cmdline(int pid, char comm[])
2542 {
2543 	preempt_disable();
2544 	arch_spin_lock(&trace_cmdline_lock);
2545 
2546 	__trace_find_cmdline(pid, comm);
2547 
2548 	arch_spin_unlock(&trace_cmdline_lock);
2549 	preempt_enable();
2550 }
2551 
trace_find_tgid_ptr(int pid)2552 static int *trace_find_tgid_ptr(int pid)
2553 {
2554 	/*
2555 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2556 	 * if we observe a non-NULL tgid_map then we also observe the correct
2557 	 * tgid_map_max.
2558 	 */
2559 	int *map = smp_load_acquire(&tgid_map);
2560 
2561 	if (unlikely(!map || pid > tgid_map_max))
2562 		return NULL;
2563 
2564 	return &map[pid];
2565 }
2566 
trace_find_tgid(int pid)2567 int trace_find_tgid(int pid)
2568 {
2569 	int *ptr = trace_find_tgid_ptr(pid);
2570 
2571 	return ptr ? *ptr : 0;
2572 }
2573 
trace_save_tgid(struct task_struct * tsk)2574 static int trace_save_tgid(struct task_struct *tsk)
2575 {
2576 	int *ptr;
2577 
2578 	/* treat recording of idle task as a success */
2579 	if (!tsk->pid)
2580 		return 1;
2581 
2582 	ptr = trace_find_tgid_ptr(tsk->pid);
2583 	if (!ptr)
2584 		return 0;
2585 
2586 	*ptr = tsk->tgid;
2587 	return 1;
2588 }
2589 
tracing_record_taskinfo_skip(int flags)2590 static bool tracing_record_taskinfo_skip(int flags)
2591 {
2592 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2593 		return true;
2594 	if (!__this_cpu_read(trace_taskinfo_save))
2595 		return true;
2596 	return false;
2597 }
2598 
2599 /**
2600  * tracing_record_taskinfo - record the task info of a task
2601  *
2602  * @task:  task to record
2603  * @flags: TRACE_RECORD_CMDLINE for recording comm
2604  *         TRACE_RECORD_TGID for recording tgid
2605  */
tracing_record_taskinfo(struct task_struct * task,int flags)2606 void tracing_record_taskinfo(struct task_struct *task, int flags)
2607 {
2608 	bool done;
2609 
2610 	if (tracing_record_taskinfo_skip(flags))
2611 		return;
2612 
2613 	/*
2614 	 * Record as much task information as possible. If some fail, continue
2615 	 * to try to record the others.
2616 	 */
2617 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2618 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2619 
2620 	/* If recording any information failed, retry again soon. */
2621 	if (!done)
2622 		return;
2623 
2624 	__this_cpu_write(trace_taskinfo_save, false);
2625 }
2626 
2627 /**
2628  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2629  *
2630  * @prev: previous task during sched_switch
2631  * @next: next task during sched_switch
2632  * @flags: TRACE_RECORD_CMDLINE for recording comm
2633  *         TRACE_RECORD_TGID for recording tgid
2634  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2635 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2636 					  struct task_struct *next, int flags)
2637 {
2638 	bool done;
2639 
2640 	if (tracing_record_taskinfo_skip(flags))
2641 		return;
2642 
2643 	/*
2644 	 * Record as much task information as possible. If some fail, continue
2645 	 * to try to record the others.
2646 	 */
2647 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2648 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2649 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2650 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2651 
2652 	/* If recording any information failed, retry again soon. */
2653 	if (!done)
2654 		return;
2655 
2656 	__this_cpu_write(trace_taskinfo_save, false);
2657 }
2658 
2659 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2660 void tracing_record_cmdline(struct task_struct *task)
2661 {
2662 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2663 }
2664 
tracing_record_tgid(struct task_struct * task)2665 void tracing_record_tgid(struct task_struct *task)
2666 {
2667 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2668 }
2669 
2670 /*
2671  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2672  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2673  * simplifies those functions and keeps them in sync.
2674  */
trace_handle_return(struct trace_seq * s)2675 enum print_line_t trace_handle_return(struct trace_seq *s)
2676 {
2677 	return trace_seq_has_overflowed(s) ?
2678 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2679 }
2680 EXPORT_SYMBOL_GPL(trace_handle_return);
2681 
migration_disable_value(void)2682 static unsigned short migration_disable_value(void)
2683 {
2684 #if defined(CONFIG_SMP)
2685 	return current->migration_disabled;
2686 #else
2687 	return 0;
2688 #endif
2689 }
2690 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2691 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2692 {
2693 	unsigned int trace_flags = irqs_status;
2694 	unsigned int pc;
2695 
2696 	pc = preempt_count();
2697 
2698 	if (pc & NMI_MASK)
2699 		trace_flags |= TRACE_FLAG_NMI;
2700 	if (pc & HARDIRQ_MASK)
2701 		trace_flags |= TRACE_FLAG_HARDIRQ;
2702 	if (in_serving_softirq())
2703 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2704 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2705 		trace_flags |= TRACE_FLAG_BH_OFF;
2706 
2707 	if (tif_need_resched())
2708 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2709 	if (test_preempt_need_resched())
2710 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2711 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2712 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2713 }
2714 
2715 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2716 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2717 			  int type,
2718 			  unsigned long len,
2719 			  unsigned int trace_ctx)
2720 {
2721 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2722 }
2723 
2724 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2725 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2726 static int trace_buffered_event_ref;
2727 
2728 /**
2729  * trace_buffered_event_enable - enable buffering events
2730  *
2731  * When events are being filtered, it is quicker to use a temporary
2732  * buffer to write the event data into if there's a likely chance
2733  * that it will not be committed. The discard of the ring buffer
2734  * is not as fast as committing, and is much slower than copying
2735  * a commit.
2736  *
2737  * When an event is to be filtered, allocate per cpu buffers to
2738  * write the event data into, and if the event is filtered and discarded
2739  * it is simply dropped, otherwise, the entire data is to be committed
2740  * in one shot.
2741  */
trace_buffered_event_enable(void)2742 void trace_buffered_event_enable(void)
2743 {
2744 	struct ring_buffer_event *event;
2745 	struct page *page;
2746 	int cpu;
2747 
2748 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2749 
2750 	if (trace_buffered_event_ref++)
2751 		return;
2752 
2753 	for_each_tracing_cpu(cpu) {
2754 		page = alloc_pages_node(cpu_to_node(cpu),
2755 					GFP_KERNEL | __GFP_NORETRY, 0);
2756 		/* This is just an optimization and can handle failures */
2757 		if (!page) {
2758 			pr_err("Failed to allocate event buffer\n");
2759 			break;
2760 		}
2761 
2762 		event = page_address(page);
2763 		memset(event, 0, sizeof(*event));
2764 
2765 		per_cpu(trace_buffered_event, cpu) = event;
2766 
2767 		preempt_disable();
2768 		if (cpu == smp_processor_id() &&
2769 		    __this_cpu_read(trace_buffered_event) !=
2770 		    per_cpu(trace_buffered_event, cpu))
2771 			WARN_ON_ONCE(1);
2772 		preempt_enable();
2773 	}
2774 }
2775 
enable_trace_buffered_event(void * data)2776 static void enable_trace_buffered_event(void *data)
2777 {
2778 	/* Probably not needed, but do it anyway */
2779 	smp_rmb();
2780 	this_cpu_dec(trace_buffered_event_cnt);
2781 }
2782 
disable_trace_buffered_event(void * data)2783 static void disable_trace_buffered_event(void *data)
2784 {
2785 	this_cpu_inc(trace_buffered_event_cnt);
2786 }
2787 
2788 /**
2789  * trace_buffered_event_disable - disable buffering events
2790  *
2791  * When a filter is removed, it is faster to not use the buffered
2792  * events, and to commit directly into the ring buffer. Free up
2793  * the temp buffers when there are no more users. This requires
2794  * special synchronization with current events.
2795  */
trace_buffered_event_disable(void)2796 void trace_buffered_event_disable(void)
2797 {
2798 	int cpu;
2799 
2800 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2801 
2802 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2803 		return;
2804 
2805 	if (--trace_buffered_event_ref)
2806 		return;
2807 
2808 	/* For each CPU, set the buffer as used. */
2809 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2810 			 NULL, true);
2811 
2812 	/* Wait for all current users to finish */
2813 	synchronize_rcu();
2814 
2815 	for_each_tracing_cpu(cpu) {
2816 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2817 		per_cpu(trace_buffered_event, cpu) = NULL;
2818 	}
2819 
2820 	/*
2821 	 * Wait for all CPUs that potentially started checking if they can use
2822 	 * their event buffer only after the previous synchronize_rcu() call and
2823 	 * they still read a valid pointer from trace_buffered_event. It must be
2824 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2825 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2826 	 */
2827 	synchronize_rcu();
2828 
2829 	/* For each CPU, relinquish the buffer */
2830 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2831 			 true);
2832 }
2833 
2834 static struct trace_buffer *temp_buffer;
2835 
2836 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2837 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2838 			  struct trace_event_file *trace_file,
2839 			  int type, unsigned long len,
2840 			  unsigned int trace_ctx)
2841 {
2842 	struct ring_buffer_event *entry;
2843 	struct trace_array *tr = trace_file->tr;
2844 	int val;
2845 
2846 	*current_rb = tr->array_buffer.buffer;
2847 
2848 	if (!tr->no_filter_buffering_ref &&
2849 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2850 		preempt_disable_notrace();
2851 		/*
2852 		 * Filtering is on, so try to use the per cpu buffer first.
2853 		 * This buffer will simulate a ring_buffer_event,
2854 		 * where the type_len is zero and the array[0] will
2855 		 * hold the full length.
2856 		 * (see include/linux/ring-buffer.h for details on
2857 		 *  how the ring_buffer_event is structured).
2858 		 *
2859 		 * Using a temp buffer during filtering and copying it
2860 		 * on a matched filter is quicker than writing directly
2861 		 * into the ring buffer and then discarding it when
2862 		 * it doesn't match. That is because the discard
2863 		 * requires several atomic operations to get right.
2864 		 * Copying on match and doing nothing on a failed match
2865 		 * is still quicker than no copy on match, but having
2866 		 * to discard out of the ring buffer on a failed match.
2867 		 */
2868 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2869 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2870 
2871 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2872 
2873 			/*
2874 			 * Preemption is disabled, but interrupts and NMIs
2875 			 * can still come in now. If that happens after
2876 			 * the above increment, then it will have to go
2877 			 * back to the old method of allocating the event
2878 			 * on the ring buffer, and if the filter fails, it
2879 			 * will have to call ring_buffer_discard_commit()
2880 			 * to remove it.
2881 			 *
2882 			 * Need to also check the unlikely case that the
2883 			 * length is bigger than the temp buffer size.
2884 			 * If that happens, then the reserve is pretty much
2885 			 * guaranteed to fail, as the ring buffer currently
2886 			 * only allows events less than a page. But that may
2887 			 * change in the future, so let the ring buffer reserve
2888 			 * handle the failure in that case.
2889 			 */
2890 			if (val == 1 && likely(len <= max_len)) {
2891 				trace_event_setup(entry, type, trace_ctx);
2892 				entry->array[0] = len;
2893 				/* Return with preemption disabled */
2894 				return entry;
2895 			}
2896 			this_cpu_dec(trace_buffered_event_cnt);
2897 		}
2898 		/* __trace_buffer_lock_reserve() disables preemption */
2899 		preempt_enable_notrace();
2900 	}
2901 
2902 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2903 					    trace_ctx);
2904 	/*
2905 	 * If tracing is off, but we have triggers enabled
2906 	 * we still need to look at the event data. Use the temp_buffer
2907 	 * to store the trace event for the trigger to use. It's recursive
2908 	 * safe and will not be recorded anywhere.
2909 	 */
2910 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2911 		*current_rb = temp_buffer;
2912 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2913 						    trace_ctx);
2914 	}
2915 	return entry;
2916 }
2917 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2918 
2919 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2920 static DEFINE_MUTEX(tracepoint_printk_mutex);
2921 
output_printk(struct trace_event_buffer * fbuffer)2922 static void output_printk(struct trace_event_buffer *fbuffer)
2923 {
2924 	struct trace_event_call *event_call;
2925 	struct trace_event_file *file;
2926 	struct trace_event *event;
2927 	unsigned long flags;
2928 	struct trace_iterator *iter = tracepoint_print_iter;
2929 
2930 	/* We should never get here if iter is NULL */
2931 	if (WARN_ON_ONCE(!iter))
2932 		return;
2933 
2934 	event_call = fbuffer->trace_file->event_call;
2935 	if (!event_call || !event_call->event.funcs ||
2936 	    !event_call->event.funcs->trace)
2937 		return;
2938 
2939 	file = fbuffer->trace_file;
2940 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2941 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2942 	     !filter_match_preds(file->filter, fbuffer->entry)))
2943 		return;
2944 
2945 	event = &fbuffer->trace_file->event_call->event;
2946 
2947 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2948 	trace_seq_init(&iter->seq);
2949 	iter->ent = fbuffer->entry;
2950 	event_call->event.funcs->trace(iter, 0, event);
2951 	trace_seq_putc(&iter->seq, 0);
2952 	printk("%s", iter->seq.buffer);
2953 
2954 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2955 }
2956 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2957 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2958 			     void *buffer, size_t *lenp,
2959 			     loff_t *ppos)
2960 {
2961 	int save_tracepoint_printk;
2962 	int ret;
2963 
2964 	mutex_lock(&tracepoint_printk_mutex);
2965 	save_tracepoint_printk = tracepoint_printk;
2966 
2967 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2968 
2969 	/*
2970 	 * This will force exiting early, as tracepoint_printk
2971 	 * is always zero when tracepoint_printk_iter is not allocated
2972 	 */
2973 	if (!tracepoint_print_iter)
2974 		tracepoint_printk = 0;
2975 
2976 	if (save_tracepoint_printk == tracepoint_printk)
2977 		goto out;
2978 
2979 	if (tracepoint_printk)
2980 		static_key_enable(&tracepoint_printk_key.key);
2981 	else
2982 		static_key_disable(&tracepoint_printk_key.key);
2983 
2984  out:
2985 	mutex_unlock(&tracepoint_printk_mutex);
2986 
2987 	return ret;
2988 }
2989 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2990 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2991 {
2992 	enum event_trigger_type tt = ETT_NONE;
2993 	struct trace_event_file *file = fbuffer->trace_file;
2994 
2995 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2996 			fbuffer->entry, &tt))
2997 		goto discard;
2998 
2999 	if (static_key_false(&tracepoint_printk_key.key))
3000 		output_printk(fbuffer);
3001 
3002 	if (static_branch_unlikely(&trace_event_exports_enabled))
3003 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3004 
3005 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3006 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3007 
3008 discard:
3009 	if (tt)
3010 		event_triggers_post_call(file, tt);
3011 
3012 }
3013 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3014 
3015 /*
3016  * Skip 3:
3017  *
3018  *   trace_buffer_unlock_commit_regs()
3019  *   trace_event_buffer_commit()
3020  *   trace_event_raw_event_xxx()
3021  */
3022 # define STACK_SKIP 3
3023 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3024 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3025 				     struct trace_buffer *buffer,
3026 				     struct ring_buffer_event *event,
3027 				     unsigned int trace_ctx,
3028 				     struct pt_regs *regs)
3029 {
3030 	__buffer_unlock_commit(buffer, event);
3031 
3032 	/*
3033 	 * If regs is not set, then skip the necessary functions.
3034 	 * Note, we can still get here via blktrace, wakeup tracer
3035 	 * and mmiotrace, but that's ok if they lose a function or
3036 	 * two. They are not that meaningful.
3037 	 */
3038 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3039 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3040 }
3041 
3042 /*
3043  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3044  */
3045 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3046 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3047 				   struct ring_buffer_event *event)
3048 {
3049 	__buffer_unlock_commit(buffer, event);
3050 }
3051 
3052 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3053 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3054 	       parent_ip, unsigned int trace_ctx)
3055 {
3056 	struct trace_event_call *call = &event_function;
3057 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3058 	struct ring_buffer_event *event;
3059 	struct ftrace_entry *entry;
3060 
3061 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3062 					    trace_ctx);
3063 	if (!event)
3064 		return;
3065 	entry	= ring_buffer_event_data(event);
3066 	entry->ip			= ip;
3067 	entry->parent_ip		= parent_ip;
3068 
3069 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3070 		if (static_branch_unlikely(&trace_function_exports_enabled))
3071 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3072 		__buffer_unlock_commit(buffer, event);
3073 	}
3074 }
3075 
3076 #ifdef CONFIG_STACKTRACE
3077 
3078 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3079 #define FTRACE_KSTACK_NESTING	4
3080 
3081 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3082 
3083 struct ftrace_stack {
3084 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3085 };
3086 
3087 
3088 struct ftrace_stacks {
3089 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3090 };
3091 
3092 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3093 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3094 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3095 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3096 				 unsigned int trace_ctx,
3097 				 int skip, struct pt_regs *regs)
3098 {
3099 	struct trace_event_call *call = &event_kernel_stack;
3100 	struct ring_buffer_event *event;
3101 	unsigned int size, nr_entries;
3102 	struct ftrace_stack *fstack;
3103 	struct stack_entry *entry;
3104 	int stackidx;
3105 
3106 	/*
3107 	 * Add one, for this function and the call to save_stack_trace()
3108 	 * If regs is set, then these functions will not be in the way.
3109 	 */
3110 #ifndef CONFIG_UNWINDER_ORC
3111 	if (!regs)
3112 		skip++;
3113 #endif
3114 
3115 	preempt_disable_notrace();
3116 
3117 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3118 
3119 	/* This should never happen. If it does, yell once and skip */
3120 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3121 		goto out;
3122 
3123 	/*
3124 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3125 	 * interrupt will either see the value pre increment or post
3126 	 * increment. If the interrupt happens pre increment it will have
3127 	 * restored the counter when it returns.  We just need a barrier to
3128 	 * keep gcc from moving things around.
3129 	 */
3130 	barrier();
3131 
3132 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3133 	size = ARRAY_SIZE(fstack->calls);
3134 
3135 	if (regs) {
3136 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3137 						   size, skip);
3138 	} else {
3139 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3140 	}
3141 
3142 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3143 				    struct_size(entry, caller, nr_entries),
3144 				    trace_ctx);
3145 	if (!event)
3146 		goto out;
3147 	entry = ring_buffer_event_data(event);
3148 
3149 	entry->size = nr_entries;
3150 	memcpy(&entry->caller, fstack->calls,
3151 	       flex_array_size(entry, caller, nr_entries));
3152 
3153 	if (!call_filter_check_discard(call, entry, buffer, event))
3154 		__buffer_unlock_commit(buffer, event);
3155 
3156  out:
3157 	/* Again, don't let gcc optimize things here */
3158 	barrier();
3159 	__this_cpu_dec(ftrace_stack_reserve);
3160 	preempt_enable_notrace();
3161 
3162 }
3163 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3164 static inline void ftrace_trace_stack(struct trace_array *tr,
3165 				      struct trace_buffer *buffer,
3166 				      unsigned int trace_ctx,
3167 				      int skip, struct pt_regs *regs)
3168 {
3169 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3170 		return;
3171 
3172 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3173 }
3174 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3175 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3176 		   int skip)
3177 {
3178 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3179 
3180 	if (rcu_is_watching()) {
3181 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182 		return;
3183 	}
3184 
3185 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3186 		return;
3187 
3188 	/*
3189 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3190 	 * but if the above rcu_is_watching() failed, then the NMI
3191 	 * triggered someplace critical, and ct_irq_enter() should
3192 	 * not be called from NMI.
3193 	 */
3194 	if (unlikely(in_nmi()))
3195 		return;
3196 
3197 	ct_irq_enter_irqson();
3198 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3199 	ct_irq_exit_irqson();
3200 }
3201 
3202 /**
3203  * trace_dump_stack - record a stack back trace in the trace buffer
3204  * @skip: Number of functions to skip (helper handlers)
3205  */
trace_dump_stack(int skip)3206 void trace_dump_stack(int skip)
3207 {
3208 	if (tracing_disabled || tracing_selftest_running)
3209 		return;
3210 
3211 #ifndef CONFIG_UNWINDER_ORC
3212 	/* Skip 1 to skip this function. */
3213 	skip++;
3214 #endif
3215 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3216 			     tracing_gen_ctx(), skip, NULL);
3217 }
3218 EXPORT_SYMBOL_GPL(trace_dump_stack);
3219 
3220 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3221 static DEFINE_PER_CPU(int, user_stack_count);
3222 
3223 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3224 ftrace_trace_userstack(struct trace_array *tr,
3225 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3226 {
3227 	struct trace_event_call *call = &event_user_stack;
3228 	struct ring_buffer_event *event;
3229 	struct userstack_entry *entry;
3230 
3231 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3232 		return;
3233 
3234 	/*
3235 	 * NMIs can not handle page faults, even with fix ups.
3236 	 * The save user stack can (and often does) fault.
3237 	 */
3238 	if (unlikely(in_nmi()))
3239 		return;
3240 
3241 	/*
3242 	 * prevent recursion, since the user stack tracing may
3243 	 * trigger other kernel events.
3244 	 */
3245 	preempt_disable();
3246 	if (__this_cpu_read(user_stack_count))
3247 		goto out;
3248 
3249 	__this_cpu_inc(user_stack_count);
3250 
3251 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3252 					    sizeof(*entry), trace_ctx);
3253 	if (!event)
3254 		goto out_drop_count;
3255 	entry	= ring_buffer_event_data(event);
3256 
3257 	entry->tgid		= current->tgid;
3258 	memset(&entry->caller, 0, sizeof(entry->caller));
3259 
3260 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3261 	if (!call_filter_check_discard(call, entry, buffer, event))
3262 		__buffer_unlock_commit(buffer, event);
3263 
3264  out_drop_count:
3265 	__this_cpu_dec(user_stack_count);
3266  out:
3267 	preempt_enable();
3268 }
3269 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3270 static void ftrace_trace_userstack(struct trace_array *tr,
3271 				   struct trace_buffer *buffer,
3272 				   unsigned int trace_ctx)
3273 {
3274 }
3275 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3276 
3277 #endif /* CONFIG_STACKTRACE */
3278 
3279 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3280 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3281 			  unsigned long long delta)
3282 {
3283 	entry->bottom_delta_ts = delta & U32_MAX;
3284 	entry->top_delta_ts = (delta >> 32);
3285 }
3286 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3287 void trace_last_func_repeats(struct trace_array *tr,
3288 			     struct trace_func_repeats *last_info,
3289 			     unsigned int trace_ctx)
3290 {
3291 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3292 	struct func_repeats_entry *entry;
3293 	struct ring_buffer_event *event;
3294 	u64 delta;
3295 
3296 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3297 					    sizeof(*entry), trace_ctx);
3298 	if (!event)
3299 		return;
3300 
3301 	delta = ring_buffer_event_time_stamp(buffer, event) -
3302 		last_info->ts_last_call;
3303 
3304 	entry = ring_buffer_event_data(event);
3305 	entry->ip = last_info->ip;
3306 	entry->parent_ip = last_info->parent_ip;
3307 	entry->count = last_info->count;
3308 	func_repeats_set_delta_ts(entry, delta);
3309 
3310 	__buffer_unlock_commit(buffer, event);
3311 }
3312 
3313 /* created for use with alloc_percpu */
3314 struct trace_buffer_struct {
3315 	int nesting;
3316 	char buffer[4][TRACE_BUF_SIZE];
3317 };
3318 
3319 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3320 
3321 /*
3322  * This allows for lockless recording.  If we're nested too deeply, then
3323  * this returns NULL.
3324  */
get_trace_buf(void)3325 static char *get_trace_buf(void)
3326 {
3327 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3328 
3329 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3330 		return NULL;
3331 
3332 	buffer->nesting++;
3333 
3334 	/* Interrupts must see nesting incremented before we use the buffer */
3335 	barrier();
3336 	return &buffer->buffer[buffer->nesting - 1][0];
3337 }
3338 
put_trace_buf(void)3339 static void put_trace_buf(void)
3340 {
3341 	/* Don't let the decrement of nesting leak before this */
3342 	barrier();
3343 	this_cpu_dec(trace_percpu_buffer->nesting);
3344 }
3345 
alloc_percpu_trace_buffer(void)3346 static int alloc_percpu_trace_buffer(void)
3347 {
3348 	struct trace_buffer_struct __percpu *buffers;
3349 
3350 	if (trace_percpu_buffer)
3351 		return 0;
3352 
3353 	buffers = alloc_percpu(struct trace_buffer_struct);
3354 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3355 		return -ENOMEM;
3356 
3357 	trace_percpu_buffer = buffers;
3358 	return 0;
3359 }
3360 
3361 static int buffers_allocated;
3362 
trace_printk_init_buffers(void)3363 void trace_printk_init_buffers(void)
3364 {
3365 	if (buffers_allocated)
3366 		return;
3367 
3368 	if (alloc_percpu_trace_buffer())
3369 		return;
3370 
3371 	/* trace_printk() is for debug use only. Don't use it in production. */
3372 
3373 	pr_warn("\n");
3374 	pr_warn("**********************************************************\n");
3375 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3376 	pr_warn("**                                                      **\n");
3377 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3378 	pr_warn("**                                                      **\n");
3379 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3380 	pr_warn("** unsafe for production use.                           **\n");
3381 	pr_warn("**                                                      **\n");
3382 	pr_warn("** If you see this message and you are not debugging    **\n");
3383 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3384 	pr_warn("**                                                      **\n");
3385 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3386 	pr_warn("**********************************************************\n");
3387 
3388 	/* Expand the buffers to set size */
3389 	tracing_update_buffers();
3390 
3391 	buffers_allocated = 1;
3392 
3393 	/*
3394 	 * trace_printk_init_buffers() can be called by modules.
3395 	 * If that happens, then we need to start cmdline recording
3396 	 * directly here. If the global_trace.buffer is already
3397 	 * allocated here, then this was called by module code.
3398 	 */
3399 	if (global_trace.array_buffer.buffer)
3400 		tracing_start_cmdline_record();
3401 }
3402 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3403 
trace_printk_start_comm(void)3404 void trace_printk_start_comm(void)
3405 {
3406 	/* Start tracing comms if trace printk is set */
3407 	if (!buffers_allocated)
3408 		return;
3409 	tracing_start_cmdline_record();
3410 }
3411 
trace_printk_start_stop_comm(int enabled)3412 static void trace_printk_start_stop_comm(int enabled)
3413 {
3414 	if (!buffers_allocated)
3415 		return;
3416 
3417 	if (enabled)
3418 		tracing_start_cmdline_record();
3419 	else
3420 		tracing_stop_cmdline_record();
3421 }
3422 
3423 /**
3424  * trace_vbprintk - write binary msg to tracing buffer
3425  * @ip:    The address of the caller
3426  * @fmt:   The string format to write to the buffer
3427  * @args:  Arguments for @fmt
3428  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3429 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3430 {
3431 	struct trace_event_call *call = &event_bprint;
3432 	struct ring_buffer_event *event;
3433 	struct trace_buffer *buffer;
3434 	struct trace_array *tr = &global_trace;
3435 	struct bprint_entry *entry;
3436 	unsigned int trace_ctx;
3437 	char *tbuffer;
3438 	int len = 0, size;
3439 
3440 	if (unlikely(tracing_selftest_running || tracing_disabled))
3441 		return 0;
3442 
3443 	/* Don't pollute graph traces with trace_vprintk internals */
3444 	pause_graph_tracing();
3445 
3446 	trace_ctx = tracing_gen_ctx();
3447 	preempt_disable_notrace();
3448 
3449 	tbuffer = get_trace_buf();
3450 	if (!tbuffer) {
3451 		len = 0;
3452 		goto out_nobuffer;
3453 	}
3454 
3455 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3456 
3457 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3458 		goto out_put;
3459 
3460 	size = sizeof(*entry) + sizeof(u32) * len;
3461 	buffer = tr->array_buffer.buffer;
3462 	ring_buffer_nest_start(buffer);
3463 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3464 					    trace_ctx);
3465 	if (!event)
3466 		goto out;
3467 	entry = ring_buffer_event_data(event);
3468 	entry->ip			= ip;
3469 	entry->fmt			= fmt;
3470 
3471 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3472 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3473 		__buffer_unlock_commit(buffer, event);
3474 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3475 	}
3476 
3477 out:
3478 	ring_buffer_nest_end(buffer);
3479 out_put:
3480 	put_trace_buf();
3481 
3482 out_nobuffer:
3483 	preempt_enable_notrace();
3484 	unpause_graph_tracing();
3485 
3486 	return len;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_vbprintk);
3489 
3490 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3491 int __trace_array_vprintk(struct trace_buffer *buffer,
3492 			  unsigned long ip, const char *fmt, va_list args)
3493 {
3494 	struct trace_event_call *call = &event_print;
3495 	struct ring_buffer_event *event;
3496 	int len = 0, size;
3497 	struct print_entry *entry;
3498 	unsigned int trace_ctx;
3499 	char *tbuffer;
3500 
3501 	if (tracing_disabled)
3502 		return 0;
3503 
3504 	/* Don't pollute graph traces with trace_vprintk internals */
3505 	pause_graph_tracing();
3506 
3507 	trace_ctx = tracing_gen_ctx();
3508 	preempt_disable_notrace();
3509 
3510 
3511 	tbuffer = get_trace_buf();
3512 	if (!tbuffer) {
3513 		len = 0;
3514 		goto out_nobuffer;
3515 	}
3516 
3517 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3518 
3519 	size = sizeof(*entry) + len + 1;
3520 	ring_buffer_nest_start(buffer);
3521 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3522 					    trace_ctx);
3523 	if (!event)
3524 		goto out;
3525 	entry = ring_buffer_event_data(event);
3526 	entry->ip = ip;
3527 
3528 	memcpy(&entry->buf, tbuffer, len + 1);
3529 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3530 		__buffer_unlock_commit(buffer, event);
3531 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3532 	}
3533 
3534 out:
3535 	ring_buffer_nest_end(buffer);
3536 	put_trace_buf();
3537 
3538 out_nobuffer:
3539 	preempt_enable_notrace();
3540 	unpause_graph_tracing();
3541 
3542 	return len;
3543 }
3544 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3545 int trace_array_vprintk(struct trace_array *tr,
3546 			unsigned long ip, const char *fmt, va_list args)
3547 {
3548 	if (tracing_selftest_running && tr == &global_trace)
3549 		return 0;
3550 
3551 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3552 }
3553 
3554 /**
3555  * trace_array_printk - Print a message to a specific instance
3556  * @tr: The instance trace_array descriptor
3557  * @ip: The instruction pointer that this is called from.
3558  * @fmt: The format to print (printf format)
3559  *
3560  * If a subsystem sets up its own instance, they have the right to
3561  * printk strings into their tracing instance buffer using this
3562  * function. Note, this function will not write into the top level
3563  * buffer (use trace_printk() for that), as writing into the top level
3564  * buffer should only have events that can be individually disabled.
3565  * trace_printk() is only used for debugging a kernel, and should not
3566  * be ever incorporated in normal use.
3567  *
3568  * trace_array_printk() can be used, as it will not add noise to the
3569  * top level tracing buffer.
3570  *
3571  * Note, trace_array_init_printk() must be called on @tr before this
3572  * can be used.
3573  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3574 int trace_array_printk(struct trace_array *tr,
3575 		       unsigned long ip, const char *fmt, ...)
3576 {
3577 	int ret;
3578 	va_list ap;
3579 
3580 	if (!tr)
3581 		return -ENOENT;
3582 
3583 	/* This is only allowed for created instances */
3584 	if (tr == &global_trace)
3585 		return 0;
3586 
3587 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3588 		return 0;
3589 
3590 	va_start(ap, fmt);
3591 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3592 	va_end(ap);
3593 	return ret;
3594 }
3595 EXPORT_SYMBOL_GPL(trace_array_printk);
3596 
3597 /**
3598  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3599  * @tr: The trace array to initialize the buffers for
3600  *
3601  * As trace_array_printk() only writes into instances, they are OK to
3602  * have in the kernel (unlike trace_printk()). This needs to be called
3603  * before trace_array_printk() can be used on a trace_array.
3604  */
trace_array_init_printk(struct trace_array * tr)3605 int trace_array_init_printk(struct trace_array *tr)
3606 {
3607 	if (!tr)
3608 		return -ENOENT;
3609 
3610 	/* This is only allowed for created instances */
3611 	if (tr == &global_trace)
3612 		return -EINVAL;
3613 
3614 	return alloc_percpu_trace_buffer();
3615 }
3616 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3617 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3618 int trace_array_printk_buf(struct trace_buffer *buffer,
3619 			   unsigned long ip, const char *fmt, ...)
3620 {
3621 	int ret;
3622 	va_list ap;
3623 
3624 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3625 		return 0;
3626 
3627 	va_start(ap, fmt);
3628 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3629 	va_end(ap);
3630 	return ret;
3631 }
3632 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3633 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3634 {
3635 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3636 }
3637 EXPORT_SYMBOL_GPL(trace_vprintk);
3638 
trace_iterator_increment(struct trace_iterator * iter)3639 static void trace_iterator_increment(struct trace_iterator *iter)
3640 {
3641 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3642 
3643 	iter->idx++;
3644 	if (buf_iter)
3645 		ring_buffer_iter_advance(buf_iter);
3646 }
3647 
3648 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3649 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3650 		unsigned long *lost_events)
3651 {
3652 	struct ring_buffer_event *event;
3653 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3654 
3655 	if (buf_iter) {
3656 		event = ring_buffer_iter_peek(buf_iter, ts);
3657 		if (lost_events)
3658 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3659 				(unsigned long)-1 : 0;
3660 	} else {
3661 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3662 					 lost_events);
3663 	}
3664 
3665 	if (event) {
3666 		iter->ent_size = ring_buffer_event_length(event);
3667 		return ring_buffer_event_data(event);
3668 	}
3669 	iter->ent_size = 0;
3670 	return NULL;
3671 }
3672 
3673 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3674 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3675 		  unsigned long *missing_events, u64 *ent_ts)
3676 {
3677 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3678 	struct trace_entry *ent, *next = NULL;
3679 	unsigned long lost_events = 0, next_lost = 0;
3680 	int cpu_file = iter->cpu_file;
3681 	u64 next_ts = 0, ts;
3682 	int next_cpu = -1;
3683 	int next_size = 0;
3684 	int cpu;
3685 
3686 	/*
3687 	 * If we are in a per_cpu trace file, don't bother by iterating over
3688 	 * all cpu and peek directly.
3689 	 */
3690 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3691 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3692 			return NULL;
3693 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3694 		if (ent_cpu)
3695 			*ent_cpu = cpu_file;
3696 
3697 		return ent;
3698 	}
3699 
3700 	for_each_tracing_cpu(cpu) {
3701 
3702 		if (ring_buffer_empty_cpu(buffer, cpu))
3703 			continue;
3704 
3705 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3706 
3707 		/*
3708 		 * Pick the entry with the smallest timestamp:
3709 		 */
3710 		if (ent && (!next || ts < next_ts)) {
3711 			next = ent;
3712 			next_cpu = cpu;
3713 			next_ts = ts;
3714 			next_lost = lost_events;
3715 			next_size = iter->ent_size;
3716 		}
3717 	}
3718 
3719 	iter->ent_size = next_size;
3720 
3721 	if (ent_cpu)
3722 		*ent_cpu = next_cpu;
3723 
3724 	if (ent_ts)
3725 		*ent_ts = next_ts;
3726 
3727 	if (missing_events)
3728 		*missing_events = next_lost;
3729 
3730 	return next;
3731 }
3732 
3733 #define STATIC_FMT_BUF_SIZE	128
3734 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3735 
trace_iter_expand_format(struct trace_iterator * iter)3736 char *trace_iter_expand_format(struct trace_iterator *iter)
3737 {
3738 	char *tmp;
3739 
3740 	/*
3741 	 * iter->tr is NULL when used with tp_printk, which makes
3742 	 * this get called where it is not safe to call krealloc().
3743 	 */
3744 	if (!iter->tr || iter->fmt == static_fmt_buf)
3745 		return NULL;
3746 
3747 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3748 		       GFP_KERNEL);
3749 	if (tmp) {
3750 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3751 		iter->fmt = tmp;
3752 	}
3753 
3754 	return tmp;
3755 }
3756 
3757 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3758 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3759 {
3760 	unsigned long addr = (unsigned long)str;
3761 	struct trace_event *trace_event;
3762 	struct trace_event_call *event;
3763 
3764 	/* OK if part of the event data */
3765 	if ((addr >= (unsigned long)iter->ent) &&
3766 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3767 		return true;
3768 
3769 	/* OK if part of the temp seq buffer */
3770 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3771 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3772 		return true;
3773 
3774 	/* Core rodata can not be freed */
3775 	if (is_kernel_rodata(addr))
3776 		return true;
3777 
3778 	if (trace_is_tracepoint_string(str))
3779 		return true;
3780 
3781 	/*
3782 	 * Now this could be a module event, referencing core module
3783 	 * data, which is OK.
3784 	 */
3785 	if (!iter->ent)
3786 		return false;
3787 
3788 	trace_event = ftrace_find_event(iter->ent->type);
3789 	if (!trace_event)
3790 		return false;
3791 
3792 	event = container_of(trace_event, struct trace_event_call, event);
3793 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3794 		return false;
3795 
3796 	/* Would rather have rodata, but this will suffice */
3797 	if (within_module_core(addr, event->module))
3798 		return true;
3799 
3800 	return false;
3801 }
3802 
3803 /**
3804  * ignore_event - Check dereferenced fields while writing to the seq buffer
3805  * @iter: The iterator that holds the seq buffer and the event being printed
3806  *
3807  * At boot up, test_event_printk() will flag any event that dereferences
3808  * a string with "%s" that does exist in the ring buffer. It may still
3809  * be valid, as the string may point to a static string in the kernel
3810  * rodata that never gets freed. But if the string pointer is pointing
3811  * to something that was allocated, there's a chance that it can be freed
3812  * by the time the user reads the trace. This would cause a bad memory
3813  * access by the kernel and possibly crash the system.
3814  *
3815  * This function will check if the event has any fields flagged as needing
3816  * to be checked at runtime and perform those checks.
3817  *
3818  * If it is found that a field is unsafe, it will write into the @iter->seq
3819  * a message stating what was found to be unsafe.
3820  *
3821  * @return: true if the event is unsafe and should be ignored,
3822  *          false otherwise.
3823  */
ignore_event(struct trace_iterator * iter)3824 bool ignore_event(struct trace_iterator *iter)
3825 {
3826 	struct ftrace_event_field *field;
3827 	struct trace_event *trace_event;
3828 	struct trace_event_call *event;
3829 	struct list_head *head;
3830 	struct trace_seq *seq;
3831 	const void *ptr;
3832 
3833 	trace_event = ftrace_find_event(iter->ent->type);
3834 
3835 	seq = &iter->seq;
3836 
3837 	if (!trace_event) {
3838 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3839 		return true;
3840 	}
3841 
3842 	event = container_of(trace_event, struct trace_event_call, event);
3843 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3844 		return false;
3845 
3846 	head = trace_get_fields(event);
3847 	if (!head) {
3848 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3849 				 trace_event_name(event));
3850 		return true;
3851 	}
3852 
3853 	/* Offsets are from the iter->ent that points to the raw event */
3854 	ptr = iter->ent;
3855 
3856 	list_for_each_entry(field, head, link) {
3857 		const char *str;
3858 		bool good;
3859 
3860 		if (!field->needs_test)
3861 			continue;
3862 
3863 		str = *(const char **)(ptr + field->offset);
3864 
3865 		good = trace_safe_str(iter, str);
3866 
3867 		/*
3868 		 * If you hit this warning, it is likely that the
3869 		 * trace event in question used %s on a string that
3870 		 * was saved at the time of the event, but may not be
3871 		 * around when the trace is read. Use __string(),
3872 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3873 		 * instead. See samples/trace_events/trace-events-sample.h
3874 		 * for reference.
3875 		 */
3876 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3877 			      trace_event_name(event), field->name)) {
3878 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3879 					 trace_event_name(event), field->name);
3880 			return true;
3881 		}
3882 	}
3883 	return false;
3884 }
3885 
trace_event_format(struct trace_iterator * iter,const char * fmt)3886 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3887 {
3888 	const char *p, *new_fmt;
3889 	char *q;
3890 
3891 	if (WARN_ON_ONCE(!fmt))
3892 		return fmt;
3893 
3894 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3895 		return fmt;
3896 
3897 	p = fmt;
3898 	new_fmt = q = iter->fmt;
3899 	while (*p) {
3900 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3901 			if (!trace_iter_expand_format(iter))
3902 				return fmt;
3903 
3904 			q += iter->fmt - new_fmt;
3905 			new_fmt = iter->fmt;
3906 		}
3907 
3908 		*q++ = *p++;
3909 
3910 		/* Replace %p with %px */
3911 		if (p[-1] == '%') {
3912 			if (p[0] == '%') {
3913 				*q++ = *p++;
3914 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3915 				*q++ = *p++;
3916 				*q++ = 'x';
3917 			}
3918 		}
3919 	}
3920 	*q = '\0';
3921 
3922 	return new_fmt;
3923 }
3924 
3925 #define STATIC_TEMP_BUF_SIZE	128
3926 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3927 
3928 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3929 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3930 					  int *ent_cpu, u64 *ent_ts)
3931 {
3932 	/* __find_next_entry will reset ent_size */
3933 	int ent_size = iter->ent_size;
3934 	struct trace_entry *entry;
3935 
3936 	/*
3937 	 * If called from ftrace_dump(), then the iter->temp buffer
3938 	 * will be the static_temp_buf and not created from kmalloc.
3939 	 * If the entry size is greater than the buffer, we can
3940 	 * not save it. Just return NULL in that case. This is only
3941 	 * used to add markers when two consecutive events' time
3942 	 * stamps have a large delta. See trace_print_lat_context()
3943 	 */
3944 	if (iter->temp == static_temp_buf &&
3945 	    STATIC_TEMP_BUF_SIZE < ent_size)
3946 		return NULL;
3947 
3948 	/*
3949 	 * The __find_next_entry() may call peek_next_entry(), which may
3950 	 * call ring_buffer_peek() that may make the contents of iter->ent
3951 	 * undefined. Need to copy iter->ent now.
3952 	 */
3953 	if (iter->ent && iter->ent != iter->temp) {
3954 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3955 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3956 			void *temp;
3957 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3958 			if (!temp)
3959 				return NULL;
3960 			kfree(iter->temp);
3961 			iter->temp = temp;
3962 			iter->temp_size = iter->ent_size;
3963 		}
3964 		memcpy(iter->temp, iter->ent, iter->ent_size);
3965 		iter->ent = iter->temp;
3966 	}
3967 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3968 	/* Put back the original ent_size */
3969 	iter->ent_size = ent_size;
3970 
3971 	return entry;
3972 }
3973 
3974 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3975 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3976 {
3977 	iter->ent = __find_next_entry(iter, &iter->cpu,
3978 				      &iter->lost_events, &iter->ts);
3979 
3980 	if (iter->ent)
3981 		trace_iterator_increment(iter);
3982 
3983 	return iter->ent ? iter : NULL;
3984 }
3985 
trace_consume(struct trace_iterator * iter)3986 static void trace_consume(struct trace_iterator *iter)
3987 {
3988 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3989 			    &iter->lost_events);
3990 }
3991 
s_next(struct seq_file * m,void * v,loff_t * pos)3992 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3993 {
3994 	struct trace_iterator *iter = m->private;
3995 	int i = (int)*pos;
3996 	void *ent;
3997 
3998 	WARN_ON_ONCE(iter->leftover);
3999 
4000 	(*pos)++;
4001 
4002 	/* can't go backwards */
4003 	if (iter->idx > i)
4004 		return NULL;
4005 
4006 	if (iter->idx < 0)
4007 		ent = trace_find_next_entry_inc(iter);
4008 	else
4009 		ent = iter;
4010 
4011 	while (ent && iter->idx < i)
4012 		ent = trace_find_next_entry_inc(iter);
4013 
4014 	iter->pos = *pos;
4015 
4016 	return ent;
4017 }
4018 
tracing_iter_reset(struct trace_iterator * iter,int cpu)4019 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4020 {
4021 	struct ring_buffer_iter *buf_iter;
4022 	unsigned long entries = 0;
4023 	u64 ts;
4024 
4025 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4026 
4027 	buf_iter = trace_buffer_iter(iter, cpu);
4028 	if (!buf_iter)
4029 		return;
4030 
4031 	ring_buffer_iter_reset(buf_iter);
4032 
4033 	/*
4034 	 * We could have the case with the max latency tracers
4035 	 * that a reset never took place on a cpu. This is evident
4036 	 * by the timestamp being before the start of the buffer.
4037 	 */
4038 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4039 		if (ts >= iter->array_buffer->time_start)
4040 			break;
4041 		entries++;
4042 		ring_buffer_iter_advance(buf_iter);
4043 		/* This could be a big loop */
4044 		cond_resched();
4045 	}
4046 
4047 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4048 }
4049 
4050 /*
4051  * The current tracer is copied to avoid a global locking
4052  * all around.
4053  */
s_start(struct seq_file * m,loff_t * pos)4054 static void *s_start(struct seq_file *m, loff_t *pos)
4055 {
4056 	struct trace_iterator *iter = m->private;
4057 	struct trace_array *tr = iter->tr;
4058 	int cpu_file = iter->cpu_file;
4059 	void *p = NULL;
4060 	loff_t l = 0;
4061 	int cpu;
4062 
4063 	mutex_lock(&trace_types_lock);
4064 	if (unlikely(tr->current_trace != iter->trace)) {
4065 		/* Close iter->trace before switching to the new current tracer */
4066 		if (iter->trace->close)
4067 			iter->trace->close(iter);
4068 		iter->trace = tr->current_trace;
4069 		/* Reopen the new current tracer */
4070 		if (iter->trace->open)
4071 			iter->trace->open(iter);
4072 	}
4073 	mutex_unlock(&trace_types_lock);
4074 
4075 #ifdef CONFIG_TRACER_MAX_TRACE
4076 	if (iter->snapshot && iter->trace->use_max_tr)
4077 		return ERR_PTR(-EBUSY);
4078 #endif
4079 
4080 	if (*pos != iter->pos) {
4081 		iter->ent = NULL;
4082 		iter->cpu = 0;
4083 		iter->idx = -1;
4084 
4085 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4086 			for_each_tracing_cpu(cpu)
4087 				tracing_iter_reset(iter, cpu);
4088 		} else
4089 			tracing_iter_reset(iter, cpu_file);
4090 
4091 		iter->leftover = 0;
4092 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4093 			;
4094 
4095 	} else {
4096 		/*
4097 		 * If we overflowed the seq_file before, then we want
4098 		 * to just reuse the trace_seq buffer again.
4099 		 */
4100 		if (iter->leftover)
4101 			p = iter;
4102 		else {
4103 			l = *pos - 1;
4104 			p = s_next(m, p, &l);
4105 		}
4106 	}
4107 
4108 	trace_event_read_lock();
4109 	trace_access_lock(cpu_file);
4110 	return p;
4111 }
4112 
s_stop(struct seq_file * m,void * p)4113 static void s_stop(struct seq_file *m, void *p)
4114 {
4115 	struct trace_iterator *iter = m->private;
4116 
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118 	if (iter->snapshot && iter->trace->use_max_tr)
4119 		return;
4120 #endif
4121 
4122 	trace_access_unlock(iter->cpu_file);
4123 	trace_event_read_unlock();
4124 }
4125 
4126 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4127 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4128 		      unsigned long *entries, int cpu)
4129 {
4130 	unsigned long count;
4131 
4132 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4133 	/*
4134 	 * If this buffer has skipped entries, then we hold all
4135 	 * entries for the trace and we need to ignore the
4136 	 * ones before the time stamp.
4137 	 */
4138 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4139 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4140 		/* total is the same as the entries */
4141 		*total = count;
4142 	} else
4143 		*total = count +
4144 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4145 	*entries = count;
4146 }
4147 
4148 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4149 get_total_entries(struct array_buffer *buf,
4150 		  unsigned long *total, unsigned long *entries)
4151 {
4152 	unsigned long t, e;
4153 	int cpu;
4154 
4155 	*total = 0;
4156 	*entries = 0;
4157 
4158 	for_each_tracing_cpu(cpu) {
4159 		get_total_entries_cpu(buf, &t, &e, cpu);
4160 		*total += t;
4161 		*entries += e;
4162 	}
4163 }
4164 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4165 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4166 {
4167 	unsigned long total, entries;
4168 
4169 	if (!tr)
4170 		tr = &global_trace;
4171 
4172 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4173 
4174 	return entries;
4175 }
4176 
trace_total_entries(struct trace_array * tr)4177 unsigned long trace_total_entries(struct trace_array *tr)
4178 {
4179 	unsigned long total, entries;
4180 
4181 	if (!tr)
4182 		tr = &global_trace;
4183 
4184 	get_total_entries(&tr->array_buffer, &total, &entries);
4185 
4186 	return entries;
4187 }
4188 
print_lat_help_header(struct seq_file * m)4189 static void print_lat_help_header(struct seq_file *m)
4190 {
4191 	seq_puts(m, "#                    _------=> CPU#            \n"
4192 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4193 		    "#                  | / _----=> need-resched    \n"
4194 		    "#                  || / _---=> hardirq/softirq \n"
4195 		    "#                  ||| / _--=> preempt-depth   \n"
4196 		    "#                  |||| / _-=> migrate-disable \n"
4197 		    "#                  ||||| /     delay           \n"
4198 		    "#  cmd     pid     |||||| time  |   caller     \n"
4199 		    "#     \\   /        ||||||  \\    |    /       \n");
4200 }
4201 
print_event_info(struct array_buffer * buf,struct seq_file * m)4202 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4203 {
4204 	unsigned long total;
4205 	unsigned long entries;
4206 
4207 	get_total_entries(buf, &total, &entries);
4208 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4209 		   entries, total, num_online_cpus());
4210 	seq_puts(m, "#\n");
4211 }
4212 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4213 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4214 				   unsigned int flags)
4215 {
4216 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4217 
4218 	print_event_info(buf, m);
4219 
4220 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4221 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4222 }
4223 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4224 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4225 				       unsigned int flags)
4226 {
4227 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4228 	static const char space[] = "            ";
4229 	int prec = tgid ? 12 : 2;
4230 
4231 	print_event_info(buf, m);
4232 
4233 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4234 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4235 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4236 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4237 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4238 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4239 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4240 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4241 }
4242 
4243 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4244 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4245 {
4246 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4247 	struct array_buffer *buf = iter->array_buffer;
4248 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4249 	struct tracer *type = iter->trace;
4250 	unsigned long entries;
4251 	unsigned long total;
4252 	const char *name = type->name;
4253 
4254 	get_total_entries(buf, &total, &entries);
4255 
4256 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4257 		   name, UTS_RELEASE);
4258 	seq_puts(m, "# -----------------------------------"
4259 		 "---------------------------------\n");
4260 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4261 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4262 		   nsecs_to_usecs(data->saved_latency),
4263 		   entries,
4264 		   total,
4265 		   buf->cpu,
4266 		   preempt_model_none()      ? "server" :
4267 		   preempt_model_voluntary() ? "desktop" :
4268 		   preempt_model_full()      ? "preempt" :
4269 		   preempt_model_rt()        ? "preempt_rt" :
4270 		   "unknown",
4271 		   /* These are reserved for later use */
4272 		   0, 0, 0, 0);
4273 #ifdef CONFIG_SMP
4274 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4275 #else
4276 	seq_puts(m, ")\n");
4277 #endif
4278 	seq_puts(m, "#    -----------------\n");
4279 	seq_printf(m, "#    | task: %.16s-%d "
4280 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4281 		   data->comm, data->pid,
4282 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4283 		   data->policy, data->rt_priority);
4284 	seq_puts(m, "#    -----------------\n");
4285 
4286 	if (data->critical_start) {
4287 		seq_puts(m, "#  => started at: ");
4288 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4289 		trace_print_seq(m, &iter->seq);
4290 		seq_puts(m, "\n#  => ended at:   ");
4291 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4292 		trace_print_seq(m, &iter->seq);
4293 		seq_puts(m, "\n#\n");
4294 	}
4295 
4296 	seq_puts(m, "#\n");
4297 }
4298 
test_cpu_buff_start(struct trace_iterator * iter)4299 static void test_cpu_buff_start(struct trace_iterator *iter)
4300 {
4301 	struct trace_seq *s = &iter->seq;
4302 	struct trace_array *tr = iter->tr;
4303 
4304 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4305 		return;
4306 
4307 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4308 		return;
4309 
4310 	if (cpumask_available(iter->started) &&
4311 	    cpumask_test_cpu(iter->cpu, iter->started))
4312 		return;
4313 
4314 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4315 		return;
4316 
4317 	if (cpumask_available(iter->started))
4318 		cpumask_set_cpu(iter->cpu, iter->started);
4319 
4320 	/* Don't print started cpu buffer for the first entry of the trace */
4321 	if (iter->idx > 1)
4322 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4323 				iter->cpu);
4324 }
4325 
print_trace_fmt(struct trace_iterator * iter)4326 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4327 {
4328 	struct trace_array *tr = iter->tr;
4329 	struct trace_seq *s = &iter->seq;
4330 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4331 	struct trace_entry *entry;
4332 	struct trace_event *event;
4333 
4334 	entry = iter->ent;
4335 
4336 	test_cpu_buff_start(iter);
4337 
4338 	event = ftrace_find_event(entry->type);
4339 
4340 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4341 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4342 			trace_print_lat_context(iter);
4343 		else
4344 			trace_print_context(iter);
4345 	}
4346 
4347 	if (trace_seq_has_overflowed(s))
4348 		return TRACE_TYPE_PARTIAL_LINE;
4349 
4350 	if (event) {
4351 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4352 			return print_event_fields(iter, event);
4353 		return event->funcs->trace(iter, sym_flags, event);
4354 	}
4355 
4356 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4357 
4358 	return trace_handle_return(s);
4359 }
4360 
print_raw_fmt(struct trace_iterator * iter)4361 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4362 {
4363 	struct trace_array *tr = iter->tr;
4364 	struct trace_seq *s = &iter->seq;
4365 	struct trace_entry *entry;
4366 	struct trace_event *event;
4367 
4368 	entry = iter->ent;
4369 
4370 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4371 		trace_seq_printf(s, "%d %d %llu ",
4372 				 entry->pid, iter->cpu, iter->ts);
4373 
4374 	if (trace_seq_has_overflowed(s))
4375 		return TRACE_TYPE_PARTIAL_LINE;
4376 
4377 	event = ftrace_find_event(entry->type);
4378 	if (event)
4379 		return event->funcs->raw(iter, 0, event);
4380 
4381 	trace_seq_printf(s, "%d ?\n", entry->type);
4382 
4383 	return trace_handle_return(s);
4384 }
4385 
print_hex_fmt(struct trace_iterator * iter)4386 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4387 {
4388 	struct trace_array *tr = iter->tr;
4389 	struct trace_seq *s = &iter->seq;
4390 	unsigned char newline = '\n';
4391 	struct trace_entry *entry;
4392 	struct trace_event *event;
4393 
4394 	entry = iter->ent;
4395 
4396 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4397 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4398 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4399 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4400 		if (trace_seq_has_overflowed(s))
4401 			return TRACE_TYPE_PARTIAL_LINE;
4402 	}
4403 
4404 	event = ftrace_find_event(entry->type);
4405 	if (event) {
4406 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4407 		if (ret != TRACE_TYPE_HANDLED)
4408 			return ret;
4409 	}
4410 
4411 	SEQ_PUT_FIELD(s, newline);
4412 
4413 	return trace_handle_return(s);
4414 }
4415 
print_bin_fmt(struct trace_iterator * iter)4416 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4417 {
4418 	struct trace_array *tr = iter->tr;
4419 	struct trace_seq *s = &iter->seq;
4420 	struct trace_entry *entry;
4421 	struct trace_event *event;
4422 
4423 	entry = iter->ent;
4424 
4425 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4426 		SEQ_PUT_FIELD(s, entry->pid);
4427 		SEQ_PUT_FIELD(s, iter->cpu);
4428 		SEQ_PUT_FIELD(s, iter->ts);
4429 		if (trace_seq_has_overflowed(s))
4430 			return TRACE_TYPE_PARTIAL_LINE;
4431 	}
4432 
4433 	event = ftrace_find_event(entry->type);
4434 	return event ? event->funcs->binary(iter, 0, event) :
4435 		TRACE_TYPE_HANDLED;
4436 }
4437 
trace_empty(struct trace_iterator * iter)4438 int trace_empty(struct trace_iterator *iter)
4439 {
4440 	struct ring_buffer_iter *buf_iter;
4441 	int cpu;
4442 
4443 	/* If we are looking at one CPU buffer, only check that one */
4444 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4445 		cpu = iter->cpu_file;
4446 		buf_iter = trace_buffer_iter(iter, cpu);
4447 		if (buf_iter) {
4448 			if (!ring_buffer_iter_empty(buf_iter))
4449 				return 0;
4450 		} else {
4451 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4452 				return 0;
4453 		}
4454 		return 1;
4455 	}
4456 
4457 	for_each_tracing_cpu(cpu) {
4458 		buf_iter = trace_buffer_iter(iter, cpu);
4459 		if (buf_iter) {
4460 			if (!ring_buffer_iter_empty(buf_iter))
4461 				return 0;
4462 		} else {
4463 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4464 				return 0;
4465 		}
4466 	}
4467 
4468 	return 1;
4469 }
4470 
4471 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4472 enum print_line_t print_trace_line(struct trace_iterator *iter)
4473 {
4474 	struct trace_array *tr = iter->tr;
4475 	unsigned long trace_flags = tr->trace_flags;
4476 	enum print_line_t ret;
4477 
4478 	if (iter->lost_events) {
4479 		if (iter->lost_events == (unsigned long)-1)
4480 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4481 					 iter->cpu);
4482 		else
4483 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4484 					 iter->cpu, iter->lost_events);
4485 		if (trace_seq_has_overflowed(&iter->seq))
4486 			return TRACE_TYPE_PARTIAL_LINE;
4487 	}
4488 
4489 	if (iter->trace && iter->trace->print_line) {
4490 		ret = iter->trace->print_line(iter);
4491 		if (ret != TRACE_TYPE_UNHANDLED)
4492 			return ret;
4493 	}
4494 
4495 	if (iter->ent->type == TRACE_BPUTS &&
4496 			trace_flags & TRACE_ITER_PRINTK &&
4497 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4498 		return trace_print_bputs_msg_only(iter);
4499 
4500 	if (iter->ent->type == TRACE_BPRINT &&
4501 			trace_flags & TRACE_ITER_PRINTK &&
4502 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4503 		return trace_print_bprintk_msg_only(iter);
4504 
4505 	if (iter->ent->type == TRACE_PRINT &&
4506 			trace_flags & TRACE_ITER_PRINTK &&
4507 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508 		return trace_print_printk_msg_only(iter);
4509 
4510 	if (trace_flags & TRACE_ITER_BIN)
4511 		return print_bin_fmt(iter);
4512 
4513 	if (trace_flags & TRACE_ITER_HEX)
4514 		return print_hex_fmt(iter);
4515 
4516 	if (trace_flags & TRACE_ITER_RAW)
4517 		return print_raw_fmt(iter);
4518 
4519 	return print_trace_fmt(iter);
4520 }
4521 
trace_latency_header(struct seq_file * m)4522 void trace_latency_header(struct seq_file *m)
4523 {
4524 	struct trace_iterator *iter = m->private;
4525 	struct trace_array *tr = iter->tr;
4526 
4527 	/* print nothing if the buffers are empty */
4528 	if (trace_empty(iter))
4529 		return;
4530 
4531 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4532 		print_trace_header(m, iter);
4533 
4534 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4535 		print_lat_help_header(m);
4536 }
4537 
trace_default_header(struct seq_file * m)4538 void trace_default_header(struct seq_file *m)
4539 {
4540 	struct trace_iterator *iter = m->private;
4541 	struct trace_array *tr = iter->tr;
4542 	unsigned long trace_flags = tr->trace_flags;
4543 
4544 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4545 		return;
4546 
4547 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4548 		/* print nothing if the buffers are empty */
4549 		if (trace_empty(iter))
4550 			return;
4551 		print_trace_header(m, iter);
4552 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4553 			print_lat_help_header(m);
4554 	} else {
4555 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4556 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4557 				print_func_help_header_irq(iter->array_buffer,
4558 							   m, trace_flags);
4559 			else
4560 				print_func_help_header(iter->array_buffer, m,
4561 						       trace_flags);
4562 		}
4563 	}
4564 }
4565 
test_ftrace_alive(struct seq_file * m)4566 static void test_ftrace_alive(struct seq_file *m)
4567 {
4568 	if (!ftrace_is_dead())
4569 		return;
4570 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4571 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4572 }
4573 
4574 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4575 static void show_snapshot_main_help(struct seq_file *m)
4576 {
4577 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4578 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4579 		    "#                      Takes a snapshot of the main buffer.\n"
4580 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4581 		    "#                      (Doesn't have to be '2' works with any number that\n"
4582 		    "#                       is not a '0' or '1')\n");
4583 }
4584 
show_snapshot_percpu_help(struct seq_file * m)4585 static void show_snapshot_percpu_help(struct seq_file *m)
4586 {
4587 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4588 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4589 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4590 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4591 #else
4592 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4593 		    "#                     Must use main snapshot file to allocate.\n");
4594 #endif
4595 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4596 		    "#                      (Doesn't have to be '2' works with any number that\n"
4597 		    "#                       is not a '0' or '1')\n");
4598 }
4599 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4600 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4601 {
4602 	if (iter->tr->allocated_snapshot)
4603 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4604 	else
4605 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4606 
4607 	seq_puts(m, "# Snapshot commands:\n");
4608 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4609 		show_snapshot_main_help(m);
4610 	else
4611 		show_snapshot_percpu_help(m);
4612 }
4613 #else
4614 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4615 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4616 #endif
4617 
s_show(struct seq_file * m,void * v)4618 static int s_show(struct seq_file *m, void *v)
4619 {
4620 	struct trace_iterator *iter = v;
4621 	int ret;
4622 
4623 	if (iter->ent == NULL) {
4624 		if (iter->tr) {
4625 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4626 			seq_puts(m, "#\n");
4627 			test_ftrace_alive(m);
4628 		}
4629 		if (iter->snapshot && trace_empty(iter))
4630 			print_snapshot_help(m, iter);
4631 		else if (iter->trace && iter->trace->print_header)
4632 			iter->trace->print_header(m);
4633 		else
4634 			trace_default_header(m);
4635 
4636 	} else if (iter->leftover) {
4637 		/*
4638 		 * If we filled the seq_file buffer earlier, we
4639 		 * want to just show it now.
4640 		 */
4641 		ret = trace_print_seq(m, &iter->seq);
4642 
4643 		/* ret should this time be zero, but you never know */
4644 		iter->leftover = ret;
4645 
4646 	} else {
4647 		ret = print_trace_line(iter);
4648 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4649 			iter->seq.full = 0;
4650 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4651 		}
4652 		ret = trace_print_seq(m, &iter->seq);
4653 		/*
4654 		 * If we overflow the seq_file buffer, then it will
4655 		 * ask us for this data again at start up.
4656 		 * Use that instead.
4657 		 *  ret is 0 if seq_file write succeeded.
4658 		 *        -1 otherwise.
4659 		 */
4660 		iter->leftover = ret;
4661 	}
4662 
4663 	return 0;
4664 }
4665 
4666 /*
4667  * Should be used after trace_array_get(), trace_types_lock
4668  * ensures that i_cdev was already initialized.
4669  */
tracing_get_cpu(struct inode * inode)4670 static inline int tracing_get_cpu(struct inode *inode)
4671 {
4672 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4673 		return (long)inode->i_cdev - 1;
4674 	return RING_BUFFER_ALL_CPUS;
4675 }
4676 
4677 static const struct seq_operations tracer_seq_ops = {
4678 	.start		= s_start,
4679 	.next		= s_next,
4680 	.stop		= s_stop,
4681 	.show		= s_show,
4682 };
4683 
4684 /*
4685  * Note, as iter itself can be allocated and freed in different
4686  * ways, this function is only used to free its content, and not
4687  * the iterator itself. The only requirement to all the allocations
4688  * is that it must zero all fields (kzalloc), as freeing works with
4689  * ethier allocated content or NULL.
4690  */
free_trace_iter_content(struct trace_iterator * iter)4691 static void free_trace_iter_content(struct trace_iterator *iter)
4692 {
4693 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4694 	if (iter->fmt != static_fmt_buf)
4695 		kfree(iter->fmt);
4696 
4697 	kfree(iter->temp);
4698 	kfree(iter->buffer_iter);
4699 	mutex_destroy(&iter->mutex);
4700 	free_cpumask_var(iter->started);
4701 }
4702 
4703 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4704 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4705 {
4706 	struct trace_array *tr = inode->i_private;
4707 	struct trace_iterator *iter;
4708 	int cpu;
4709 
4710 	if (tracing_disabled)
4711 		return ERR_PTR(-ENODEV);
4712 
4713 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4714 	if (!iter)
4715 		return ERR_PTR(-ENOMEM);
4716 
4717 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4718 				    GFP_KERNEL);
4719 	if (!iter->buffer_iter)
4720 		goto release;
4721 
4722 	/*
4723 	 * trace_find_next_entry() may need to save off iter->ent.
4724 	 * It will place it into the iter->temp buffer. As most
4725 	 * events are less than 128, allocate a buffer of that size.
4726 	 * If one is greater, then trace_find_next_entry() will
4727 	 * allocate a new buffer to adjust for the bigger iter->ent.
4728 	 * It's not critical if it fails to get allocated here.
4729 	 */
4730 	iter->temp = kmalloc(128, GFP_KERNEL);
4731 	if (iter->temp)
4732 		iter->temp_size = 128;
4733 
4734 	/*
4735 	 * trace_event_printf() may need to modify given format
4736 	 * string to replace %p with %px so that it shows real address
4737 	 * instead of hash value. However, that is only for the event
4738 	 * tracing, other tracer may not need. Defer the allocation
4739 	 * until it is needed.
4740 	 */
4741 	iter->fmt = NULL;
4742 	iter->fmt_size = 0;
4743 
4744 	mutex_lock(&trace_types_lock);
4745 	iter->trace = tr->current_trace;
4746 
4747 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4748 		goto fail;
4749 
4750 	iter->tr = tr;
4751 
4752 #ifdef CONFIG_TRACER_MAX_TRACE
4753 	/* Currently only the top directory has a snapshot */
4754 	if (tr->current_trace->print_max || snapshot)
4755 		iter->array_buffer = &tr->max_buffer;
4756 	else
4757 #endif
4758 		iter->array_buffer = &tr->array_buffer;
4759 	iter->snapshot = snapshot;
4760 	iter->pos = -1;
4761 	iter->cpu_file = tracing_get_cpu(inode);
4762 	mutex_init(&iter->mutex);
4763 
4764 	/* Notify the tracer early; before we stop tracing. */
4765 	if (iter->trace->open)
4766 		iter->trace->open(iter);
4767 
4768 	/* Annotate start of buffers if we had overruns */
4769 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4770 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4771 
4772 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4773 	if (trace_clocks[tr->clock_id].in_ns)
4774 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4775 
4776 	/*
4777 	 * If pause-on-trace is enabled, then stop the trace while
4778 	 * dumping, unless this is the "snapshot" file
4779 	 */
4780 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4781 		tracing_stop_tr(tr);
4782 
4783 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4784 		for_each_tracing_cpu(cpu) {
4785 			iter->buffer_iter[cpu] =
4786 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4787 							 cpu, GFP_KERNEL);
4788 		}
4789 		ring_buffer_read_prepare_sync();
4790 		for_each_tracing_cpu(cpu) {
4791 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4792 			tracing_iter_reset(iter, cpu);
4793 		}
4794 	} else {
4795 		cpu = iter->cpu_file;
4796 		iter->buffer_iter[cpu] =
4797 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4798 						 cpu, GFP_KERNEL);
4799 		ring_buffer_read_prepare_sync();
4800 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4801 		tracing_iter_reset(iter, cpu);
4802 	}
4803 
4804 	mutex_unlock(&trace_types_lock);
4805 
4806 	return iter;
4807 
4808  fail:
4809 	mutex_unlock(&trace_types_lock);
4810 	free_trace_iter_content(iter);
4811 release:
4812 	seq_release_private(inode, file);
4813 	return ERR_PTR(-ENOMEM);
4814 }
4815 
tracing_open_generic(struct inode * inode,struct file * filp)4816 int tracing_open_generic(struct inode *inode, struct file *filp)
4817 {
4818 	int ret;
4819 
4820 	ret = tracing_check_open_get_tr(NULL);
4821 	if (ret)
4822 		return ret;
4823 
4824 	filp->private_data = inode->i_private;
4825 	return 0;
4826 }
4827 
tracing_is_disabled(void)4828 bool tracing_is_disabled(void)
4829 {
4830 	return (tracing_disabled) ? true: false;
4831 }
4832 
4833 /*
4834  * Open and update trace_array ref count.
4835  * Must have the current trace_array passed to it.
4836  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4837 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 	int ret;
4841 
4842 	ret = tracing_check_open_get_tr(tr);
4843 	if (ret)
4844 		return ret;
4845 
4846 	filp->private_data = inode->i_private;
4847 
4848 	return 0;
4849 }
4850 
4851 /*
4852  * The private pointer of the inode is the trace_event_file.
4853  * Update the tr ref count associated to it.
4854  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4855 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4856 {
4857 	struct trace_event_file *file = inode->i_private;
4858 	int ret;
4859 
4860 	ret = tracing_check_open_get_tr(file->tr);
4861 	if (ret)
4862 		return ret;
4863 
4864 	mutex_lock(&event_mutex);
4865 
4866 	/* Fail if the file is marked for removal */
4867 	if (file->flags & EVENT_FILE_FL_FREED) {
4868 		trace_array_put(file->tr);
4869 		ret = -ENODEV;
4870 	} else {
4871 		event_file_get(file);
4872 	}
4873 
4874 	mutex_unlock(&event_mutex);
4875 	if (ret)
4876 		return ret;
4877 
4878 	filp->private_data = inode->i_private;
4879 
4880 	return 0;
4881 }
4882 
tracing_release_file_tr(struct inode * inode,struct file * filp)4883 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4884 {
4885 	struct trace_event_file *file = inode->i_private;
4886 
4887 	trace_array_put(file->tr);
4888 	event_file_put(file);
4889 
4890 	return 0;
4891 }
4892 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4893 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4894 {
4895 	tracing_release_file_tr(inode, filp);
4896 	return single_release(inode, filp);
4897 }
4898 
tracing_mark_open(struct inode * inode,struct file * filp)4899 static int tracing_mark_open(struct inode *inode, struct file *filp)
4900 {
4901 	stream_open(inode, filp);
4902 	return tracing_open_generic_tr(inode, filp);
4903 }
4904 
tracing_release(struct inode * inode,struct file * file)4905 static int tracing_release(struct inode *inode, struct file *file)
4906 {
4907 	struct trace_array *tr = inode->i_private;
4908 	struct seq_file *m = file->private_data;
4909 	struct trace_iterator *iter;
4910 	int cpu;
4911 
4912 	if (!(file->f_mode & FMODE_READ)) {
4913 		trace_array_put(tr);
4914 		return 0;
4915 	}
4916 
4917 	/* Writes do not use seq_file */
4918 	iter = m->private;
4919 	mutex_lock(&trace_types_lock);
4920 
4921 	for_each_tracing_cpu(cpu) {
4922 		if (iter->buffer_iter[cpu])
4923 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4924 	}
4925 
4926 	if (iter->trace && iter->trace->close)
4927 		iter->trace->close(iter);
4928 
4929 	if (!iter->snapshot && tr->stop_count)
4930 		/* reenable tracing if it was previously enabled */
4931 		tracing_start_tr(tr);
4932 
4933 	__trace_array_put(tr);
4934 
4935 	mutex_unlock(&trace_types_lock);
4936 
4937 	free_trace_iter_content(iter);
4938 	seq_release_private(inode, file);
4939 
4940 	return 0;
4941 }
4942 
tracing_release_generic_tr(struct inode * inode,struct file * file)4943 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4944 {
4945 	struct trace_array *tr = inode->i_private;
4946 
4947 	trace_array_put(tr);
4948 	return 0;
4949 }
4950 
tracing_single_release_tr(struct inode * inode,struct file * file)4951 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4952 {
4953 	struct trace_array *tr = inode->i_private;
4954 
4955 	trace_array_put(tr);
4956 
4957 	return single_release(inode, file);
4958 }
4959 
tracing_open(struct inode * inode,struct file * file)4960 static int tracing_open(struct inode *inode, struct file *file)
4961 {
4962 	struct trace_array *tr = inode->i_private;
4963 	struct trace_iterator *iter;
4964 	int ret;
4965 
4966 	ret = tracing_check_open_get_tr(tr);
4967 	if (ret)
4968 		return ret;
4969 
4970 	/* If this file was open for write, then erase contents */
4971 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4972 		int cpu = tracing_get_cpu(inode);
4973 		struct array_buffer *trace_buf = &tr->array_buffer;
4974 
4975 #ifdef CONFIG_TRACER_MAX_TRACE
4976 		if (tr->current_trace->print_max)
4977 			trace_buf = &tr->max_buffer;
4978 #endif
4979 
4980 		if (cpu == RING_BUFFER_ALL_CPUS)
4981 			tracing_reset_online_cpus(trace_buf);
4982 		else
4983 			tracing_reset_cpu(trace_buf, cpu);
4984 	}
4985 
4986 	if (file->f_mode & FMODE_READ) {
4987 		iter = __tracing_open(inode, file, false);
4988 		if (IS_ERR(iter))
4989 			ret = PTR_ERR(iter);
4990 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4991 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4992 	}
4993 
4994 	if (ret < 0)
4995 		trace_array_put(tr);
4996 
4997 	return ret;
4998 }
4999 
5000 /*
5001  * Some tracers are not suitable for instance buffers.
5002  * A tracer is always available for the global array (toplevel)
5003  * or if it explicitly states that it is.
5004  */
5005 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5006 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5007 {
5008 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5009 }
5010 
5011 /* Find the next tracer that this trace array may use */
5012 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5013 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5014 {
5015 	while (t && !trace_ok_for_array(t, tr))
5016 		t = t->next;
5017 
5018 	return t;
5019 }
5020 
5021 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5022 t_next(struct seq_file *m, void *v, loff_t *pos)
5023 {
5024 	struct trace_array *tr = m->private;
5025 	struct tracer *t = v;
5026 
5027 	(*pos)++;
5028 
5029 	if (t)
5030 		t = get_tracer_for_array(tr, t->next);
5031 
5032 	return t;
5033 }
5034 
t_start(struct seq_file * m,loff_t * pos)5035 static void *t_start(struct seq_file *m, loff_t *pos)
5036 {
5037 	struct trace_array *tr = m->private;
5038 	struct tracer *t;
5039 	loff_t l = 0;
5040 
5041 	mutex_lock(&trace_types_lock);
5042 
5043 	t = get_tracer_for_array(tr, trace_types);
5044 	for (; t && l < *pos; t = t_next(m, t, &l))
5045 			;
5046 
5047 	return t;
5048 }
5049 
t_stop(struct seq_file * m,void * p)5050 static void t_stop(struct seq_file *m, void *p)
5051 {
5052 	mutex_unlock(&trace_types_lock);
5053 }
5054 
t_show(struct seq_file * m,void * v)5055 static int t_show(struct seq_file *m, void *v)
5056 {
5057 	struct tracer *t = v;
5058 
5059 	if (!t)
5060 		return 0;
5061 
5062 	seq_puts(m, t->name);
5063 	if (t->next)
5064 		seq_putc(m, ' ');
5065 	else
5066 		seq_putc(m, '\n');
5067 
5068 	return 0;
5069 }
5070 
5071 static const struct seq_operations show_traces_seq_ops = {
5072 	.start		= t_start,
5073 	.next		= t_next,
5074 	.stop		= t_stop,
5075 	.show		= t_show,
5076 };
5077 
show_traces_open(struct inode * inode,struct file * file)5078 static int show_traces_open(struct inode *inode, struct file *file)
5079 {
5080 	struct trace_array *tr = inode->i_private;
5081 	struct seq_file *m;
5082 	int ret;
5083 
5084 	ret = tracing_check_open_get_tr(tr);
5085 	if (ret)
5086 		return ret;
5087 
5088 	ret = seq_open(file, &show_traces_seq_ops);
5089 	if (ret) {
5090 		trace_array_put(tr);
5091 		return ret;
5092 	}
5093 
5094 	m = file->private_data;
5095 	m->private = tr;
5096 
5097 	return 0;
5098 }
5099 
show_traces_release(struct inode * inode,struct file * file)5100 static int show_traces_release(struct inode *inode, struct file *file)
5101 {
5102 	struct trace_array *tr = inode->i_private;
5103 
5104 	trace_array_put(tr);
5105 	return seq_release(inode, file);
5106 }
5107 
5108 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5109 tracing_write_stub(struct file *filp, const char __user *ubuf,
5110 		   size_t count, loff_t *ppos)
5111 {
5112 	return count;
5113 }
5114 
tracing_lseek(struct file * file,loff_t offset,int whence)5115 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5116 {
5117 	int ret;
5118 
5119 	if (file->f_mode & FMODE_READ)
5120 		ret = seq_lseek(file, offset, whence);
5121 	else
5122 		file->f_pos = ret = 0;
5123 
5124 	return ret;
5125 }
5126 
5127 static const struct file_operations tracing_fops = {
5128 	.open		= tracing_open,
5129 	.read		= seq_read,
5130 	.read_iter	= seq_read_iter,
5131 	.splice_read	= copy_splice_read,
5132 	.write		= tracing_write_stub,
5133 	.llseek		= tracing_lseek,
5134 	.release	= tracing_release,
5135 };
5136 
5137 static const struct file_operations show_traces_fops = {
5138 	.open		= show_traces_open,
5139 	.read		= seq_read,
5140 	.llseek		= seq_lseek,
5141 	.release	= show_traces_release,
5142 };
5143 
5144 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5145 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5146 		     size_t count, loff_t *ppos)
5147 {
5148 	struct trace_array *tr = file_inode(filp)->i_private;
5149 	char *mask_str;
5150 	int len;
5151 
5152 	len = snprintf(NULL, 0, "%*pb\n",
5153 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5154 	mask_str = kmalloc(len, GFP_KERNEL);
5155 	if (!mask_str)
5156 		return -ENOMEM;
5157 
5158 	len = snprintf(mask_str, len, "%*pb\n",
5159 		       cpumask_pr_args(tr->tracing_cpumask));
5160 	if (len >= count) {
5161 		count = -EINVAL;
5162 		goto out_err;
5163 	}
5164 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5165 
5166 out_err:
5167 	kfree(mask_str);
5168 
5169 	return count;
5170 }
5171 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5172 int tracing_set_cpumask(struct trace_array *tr,
5173 			cpumask_var_t tracing_cpumask_new)
5174 {
5175 	int cpu;
5176 
5177 	if (!tr)
5178 		return -EINVAL;
5179 
5180 	local_irq_disable();
5181 	arch_spin_lock(&tr->max_lock);
5182 	for_each_tracing_cpu(cpu) {
5183 		/*
5184 		 * Increase/decrease the disabled counter if we are
5185 		 * about to flip a bit in the cpumask:
5186 		 */
5187 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5188 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5189 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5190 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5191 #ifdef CONFIG_TRACER_MAX_TRACE
5192 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5193 #endif
5194 		}
5195 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5196 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5197 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5198 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5199 #ifdef CONFIG_TRACER_MAX_TRACE
5200 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5201 #endif
5202 		}
5203 	}
5204 	arch_spin_unlock(&tr->max_lock);
5205 	local_irq_enable();
5206 
5207 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5208 
5209 	return 0;
5210 }
5211 
5212 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5213 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5214 		      size_t count, loff_t *ppos)
5215 {
5216 	struct trace_array *tr = file_inode(filp)->i_private;
5217 	cpumask_var_t tracing_cpumask_new;
5218 	int err;
5219 
5220 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5221 		return -EINVAL;
5222 
5223 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5224 		return -ENOMEM;
5225 
5226 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5227 	if (err)
5228 		goto err_free;
5229 
5230 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5231 	if (err)
5232 		goto err_free;
5233 
5234 	free_cpumask_var(tracing_cpumask_new);
5235 
5236 	return count;
5237 
5238 err_free:
5239 	free_cpumask_var(tracing_cpumask_new);
5240 
5241 	return err;
5242 }
5243 
5244 static const struct file_operations tracing_cpumask_fops = {
5245 	.open		= tracing_open_generic_tr,
5246 	.read		= tracing_cpumask_read,
5247 	.write		= tracing_cpumask_write,
5248 	.release	= tracing_release_generic_tr,
5249 	.llseek		= generic_file_llseek,
5250 };
5251 
tracing_trace_options_show(struct seq_file * m,void * v)5252 static int tracing_trace_options_show(struct seq_file *m, void *v)
5253 {
5254 	struct tracer_opt *trace_opts;
5255 	struct trace_array *tr = m->private;
5256 	u32 tracer_flags;
5257 	int i;
5258 
5259 	mutex_lock(&trace_types_lock);
5260 	tracer_flags = tr->current_trace->flags->val;
5261 	trace_opts = tr->current_trace->flags->opts;
5262 
5263 	for (i = 0; trace_options[i]; i++) {
5264 		if (tr->trace_flags & (1 << i))
5265 			seq_printf(m, "%s\n", trace_options[i]);
5266 		else
5267 			seq_printf(m, "no%s\n", trace_options[i]);
5268 	}
5269 
5270 	for (i = 0; trace_opts[i].name; i++) {
5271 		if (tracer_flags & trace_opts[i].bit)
5272 			seq_printf(m, "%s\n", trace_opts[i].name);
5273 		else
5274 			seq_printf(m, "no%s\n", trace_opts[i].name);
5275 	}
5276 	mutex_unlock(&trace_types_lock);
5277 
5278 	return 0;
5279 }
5280 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5281 static int __set_tracer_option(struct trace_array *tr,
5282 			       struct tracer_flags *tracer_flags,
5283 			       struct tracer_opt *opts, int neg)
5284 {
5285 	struct tracer *trace = tracer_flags->trace;
5286 	int ret;
5287 
5288 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5289 	if (ret)
5290 		return ret;
5291 
5292 	if (neg)
5293 		tracer_flags->val &= ~opts->bit;
5294 	else
5295 		tracer_flags->val |= opts->bit;
5296 	return 0;
5297 }
5298 
5299 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5300 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5301 {
5302 	struct tracer *trace = tr->current_trace;
5303 	struct tracer_flags *tracer_flags = trace->flags;
5304 	struct tracer_opt *opts = NULL;
5305 	int i;
5306 
5307 	for (i = 0; tracer_flags->opts[i].name; i++) {
5308 		opts = &tracer_flags->opts[i];
5309 
5310 		if (strcmp(cmp, opts->name) == 0)
5311 			return __set_tracer_option(tr, trace->flags, opts, neg);
5312 	}
5313 
5314 	return -EINVAL;
5315 }
5316 
5317 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5318 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5319 {
5320 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5321 		return -1;
5322 
5323 	return 0;
5324 }
5325 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5326 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5327 {
5328 	int *map;
5329 
5330 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5331 	    (mask == TRACE_ITER_RECORD_CMD))
5332 		lockdep_assert_held(&event_mutex);
5333 
5334 	/* do nothing if flag is already set */
5335 	if (!!(tr->trace_flags & mask) == !!enabled)
5336 		return 0;
5337 
5338 	/* Give the tracer a chance to approve the change */
5339 	if (tr->current_trace->flag_changed)
5340 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5341 			return -EINVAL;
5342 
5343 	if (enabled)
5344 		tr->trace_flags |= mask;
5345 	else
5346 		tr->trace_flags &= ~mask;
5347 
5348 	if (mask == TRACE_ITER_RECORD_CMD)
5349 		trace_event_enable_cmd_record(enabled);
5350 
5351 	if (mask == TRACE_ITER_RECORD_TGID) {
5352 		if (!tgid_map) {
5353 			tgid_map_max = pid_max;
5354 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5355 				       GFP_KERNEL);
5356 
5357 			/*
5358 			 * Pairs with smp_load_acquire() in
5359 			 * trace_find_tgid_ptr() to ensure that if it observes
5360 			 * the tgid_map we just allocated then it also observes
5361 			 * the corresponding tgid_map_max value.
5362 			 */
5363 			smp_store_release(&tgid_map, map);
5364 		}
5365 		if (!tgid_map) {
5366 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5367 			return -ENOMEM;
5368 		}
5369 
5370 		trace_event_enable_tgid_record(enabled);
5371 	}
5372 
5373 	if (mask == TRACE_ITER_EVENT_FORK)
5374 		trace_event_follow_fork(tr, enabled);
5375 
5376 	if (mask == TRACE_ITER_FUNC_FORK)
5377 		ftrace_pid_follow_fork(tr, enabled);
5378 
5379 	if (mask == TRACE_ITER_OVERWRITE) {
5380 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5381 #ifdef CONFIG_TRACER_MAX_TRACE
5382 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5383 #endif
5384 	}
5385 
5386 	if (mask == TRACE_ITER_PRINTK) {
5387 		trace_printk_start_stop_comm(enabled);
5388 		trace_printk_control(enabled);
5389 	}
5390 
5391 	return 0;
5392 }
5393 
trace_set_options(struct trace_array * tr,char * option)5394 int trace_set_options(struct trace_array *tr, char *option)
5395 {
5396 	char *cmp;
5397 	int neg = 0;
5398 	int ret;
5399 	size_t orig_len = strlen(option);
5400 	int len;
5401 
5402 	cmp = strstrip(option);
5403 
5404 	len = str_has_prefix(cmp, "no");
5405 	if (len)
5406 		neg = 1;
5407 
5408 	cmp += len;
5409 
5410 	mutex_lock(&event_mutex);
5411 	mutex_lock(&trace_types_lock);
5412 
5413 	ret = match_string(trace_options, -1, cmp);
5414 	/* If no option could be set, test the specific tracer options */
5415 	if (ret < 0)
5416 		ret = set_tracer_option(tr, cmp, neg);
5417 	else
5418 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5419 
5420 	mutex_unlock(&trace_types_lock);
5421 	mutex_unlock(&event_mutex);
5422 
5423 	/*
5424 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5425 	 * turn it back into a space.
5426 	 */
5427 	if (orig_len > strlen(option))
5428 		option[strlen(option)] = ' ';
5429 
5430 	return ret;
5431 }
5432 
apply_trace_boot_options(void)5433 static void __init apply_trace_boot_options(void)
5434 {
5435 	char *buf = trace_boot_options_buf;
5436 	char *option;
5437 
5438 	while (true) {
5439 		option = strsep(&buf, ",");
5440 
5441 		if (!option)
5442 			break;
5443 
5444 		if (*option)
5445 			trace_set_options(&global_trace, option);
5446 
5447 		/* Put back the comma to allow this to be called again */
5448 		if (buf)
5449 			*(buf - 1) = ',';
5450 	}
5451 }
5452 
5453 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5454 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5455 			size_t cnt, loff_t *ppos)
5456 {
5457 	struct seq_file *m = filp->private_data;
5458 	struct trace_array *tr = m->private;
5459 	char buf[64];
5460 	int ret;
5461 
5462 	if (cnt >= sizeof(buf))
5463 		return -EINVAL;
5464 
5465 	if (copy_from_user(buf, ubuf, cnt))
5466 		return -EFAULT;
5467 
5468 	buf[cnt] = 0;
5469 
5470 	ret = trace_set_options(tr, buf);
5471 	if (ret < 0)
5472 		return ret;
5473 
5474 	*ppos += cnt;
5475 
5476 	return cnt;
5477 }
5478 
tracing_trace_options_open(struct inode * inode,struct file * file)5479 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5480 {
5481 	struct trace_array *tr = inode->i_private;
5482 	int ret;
5483 
5484 	ret = tracing_check_open_get_tr(tr);
5485 	if (ret)
5486 		return ret;
5487 
5488 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5489 	if (ret < 0)
5490 		trace_array_put(tr);
5491 
5492 	return ret;
5493 }
5494 
5495 static const struct file_operations tracing_iter_fops = {
5496 	.open		= tracing_trace_options_open,
5497 	.read		= seq_read,
5498 	.llseek		= seq_lseek,
5499 	.release	= tracing_single_release_tr,
5500 	.write		= tracing_trace_options_write,
5501 };
5502 
5503 static const char readme_msg[] =
5504 	"tracing mini-HOWTO:\n\n"
5505 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5506 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5507 	" Important files:\n"
5508 	"  trace\t\t\t- The static contents of the buffer\n"
5509 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5510 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5511 	"  current_tracer\t- function and latency tracers\n"
5512 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5513 	"  error_log\t- error log for failed commands (that support it)\n"
5514 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5515 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5516 	"  trace_clock\t\t- change the clock used to order events\n"
5517 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5518 	"      global:   Synced across CPUs but slows tracing down.\n"
5519 	"     counter:   Not a clock, but just an increment\n"
5520 	"      uptime:   Jiffy counter from time of boot\n"
5521 	"        perf:   Same clock that perf events use\n"
5522 #ifdef CONFIG_X86_64
5523 	"     x86-tsc:   TSC cycle counter\n"
5524 #endif
5525 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5526 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5527 	"    absolute:   Absolute (standalone) timestamp\n"
5528 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5529 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5530 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5531 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5532 	"\t\t\t  Remove sub-buffer with rmdir\n"
5533 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5534 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5535 	"\t\t\t  option name\n"
5536 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5537 #ifdef CONFIG_DYNAMIC_FTRACE
5538 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5539 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5540 	"\t\t\t  functions\n"
5541 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5542 	"\t     modules: Can select a group via module\n"
5543 	"\t      Format: :mod:<module-name>\n"
5544 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5545 	"\t    triggers: a command to perform when function is hit\n"
5546 	"\t      Format: <function>:<trigger>[:count]\n"
5547 	"\t     trigger: traceon, traceoff\n"
5548 	"\t\t      enable_event:<system>:<event>\n"
5549 	"\t\t      disable_event:<system>:<event>\n"
5550 #ifdef CONFIG_STACKTRACE
5551 	"\t\t      stacktrace\n"
5552 #endif
5553 #ifdef CONFIG_TRACER_SNAPSHOT
5554 	"\t\t      snapshot\n"
5555 #endif
5556 	"\t\t      dump\n"
5557 	"\t\t      cpudump\n"
5558 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5559 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5560 	"\t     The first one will disable tracing every time do_fault is hit\n"
5561 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5562 	"\t       The first time do trap is hit and it disables tracing, the\n"
5563 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5564 	"\t       the counter will not decrement. It only decrements when the\n"
5565 	"\t       trigger did work\n"
5566 	"\t     To remove trigger without count:\n"
5567 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5568 	"\t     To remove trigger with a count:\n"
5569 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5570 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5571 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5572 	"\t    modules: Can select a group via module command :mod:\n"
5573 	"\t    Does not accept triggers\n"
5574 #endif /* CONFIG_DYNAMIC_FTRACE */
5575 #ifdef CONFIG_FUNCTION_TRACER
5576 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5577 	"\t\t    (function)\n"
5578 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5579 	"\t\t    (function)\n"
5580 #endif
5581 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5582 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5583 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5584 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5585 #endif
5586 #ifdef CONFIG_TRACER_SNAPSHOT
5587 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5588 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5589 	"\t\t\t  information\n"
5590 #endif
5591 #ifdef CONFIG_STACK_TRACER
5592 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5593 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5594 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5595 	"\t\t\t  new trace)\n"
5596 #ifdef CONFIG_DYNAMIC_FTRACE
5597 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5598 	"\t\t\t  traces\n"
5599 #endif
5600 #endif /* CONFIG_STACK_TRACER */
5601 #ifdef CONFIG_DYNAMIC_EVENTS
5602 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5603 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5604 #endif
5605 #ifdef CONFIG_KPROBE_EVENTS
5606 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5607 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5608 #endif
5609 #ifdef CONFIG_UPROBE_EVENTS
5610 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5611 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5612 #endif
5613 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5614     defined(CONFIG_FPROBE_EVENTS)
5615 	"\t  accepts: event-definitions (one definition per line)\n"
5616 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5617 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5618 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5619 #endif
5620 #ifdef CONFIG_FPROBE_EVENTS
5621 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5622 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5623 #endif
5624 #ifdef CONFIG_HIST_TRIGGERS
5625 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5626 #endif
5627 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5628 	"\t           -:[<group>/][<event>]\n"
5629 #ifdef CONFIG_KPROBE_EVENTS
5630 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5631   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5632 #endif
5633 #ifdef CONFIG_UPROBE_EVENTS
5634   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5635 #endif
5636 	"\t     args: <name>=fetcharg[:type]\n"
5637 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5638 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5639 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5640 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5641 	"\t           <argname>[->field[->field|.field...]],\n"
5642 #else
5643 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5644 #endif
5645 #else
5646 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5647 #endif
5648 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5649 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5650 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5651 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5652 	"\t           symstr, <type>\\[<array-size>\\]\n"
5653 #ifdef CONFIG_HIST_TRIGGERS
5654 	"\t    field: <stype> <name>;\n"
5655 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5656 	"\t           [unsigned] char/int/long\n"
5657 #endif
5658 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5659 	"\t            of the <attached-group>/<attached-event>.\n"
5660 #endif
5661 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5662 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5663 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5664 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5665 	"\t\t\t  events\n"
5666 	"      filter\t\t- If set, only events passing filter are traced\n"
5667 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5668 	"\t\t\t  <event>:\n"
5669 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5670 	"      filter\t\t- If set, only events passing filter are traced\n"
5671 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5672 	"\t    Format: <trigger>[:count][if <filter>]\n"
5673 	"\t   trigger: traceon, traceoff\n"
5674 	"\t            enable_event:<system>:<event>\n"
5675 	"\t            disable_event:<system>:<event>\n"
5676 #ifdef CONFIG_HIST_TRIGGERS
5677 	"\t            enable_hist:<system>:<event>\n"
5678 	"\t            disable_hist:<system>:<event>\n"
5679 #endif
5680 #ifdef CONFIG_STACKTRACE
5681 	"\t\t    stacktrace\n"
5682 #endif
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684 	"\t\t    snapshot\n"
5685 #endif
5686 #ifdef CONFIG_HIST_TRIGGERS
5687 	"\t\t    hist (see below)\n"
5688 #endif
5689 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5690 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5691 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5692 	"\t                  events/block/block_unplug/trigger\n"
5693 	"\t   The first disables tracing every time block_unplug is hit.\n"
5694 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5695 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5696 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5697 	"\t   Like function triggers, the counter is only decremented if it\n"
5698 	"\t    enabled or disabled tracing.\n"
5699 	"\t   To remove a trigger without a count:\n"
5700 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5701 	"\t   To remove a trigger with a count:\n"
5702 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5703 	"\t   Filters can be ignored when removing a trigger.\n"
5704 #ifdef CONFIG_HIST_TRIGGERS
5705 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5706 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5707 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5708 	"\t            [:values=<field1[,field2,...]>]\n"
5709 	"\t            [:sort=<field1[,field2,...]>]\n"
5710 	"\t            [:size=#entries]\n"
5711 	"\t            [:pause][:continue][:clear]\n"
5712 	"\t            [:name=histname1]\n"
5713 	"\t            [:nohitcount]\n"
5714 	"\t            [:<handler>.<action>]\n"
5715 	"\t            [if <filter>]\n\n"
5716 	"\t    Note, special fields can be used as well:\n"
5717 	"\t            common_timestamp - to record current timestamp\n"
5718 	"\t            common_cpu - to record the CPU the event happened on\n"
5719 	"\n"
5720 	"\t    A hist trigger variable can be:\n"
5721 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5722 	"\t        - a reference to another variable e.g. y=$x,\n"
5723 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5724 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5725 	"\n"
5726 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5727 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5728 	"\t    variable reference, field or numeric literal.\n"
5729 	"\n"
5730 	"\t    When a matching event is hit, an entry is added to a hash\n"
5731 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5732 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5733 	"\t    correspond to fields in the event's format description.  Keys\n"
5734 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5735 	"\t    Compound keys consisting of up to two fields can be specified\n"
5736 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5737 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5738 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5739 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5740 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5741 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5742 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5743 	"\t    its histogram data will be shared with other triggers of the\n"
5744 	"\t    same name, and trigger hits will update this common data.\n\n"
5745 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5746 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5747 	"\t    triggers attached to an event, there will be a table for each\n"
5748 	"\t    trigger in the output.  The table displayed for a named\n"
5749 	"\t    trigger will be the same as any other instance having the\n"
5750 	"\t    same name.  The default format used to display a given field\n"
5751 	"\t    can be modified by appending any of the following modifiers\n"
5752 	"\t    to the field name, as applicable:\n\n"
5753 	"\t            .hex        display a number as a hex value\n"
5754 	"\t            .sym        display an address as a symbol\n"
5755 	"\t            .sym-offset display an address as a symbol and offset\n"
5756 	"\t            .execname   display a common_pid as a program name\n"
5757 	"\t            .syscall    display a syscall id as a syscall name\n"
5758 	"\t            .log2       display log2 value rather than raw number\n"
5759 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5760 	"\t            .usecs      display a common_timestamp in microseconds\n"
5761 	"\t            .percent    display a number of percentage value\n"
5762 	"\t            .graph      display a bar-graph of a value\n\n"
5763 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5764 	"\t    trigger or to start a hist trigger but not log any events\n"
5765 	"\t    until told to do so.  'continue' can be used to start or\n"
5766 	"\t    restart a paused hist trigger.\n\n"
5767 	"\t    The 'clear' parameter will clear the contents of a running\n"
5768 	"\t    hist trigger and leave its current paused/active state\n"
5769 	"\t    unchanged.\n\n"
5770 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5771 	"\t    raw hitcount in the histogram.\n\n"
5772 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5773 	"\t    have one event conditionally start and stop another event's\n"
5774 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5775 	"\t    the enable_event and disable_event triggers.\n\n"
5776 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5777 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5778 	"\t        <handler>.<action>\n\n"
5779 	"\t    The available handlers are:\n\n"
5780 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5781 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5782 	"\t        onchange(var)            - invoke action if var changes\n\n"
5783 	"\t    The available actions are:\n\n"
5784 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5785 	"\t        save(field,...)                      - save current event fields\n"
5786 #ifdef CONFIG_TRACER_SNAPSHOT
5787 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5788 #endif
5789 #ifdef CONFIG_SYNTH_EVENTS
5790 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5791 	"\t  Write into this file to define/undefine new synthetic events.\n"
5792 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5793 #endif
5794 #endif
5795 ;
5796 
5797 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5798 tracing_readme_read(struct file *filp, char __user *ubuf,
5799 		       size_t cnt, loff_t *ppos)
5800 {
5801 	return simple_read_from_buffer(ubuf, cnt, ppos,
5802 					readme_msg, strlen(readme_msg));
5803 }
5804 
5805 static const struct file_operations tracing_readme_fops = {
5806 	.open		= tracing_open_generic,
5807 	.read		= tracing_readme_read,
5808 	.llseek		= generic_file_llseek,
5809 };
5810 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5811 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5812 {
5813 	int pid = ++(*pos);
5814 
5815 	return trace_find_tgid_ptr(pid);
5816 }
5817 
saved_tgids_start(struct seq_file * m,loff_t * pos)5818 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5819 {
5820 	int pid = *pos;
5821 
5822 	return trace_find_tgid_ptr(pid);
5823 }
5824 
saved_tgids_stop(struct seq_file * m,void * v)5825 static void saved_tgids_stop(struct seq_file *m, void *v)
5826 {
5827 }
5828 
saved_tgids_show(struct seq_file * m,void * v)5829 static int saved_tgids_show(struct seq_file *m, void *v)
5830 {
5831 	int *entry = (int *)v;
5832 	int pid = entry - tgid_map;
5833 	int tgid = *entry;
5834 
5835 	if (tgid == 0)
5836 		return SEQ_SKIP;
5837 
5838 	seq_printf(m, "%d %d\n", pid, tgid);
5839 	return 0;
5840 }
5841 
5842 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5843 	.start		= saved_tgids_start,
5844 	.stop		= saved_tgids_stop,
5845 	.next		= saved_tgids_next,
5846 	.show		= saved_tgids_show,
5847 };
5848 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5849 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5850 {
5851 	int ret;
5852 
5853 	ret = tracing_check_open_get_tr(NULL);
5854 	if (ret)
5855 		return ret;
5856 
5857 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5858 }
5859 
5860 
5861 static const struct file_operations tracing_saved_tgids_fops = {
5862 	.open		= tracing_saved_tgids_open,
5863 	.read		= seq_read,
5864 	.llseek		= seq_lseek,
5865 	.release	= seq_release,
5866 };
5867 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5868 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5869 {
5870 	unsigned int *ptr = v;
5871 
5872 	if (*pos || m->count)
5873 		ptr++;
5874 
5875 	(*pos)++;
5876 
5877 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5878 	     ptr++) {
5879 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5880 			continue;
5881 
5882 		return ptr;
5883 	}
5884 
5885 	return NULL;
5886 }
5887 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5888 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5889 {
5890 	void *v;
5891 	loff_t l = 0;
5892 
5893 	preempt_disable();
5894 	arch_spin_lock(&trace_cmdline_lock);
5895 
5896 	v = &savedcmd->map_cmdline_to_pid[0];
5897 	while (l <= *pos) {
5898 		v = saved_cmdlines_next(m, v, &l);
5899 		if (!v)
5900 			return NULL;
5901 	}
5902 
5903 	return v;
5904 }
5905 
saved_cmdlines_stop(struct seq_file * m,void * v)5906 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5907 {
5908 	arch_spin_unlock(&trace_cmdline_lock);
5909 	preempt_enable();
5910 }
5911 
saved_cmdlines_show(struct seq_file * m,void * v)5912 static int saved_cmdlines_show(struct seq_file *m, void *v)
5913 {
5914 	char buf[TASK_COMM_LEN];
5915 	unsigned int *pid = v;
5916 
5917 	__trace_find_cmdline(*pid, buf);
5918 	seq_printf(m, "%d %s\n", *pid, buf);
5919 	return 0;
5920 }
5921 
5922 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5923 	.start		= saved_cmdlines_start,
5924 	.next		= saved_cmdlines_next,
5925 	.stop		= saved_cmdlines_stop,
5926 	.show		= saved_cmdlines_show,
5927 };
5928 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5929 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5930 {
5931 	int ret;
5932 
5933 	ret = tracing_check_open_get_tr(NULL);
5934 	if (ret)
5935 		return ret;
5936 
5937 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5938 }
5939 
5940 static const struct file_operations tracing_saved_cmdlines_fops = {
5941 	.open		= tracing_saved_cmdlines_open,
5942 	.read		= seq_read,
5943 	.llseek		= seq_lseek,
5944 	.release	= seq_release,
5945 };
5946 
5947 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5948 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5949 				 size_t cnt, loff_t *ppos)
5950 {
5951 	char buf[64];
5952 	int r;
5953 
5954 	preempt_disable();
5955 	arch_spin_lock(&trace_cmdline_lock);
5956 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5957 	arch_spin_unlock(&trace_cmdline_lock);
5958 	preempt_enable();
5959 
5960 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5961 }
5962 
tracing_resize_saved_cmdlines(unsigned int val)5963 static int tracing_resize_saved_cmdlines(unsigned int val)
5964 {
5965 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5966 
5967 	s = allocate_cmdlines_buffer(val);
5968 	if (!s)
5969 		return -ENOMEM;
5970 
5971 	preempt_disable();
5972 	arch_spin_lock(&trace_cmdline_lock);
5973 	savedcmd_temp = savedcmd;
5974 	savedcmd = s;
5975 	arch_spin_unlock(&trace_cmdline_lock);
5976 	preempt_enable();
5977 	free_saved_cmdlines_buffer(savedcmd_temp);
5978 
5979 	return 0;
5980 }
5981 
5982 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5983 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5984 				  size_t cnt, loff_t *ppos)
5985 {
5986 	unsigned long val;
5987 	int ret;
5988 
5989 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5990 	if (ret)
5991 		return ret;
5992 
5993 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5994 	if (!val || val > PID_MAX_DEFAULT)
5995 		return -EINVAL;
5996 
5997 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5998 	if (ret < 0)
5999 		return ret;
6000 
6001 	*ppos += cnt;
6002 
6003 	return cnt;
6004 }
6005 
6006 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6007 	.open		= tracing_open_generic,
6008 	.read		= tracing_saved_cmdlines_size_read,
6009 	.write		= tracing_saved_cmdlines_size_write,
6010 };
6011 
6012 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6013 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6014 update_eval_map(union trace_eval_map_item *ptr)
6015 {
6016 	if (!ptr->map.eval_string) {
6017 		if (ptr->tail.next) {
6018 			ptr = ptr->tail.next;
6019 			/* Set ptr to the next real item (skip head) */
6020 			ptr++;
6021 		} else
6022 			return NULL;
6023 	}
6024 	return ptr;
6025 }
6026 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6027 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6028 {
6029 	union trace_eval_map_item *ptr = v;
6030 
6031 	/*
6032 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6033 	 * This really should never happen.
6034 	 */
6035 	(*pos)++;
6036 	ptr = update_eval_map(ptr);
6037 	if (WARN_ON_ONCE(!ptr))
6038 		return NULL;
6039 
6040 	ptr++;
6041 	ptr = update_eval_map(ptr);
6042 
6043 	return ptr;
6044 }
6045 
eval_map_start(struct seq_file * m,loff_t * pos)6046 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6047 {
6048 	union trace_eval_map_item *v;
6049 	loff_t l = 0;
6050 
6051 	mutex_lock(&trace_eval_mutex);
6052 
6053 	v = trace_eval_maps;
6054 	if (v)
6055 		v++;
6056 
6057 	while (v && l < *pos) {
6058 		v = eval_map_next(m, v, &l);
6059 	}
6060 
6061 	return v;
6062 }
6063 
eval_map_stop(struct seq_file * m,void * v)6064 static void eval_map_stop(struct seq_file *m, void *v)
6065 {
6066 	mutex_unlock(&trace_eval_mutex);
6067 }
6068 
eval_map_show(struct seq_file * m,void * v)6069 static int eval_map_show(struct seq_file *m, void *v)
6070 {
6071 	union trace_eval_map_item *ptr = v;
6072 
6073 	seq_printf(m, "%s %ld (%s)\n",
6074 		   ptr->map.eval_string, ptr->map.eval_value,
6075 		   ptr->map.system);
6076 
6077 	return 0;
6078 }
6079 
6080 static const struct seq_operations tracing_eval_map_seq_ops = {
6081 	.start		= eval_map_start,
6082 	.next		= eval_map_next,
6083 	.stop		= eval_map_stop,
6084 	.show		= eval_map_show,
6085 };
6086 
tracing_eval_map_open(struct inode * inode,struct file * filp)6087 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6088 {
6089 	int ret;
6090 
6091 	ret = tracing_check_open_get_tr(NULL);
6092 	if (ret)
6093 		return ret;
6094 
6095 	return seq_open(filp, &tracing_eval_map_seq_ops);
6096 }
6097 
6098 static const struct file_operations tracing_eval_map_fops = {
6099 	.open		= tracing_eval_map_open,
6100 	.read		= seq_read,
6101 	.llseek		= seq_lseek,
6102 	.release	= seq_release,
6103 };
6104 
6105 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6106 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6107 {
6108 	/* Return tail of array given the head */
6109 	return ptr + ptr->head.length + 1;
6110 }
6111 
6112 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6113 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6114 			   int len)
6115 {
6116 	struct trace_eval_map **stop;
6117 	struct trace_eval_map **map;
6118 	union trace_eval_map_item *map_array;
6119 	union trace_eval_map_item *ptr;
6120 
6121 	stop = start + len;
6122 
6123 	/*
6124 	 * The trace_eval_maps contains the map plus a head and tail item,
6125 	 * where the head holds the module and length of array, and the
6126 	 * tail holds a pointer to the next list.
6127 	 */
6128 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6129 	if (!map_array) {
6130 		pr_warn("Unable to allocate trace eval mapping\n");
6131 		return;
6132 	}
6133 
6134 	mutex_lock(&trace_eval_mutex);
6135 
6136 	if (!trace_eval_maps)
6137 		trace_eval_maps = map_array;
6138 	else {
6139 		ptr = trace_eval_maps;
6140 		for (;;) {
6141 			ptr = trace_eval_jmp_to_tail(ptr);
6142 			if (!ptr->tail.next)
6143 				break;
6144 			ptr = ptr->tail.next;
6145 
6146 		}
6147 		ptr->tail.next = map_array;
6148 	}
6149 	map_array->head.mod = mod;
6150 	map_array->head.length = len;
6151 	map_array++;
6152 
6153 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6154 		map_array->map = **map;
6155 		map_array++;
6156 	}
6157 	memset(map_array, 0, sizeof(*map_array));
6158 
6159 	mutex_unlock(&trace_eval_mutex);
6160 }
6161 
trace_create_eval_file(struct dentry * d_tracer)6162 static void trace_create_eval_file(struct dentry *d_tracer)
6163 {
6164 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6165 			  NULL, &tracing_eval_map_fops);
6166 }
6167 
6168 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6169 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6170 static inline void trace_insert_eval_map_file(struct module *mod,
6171 			      struct trace_eval_map **start, int len) { }
6172 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6173 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6174 static void trace_insert_eval_map(struct module *mod,
6175 				  struct trace_eval_map **start, int len)
6176 {
6177 	struct trace_eval_map **map;
6178 
6179 	if (len <= 0)
6180 		return;
6181 
6182 	map = start;
6183 
6184 	trace_event_eval_update(map, len);
6185 
6186 	trace_insert_eval_map_file(mod, start, len);
6187 }
6188 
6189 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6190 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6191 		       size_t cnt, loff_t *ppos)
6192 {
6193 	struct trace_array *tr = filp->private_data;
6194 	char buf[MAX_TRACER_SIZE+2];
6195 	int r;
6196 
6197 	mutex_lock(&trace_types_lock);
6198 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6199 	mutex_unlock(&trace_types_lock);
6200 
6201 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6202 }
6203 
tracer_init(struct tracer * t,struct trace_array * tr)6204 int tracer_init(struct tracer *t, struct trace_array *tr)
6205 {
6206 	tracing_reset_online_cpus(&tr->array_buffer);
6207 	return t->init(tr);
6208 }
6209 
set_buffer_entries(struct array_buffer * buf,unsigned long val)6210 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6211 {
6212 	int cpu;
6213 
6214 	for_each_tracing_cpu(cpu)
6215 		per_cpu_ptr(buf->data, cpu)->entries = val;
6216 }
6217 
update_buffer_entries(struct array_buffer * buf,int cpu)6218 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6219 {
6220 	if (cpu == RING_BUFFER_ALL_CPUS) {
6221 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6222 	} else {
6223 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6224 	}
6225 }
6226 
6227 #ifdef CONFIG_TRACER_MAX_TRACE
6228 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6229 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6230 					struct array_buffer *size_buf, int cpu_id)
6231 {
6232 	int cpu, ret = 0;
6233 
6234 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6235 		for_each_tracing_cpu(cpu) {
6236 			ret = ring_buffer_resize(trace_buf->buffer,
6237 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6238 			if (ret < 0)
6239 				break;
6240 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6241 				per_cpu_ptr(size_buf->data, cpu)->entries;
6242 		}
6243 	} else {
6244 		ret = ring_buffer_resize(trace_buf->buffer,
6245 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6246 		if (ret == 0)
6247 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6248 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6249 	}
6250 
6251 	return ret;
6252 }
6253 #endif /* CONFIG_TRACER_MAX_TRACE */
6254 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6255 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6256 					unsigned long size, int cpu)
6257 {
6258 	int ret;
6259 
6260 	/*
6261 	 * If kernel or user changes the size of the ring buffer
6262 	 * we use the size that was given, and we can forget about
6263 	 * expanding it later.
6264 	 */
6265 	ring_buffer_expanded = true;
6266 
6267 	/* May be called before buffers are initialized */
6268 	if (!tr->array_buffer.buffer)
6269 		return 0;
6270 
6271 	/* Do not allow tracing while resizing ring buffer */
6272 	tracing_stop_tr(tr);
6273 
6274 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6275 	if (ret < 0)
6276 		goto out_start;
6277 
6278 #ifdef CONFIG_TRACER_MAX_TRACE
6279 	if (!tr->allocated_snapshot)
6280 		goto out;
6281 
6282 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6283 	if (ret < 0) {
6284 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6285 						     &tr->array_buffer, cpu);
6286 		if (r < 0) {
6287 			/*
6288 			 * AARGH! We are left with different
6289 			 * size max buffer!!!!
6290 			 * The max buffer is our "snapshot" buffer.
6291 			 * When a tracer needs a snapshot (one of the
6292 			 * latency tracers), it swaps the max buffer
6293 			 * with the saved snap shot. We succeeded to
6294 			 * update the size of the main buffer, but failed to
6295 			 * update the size of the max buffer. But when we tried
6296 			 * to reset the main buffer to the original size, we
6297 			 * failed there too. This is very unlikely to
6298 			 * happen, but if it does, warn and kill all
6299 			 * tracing.
6300 			 */
6301 			WARN_ON(1);
6302 			tracing_disabled = 1;
6303 		}
6304 		goto out_start;
6305 	}
6306 
6307 	update_buffer_entries(&tr->max_buffer, cpu);
6308 
6309  out:
6310 #endif /* CONFIG_TRACER_MAX_TRACE */
6311 
6312 	update_buffer_entries(&tr->array_buffer, cpu);
6313  out_start:
6314 	tracing_start_tr(tr);
6315 	return ret;
6316 }
6317 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6318 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6319 				  unsigned long size, int cpu_id)
6320 {
6321 	int ret;
6322 
6323 	mutex_lock(&trace_types_lock);
6324 
6325 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6326 		/* make sure, this cpu is enabled in the mask */
6327 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6328 			ret = -EINVAL;
6329 			goto out;
6330 		}
6331 	}
6332 
6333 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6334 	if (ret < 0)
6335 		ret = -ENOMEM;
6336 
6337 out:
6338 	mutex_unlock(&trace_types_lock);
6339 
6340 	return ret;
6341 }
6342 
6343 
6344 /**
6345  * tracing_update_buffers - used by tracing facility to expand ring buffers
6346  *
6347  * To save on memory when the tracing is never used on a system with it
6348  * configured in. The ring buffers are set to a minimum size. But once
6349  * a user starts to use the tracing facility, then they need to grow
6350  * to their default size.
6351  *
6352  * This function is to be called when a tracer is about to be used.
6353  */
tracing_update_buffers(void)6354 int tracing_update_buffers(void)
6355 {
6356 	int ret = 0;
6357 
6358 	mutex_lock(&trace_types_lock);
6359 	if (!ring_buffer_expanded)
6360 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6361 						RING_BUFFER_ALL_CPUS);
6362 	mutex_unlock(&trace_types_lock);
6363 
6364 	return ret;
6365 }
6366 
6367 struct trace_option_dentry;
6368 
6369 static void
6370 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6371 
6372 /*
6373  * Used to clear out the tracer before deletion of an instance.
6374  * Must have trace_types_lock held.
6375  */
tracing_set_nop(struct trace_array * tr)6376 static void tracing_set_nop(struct trace_array *tr)
6377 {
6378 	if (tr->current_trace == &nop_trace)
6379 		return;
6380 
6381 	tr->current_trace->enabled--;
6382 
6383 	if (tr->current_trace->reset)
6384 		tr->current_trace->reset(tr);
6385 
6386 	tr->current_trace = &nop_trace;
6387 }
6388 
6389 static bool tracer_options_updated;
6390 
add_tracer_options(struct trace_array * tr,struct tracer * t)6391 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6392 {
6393 	/* Only enable if the directory has been created already. */
6394 	if (!tr->dir)
6395 		return;
6396 
6397 	/* Only create trace option files after update_tracer_options finish */
6398 	if (!tracer_options_updated)
6399 		return;
6400 
6401 	create_trace_option_files(tr, t);
6402 }
6403 
tracing_set_tracer(struct trace_array * tr,const char * buf)6404 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6405 {
6406 	struct tracer *t;
6407 #ifdef CONFIG_TRACER_MAX_TRACE
6408 	bool had_max_tr;
6409 #endif
6410 	int ret = 0;
6411 
6412 	mutex_lock(&trace_types_lock);
6413 
6414 	if (!ring_buffer_expanded) {
6415 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6416 						RING_BUFFER_ALL_CPUS);
6417 		if (ret < 0)
6418 			goto out;
6419 		ret = 0;
6420 	}
6421 
6422 	for (t = trace_types; t; t = t->next) {
6423 		if (strcmp(t->name, buf) == 0)
6424 			break;
6425 	}
6426 	if (!t) {
6427 		ret = -EINVAL;
6428 		goto out;
6429 	}
6430 	if (t == tr->current_trace)
6431 		goto out;
6432 
6433 #ifdef CONFIG_TRACER_SNAPSHOT
6434 	if (t->use_max_tr) {
6435 		local_irq_disable();
6436 		arch_spin_lock(&tr->max_lock);
6437 		if (tr->cond_snapshot)
6438 			ret = -EBUSY;
6439 		arch_spin_unlock(&tr->max_lock);
6440 		local_irq_enable();
6441 		if (ret)
6442 			goto out;
6443 	}
6444 #endif
6445 	/* Some tracers won't work on kernel command line */
6446 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6447 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6448 			t->name);
6449 		goto out;
6450 	}
6451 
6452 	/* Some tracers are only allowed for the top level buffer */
6453 	if (!trace_ok_for_array(t, tr)) {
6454 		ret = -EINVAL;
6455 		goto out;
6456 	}
6457 
6458 	/* If trace pipe files are being read, we can't change the tracer */
6459 	if (tr->trace_ref) {
6460 		ret = -EBUSY;
6461 		goto out;
6462 	}
6463 
6464 	trace_branch_disable();
6465 
6466 	tr->current_trace->enabled--;
6467 
6468 	if (tr->current_trace->reset)
6469 		tr->current_trace->reset(tr);
6470 
6471 #ifdef CONFIG_TRACER_MAX_TRACE
6472 	had_max_tr = tr->current_trace->use_max_tr;
6473 
6474 	/* Current trace needs to be nop_trace before synchronize_rcu */
6475 	tr->current_trace = &nop_trace;
6476 
6477 	if (had_max_tr && !t->use_max_tr) {
6478 		/*
6479 		 * We need to make sure that the update_max_tr sees that
6480 		 * current_trace changed to nop_trace to keep it from
6481 		 * swapping the buffers after we resize it.
6482 		 * The update_max_tr is called from interrupts disabled
6483 		 * so a synchronized_sched() is sufficient.
6484 		 */
6485 		synchronize_rcu();
6486 		free_snapshot(tr);
6487 	}
6488 
6489 	if (t->use_max_tr && !tr->allocated_snapshot) {
6490 		ret = tracing_alloc_snapshot_instance(tr);
6491 		if (ret < 0)
6492 			goto out;
6493 	}
6494 #else
6495 	tr->current_trace = &nop_trace;
6496 #endif
6497 
6498 	if (t->init) {
6499 		ret = tracer_init(t, tr);
6500 		if (ret)
6501 			goto out;
6502 	}
6503 
6504 	tr->current_trace = t;
6505 	tr->current_trace->enabled++;
6506 	trace_branch_enable(tr);
6507  out:
6508 	mutex_unlock(&trace_types_lock);
6509 
6510 	return ret;
6511 }
6512 
6513 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6514 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6515 			size_t cnt, loff_t *ppos)
6516 {
6517 	struct trace_array *tr = filp->private_data;
6518 	char buf[MAX_TRACER_SIZE+1];
6519 	char *name;
6520 	size_t ret;
6521 	int err;
6522 
6523 	ret = cnt;
6524 
6525 	if (cnt > MAX_TRACER_SIZE)
6526 		cnt = MAX_TRACER_SIZE;
6527 
6528 	if (copy_from_user(buf, ubuf, cnt))
6529 		return -EFAULT;
6530 
6531 	buf[cnt] = 0;
6532 
6533 	name = strim(buf);
6534 
6535 	err = tracing_set_tracer(tr, name);
6536 	if (err)
6537 		return err;
6538 
6539 	*ppos += ret;
6540 
6541 	return ret;
6542 }
6543 
6544 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6545 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6546 		   size_t cnt, loff_t *ppos)
6547 {
6548 	char buf[64];
6549 	int r;
6550 
6551 	r = snprintf(buf, sizeof(buf), "%ld\n",
6552 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6553 	if (r > sizeof(buf))
6554 		r = sizeof(buf);
6555 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6556 }
6557 
6558 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6559 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6560 		    size_t cnt, loff_t *ppos)
6561 {
6562 	unsigned long val;
6563 	int ret;
6564 
6565 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6566 	if (ret)
6567 		return ret;
6568 
6569 	*ptr = val * 1000;
6570 
6571 	return cnt;
6572 }
6573 
6574 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6575 tracing_thresh_read(struct file *filp, char __user *ubuf,
6576 		    size_t cnt, loff_t *ppos)
6577 {
6578 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6579 }
6580 
6581 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6582 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6583 		     size_t cnt, loff_t *ppos)
6584 {
6585 	struct trace_array *tr = filp->private_data;
6586 	int ret;
6587 
6588 	mutex_lock(&trace_types_lock);
6589 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6590 	if (ret < 0)
6591 		goto out;
6592 
6593 	if (tr->current_trace->update_thresh) {
6594 		ret = tr->current_trace->update_thresh(tr);
6595 		if (ret < 0)
6596 			goto out;
6597 	}
6598 
6599 	ret = cnt;
6600 out:
6601 	mutex_unlock(&trace_types_lock);
6602 
6603 	return ret;
6604 }
6605 
6606 #ifdef CONFIG_TRACER_MAX_TRACE
6607 
6608 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6609 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6610 		     size_t cnt, loff_t *ppos)
6611 {
6612 	struct trace_array *tr = filp->private_data;
6613 
6614 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6615 }
6616 
6617 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6618 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6619 		      size_t cnt, loff_t *ppos)
6620 {
6621 	struct trace_array *tr = filp->private_data;
6622 
6623 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6624 }
6625 
6626 #endif
6627 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6628 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6629 {
6630 	if (cpu == RING_BUFFER_ALL_CPUS) {
6631 		if (cpumask_empty(tr->pipe_cpumask)) {
6632 			cpumask_setall(tr->pipe_cpumask);
6633 			return 0;
6634 		}
6635 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6636 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6637 		return 0;
6638 	}
6639 	return -EBUSY;
6640 }
6641 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6642 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6643 {
6644 	if (cpu == RING_BUFFER_ALL_CPUS) {
6645 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6646 		cpumask_clear(tr->pipe_cpumask);
6647 	} else {
6648 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6649 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6650 	}
6651 }
6652 
tracing_open_pipe(struct inode * inode,struct file * filp)6653 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6654 {
6655 	struct trace_array *tr = inode->i_private;
6656 	struct trace_iterator *iter;
6657 	int cpu;
6658 	int ret;
6659 
6660 	ret = tracing_check_open_get_tr(tr);
6661 	if (ret)
6662 		return ret;
6663 
6664 	mutex_lock(&trace_types_lock);
6665 	cpu = tracing_get_cpu(inode);
6666 	ret = open_pipe_on_cpu(tr, cpu);
6667 	if (ret)
6668 		goto fail_pipe_on_cpu;
6669 
6670 	/* create a buffer to store the information to pass to userspace */
6671 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6672 	if (!iter) {
6673 		ret = -ENOMEM;
6674 		goto fail_alloc_iter;
6675 	}
6676 
6677 	trace_seq_init(&iter->seq);
6678 	iter->trace = tr->current_trace;
6679 
6680 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6681 		ret = -ENOMEM;
6682 		goto fail;
6683 	}
6684 
6685 	/* trace pipe does not show start of buffer */
6686 	cpumask_setall(iter->started);
6687 
6688 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6689 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6690 
6691 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6692 	if (trace_clocks[tr->clock_id].in_ns)
6693 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6694 
6695 	iter->tr = tr;
6696 	iter->array_buffer = &tr->array_buffer;
6697 	iter->cpu_file = cpu;
6698 	mutex_init(&iter->mutex);
6699 	filp->private_data = iter;
6700 
6701 	if (iter->trace->pipe_open)
6702 		iter->trace->pipe_open(iter);
6703 
6704 	nonseekable_open(inode, filp);
6705 
6706 	tr->trace_ref++;
6707 
6708 	mutex_unlock(&trace_types_lock);
6709 	return ret;
6710 
6711 fail:
6712 	kfree(iter);
6713 fail_alloc_iter:
6714 	close_pipe_on_cpu(tr, cpu);
6715 fail_pipe_on_cpu:
6716 	__trace_array_put(tr);
6717 	mutex_unlock(&trace_types_lock);
6718 	return ret;
6719 }
6720 
tracing_release_pipe(struct inode * inode,struct file * file)6721 static int tracing_release_pipe(struct inode *inode, struct file *file)
6722 {
6723 	struct trace_iterator *iter = file->private_data;
6724 	struct trace_array *tr = inode->i_private;
6725 
6726 	mutex_lock(&trace_types_lock);
6727 
6728 	tr->trace_ref--;
6729 
6730 	if (iter->trace->pipe_close)
6731 		iter->trace->pipe_close(iter);
6732 	close_pipe_on_cpu(tr, iter->cpu_file);
6733 	mutex_unlock(&trace_types_lock);
6734 
6735 	free_trace_iter_content(iter);
6736 	kfree(iter);
6737 
6738 	trace_array_put(tr);
6739 
6740 	return 0;
6741 }
6742 
6743 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6744 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6745 {
6746 	struct trace_array *tr = iter->tr;
6747 
6748 	/* Iterators are static, they should be filled or empty */
6749 	if (trace_buffer_iter(iter, iter->cpu_file))
6750 		return EPOLLIN | EPOLLRDNORM;
6751 
6752 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6753 		/*
6754 		 * Always select as readable when in blocking mode
6755 		 */
6756 		return EPOLLIN | EPOLLRDNORM;
6757 	else
6758 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6759 					     filp, poll_table, iter->tr->buffer_percent);
6760 }
6761 
6762 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6763 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6764 {
6765 	struct trace_iterator *iter = filp->private_data;
6766 
6767 	return trace_poll(iter, filp, poll_table);
6768 }
6769 
6770 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6771 static int tracing_wait_pipe(struct file *filp)
6772 {
6773 	struct trace_iterator *iter = filp->private_data;
6774 	int ret;
6775 
6776 	while (trace_empty(iter)) {
6777 
6778 		if ((filp->f_flags & O_NONBLOCK)) {
6779 			return -EAGAIN;
6780 		}
6781 
6782 		/*
6783 		 * We block until we read something and tracing is disabled.
6784 		 * We still block if tracing is disabled, but we have never
6785 		 * read anything. This allows a user to cat this file, and
6786 		 * then enable tracing. But after we have read something,
6787 		 * we give an EOF when tracing is again disabled.
6788 		 *
6789 		 * iter->pos will be 0 if we haven't read anything.
6790 		 */
6791 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6792 			break;
6793 
6794 		mutex_unlock(&iter->mutex);
6795 
6796 		ret = wait_on_pipe(iter, 0);
6797 
6798 		mutex_lock(&iter->mutex);
6799 
6800 		if (ret)
6801 			return ret;
6802 	}
6803 
6804 	return 1;
6805 }
6806 
6807 /*
6808  * Consumer reader.
6809  */
6810 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6811 tracing_read_pipe(struct file *filp, char __user *ubuf,
6812 		  size_t cnt, loff_t *ppos)
6813 {
6814 	struct trace_iterator *iter = filp->private_data;
6815 	ssize_t sret;
6816 
6817 	/*
6818 	 * Avoid more than one consumer on a single file descriptor
6819 	 * This is just a matter of traces coherency, the ring buffer itself
6820 	 * is protected.
6821 	 */
6822 	mutex_lock(&iter->mutex);
6823 
6824 	/* return any leftover data */
6825 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6826 	if (sret != -EBUSY)
6827 		goto out;
6828 
6829 	trace_seq_init(&iter->seq);
6830 
6831 	if (iter->trace->read) {
6832 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6833 		if (sret)
6834 			goto out;
6835 	}
6836 
6837 waitagain:
6838 	sret = tracing_wait_pipe(filp);
6839 	if (sret <= 0)
6840 		goto out;
6841 
6842 	/* stop when tracing is finished */
6843 	if (trace_empty(iter)) {
6844 		sret = 0;
6845 		goto out;
6846 	}
6847 
6848 	if (cnt >= PAGE_SIZE)
6849 		cnt = PAGE_SIZE - 1;
6850 
6851 	/* reset all but tr, trace, and overruns */
6852 	trace_iterator_reset(iter);
6853 	cpumask_clear(iter->started);
6854 	trace_seq_init(&iter->seq);
6855 
6856 	trace_event_read_lock();
6857 	trace_access_lock(iter->cpu_file);
6858 	while (trace_find_next_entry_inc(iter) != NULL) {
6859 		enum print_line_t ret;
6860 		int save_len = iter->seq.seq.len;
6861 
6862 		ret = print_trace_line(iter);
6863 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6864 			/*
6865 			 * If one print_trace_line() fills entire trace_seq in one shot,
6866 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6867 			 * In this case, we need to consume it, otherwise, loop will peek
6868 			 * this event next time, resulting in an infinite loop.
6869 			 */
6870 			if (save_len == 0) {
6871 				iter->seq.full = 0;
6872 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6873 				trace_consume(iter);
6874 				break;
6875 			}
6876 
6877 			/* In other cases, don't print partial lines */
6878 			iter->seq.seq.len = save_len;
6879 			break;
6880 		}
6881 		if (ret != TRACE_TYPE_NO_CONSUME)
6882 			trace_consume(iter);
6883 
6884 		if (trace_seq_used(&iter->seq) >= cnt)
6885 			break;
6886 
6887 		/*
6888 		 * Setting the full flag means we reached the trace_seq buffer
6889 		 * size and we should leave by partial output condition above.
6890 		 * One of the trace_seq_* functions is not used properly.
6891 		 */
6892 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6893 			  iter->ent->type);
6894 	}
6895 	trace_access_unlock(iter->cpu_file);
6896 	trace_event_read_unlock();
6897 
6898 	/* Now copy what we have to the user */
6899 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6900 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6901 		trace_seq_init(&iter->seq);
6902 
6903 	/*
6904 	 * If there was nothing to send to user, in spite of consuming trace
6905 	 * entries, go back to wait for more entries.
6906 	 */
6907 	if (sret == -EBUSY)
6908 		goto waitagain;
6909 
6910 out:
6911 	mutex_unlock(&iter->mutex);
6912 
6913 	return sret;
6914 }
6915 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6916 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6917 				     unsigned int idx)
6918 {
6919 	__free_page(spd->pages[idx]);
6920 }
6921 
6922 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6923 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6924 {
6925 	size_t count;
6926 	int save_len;
6927 	int ret;
6928 
6929 	/* Seq buffer is page-sized, exactly what we need. */
6930 	for (;;) {
6931 		save_len = iter->seq.seq.len;
6932 		ret = print_trace_line(iter);
6933 
6934 		if (trace_seq_has_overflowed(&iter->seq)) {
6935 			iter->seq.seq.len = save_len;
6936 			break;
6937 		}
6938 
6939 		/*
6940 		 * This should not be hit, because it should only
6941 		 * be set if the iter->seq overflowed. But check it
6942 		 * anyway to be safe.
6943 		 */
6944 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6945 			iter->seq.seq.len = save_len;
6946 			break;
6947 		}
6948 
6949 		count = trace_seq_used(&iter->seq) - save_len;
6950 		if (rem < count) {
6951 			rem = 0;
6952 			iter->seq.seq.len = save_len;
6953 			break;
6954 		}
6955 
6956 		if (ret != TRACE_TYPE_NO_CONSUME)
6957 			trace_consume(iter);
6958 		rem -= count;
6959 		if (!trace_find_next_entry_inc(iter))	{
6960 			rem = 0;
6961 			iter->ent = NULL;
6962 			break;
6963 		}
6964 	}
6965 
6966 	return rem;
6967 }
6968 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6969 static ssize_t tracing_splice_read_pipe(struct file *filp,
6970 					loff_t *ppos,
6971 					struct pipe_inode_info *pipe,
6972 					size_t len,
6973 					unsigned int flags)
6974 {
6975 	struct page *pages_def[PIPE_DEF_BUFFERS];
6976 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6977 	struct trace_iterator *iter = filp->private_data;
6978 	struct splice_pipe_desc spd = {
6979 		.pages		= pages_def,
6980 		.partial	= partial_def,
6981 		.nr_pages	= 0, /* This gets updated below. */
6982 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6983 		.ops		= &default_pipe_buf_ops,
6984 		.spd_release	= tracing_spd_release_pipe,
6985 	};
6986 	ssize_t ret;
6987 	size_t rem;
6988 	unsigned int i;
6989 
6990 	if (splice_grow_spd(pipe, &spd))
6991 		return -ENOMEM;
6992 
6993 	mutex_lock(&iter->mutex);
6994 
6995 	if (iter->trace->splice_read) {
6996 		ret = iter->trace->splice_read(iter, filp,
6997 					       ppos, pipe, len, flags);
6998 		if (ret)
6999 			goto out_err;
7000 	}
7001 
7002 	ret = tracing_wait_pipe(filp);
7003 	if (ret <= 0)
7004 		goto out_err;
7005 
7006 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7007 		ret = -EFAULT;
7008 		goto out_err;
7009 	}
7010 
7011 	trace_event_read_lock();
7012 	trace_access_lock(iter->cpu_file);
7013 
7014 	/* Fill as many pages as possible. */
7015 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7016 		spd.pages[i] = alloc_page(GFP_KERNEL);
7017 		if (!spd.pages[i])
7018 			break;
7019 
7020 		rem = tracing_fill_pipe_page(rem, iter);
7021 
7022 		/* Copy the data into the page, so we can start over. */
7023 		ret = trace_seq_to_buffer(&iter->seq,
7024 					  page_address(spd.pages[i]),
7025 					  min((size_t)trace_seq_used(&iter->seq),
7026 						  (size_t)PAGE_SIZE));
7027 		if (ret < 0) {
7028 			__free_page(spd.pages[i]);
7029 			break;
7030 		}
7031 		spd.partial[i].offset = 0;
7032 		spd.partial[i].len = ret;
7033 
7034 		trace_seq_init(&iter->seq);
7035 	}
7036 
7037 	trace_access_unlock(iter->cpu_file);
7038 	trace_event_read_unlock();
7039 	mutex_unlock(&iter->mutex);
7040 
7041 	spd.nr_pages = i;
7042 
7043 	if (i)
7044 		ret = splice_to_pipe(pipe, &spd);
7045 	else
7046 		ret = 0;
7047 out:
7048 	splice_shrink_spd(&spd);
7049 	return ret;
7050 
7051 out_err:
7052 	mutex_unlock(&iter->mutex);
7053 	goto out;
7054 }
7055 
7056 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7057 tracing_entries_read(struct file *filp, char __user *ubuf,
7058 		     size_t cnt, loff_t *ppos)
7059 {
7060 	struct inode *inode = file_inode(filp);
7061 	struct trace_array *tr = inode->i_private;
7062 	int cpu = tracing_get_cpu(inode);
7063 	char buf[64];
7064 	int r = 0;
7065 	ssize_t ret;
7066 
7067 	mutex_lock(&trace_types_lock);
7068 
7069 	if (cpu == RING_BUFFER_ALL_CPUS) {
7070 		int cpu, buf_size_same;
7071 		unsigned long size;
7072 
7073 		size = 0;
7074 		buf_size_same = 1;
7075 		/* check if all cpu sizes are same */
7076 		for_each_tracing_cpu(cpu) {
7077 			/* fill in the size from first enabled cpu */
7078 			if (size == 0)
7079 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7080 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7081 				buf_size_same = 0;
7082 				break;
7083 			}
7084 		}
7085 
7086 		if (buf_size_same) {
7087 			if (!ring_buffer_expanded)
7088 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7089 					    size >> 10,
7090 					    trace_buf_size >> 10);
7091 			else
7092 				r = sprintf(buf, "%lu\n", size >> 10);
7093 		} else
7094 			r = sprintf(buf, "X\n");
7095 	} else
7096 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7097 
7098 	mutex_unlock(&trace_types_lock);
7099 
7100 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7101 	return ret;
7102 }
7103 
7104 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7105 tracing_entries_write(struct file *filp, const char __user *ubuf,
7106 		      size_t cnt, loff_t *ppos)
7107 {
7108 	struct inode *inode = file_inode(filp);
7109 	struct trace_array *tr = inode->i_private;
7110 	unsigned long val;
7111 	int ret;
7112 
7113 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7114 	if (ret)
7115 		return ret;
7116 
7117 	/* must have at least 1 entry */
7118 	if (!val)
7119 		return -EINVAL;
7120 
7121 	/* value is in KB */
7122 	val <<= 10;
7123 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7124 	if (ret < 0)
7125 		return ret;
7126 
7127 	*ppos += cnt;
7128 
7129 	return cnt;
7130 }
7131 
7132 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7133 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7134 				size_t cnt, loff_t *ppos)
7135 {
7136 	struct trace_array *tr = filp->private_data;
7137 	char buf[64];
7138 	int r, cpu;
7139 	unsigned long size = 0, expanded_size = 0;
7140 
7141 	mutex_lock(&trace_types_lock);
7142 	for_each_tracing_cpu(cpu) {
7143 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7144 		if (!ring_buffer_expanded)
7145 			expanded_size += trace_buf_size >> 10;
7146 	}
7147 	if (ring_buffer_expanded)
7148 		r = sprintf(buf, "%lu\n", size);
7149 	else
7150 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7151 	mutex_unlock(&trace_types_lock);
7152 
7153 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7154 }
7155 
7156 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7157 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7158 			  size_t cnt, loff_t *ppos)
7159 {
7160 	/*
7161 	 * There is no need to read what the user has written, this function
7162 	 * is just to make sure that there is no error when "echo" is used
7163 	 */
7164 
7165 	*ppos += cnt;
7166 
7167 	return cnt;
7168 }
7169 
7170 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7171 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7172 {
7173 	struct trace_array *tr = inode->i_private;
7174 
7175 	/* disable tracing ? */
7176 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7177 		tracer_tracing_off(tr);
7178 	/* resize the ring buffer to 0 */
7179 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7180 
7181 	trace_array_put(tr);
7182 
7183 	return 0;
7184 }
7185 
7186 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7187 tracing_mark_write(struct file *filp, const char __user *ubuf,
7188 					size_t cnt, loff_t *fpos)
7189 {
7190 	struct trace_array *tr = filp->private_data;
7191 	struct ring_buffer_event *event;
7192 	enum event_trigger_type tt = ETT_NONE;
7193 	struct trace_buffer *buffer;
7194 	struct print_entry *entry;
7195 	ssize_t written;
7196 	int size;
7197 	int len;
7198 
7199 /* Used in tracing_mark_raw_write() as well */
7200 #define FAULTED_STR "<faulted>"
7201 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7202 
7203 	if (tracing_disabled)
7204 		return -EINVAL;
7205 
7206 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7207 		return -EINVAL;
7208 
7209 	if (cnt > TRACE_BUF_SIZE)
7210 		cnt = TRACE_BUF_SIZE;
7211 
7212 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7213 
7214 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7215 
7216 	/* If less than "<faulted>", then make sure we can still add that */
7217 	if (cnt < FAULTED_SIZE)
7218 		size += FAULTED_SIZE - cnt;
7219 
7220 	buffer = tr->array_buffer.buffer;
7221 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7222 					    tracing_gen_ctx());
7223 	if (unlikely(!event))
7224 		/* Ring buffer disabled, return as if not open for write */
7225 		return -EBADF;
7226 
7227 	entry = ring_buffer_event_data(event);
7228 	entry->ip = _THIS_IP_;
7229 
7230 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7231 	if (len) {
7232 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7233 		cnt = FAULTED_SIZE;
7234 		written = -EFAULT;
7235 	} else
7236 		written = cnt;
7237 
7238 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7239 		/* do not add \n before testing triggers, but add \0 */
7240 		entry->buf[cnt] = '\0';
7241 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7242 	}
7243 
7244 	if (entry->buf[cnt - 1] != '\n') {
7245 		entry->buf[cnt] = '\n';
7246 		entry->buf[cnt + 1] = '\0';
7247 	} else
7248 		entry->buf[cnt] = '\0';
7249 
7250 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7251 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7252 	__buffer_unlock_commit(buffer, event);
7253 
7254 	if (tt)
7255 		event_triggers_post_call(tr->trace_marker_file, tt);
7256 
7257 	return written;
7258 }
7259 
7260 /* Limit it for now to 3K (including tag) */
7261 #define RAW_DATA_MAX_SIZE (1024*3)
7262 
7263 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7264 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7265 					size_t cnt, loff_t *fpos)
7266 {
7267 	struct trace_array *tr = filp->private_data;
7268 	struct ring_buffer_event *event;
7269 	struct trace_buffer *buffer;
7270 	struct raw_data_entry *entry;
7271 	ssize_t written;
7272 	int size;
7273 	int len;
7274 
7275 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7276 
7277 	if (tracing_disabled)
7278 		return -EINVAL;
7279 
7280 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7281 		return -EINVAL;
7282 
7283 	/* The marker must at least have a tag id */
7284 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7285 		return -EINVAL;
7286 
7287 	if (cnt > TRACE_BUF_SIZE)
7288 		cnt = TRACE_BUF_SIZE;
7289 
7290 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7291 
7292 	size = sizeof(*entry) + cnt;
7293 	if (cnt < FAULT_SIZE_ID)
7294 		size += FAULT_SIZE_ID - cnt;
7295 
7296 	buffer = tr->array_buffer.buffer;
7297 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7298 					    tracing_gen_ctx());
7299 	if (!event)
7300 		/* Ring buffer disabled, return as if not open for write */
7301 		return -EBADF;
7302 
7303 	entry = ring_buffer_event_data(event);
7304 
7305 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7306 	if (len) {
7307 		entry->id = -1;
7308 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7309 		written = -EFAULT;
7310 	} else
7311 		written = cnt;
7312 
7313 	__buffer_unlock_commit(buffer, event);
7314 
7315 	return written;
7316 }
7317 
tracing_clock_show(struct seq_file * m,void * v)7318 static int tracing_clock_show(struct seq_file *m, void *v)
7319 {
7320 	struct trace_array *tr = m->private;
7321 	int i;
7322 
7323 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7324 		seq_printf(m,
7325 			"%s%s%s%s", i ? " " : "",
7326 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7327 			i == tr->clock_id ? "]" : "");
7328 	seq_putc(m, '\n');
7329 
7330 	return 0;
7331 }
7332 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7333 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7334 {
7335 	int i;
7336 
7337 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7338 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7339 			break;
7340 	}
7341 	if (i == ARRAY_SIZE(trace_clocks))
7342 		return -EINVAL;
7343 
7344 	mutex_lock(&trace_types_lock);
7345 
7346 	tr->clock_id = i;
7347 
7348 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7349 
7350 	/*
7351 	 * New clock may not be consistent with the previous clock.
7352 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7353 	 */
7354 	tracing_reset_online_cpus(&tr->array_buffer);
7355 
7356 #ifdef CONFIG_TRACER_MAX_TRACE
7357 	if (tr->max_buffer.buffer)
7358 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7359 	tracing_reset_online_cpus(&tr->max_buffer);
7360 #endif
7361 
7362 	mutex_unlock(&trace_types_lock);
7363 
7364 	return 0;
7365 }
7366 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7367 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7368 				   size_t cnt, loff_t *fpos)
7369 {
7370 	struct seq_file *m = filp->private_data;
7371 	struct trace_array *tr = m->private;
7372 	char buf[64];
7373 	const char *clockstr;
7374 	int ret;
7375 
7376 	if (cnt >= sizeof(buf))
7377 		return -EINVAL;
7378 
7379 	if (copy_from_user(buf, ubuf, cnt))
7380 		return -EFAULT;
7381 
7382 	buf[cnt] = 0;
7383 
7384 	clockstr = strstrip(buf);
7385 
7386 	ret = tracing_set_clock(tr, clockstr);
7387 	if (ret)
7388 		return ret;
7389 
7390 	*fpos += cnt;
7391 
7392 	return cnt;
7393 }
7394 
tracing_clock_open(struct inode * inode,struct file * file)7395 static int tracing_clock_open(struct inode *inode, struct file *file)
7396 {
7397 	struct trace_array *tr = inode->i_private;
7398 	int ret;
7399 
7400 	ret = tracing_check_open_get_tr(tr);
7401 	if (ret)
7402 		return ret;
7403 
7404 	ret = single_open(file, tracing_clock_show, inode->i_private);
7405 	if (ret < 0)
7406 		trace_array_put(tr);
7407 
7408 	return ret;
7409 }
7410 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7411 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7412 {
7413 	struct trace_array *tr = m->private;
7414 
7415 	mutex_lock(&trace_types_lock);
7416 
7417 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7418 		seq_puts(m, "delta [absolute]\n");
7419 	else
7420 		seq_puts(m, "[delta] absolute\n");
7421 
7422 	mutex_unlock(&trace_types_lock);
7423 
7424 	return 0;
7425 }
7426 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7427 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7428 {
7429 	struct trace_array *tr = inode->i_private;
7430 	int ret;
7431 
7432 	ret = tracing_check_open_get_tr(tr);
7433 	if (ret)
7434 		return ret;
7435 
7436 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7437 	if (ret < 0)
7438 		trace_array_put(tr);
7439 
7440 	return ret;
7441 }
7442 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7443 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7444 {
7445 	if (rbe == this_cpu_read(trace_buffered_event))
7446 		return ring_buffer_time_stamp(buffer);
7447 
7448 	return ring_buffer_event_time_stamp(buffer, rbe);
7449 }
7450 
7451 /*
7452  * Set or disable using the per CPU trace_buffer_event when possible.
7453  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7454 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7455 {
7456 	int ret = 0;
7457 
7458 	mutex_lock(&trace_types_lock);
7459 
7460 	if (set && tr->no_filter_buffering_ref++)
7461 		goto out;
7462 
7463 	if (!set) {
7464 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7465 			ret = -EINVAL;
7466 			goto out;
7467 		}
7468 
7469 		--tr->no_filter_buffering_ref;
7470 	}
7471  out:
7472 	mutex_unlock(&trace_types_lock);
7473 
7474 	return ret;
7475 }
7476 
7477 struct ftrace_buffer_info {
7478 	struct trace_iterator	iter;
7479 	void			*spare;
7480 	unsigned int		spare_cpu;
7481 	unsigned int		read;
7482 };
7483 
7484 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7485 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7486 {
7487 	struct trace_array *tr = inode->i_private;
7488 	struct trace_iterator *iter;
7489 	struct seq_file *m;
7490 	int ret;
7491 
7492 	ret = tracing_check_open_get_tr(tr);
7493 	if (ret)
7494 		return ret;
7495 
7496 	if (file->f_mode & FMODE_READ) {
7497 		iter = __tracing_open(inode, file, true);
7498 		if (IS_ERR(iter))
7499 			ret = PTR_ERR(iter);
7500 	} else {
7501 		/* Writes still need the seq_file to hold the private data */
7502 		ret = -ENOMEM;
7503 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7504 		if (!m)
7505 			goto out;
7506 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7507 		if (!iter) {
7508 			kfree(m);
7509 			goto out;
7510 		}
7511 		ret = 0;
7512 
7513 		iter->tr = tr;
7514 		iter->array_buffer = &tr->max_buffer;
7515 		iter->cpu_file = tracing_get_cpu(inode);
7516 		m->private = iter;
7517 		file->private_data = m;
7518 	}
7519 out:
7520 	if (ret < 0)
7521 		trace_array_put(tr);
7522 
7523 	return ret;
7524 }
7525 
tracing_swap_cpu_buffer(void * tr)7526 static void tracing_swap_cpu_buffer(void *tr)
7527 {
7528 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7529 }
7530 
7531 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7532 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7533 		       loff_t *ppos)
7534 {
7535 	struct seq_file *m = filp->private_data;
7536 	struct trace_iterator *iter = m->private;
7537 	struct trace_array *tr = iter->tr;
7538 	unsigned long val;
7539 	int ret;
7540 
7541 	ret = tracing_update_buffers();
7542 	if (ret < 0)
7543 		return ret;
7544 
7545 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7546 	if (ret)
7547 		return ret;
7548 
7549 	mutex_lock(&trace_types_lock);
7550 
7551 	if (tr->current_trace->use_max_tr) {
7552 		ret = -EBUSY;
7553 		goto out;
7554 	}
7555 
7556 	local_irq_disable();
7557 	arch_spin_lock(&tr->max_lock);
7558 	if (tr->cond_snapshot)
7559 		ret = -EBUSY;
7560 	arch_spin_unlock(&tr->max_lock);
7561 	local_irq_enable();
7562 	if (ret)
7563 		goto out;
7564 
7565 	switch (val) {
7566 	case 0:
7567 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7568 			ret = -EINVAL;
7569 			break;
7570 		}
7571 		if (tr->allocated_snapshot)
7572 			free_snapshot(tr);
7573 		break;
7574 	case 1:
7575 /* Only allow per-cpu swap if the ring buffer supports it */
7576 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7577 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7578 			ret = -EINVAL;
7579 			break;
7580 		}
7581 #endif
7582 		if (tr->allocated_snapshot)
7583 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7584 					&tr->array_buffer, iter->cpu_file);
7585 		else
7586 			ret = tracing_alloc_snapshot_instance(tr);
7587 		if (ret < 0)
7588 			break;
7589 		/* Now, we're going to swap */
7590 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7591 			local_irq_disable();
7592 			update_max_tr(tr, current, smp_processor_id(), NULL);
7593 			local_irq_enable();
7594 		} else {
7595 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7596 						 (void *)tr, 1);
7597 		}
7598 		break;
7599 	default:
7600 		if (tr->allocated_snapshot) {
7601 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7602 				tracing_reset_online_cpus(&tr->max_buffer);
7603 			else
7604 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7605 		}
7606 		break;
7607 	}
7608 
7609 	if (ret >= 0) {
7610 		*ppos += cnt;
7611 		ret = cnt;
7612 	}
7613 out:
7614 	mutex_unlock(&trace_types_lock);
7615 	return ret;
7616 }
7617 
tracing_snapshot_release(struct inode * inode,struct file * file)7618 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7619 {
7620 	struct seq_file *m = file->private_data;
7621 	int ret;
7622 
7623 	ret = tracing_release(inode, file);
7624 
7625 	if (file->f_mode & FMODE_READ)
7626 		return ret;
7627 
7628 	/* If write only, the seq_file is just a stub */
7629 	if (m)
7630 		kfree(m->private);
7631 	kfree(m);
7632 
7633 	return 0;
7634 }
7635 
7636 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7637 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7638 				    size_t count, loff_t *ppos);
7639 static int tracing_buffers_release(struct inode *inode, struct file *file);
7640 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7641 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7642 
snapshot_raw_open(struct inode * inode,struct file * filp)7643 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7644 {
7645 	struct ftrace_buffer_info *info;
7646 	int ret;
7647 
7648 	/* The following checks for tracefs lockdown */
7649 	ret = tracing_buffers_open(inode, filp);
7650 	if (ret < 0)
7651 		return ret;
7652 
7653 	info = filp->private_data;
7654 
7655 	if (info->iter.trace->use_max_tr) {
7656 		tracing_buffers_release(inode, filp);
7657 		return -EBUSY;
7658 	}
7659 
7660 	info->iter.snapshot = true;
7661 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7662 
7663 	return ret;
7664 }
7665 
7666 #endif /* CONFIG_TRACER_SNAPSHOT */
7667 
7668 
7669 static const struct file_operations tracing_thresh_fops = {
7670 	.open		= tracing_open_generic,
7671 	.read		= tracing_thresh_read,
7672 	.write		= tracing_thresh_write,
7673 	.llseek		= generic_file_llseek,
7674 };
7675 
7676 #ifdef CONFIG_TRACER_MAX_TRACE
7677 static const struct file_operations tracing_max_lat_fops = {
7678 	.open		= tracing_open_generic_tr,
7679 	.read		= tracing_max_lat_read,
7680 	.write		= tracing_max_lat_write,
7681 	.llseek		= generic_file_llseek,
7682 	.release	= tracing_release_generic_tr,
7683 };
7684 #endif
7685 
7686 static const struct file_operations set_tracer_fops = {
7687 	.open		= tracing_open_generic_tr,
7688 	.read		= tracing_set_trace_read,
7689 	.write		= tracing_set_trace_write,
7690 	.llseek		= generic_file_llseek,
7691 	.release	= tracing_release_generic_tr,
7692 };
7693 
7694 static const struct file_operations tracing_pipe_fops = {
7695 	.open		= tracing_open_pipe,
7696 	.poll		= tracing_poll_pipe,
7697 	.read		= tracing_read_pipe,
7698 	.splice_read	= tracing_splice_read_pipe,
7699 	.release	= tracing_release_pipe,
7700 	.llseek		= no_llseek,
7701 };
7702 
7703 static const struct file_operations tracing_entries_fops = {
7704 	.open		= tracing_open_generic_tr,
7705 	.read		= tracing_entries_read,
7706 	.write		= tracing_entries_write,
7707 	.llseek		= generic_file_llseek,
7708 	.release	= tracing_release_generic_tr,
7709 };
7710 
7711 static const struct file_operations tracing_total_entries_fops = {
7712 	.open		= tracing_open_generic_tr,
7713 	.read		= tracing_total_entries_read,
7714 	.llseek		= generic_file_llseek,
7715 	.release	= tracing_release_generic_tr,
7716 };
7717 
7718 static const struct file_operations tracing_free_buffer_fops = {
7719 	.open		= tracing_open_generic_tr,
7720 	.write		= tracing_free_buffer_write,
7721 	.release	= tracing_free_buffer_release,
7722 };
7723 
7724 static const struct file_operations tracing_mark_fops = {
7725 	.open		= tracing_mark_open,
7726 	.write		= tracing_mark_write,
7727 	.release	= tracing_release_generic_tr,
7728 };
7729 
7730 static const struct file_operations tracing_mark_raw_fops = {
7731 	.open		= tracing_mark_open,
7732 	.write		= tracing_mark_raw_write,
7733 	.release	= tracing_release_generic_tr,
7734 };
7735 
7736 static const struct file_operations trace_clock_fops = {
7737 	.open		= tracing_clock_open,
7738 	.read		= seq_read,
7739 	.llseek		= seq_lseek,
7740 	.release	= tracing_single_release_tr,
7741 	.write		= tracing_clock_write,
7742 };
7743 
7744 static const struct file_operations trace_time_stamp_mode_fops = {
7745 	.open		= tracing_time_stamp_mode_open,
7746 	.read		= seq_read,
7747 	.llseek		= seq_lseek,
7748 	.release	= tracing_single_release_tr,
7749 };
7750 
7751 #ifdef CONFIG_TRACER_SNAPSHOT
7752 static const struct file_operations snapshot_fops = {
7753 	.open		= tracing_snapshot_open,
7754 	.read		= seq_read,
7755 	.write		= tracing_snapshot_write,
7756 	.llseek		= tracing_lseek,
7757 	.release	= tracing_snapshot_release,
7758 };
7759 
7760 static const struct file_operations snapshot_raw_fops = {
7761 	.open		= snapshot_raw_open,
7762 	.read		= tracing_buffers_read,
7763 	.release	= tracing_buffers_release,
7764 	.splice_read	= tracing_buffers_splice_read,
7765 	.llseek		= no_llseek,
7766 };
7767 
7768 #endif /* CONFIG_TRACER_SNAPSHOT */
7769 
7770 /*
7771  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7772  * @filp: The active open file structure
7773  * @ubuf: The userspace provided buffer to read value into
7774  * @cnt: The maximum number of bytes to read
7775  * @ppos: The current "file" position
7776  *
7777  * This function implements the write interface for a struct trace_min_max_param.
7778  * The filp->private_data must point to a trace_min_max_param structure that
7779  * defines where to write the value, the min and the max acceptable values,
7780  * and a lock to protect the write.
7781  */
7782 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7783 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7784 {
7785 	struct trace_min_max_param *param = filp->private_data;
7786 	u64 val;
7787 	int err;
7788 
7789 	if (!param)
7790 		return -EFAULT;
7791 
7792 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7793 	if (err)
7794 		return err;
7795 
7796 	if (param->lock)
7797 		mutex_lock(param->lock);
7798 
7799 	if (param->min && val < *param->min)
7800 		err = -EINVAL;
7801 
7802 	if (param->max && val > *param->max)
7803 		err = -EINVAL;
7804 
7805 	if (!err)
7806 		*param->val = val;
7807 
7808 	if (param->lock)
7809 		mutex_unlock(param->lock);
7810 
7811 	if (err)
7812 		return err;
7813 
7814 	return cnt;
7815 }
7816 
7817 /*
7818  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7819  * @filp: The active open file structure
7820  * @ubuf: The userspace provided buffer to read value into
7821  * @cnt: The maximum number of bytes to read
7822  * @ppos: The current "file" position
7823  *
7824  * This function implements the read interface for a struct trace_min_max_param.
7825  * The filp->private_data must point to a trace_min_max_param struct with valid
7826  * data.
7827  */
7828 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7829 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7830 {
7831 	struct trace_min_max_param *param = filp->private_data;
7832 	char buf[U64_STR_SIZE];
7833 	int len;
7834 	u64 val;
7835 
7836 	if (!param)
7837 		return -EFAULT;
7838 
7839 	val = *param->val;
7840 
7841 	if (cnt > sizeof(buf))
7842 		cnt = sizeof(buf);
7843 
7844 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7845 
7846 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7847 }
7848 
7849 const struct file_operations trace_min_max_fops = {
7850 	.open		= tracing_open_generic,
7851 	.read		= trace_min_max_read,
7852 	.write		= trace_min_max_write,
7853 };
7854 
7855 #define TRACING_LOG_ERRS_MAX	8
7856 #define TRACING_LOG_LOC_MAX	128
7857 
7858 #define CMD_PREFIX "  Command: "
7859 
7860 struct err_info {
7861 	const char	**errs;	/* ptr to loc-specific array of err strings */
7862 	u8		type;	/* index into errs -> specific err string */
7863 	u16		pos;	/* caret position */
7864 	u64		ts;
7865 };
7866 
7867 struct tracing_log_err {
7868 	struct list_head	list;
7869 	struct err_info		info;
7870 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7871 	char			*cmd;                     /* what caused err */
7872 };
7873 
7874 static DEFINE_MUTEX(tracing_err_log_lock);
7875 
alloc_tracing_log_err(int len)7876 static struct tracing_log_err *alloc_tracing_log_err(int len)
7877 {
7878 	struct tracing_log_err *err;
7879 
7880 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7881 	if (!err)
7882 		return ERR_PTR(-ENOMEM);
7883 
7884 	err->cmd = kzalloc(len, GFP_KERNEL);
7885 	if (!err->cmd) {
7886 		kfree(err);
7887 		return ERR_PTR(-ENOMEM);
7888 	}
7889 
7890 	return err;
7891 }
7892 
free_tracing_log_err(struct tracing_log_err * err)7893 static void free_tracing_log_err(struct tracing_log_err *err)
7894 {
7895 	kfree(err->cmd);
7896 	kfree(err);
7897 }
7898 
get_tracing_log_err(struct trace_array * tr,int len)7899 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7900 						   int len)
7901 {
7902 	struct tracing_log_err *err;
7903 	char *cmd;
7904 
7905 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7906 		err = alloc_tracing_log_err(len);
7907 		if (PTR_ERR(err) != -ENOMEM)
7908 			tr->n_err_log_entries++;
7909 
7910 		return err;
7911 	}
7912 	cmd = kzalloc(len, GFP_KERNEL);
7913 	if (!cmd)
7914 		return ERR_PTR(-ENOMEM);
7915 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7916 	kfree(err->cmd);
7917 	err->cmd = cmd;
7918 	list_del(&err->list);
7919 
7920 	return err;
7921 }
7922 
7923 /**
7924  * err_pos - find the position of a string within a command for error careting
7925  * @cmd: The tracing command that caused the error
7926  * @str: The string to position the caret at within @cmd
7927  *
7928  * Finds the position of the first occurrence of @str within @cmd.  The
7929  * return value can be passed to tracing_log_err() for caret placement
7930  * within @cmd.
7931  *
7932  * Returns the index within @cmd of the first occurrence of @str or 0
7933  * if @str was not found.
7934  */
err_pos(char * cmd,const char * str)7935 unsigned int err_pos(char *cmd, const char *str)
7936 {
7937 	char *found;
7938 
7939 	if (WARN_ON(!strlen(cmd)))
7940 		return 0;
7941 
7942 	found = strstr(cmd, str);
7943 	if (found)
7944 		return found - cmd;
7945 
7946 	return 0;
7947 }
7948 
7949 /**
7950  * tracing_log_err - write an error to the tracing error log
7951  * @tr: The associated trace array for the error (NULL for top level array)
7952  * @loc: A string describing where the error occurred
7953  * @cmd: The tracing command that caused the error
7954  * @errs: The array of loc-specific static error strings
7955  * @type: The index into errs[], which produces the specific static err string
7956  * @pos: The position the caret should be placed in the cmd
7957  *
7958  * Writes an error into tracing/error_log of the form:
7959  *
7960  * <loc>: error: <text>
7961  *   Command: <cmd>
7962  *              ^
7963  *
7964  * tracing/error_log is a small log file containing the last
7965  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7966  * unless there has been a tracing error, and the error log can be
7967  * cleared and have its memory freed by writing the empty string in
7968  * truncation mode to it i.e. echo > tracing/error_log.
7969  *
7970  * NOTE: the @errs array along with the @type param are used to
7971  * produce a static error string - this string is not copied and saved
7972  * when the error is logged - only a pointer to it is saved.  See
7973  * existing callers for examples of how static strings are typically
7974  * defined for use with tracing_log_err().
7975  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7976 void tracing_log_err(struct trace_array *tr,
7977 		     const char *loc, const char *cmd,
7978 		     const char **errs, u8 type, u16 pos)
7979 {
7980 	struct tracing_log_err *err;
7981 	int len = 0;
7982 
7983 	if (!tr)
7984 		tr = &global_trace;
7985 
7986 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7987 
7988 	mutex_lock(&tracing_err_log_lock);
7989 	err = get_tracing_log_err(tr, len);
7990 	if (PTR_ERR(err) == -ENOMEM) {
7991 		mutex_unlock(&tracing_err_log_lock);
7992 		return;
7993 	}
7994 
7995 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7996 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7997 
7998 	err->info.errs = errs;
7999 	err->info.type = type;
8000 	err->info.pos = pos;
8001 	err->info.ts = local_clock();
8002 
8003 	list_add_tail(&err->list, &tr->err_log);
8004 	mutex_unlock(&tracing_err_log_lock);
8005 }
8006 
clear_tracing_err_log(struct trace_array * tr)8007 static void clear_tracing_err_log(struct trace_array *tr)
8008 {
8009 	struct tracing_log_err *err, *next;
8010 
8011 	mutex_lock(&tracing_err_log_lock);
8012 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8013 		list_del(&err->list);
8014 		free_tracing_log_err(err);
8015 	}
8016 
8017 	tr->n_err_log_entries = 0;
8018 	mutex_unlock(&tracing_err_log_lock);
8019 }
8020 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8021 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8022 {
8023 	struct trace_array *tr = m->private;
8024 
8025 	mutex_lock(&tracing_err_log_lock);
8026 
8027 	return seq_list_start(&tr->err_log, *pos);
8028 }
8029 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8030 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8031 {
8032 	struct trace_array *tr = m->private;
8033 
8034 	return seq_list_next(v, &tr->err_log, pos);
8035 }
8036 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8037 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8038 {
8039 	mutex_unlock(&tracing_err_log_lock);
8040 }
8041 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8042 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8043 {
8044 	u16 i;
8045 
8046 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8047 		seq_putc(m, ' ');
8048 	for (i = 0; i < pos; i++)
8049 		seq_putc(m, ' ');
8050 	seq_puts(m, "^\n");
8051 }
8052 
tracing_err_log_seq_show(struct seq_file * m,void * v)8053 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8054 {
8055 	struct tracing_log_err *err = v;
8056 
8057 	if (err) {
8058 		const char *err_text = err->info.errs[err->info.type];
8059 		u64 sec = err->info.ts;
8060 		u32 nsec;
8061 
8062 		nsec = do_div(sec, NSEC_PER_SEC);
8063 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8064 			   err->loc, err_text);
8065 		seq_printf(m, "%s", err->cmd);
8066 		tracing_err_log_show_pos(m, err->info.pos);
8067 	}
8068 
8069 	return 0;
8070 }
8071 
8072 static const struct seq_operations tracing_err_log_seq_ops = {
8073 	.start  = tracing_err_log_seq_start,
8074 	.next   = tracing_err_log_seq_next,
8075 	.stop   = tracing_err_log_seq_stop,
8076 	.show   = tracing_err_log_seq_show
8077 };
8078 
tracing_err_log_open(struct inode * inode,struct file * file)8079 static int tracing_err_log_open(struct inode *inode, struct file *file)
8080 {
8081 	struct trace_array *tr = inode->i_private;
8082 	int ret = 0;
8083 
8084 	ret = tracing_check_open_get_tr(tr);
8085 	if (ret)
8086 		return ret;
8087 
8088 	/* If this file was opened for write, then erase contents */
8089 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8090 		clear_tracing_err_log(tr);
8091 
8092 	if (file->f_mode & FMODE_READ) {
8093 		ret = seq_open(file, &tracing_err_log_seq_ops);
8094 		if (!ret) {
8095 			struct seq_file *m = file->private_data;
8096 			m->private = tr;
8097 		} else {
8098 			trace_array_put(tr);
8099 		}
8100 	}
8101 	return ret;
8102 }
8103 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8104 static ssize_t tracing_err_log_write(struct file *file,
8105 				     const char __user *buffer,
8106 				     size_t count, loff_t *ppos)
8107 {
8108 	return count;
8109 }
8110 
tracing_err_log_release(struct inode * inode,struct file * file)8111 static int tracing_err_log_release(struct inode *inode, struct file *file)
8112 {
8113 	struct trace_array *tr = inode->i_private;
8114 
8115 	trace_array_put(tr);
8116 
8117 	if (file->f_mode & FMODE_READ)
8118 		seq_release(inode, file);
8119 
8120 	return 0;
8121 }
8122 
8123 static const struct file_operations tracing_err_log_fops = {
8124 	.open           = tracing_err_log_open,
8125 	.write		= tracing_err_log_write,
8126 	.read           = seq_read,
8127 	.llseek         = tracing_lseek,
8128 	.release        = tracing_err_log_release,
8129 };
8130 
tracing_buffers_open(struct inode * inode,struct file * filp)8131 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8132 {
8133 	struct trace_array *tr = inode->i_private;
8134 	struct ftrace_buffer_info *info;
8135 	int ret;
8136 
8137 	ret = tracing_check_open_get_tr(tr);
8138 	if (ret)
8139 		return ret;
8140 
8141 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8142 	if (!info) {
8143 		trace_array_put(tr);
8144 		return -ENOMEM;
8145 	}
8146 
8147 	mutex_lock(&trace_types_lock);
8148 
8149 	info->iter.tr		= tr;
8150 	info->iter.cpu_file	= tracing_get_cpu(inode);
8151 	info->iter.trace	= tr->current_trace;
8152 	info->iter.array_buffer = &tr->array_buffer;
8153 	info->spare		= NULL;
8154 	/* Force reading ring buffer for first read */
8155 	info->read		= (unsigned int)-1;
8156 
8157 	filp->private_data = info;
8158 
8159 	tr->trace_ref++;
8160 
8161 	mutex_unlock(&trace_types_lock);
8162 
8163 	ret = nonseekable_open(inode, filp);
8164 	if (ret < 0)
8165 		trace_array_put(tr);
8166 
8167 	return ret;
8168 }
8169 
8170 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8171 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8172 {
8173 	struct ftrace_buffer_info *info = filp->private_data;
8174 	struct trace_iterator *iter = &info->iter;
8175 
8176 	return trace_poll(iter, filp, poll_table);
8177 }
8178 
8179 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8180 tracing_buffers_read(struct file *filp, char __user *ubuf,
8181 		     size_t count, loff_t *ppos)
8182 {
8183 	struct ftrace_buffer_info *info = filp->private_data;
8184 	struct trace_iterator *iter = &info->iter;
8185 	ssize_t ret = 0;
8186 	ssize_t size;
8187 
8188 	if (!count)
8189 		return 0;
8190 
8191 #ifdef CONFIG_TRACER_MAX_TRACE
8192 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8193 		return -EBUSY;
8194 #endif
8195 
8196 	if (!info->spare) {
8197 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8198 							  iter->cpu_file);
8199 		if (IS_ERR(info->spare)) {
8200 			ret = PTR_ERR(info->spare);
8201 			info->spare = NULL;
8202 		} else {
8203 			info->spare_cpu = iter->cpu_file;
8204 		}
8205 	}
8206 	if (!info->spare)
8207 		return ret;
8208 
8209 	/* Do we have previous read data to read? */
8210 	if (info->read < PAGE_SIZE)
8211 		goto read;
8212 
8213  again:
8214 	trace_access_lock(iter->cpu_file);
8215 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8216 				    &info->spare,
8217 				    count,
8218 				    iter->cpu_file, 0);
8219 	trace_access_unlock(iter->cpu_file);
8220 
8221 	if (ret < 0) {
8222 		if (trace_empty(iter)) {
8223 			if ((filp->f_flags & O_NONBLOCK))
8224 				return -EAGAIN;
8225 
8226 			ret = wait_on_pipe(iter, 0);
8227 			if (ret)
8228 				return ret;
8229 
8230 			goto again;
8231 		}
8232 		return 0;
8233 	}
8234 
8235 	info->read = 0;
8236  read:
8237 	size = PAGE_SIZE - info->read;
8238 	if (size > count)
8239 		size = count;
8240 
8241 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8242 	if (ret == size)
8243 		return -EFAULT;
8244 
8245 	size -= ret;
8246 
8247 	*ppos += size;
8248 	info->read += size;
8249 
8250 	return size;
8251 }
8252 
tracing_buffers_flush(struct file * file,fl_owner_t id)8253 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8254 {
8255 	struct ftrace_buffer_info *info = file->private_data;
8256 	struct trace_iterator *iter = &info->iter;
8257 
8258 	iter->wait_index++;
8259 	/* Make sure the waiters see the new wait_index */
8260 	smp_wmb();
8261 
8262 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8263 
8264 	return 0;
8265 }
8266 
tracing_buffers_release(struct inode * inode,struct file * file)8267 static int tracing_buffers_release(struct inode *inode, struct file *file)
8268 {
8269 	struct ftrace_buffer_info *info = file->private_data;
8270 	struct trace_iterator *iter = &info->iter;
8271 
8272 	mutex_lock(&trace_types_lock);
8273 
8274 	iter->tr->trace_ref--;
8275 
8276 	__trace_array_put(iter->tr);
8277 
8278 	if (info->spare)
8279 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8280 					   info->spare_cpu, info->spare);
8281 	kvfree(info);
8282 
8283 	mutex_unlock(&trace_types_lock);
8284 
8285 	return 0;
8286 }
8287 
8288 struct buffer_ref {
8289 	struct trace_buffer	*buffer;
8290 	void			*page;
8291 	int			cpu;
8292 	refcount_t		refcount;
8293 };
8294 
buffer_ref_release(struct buffer_ref * ref)8295 static void buffer_ref_release(struct buffer_ref *ref)
8296 {
8297 	if (!refcount_dec_and_test(&ref->refcount))
8298 		return;
8299 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8300 	kfree(ref);
8301 }
8302 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8303 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8304 				    struct pipe_buffer *buf)
8305 {
8306 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8307 
8308 	buffer_ref_release(ref);
8309 	buf->private = 0;
8310 }
8311 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8312 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8313 				struct pipe_buffer *buf)
8314 {
8315 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8316 
8317 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8318 		return false;
8319 
8320 	refcount_inc(&ref->refcount);
8321 	return true;
8322 }
8323 
8324 /* Pipe buffer operations for a buffer. */
8325 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8326 	.release		= buffer_pipe_buf_release,
8327 	.get			= buffer_pipe_buf_get,
8328 };
8329 
8330 /*
8331  * Callback from splice_to_pipe(), if we need to release some pages
8332  * at the end of the spd in case we error'ed out in filling the pipe.
8333  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8334 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8335 {
8336 	struct buffer_ref *ref =
8337 		(struct buffer_ref *)spd->partial[i].private;
8338 
8339 	buffer_ref_release(ref);
8340 	spd->partial[i].private = 0;
8341 }
8342 
8343 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8344 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8345 			    struct pipe_inode_info *pipe, size_t len,
8346 			    unsigned int flags)
8347 {
8348 	struct ftrace_buffer_info *info = file->private_data;
8349 	struct trace_iterator *iter = &info->iter;
8350 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8351 	struct page *pages_def[PIPE_DEF_BUFFERS];
8352 	struct splice_pipe_desc spd = {
8353 		.pages		= pages_def,
8354 		.partial	= partial_def,
8355 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8356 		.ops		= &buffer_pipe_buf_ops,
8357 		.spd_release	= buffer_spd_release,
8358 	};
8359 	struct buffer_ref *ref;
8360 	int entries, i;
8361 	ssize_t ret = 0;
8362 
8363 #ifdef CONFIG_TRACER_MAX_TRACE
8364 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8365 		return -EBUSY;
8366 #endif
8367 
8368 	if (*ppos & (PAGE_SIZE - 1))
8369 		return -EINVAL;
8370 
8371 	if (len & (PAGE_SIZE - 1)) {
8372 		if (len < PAGE_SIZE)
8373 			return -EINVAL;
8374 		len &= PAGE_MASK;
8375 	}
8376 
8377 	if (splice_grow_spd(pipe, &spd))
8378 		return -ENOMEM;
8379 
8380  again:
8381 	trace_access_lock(iter->cpu_file);
8382 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8383 
8384 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8385 		struct page *page;
8386 		int r;
8387 
8388 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8389 		if (!ref) {
8390 			ret = -ENOMEM;
8391 			break;
8392 		}
8393 
8394 		refcount_set(&ref->refcount, 1);
8395 		ref->buffer = iter->array_buffer->buffer;
8396 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8397 		if (IS_ERR(ref->page)) {
8398 			ret = PTR_ERR(ref->page);
8399 			ref->page = NULL;
8400 			kfree(ref);
8401 			break;
8402 		}
8403 		ref->cpu = iter->cpu_file;
8404 
8405 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8406 					  len, iter->cpu_file, 1);
8407 		if (r < 0) {
8408 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8409 						   ref->page);
8410 			kfree(ref);
8411 			break;
8412 		}
8413 
8414 		page = virt_to_page(ref->page);
8415 
8416 		spd.pages[i] = page;
8417 		spd.partial[i].len = PAGE_SIZE;
8418 		spd.partial[i].offset = 0;
8419 		spd.partial[i].private = (unsigned long)ref;
8420 		spd.nr_pages++;
8421 		*ppos += PAGE_SIZE;
8422 
8423 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8424 	}
8425 
8426 	trace_access_unlock(iter->cpu_file);
8427 	spd.nr_pages = i;
8428 
8429 	/* did we read anything? */
8430 	if (!spd.nr_pages) {
8431 		long wait_index;
8432 
8433 		if (ret)
8434 			goto out;
8435 
8436 		ret = -EAGAIN;
8437 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8438 			goto out;
8439 
8440 		wait_index = READ_ONCE(iter->wait_index);
8441 
8442 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8443 		if (ret)
8444 			goto out;
8445 
8446 		/* No need to wait after waking up when tracing is off */
8447 		if (!tracer_tracing_is_on(iter->tr))
8448 			goto out;
8449 
8450 		/* Make sure we see the new wait_index */
8451 		smp_rmb();
8452 		if (wait_index != iter->wait_index)
8453 			goto out;
8454 
8455 		goto again;
8456 	}
8457 
8458 	ret = splice_to_pipe(pipe, &spd);
8459 out:
8460 	splice_shrink_spd(&spd);
8461 
8462 	return ret;
8463 }
8464 
8465 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8466 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8467 {
8468 	struct ftrace_buffer_info *info = file->private_data;
8469 	struct trace_iterator *iter = &info->iter;
8470 
8471 	if (cmd)
8472 		return -ENOIOCTLCMD;
8473 
8474 	mutex_lock(&trace_types_lock);
8475 
8476 	iter->wait_index++;
8477 	/* Make sure the waiters see the new wait_index */
8478 	smp_wmb();
8479 
8480 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8481 
8482 	mutex_unlock(&trace_types_lock);
8483 	return 0;
8484 }
8485 
8486 static const struct file_operations tracing_buffers_fops = {
8487 	.open		= tracing_buffers_open,
8488 	.read		= tracing_buffers_read,
8489 	.poll		= tracing_buffers_poll,
8490 	.release	= tracing_buffers_release,
8491 	.flush		= tracing_buffers_flush,
8492 	.splice_read	= tracing_buffers_splice_read,
8493 	.unlocked_ioctl = tracing_buffers_ioctl,
8494 	.llseek		= no_llseek,
8495 };
8496 
8497 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8498 tracing_stats_read(struct file *filp, char __user *ubuf,
8499 		   size_t count, loff_t *ppos)
8500 {
8501 	struct inode *inode = file_inode(filp);
8502 	struct trace_array *tr = inode->i_private;
8503 	struct array_buffer *trace_buf = &tr->array_buffer;
8504 	int cpu = tracing_get_cpu(inode);
8505 	struct trace_seq *s;
8506 	unsigned long cnt;
8507 	unsigned long long t;
8508 	unsigned long usec_rem;
8509 
8510 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8511 	if (!s)
8512 		return -ENOMEM;
8513 
8514 	trace_seq_init(s);
8515 
8516 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8517 	trace_seq_printf(s, "entries: %ld\n", cnt);
8518 
8519 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8520 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8521 
8522 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8523 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8524 
8525 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8526 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8527 
8528 	if (trace_clocks[tr->clock_id].in_ns) {
8529 		/* local or global for trace_clock */
8530 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8531 		usec_rem = do_div(t, USEC_PER_SEC);
8532 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8533 								t, usec_rem);
8534 
8535 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8536 		usec_rem = do_div(t, USEC_PER_SEC);
8537 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8538 	} else {
8539 		/* counter or tsc mode for trace_clock */
8540 		trace_seq_printf(s, "oldest event ts: %llu\n",
8541 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8542 
8543 		trace_seq_printf(s, "now ts: %llu\n",
8544 				ring_buffer_time_stamp(trace_buf->buffer));
8545 	}
8546 
8547 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8548 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8549 
8550 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8551 	trace_seq_printf(s, "read events: %ld\n", cnt);
8552 
8553 	count = simple_read_from_buffer(ubuf, count, ppos,
8554 					s->buffer, trace_seq_used(s));
8555 
8556 	kfree(s);
8557 
8558 	return count;
8559 }
8560 
8561 static const struct file_operations tracing_stats_fops = {
8562 	.open		= tracing_open_generic_tr,
8563 	.read		= tracing_stats_read,
8564 	.llseek		= generic_file_llseek,
8565 	.release	= tracing_release_generic_tr,
8566 };
8567 
8568 #ifdef CONFIG_DYNAMIC_FTRACE
8569 
8570 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8571 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8572 		  size_t cnt, loff_t *ppos)
8573 {
8574 	ssize_t ret;
8575 	char *buf;
8576 	int r;
8577 
8578 	/* 256 should be plenty to hold the amount needed */
8579 	buf = kmalloc(256, GFP_KERNEL);
8580 	if (!buf)
8581 		return -ENOMEM;
8582 
8583 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8584 		      ftrace_update_tot_cnt,
8585 		      ftrace_number_of_pages,
8586 		      ftrace_number_of_groups);
8587 
8588 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8589 	kfree(buf);
8590 	return ret;
8591 }
8592 
8593 static const struct file_operations tracing_dyn_info_fops = {
8594 	.open		= tracing_open_generic,
8595 	.read		= tracing_read_dyn_info,
8596 	.llseek		= generic_file_llseek,
8597 };
8598 #endif /* CONFIG_DYNAMIC_FTRACE */
8599 
8600 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8601 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8602 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8603 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8604 		void *data)
8605 {
8606 	tracing_snapshot_instance(tr);
8607 }
8608 
8609 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8610 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8611 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8612 		      void *data)
8613 {
8614 	struct ftrace_func_mapper *mapper = data;
8615 	long *count = NULL;
8616 
8617 	if (mapper)
8618 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8619 
8620 	if (count) {
8621 
8622 		if (*count <= 0)
8623 			return;
8624 
8625 		(*count)--;
8626 	}
8627 
8628 	tracing_snapshot_instance(tr);
8629 }
8630 
8631 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8632 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8633 		      struct ftrace_probe_ops *ops, void *data)
8634 {
8635 	struct ftrace_func_mapper *mapper = data;
8636 	long *count = NULL;
8637 
8638 	seq_printf(m, "%ps:", (void *)ip);
8639 
8640 	seq_puts(m, "snapshot");
8641 
8642 	if (mapper)
8643 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8644 
8645 	if (count)
8646 		seq_printf(m, ":count=%ld\n", *count);
8647 	else
8648 		seq_puts(m, ":unlimited\n");
8649 
8650 	return 0;
8651 }
8652 
8653 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8654 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8655 		     unsigned long ip, void *init_data, void **data)
8656 {
8657 	struct ftrace_func_mapper *mapper = *data;
8658 
8659 	if (!mapper) {
8660 		mapper = allocate_ftrace_func_mapper();
8661 		if (!mapper)
8662 			return -ENOMEM;
8663 		*data = mapper;
8664 	}
8665 
8666 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8667 }
8668 
8669 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8670 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8671 		     unsigned long ip, void *data)
8672 {
8673 	struct ftrace_func_mapper *mapper = data;
8674 
8675 	if (!ip) {
8676 		if (!mapper)
8677 			return;
8678 		free_ftrace_func_mapper(mapper, NULL);
8679 		return;
8680 	}
8681 
8682 	ftrace_func_mapper_remove_ip(mapper, ip);
8683 }
8684 
8685 static struct ftrace_probe_ops snapshot_probe_ops = {
8686 	.func			= ftrace_snapshot,
8687 	.print			= ftrace_snapshot_print,
8688 };
8689 
8690 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8691 	.func			= ftrace_count_snapshot,
8692 	.print			= ftrace_snapshot_print,
8693 	.init			= ftrace_snapshot_init,
8694 	.free			= ftrace_snapshot_free,
8695 };
8696 
8697 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8698 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8699 			       char *glob, char *cmd, char *param, int enable)
8700 {
8701 	struct ftrace_probe_ops *ops;
8702 	void *count = (void *)-1;
8703 	char *number;
8704 	int ret;
8705 
8706 	if (!tr)
8707 		return -ENODEV;
8708 
8709 	/* hash funcs only work with set_ftrace_filter */
8710 	if (!enable)
8711 		return -EINVAL;
8712 
8713 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8714 
8715 	if (glob[0] == '!')
8716 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8717 
8718 	if (!param)
8719 		goto out_reg;
8720 
8721 	number = strsep(&param, ":");
8722 
8723 	if (!strlen(number))
8724 		goto out_reg;
8725 
8726 	/*
8727 	 * We use the callback data field (which is a pointer)
8728 	 * as our counter.
8729 	 */
8730 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8731 	if (ret)
8732 		return ret;
8733 
8734  out_reg:
8735 	ret = tracing_alloc_snapshot_instance(tr);
8736 	if (ret < 0)
8737 		goto out;
8738 
8739 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8740 
8741  out:
8742 	return ret < 0 ? ret : 0;
8743 }
8744 
8745 static struct ftrace_func_command ftrace_snapshot_cmd = {
8746 	.name			= "snapshot",
8747 	.func			= ftrace_trace_snapshot_callback,
8748 };
8749 
register_snapshot_cmd(void)8750 static __init int register_snapshot_cmd(void)
8751 {
8752 	return register_ftrace_command(&ftrace_snapshot_cmd);
8753 }
8754 #else
register_snapshot_cmd(void)8755 static inline __init int register_snapshot_cmd(void) { return 0; }
8756 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8757 
tracing_get_dentry(struct trace_array * tr)8758 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8759 {
8760 	if (WARN_ON(!tr->dir))
8761 		return ERR_PTR(-ENODEV);
8762 
8763 	/* Top directory uses NULL as the parent */
8764 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8765 		return NULL;
8766 
8767 	/* All sub buffers have a descriptor */
8768 	return tr->dir;
8769 }
8770 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8771 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8772 {
8773 	struct dentry *d_tracer;
8774 
8775 	if (tr->percpu_dir)
8776 		return tr->percpu_dir;
8777 
8778 	d_tracer = tracing_get_dentry(tr);
8779 	if (IS_ERR(d_tracer))
8780 		return NULL;
8781 
8782 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8783 
8784 	MEM_FAIL(!tr->percpu_dir,
8785 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8786 
8787 	return tr->percpu_dir;
8788 }
8789 
8790 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8791 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8792 		      void *data, long cpu, const struct file_operations *fops)
8793 {
8794 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8795 
8796 	if (ret) /* See tracing_get_cpu() */
8797 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8798 	return ret;
8799 }
8800 
8801 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8802 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8803 {
8804 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8805 	struct dentry *d_cpu;
8806 	char cpu_dir[30]; /* 30 characters should be more than enough */
8807 
8808 	if (!d_percpu)
8809 		return;
8810 
8811 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8812 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8813 	if (!d_cpu) {
8814 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8815 		return;
8816 	}
8817 
8818 	/* per cpu trace_pipe */
8819 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8820 				tr, cpu, &tracing_pipe_fops);
8821 
8822 	/* per cpu trace */
8823 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8824 				tr, cpu, &tracing_fops);
8825 
8826 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8827 				tr, cpu, &tracing_buffers_fops);
8828 
8829 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8830 				tr, cpu, &tracing_stats_fops);
8831 
8832 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8833 				tr, cpu, &tracing_entries_fops);
8834 
8835 #ifdef CONFIG_TRACER_SNAPSHOT
8836 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8837 				tr, cpu, &snapshot_fops);
8838 
8839 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8840 				tr, cpu, &snapshot_raw_fops);
8841 #endif
8842 }
8843 
8844 #ifdef CONFIG_FTRACE_SELFTEST
8845 /* Let selftest have access to static functions in this file */
8846 #include "trace_selftest.c"
8847 #endif
8848 
8849 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8850 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8851 			loff_t *ppos)
8852 {
8853 	struct trace_option_dentry *topt = filp->private_data;
8854 	char *buf;
8855 
8856 	if (topt->flags->val & topt->opt->bit)
8857 		buf = "1\n";
8858 	else
8859 		buf = "0\n";
8860 
8861 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8862 }
8863 
8864 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8865 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8866 			 loff_t *ppos)
8867 {
8868 	struct trace_option_dentry *topt = filp->private_data;
8869 	unsigned long val;
8870 	int ret;
8871 
8872 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8873 	if (ret)
8874 		return ret;
8875 
8876 	if (val != 0 && val != 1)
8877 		return -EINVAL;
8878 
8879 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8880 		mutex_lock(&trace_types_lock);
8881 		ret = __set_tracer_option(topt->tr, topt->flags,
8882 					  topt->opt, !val);
8883 		mutex_unlock(&trace_types_lock);
8884 		if (ret)
8885 			return ret;
8886 	}
8887 
8888 	*ppos += cnt;
8889 
8890 	return cnt;
8891 }
8892 
tracing_open_options(struct inode * inode,struct file * filp)8893 static int tracing_open_options(struct inode *inode, struct file *filp)
8894 {
8895 	struct trace_option_dentry *topt = inode->i_private;
8896 	int ret;
8897 
8898 	ret = tracing_check_open_get_tr(topt->tr);
8899 	if (ret)
8900 		return ret;
8901 
8902 	filp->private_data = inode->i_private;
8903 	return 0;
8904 }
8905 
tracing_release_options(struct inode * inode,struct file * file)8906 static int tracing_release_options(struct inode *inode, struct file *file)
8907 {
8908 	struct trace_option_dentry *topt = file->private_data;
8909 
8910 	trace_array_put(topt->tr);
8911 	return 0;
8912 }
8913 
8914 static const struct file_operations trace_options_fops = {
8915 	.open = tracing_open_options,
8916 	.read = trace_options_read,
8917 	.write = trace_options_write,
8918 	.llseek	= generic_file_llseek,
8919 	.release = tracing_release_options,
8920 };
8921 
8922 /*
8923  * In order to pass in both the trace_array descriptor as well as the index
8924  * to the flag that the trace option file represents, the trace_array
8925  * has a character array of trace_flags_index[], which holds the index
8926  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8927  * The address of this character array is passed to the flag option file
8928  * read/write callbacks.
8929  *
8930  * In order to extract both the index and the trace_array descriptor,
8931  * get_tr_index() uses the following algorithm.
8932  *
8933  *   idx = *ptr;
8934  *
8935  * As the pointer itself contains the address of the index (remember
8936  * index[1] == 1).
8937  *
8938  * Then to get the trace_array descriptor, by subtracting that index
8939  * from the ptr, we get to the start of the index itself.
8940  *
8941  *   ptr - idx == &index[0]
8942  *
8943  * Then a simple container_of() from that pointer gets us to the
8944  * trace_array descriptor.
8945  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8946 static void get_tr_index(void *data, struct trace_array **ptr,
8947 			 unsigned int *pindex)
8948 {
8949 	*pindex = *(unsigned char *)data;
8950 
8951 	*ptr = container_of(data - *pindex, struct trace_array,
8952 			    trace_flags_index);
8953 }
8954 
8955 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8956 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8957 			loff_t *ppos)
8958 {
8959 	void *tr_index = filp->private_data;
8960 	struct trace_array *tr;
8961 	unsigned int index;
8962 	char *buf;
8963 
8964 	get_tr_index(tr_index, &tr, &index);
8965 
8966 	if (tr->trace_flags & (1 << index))
8967 		buf = "1\n";
8968 	else
8969 		buf = "0\n";
8970 
8971 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8972 }
8973 
8974 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8975 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8976 			 loff_t *ppos)
8977 {
8978 	void *tr_index = filp->private_data;
8979 	struct trace_array *tr;
8980 	unsigned int index;
8981 	unsigned long val;
8982 	int ret;
8983 
8984 	get_tr_index(tr_index, &tr, &index);
8985 
8986 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8987 	if (ret)
8988 		return ret;
8989 
8990 	if (val != 0 && val != 1)
8991 		return -EINVAL;
8992 
8993 	mutex_lock(&event_mutex);
8994 	mutex_lock(&trace_types_lock);
8995 	ret = set_tracer_flag(tr, 1 << index, val);
8996 	mutex_unlock(&trace_types_lock);
8997 	mutex_unlock(&event_mutex);
8998 
8999 	if (ret < 0)
9000 		return ret;
9001 
9002 	*ppos += cnt;
9003 
9004 	return cnt;
9005 }
9006 
9007 static const struct file_operations trace_options_core_fops = {
9008 	.open = tracing_open_generic,
9009 	.read = trace_options_core_read,
9010 	.write = trace_options_core_write,
9011 	.llseek = generic_file_llseek,
9012 };
9013 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9014 struct dentry *trace_create_file(const char *name,
9015 				 umode_t mode,
9016 				 struct dentry *parent,
9017 				 void *data,
9018 				 const struct file_operations *fops)
9019 {
9020 	struct dentry *ret;
9021 
9022 	ret = tracefs_create_file(name, mode, parent, data, fops);
9023 	if (!ret)
9024 		pr_warn("Could not create tracefs '%s' entry\n", name);
9025 
9026 	return ret;
9027 }
9028 
9029 
trace_options_init_dentry(struct trace_array * tr)9030 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9031 {
9032 	struct dentry *d_tracer;
9033 
9034 	if (tr->options)
9035 		return tr->options;
9036 
9037 	d_tracer = tracing_get_dentry(tr);
9038 	if (IS_ERR(d_tracer))
9039 		return NULL;
9040 
9041 	tr->options = tracefs_create_dir("options", d_tracer);
9042 	if (!tr->options) {
9043 		pr_warn("Could not create tracefs directory 'options'\n");
9044 		return NULL;
9045 	}
9046 
9047 	return tr->options;
9048 }
9049 
9050 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9051 create_trace_option_file(struct trace_array *tr,
9052 			 struct trace_option_dentry *topt,
9053 			 struct tracer_flags *flags,
9054 			 struct tracer_opt *opt)
9055 {
9056 	struct dentry *t_options;
9057 
9058 	t_options = trace_options_init_dentry(tr);
9059 	if (!t_options)
9060 		return;
9061 
9062 	topt->flags = flags;
9063 	topt->opt = opt;
9064 	topt->tr = tr;
9065 
9066 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9067 					t_options, topt, &trace_options_fops);
9068 
9069 }
9070 
9071 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9072 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9073 {
9074 	struct trace_option_dentry *topts;
9075 	struct trace_options *tr_topts;
9076 	struct tracer_flags *flags;
9077 	struct tracer_opt *opts;
9078 	int cnt;
9079 	int i;
9080 
9081 	if (!tracer)
9082 		return;
9083 
9084 	flags = tracer->flags;
9085 
9086 	if (!flags || !flags->opts)
9087 		return;
9088 
9089 	/*
9090 	 * If this is an instance, only create flags for tracers
9091 	 * the instance may have.
9092 	 */
9093 	if (!trace_ok_for_array(tracer, tr))
9094 		return;
9095 
9096 	for (i = 0; i < tr->nr_topts; i++) {
9097 		/* Make sure there's no duplicate flags. */
9098 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9099 			return;
9100 	}
9101 
9102 	opts = flags->opts;
9103 
9104 	for (cnt = 0; opts[cnt].name; cnt++)
9105 		;
9106 
9107 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9108 	if (!topts)
9109 		return;
9110 
9111 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9112 			    GFP_KERNEL);
9113 	if (!tr_topts) {
9114 		kfree(topts);
9115 		return;
9116 	}
9117 
9118 	tr->topts = tr_topts;
9119 	tr->topts[tr->nr_topts].tracer = tracer;
9120 	tr->topts[tr->nr_topts].topts = topts;
9121 	tr->nr_topts++;
9122 
9123 	for (cnt = 0; opts[cnt].name; cnt++) {
9124 		create_trace_option_file(tr, &topts[cnt], flags,
9125 					 &opts[cnt]);
9126 		MEM_FAIL(topts[cnt].entry == NULL,
9127 			  "Failed to create trace option: %s",
9128 			  opts[cnt].name);
9129 	}
9130 }
9131 
9132 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9133 create_trace_option_core_file(struct trace_array *tr,
9134 			      const char *option, long index)
9135 {
9136 	struct dentry *t_options;
9137 
9138 	t_options = trace_options_init_dentry(tr);
9139 	if (!t_options)
9140 		return NULL;
9141 
9142 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9143 				 (void *)&tr->trace_flags_index[index],
9144 				 &trace_options_core_fops);
9145 }
9146 
create_trace_options_dir(struct trace_array * tr)9147 static void create_trace_options_dir(struct trace_array *tr)
9148 {
9149 	struct dentry *t_options;
9150 	bool top_level = tr == &global_trace;
9151 	int i;
9152 
9153 	t_options = trace_options_init_dentry(tr);
9154 	if (!t_options)
9155 		return;
9156 
9157 	for (i = 0; trace_options[i]; i++) {
9158 		if (top_level ||
9159 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9160 			create_trace_option_core_file(tr, trace_options[i], i);
9161 	}
9162 }
9163 
9164 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9165 rb_simple_read(struct file *filp, char __user *ubuf,
9166 	       size_t cnt, loff_t *ppos)
9167 {
9168 	struct trace_array *tr = filp->private_data;
9169 	char buf[64];
9170 	int r;
9171 
9172 	r = tracer_tracing_is_on(tr);
9173 	r = sprintf(buf, "%d\n", r);
9174 
9175 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9176 }
9177 
9178 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9179 rb_simple_write(struct file *filp, const char __user *ubuf,
9180 		size_t cnt, loff_t *ppos)
9181 {
9182 	struct trace_array *tr = filp->private_data;
9183 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9184 	unsigned long val;
9185 	int ret;
9186 
9187 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9188 	if (ret)
9189 		return ret;
9190 
9191 	if (buffer) {
9192 		mutex_lock(&trace_types_lock);
9193 		if (!!val == tracer_tracing_is_on(tr)) {
9194 			val = 0; /* do nothing */
9195 		} else if (val) {
9196 			tracer_tracing_on(tr);
9197 			if (tr->current_trace->start)
9198 				tr->current_trace->start(tr);
9199 		} else {
9200 			tracer_tracing_off(tr);
9201 			if (tr->current_trace->stop)
9202 				tr->current_trace->stop(tr);
9203 			/* Wake up any waiters */
9204 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9205 		}
9206 		mutex_unlock(&trace_types_lock);
9207 	}
9208 
9209 	(*ppos)++;
9210 
9211 	return cnt;
9212 }
9213 
9214 static const struct file_operations rb_simple_fops = {
9215 	.open		= tracing_open_generic_tr,
9216 	.read		= rb_simple_read,
9217 	.write		= rb_simple_write,
9218 	.release	= tracing_release_generic_tr,
9219 	.llseek		= default_llseek,
9220 };
9221 
9222 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9223 buffer_percent_read(struct file *filp, char __user *ubuf,
9224 		    size_t cnt, loff_t *ppos)
9225 {
9226 	struct trace_array *tr = filp->private_data;
9227 	char buf[64];
9228 	int r;
9229 
9230 	r = tr->buffer_percent;
9231 	r = sprintf(buf, "%d\n", r);
9232 
9233 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9234 }
9235 
9236 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9237 buffer_percent_write(struct file *filp, const char __user *ubuf,
9238 		     size_t cnt, loff_t *ppos)
9239 {
9240 	struct trace_array *tr = filp->private_data;
9241 	unsigned long val;
9242 	int ret;
9243 
9244 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9245 	if (ret)
9246 		return ret;
9247 
9248 	if (val > 100)
9249 		return -EINVAL;
9250 
9251 	tr->buffer_percent = val;
9252 
9253 	(*ppos)++;
9254 
9255 	return cnt;
9256 }
9257 
9258 static const struct file_operations buffer_percent_fops = {
9259 	.open		= tracing_open_generic_tr,
9260 	.read		= buffer_percent_read,
9261 	.write		= buffer_percent_write,
9262 	.release	= tracing_release_generic_tr,
9263 	.llseek		= default_llseek,
9264 };
9265 
9266 static struct dentry *trace_instance_dir;
9267 
9268 static void
9269 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9270 
9271 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9272 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9273 {
9274 	enum ring_buffer_flags rb_flags;
9275 
9276 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9277 
9278 	buf->tr = tr;
9279 
9280 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9281 	if (!buf->buffer)
9282 		return -ENOMEM;
9283 
9284 	buf->data = alloc_percpu(struct trace_array_cpu);
9285 	if (!buf->data) {
9286 		ring_buffer_free(buf->buffer);
9287 		buf->buffer = NULL;
9288 		return -ENOMEM;
9289 	}
9290 
9291 	/* Allocate the first page for all buffers */
9292 	set_buffer_entries(&tr->array_buffer,
9293 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9294 
9295 	return 0;
9296 }
9297 
free_trace_buffer(struct array_buffer * buf)9298 static void free_trace_buffer(struct array_buffer *buf)
9299 {
9300 	if (buf->buffer) {
9301 		ring_buffer_free(buf->buffer);
9302 		buf->buffer = NULL;
9303 		free_percpu(buf->data);
9304 		buf->data = NULL;
9305 	}
9306 }
9307 
allocate_trace_buffers(struct trace_array * tr,int size)9308 static int allocate_trace_buffers(struct trace_array *tr, int size)
9309 {
9310 	int ret;
9311 
9312 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9313 	if (ret)
9314 		return ret;
9315 
9316 #ifdef CONFIG_TRACER_MAX_TRACE
9317 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9318 				    allocate_snapshot ? size : 1);
9319 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9320 		free_trace_buffer(&tr->array_buffer);
9321 		return -ENOMEM;
9322 	}
9323 	tr->allocated_snapshot = allocate_snapshot;
9324 
9325 	allocate_snapshot = false;
9326 #endif
9327 
9328 	return 0;
9329 }
9330 
free_trace_buffers(struct trace_array * tr)9331 static void free_trace_buffers(struct trace_array *tr)
9332 {
9333 	if (!tr)
9334 		return;
9335 
9336 	free_trace_buffer(&tr->array_buffer);
9337 
9338 #ifdef CONFIG_TRACER_MAX_TRACE
9339 	free_trace_buffer(&tr->max_buffer);
9340 #endif
9341 }
9342 
init_trace_flags_index(struct trace_array * tr)9343 static void init_trace_flags_index(struct trace_array *tr)
9344 {
9345 	int i;
9346 
9347 	/* Used by the trace options files */
9348 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9349 		tr->trace_flags_index[i] = i;
9350 }
9351 
__update_tracer_options(struct trace_array * tr)9352 static void __update_tracer_options(struct trace_array *tr)
9353 {
9354 	struct tracer *t;
9355 
9356 	for (t = trace_types; t; t = t->next)
9357 		add_tracer_options(tr, t);
9358 }
9359 
update_tracer_options(struct trace_array * tr)9360 static void update_tracer_options(struct trace_array *tr)
9361 {
9362 	mutex_lock(&trace_types_lock);
9363 	tracer_options_updated = true;
9364 	__update_tracer_options(tr);
9365 	mutex_unlock(&trace_types_lock);
9366 }
9367 
9368 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9369 struct trace_array *trace_array_find(const char *instance)
9370 {
9371 	struct trace_array *tr, *found = NULL;
9372 
9373 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9374 		if (tr->name && strcmp(tr->name, instance) == 0) {
9375 			found = tr;
9376 			break;
9377 		}
9378 	}
9379 
9380 	return found;
9381 }
9382 
trace_array_find_get(const char * instance)9383 struct trace_array *trace_array_find_get(const char *instance)
9384 {
9385 	struct trace_array *tr;
9386 
9387 	mutex_lock(&trace_types_lock);
9388 	tr = trace_array_find(instance);
9389 	if (tr)
9390 		tr->ref++;
9391 	mutex_unlock(&trace_types_lock);
9392 
9393 	return tr;
9394 }
9395 
trace_array_create_dir(struct trace_array * tr)9396 static int trace_array_create_dir(struct trace_array *tr)
9397 {
9398 	int ret;
9399 
9400 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9401 	if (!tr->dir)
9402 		return -EINVAL;
9403 
9404 	ret = event_trace_add_tracer(tr->dir, tr);
9405 	if (ret) {
9406 		tracefs_remove(tr->dir);
9407 		return ret;
9408 	}
9409 
9410 	init_tracer_tracefs(tr, tr->dir);
9411 	__update_tracer_options(tr);
9412 
9413 	return ret;
9414 }
9415 
9416 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems)9417 trace_array_create_systems(const char *name, const char *systems)
9418 {
9419 	struct trace_array *tr;
9420 	int ret;
9421 
9422 	ret = -ENOMEM;
9423 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9424 	if (!tr)
9425 		return ERR_PTR(ret);
9426 
9427 	tr->name = kstrdup(name, GFP_KERNEL);
9428 	if (!tr->name)
9429 		goto out_free_tr;
9430 
9431 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9432 		goto out_free_tr;
9433 
9434 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9435 		goto out_free_tr;
9436 
9437 	if (systems) {
9438 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9439 		if (!tr->system_names)
9440 			goto out_free_tr;
9441 	}
9442 
9443 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9444 
9445 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9446 
9447 	raw_spin_lock_init(&tr->start_lock);
9448 
9449 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9450 
9451 	tr->current_trace = &nop_trace;
9452 
9453 	INIT_LIST_HEAD(&tr->systems);
9454 	INIT_LIST_HEAD(&tr->events);
9455 	INIT_LIST_HEAD(&tr->hist_vars);
9456 	INIT_LIST_HEAD(&tr->err_log);
9457 
9458 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9459 		goto out_free_tr;
9460 
9461 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9462 		goto out_free_tr;
9463 
9464 	ftrace_init_trace_array(tr);
9465 
9466 	init_trace_flags_index(tr);
9467 
9468 	if (trace_instance_dir) {
9469 		ret = trace_array_create_dir(tr);
9470 		if (ret)
9471 			goto out_free_tr;
9472 	} else
9473 		__trace_early_add_events(tr);
9474 
9475 	list_add(&tr->list, &ftrace_trace_arrays);
9476 
9477 	tr->ref++;
9478 
9479 	return tr;
9480 
9481  out_free_tr:
9482 	ftrace_free_ftrace_ops(tr);
9483 	free_trace_buffers(tr);
9484 	free_cpumask_var(tr->pipe_cpumask);
9485 	free_cpumask_var(tr->tracing_cpumask);
9486 	kfree_const(tr->system_names);
9487 	kfree(tr->name);
9488 	kfree(tr);
9489 
9490 	return ERR_PTR(ret);
9491 }
9492 
trace_array_create(const char * name)9493 static struct trace_array *trace_array_create(const char *name)
9494 {
9495 	return trace_array_create_systems(name, NULL);
9496 }
9497 
instance_mkdir(const char * name)9498 static int instance_mkdir(const char *name)
9499 {
9500 	struct trace_array *tr;
9501 	int ret;
9502 
9503 	mutex_lock(&event_mutex);
9504 	mutex_lock(&trace_types_lock);
9505 
9506 	ret = -EEXIST;
9507 	if (trace_array_find(name))
9508 		goto out_unlock;
9509 
9510 	tr = trace_array_create(name);
9511 
9512 	ret = PTR_ERR_OR_ZERO(tr);
9513 
9514 out_unlock:
9515 	mutex_unlock(&trace_types_lock);
9516 	mutex_unlock(&event_mutex);
9517 	return ret;
9518 }
9519 
9520 /**
9521  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9522  * @name: The name of the trace array to be looked up/created.
9523  * @systems: A list of systems to create event directories for (NULL for all)
9524  *
9525  * Returns pointer to trace array with given name.
9526  * NULL, if it cannot be created.
9527  *
9528  * NOTE: This function increments the reference counter associated with the
9529  * trace array returned. This makes sure it cannot be freed while in use.
9530  * Use trace_array_put() once the trace array is no longer needed.
9531  * If the trace_array is to be freed, trace_array_destroy() needs to
9532  * be called after the trace_array_put(), or simply let user space delete
9533  * it from the tracefs instances directory. But until the
9534  * trace_array_put() is called, user space can not delete it.
9535  *
9536  */
trace_array_get_by_name(const char * name,const char * systems)9537 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9538 {
9539 	struct trace_array *tr;
9540 
9541 	mutex_lock(&event_mutex);
9542 	mutex_lock(&trace_types_lock);
9543 
9544 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9545 		if (tr->name && strcmp(tr->name, name) == 0)
9546 			goto out_unlock;
9547 	}
9548 
9549 	tr = trace_array_create_systems(name, systems);
9550 
9551 	if (IS_ERR(tr))
9552 		tr = NULL;
9553 out_unlock:
9554 	if (tr)
9555 		tr->ref++;
9556 
9557 	mutex_unlock(&trace_types_lock);
9558 	mutex_unlock(&event_mutex);
9559 	return tr;
9560 }
9561 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9562 
__remove_instance(struct trace_array * tr)9563 static int __remove_instance(struct trace_array *tr)
9564 {
9565 	int i;
9566 
9567 	/* Reference counter for a newly created trace array = 1. */
9568 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9569 		return -EBUSY;
9570 
9571 	list_del(&tr->list);
9572 
9573 	/* Disable all the flags that were enabled coming in */
9574 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9575 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9576 			set_tracer_flag(tr, 1 << i, 0);
9577 	}
9578 
9579 	tracing_set_nop(tr);
9580 	clear_ftrace_function_probes(tr);
9581 	event_trace_del_tracer(tr);
9582 	ftrace_clear_pids(tr);
9583 	ftrace_destroy_function_files(tr);
9584 	tracefs_remove(tr->dir);
9585 	free_percpu(tr->last_func_repeats);
9586 	free_trace_buffers(tr);
9587 	clear_tracing_err_log(tr);
9588 
9589 	for (i = 0; i < tr->nr_topts; i++) {
9590 		kfree(tr->topts[i].topts);
9591 	}
9592 	kfree(tr->topts);
9593 
9594 	free_cpumask_var(tr->pipe_cpumask);
9595 	free_cpumask_var(tr->tracing_cpumask);
9596 	kfree_const(tr->system_names);
9597 	kfree(tr->name);
9598 	kfree(tr);
9599 
9600 	return 0;
9601 }
9602 
trace_array_destroy(struct trace_array * this_tr)9603 int trace_array_destroy(struct trace_array *this_tr)
9604 {
9605 	struct trace_array *tr;
9606 	int ret;
9607 
9608 	if (!this_tr)
9609 		return -EINVAL;
9610 
9611 	mutex_lock(&event_mutex);
9612 	mutex_lock(&trace_types_lock);
9613 
9614 	ret = -ENODEV;
9615 
9616 	/* Making sure trace array exists before destroying it. */
9617 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9618 		if (tr == this_tr) {
9619 			ret = __remove_instance(tr);
9620 			break;
9621 		}
9622 	}
9623 
9624 	mutex_unlock(&trace_types_lock);
9625 	mutex_unlock(&event_mutex);
9626 
9627 	return ret;
9628 }
9629 EXPORT_SYMBOL_GPL(trace_array_destroy);
9630 
instance_rmdir(const char * name)9631 static int instance_rmdir(const char *name)
9632 {
9633 	struct trace_array *tr;
9634 	int ret;
9635 
9636 	mutex_lock(&event_mutex);
9637 	mutex_lock(&trace_types_lock);
9638 
9639 	ret = -ENODEV;
9640 	tr = trace_array_find(name);
9641 	if (tr)
9642 		ret = __remove_instance(tr);
9643 
9644 	mutex_unlock(&trace_types_lock);
9645 	mutex_unlock(&event_mutex);
9646 
9647 	return ret;
9648 }
9649 
create_trace_instances(struct dentry * d_tracer)9650 static __init void create_trace_instances(struct dentry *d_tracer)
9651 {
9652 	struct trace_array *tr;
9653 
9654 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9655 							 instance_mkdir,
9656 							 instance_rmdir);
9657 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9658 		return;
9659 
9660 	mutex_lock(&event_mutex);
9661 	mutex_lock(&trace_types_lock);
9662 
9663 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9664 		if (!tr->name)
9665 			continue;
9666 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9667 			     "Failed to create instance directory\n"))
9668 			break;
9669 	}
9670 
9671 	mutex_unlock(&trace_types_lock);
9672 	mutex_unlock(&event_mutex);
9673 }
9674 
9675 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9676 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9677 {
9678 	int cpu;
9679 
9680 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9681 			tr, &show_traces_fops);
9682 
9683 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9684 			tr, &set_tracer_fops);
9685 
9686 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9687 			  tr, &tracing_cpumask_fops);
9688 
9689 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9690 			  tr, &tracing_iter_fops);
9691 
9692 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9693 			  tr, &tracing_fops);
9694 
9695 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9696 			  tr, &tracing_pipe_fops);
9697 
9698 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9699 			  tr, &tracing_entries_fops);
9700 
9701 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9702 			  tr, &tracing_total_entries_fops);
9703 
9704 	trace_create_file("free_buffer", 0200, d_tracer,
9705 			  tr, &tracing_free_buffer_fops);
9706 
9707 	trace_create_file("trace_marker", 0220, d_tracer,
9708 			  tr, &tracing_mark_fops);
9709 
9710 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9711 
9712 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9713 			  tr, &tracing_mark_raw_fops);
9714 
9715 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9716 			  &trace_clock_fops);
9717 
9718 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9719 			  tr, &rb_simple_fops);
9720 
9721 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9722 			  &trace_time_stamp_mode_fops);
9723 
9724 	tr->buffer_percent = 50;
9725 
9726 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9727 			tr, &buffer_percent_fops);
9728 
9729 	create_trace_options_dir(tr);
9730 
9731 #ifdef CONFIG_TRACER_MAX_TRACE
9732 	trace_create_maxlat_file(tr, d_tracer);
9733 #endif
9734 
9735 	if (ftrace_create_function_files(tr, d_tracer))
9736 		MEM_FAIL(1, "Could not allocate function filter files");
9737 
9738 #ifdef CONFIG_TRACER_SNAPSHOT
9739 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9740 			  tr, &snapshot_fops);
9741 #endif
9742 
9743 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9744 			  tr, &tracing_err_log_fops);
9745 
9746 	for_each_tracing_cpu(cpu)
9747 		tracing_init_tracefs_percpu(tr, cpu);
9748 
9749 	ftrace_init_tracefs(tr, d_tracer);
9750 }
9751 
trace_automount(struct dentry * mntpt,void * ingore)9752 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9753 {
9754 	struct vfsmount *mnt;
9755 	struct file_system_type *type;
9756 
9757 	/*
9758 	 * To maintain backward compatibility for tools that mount
9759 	 * debugfs to get to the tracing facility, tracefs is automatically
9760 	 * mounted to the debugfs/tracing directory.
9761 	 */
9762 	type = get_fs_type("tracefs");
9763 	if (!type)
9764 		return NULL;
9765 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9766 	put_filesystem(type);
9767 	if (IS_ERR(mnt))
9768 		return NULL;
9769 	mntget(mnt);
9770 
9771 	return mnt;
9772 }
9773 
9774 /**
9775  * tracing_init_dentry - initialize top level trace array
9776  *
9777  * This is called when creating files or directories in the tracing
9778  * directory. It is called via fs_initcall() by any of the boot up code
9779  * and expects to return the dentry of the top level tracing directory.
9780  */
tracing_init_dentry(void)9781 int tracing_init_dentry(void)
9782 {
9783 	struct trace_array *tr = &global_trace;
9784 
9785 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9786 		pr_warn("Tracing disabled due to lockdown\n");
9787 		return -EPERM;
9788 	}
9789 
9790 	/* The top level trace array uses  NULL as parent */
9791 	if (tr->dir)
9792 		return 0;
9793 
9794 	if (WARN_ON(!tracefs_initialized()))
9795 		return -ENODEV;
9796 
9797 	/*
9798 	 * As there may still be users that expect the tracing
9799 	 * files to exist in debugfs/tracing, we must automount
9800 	 * the tracefs file system there, so older tools still
9801 	 * work with the newer kernel.
9802 	 */
9803 	tr->dir = debugfs_create_automount("tracing", NULL,
9804 					   trace_automount, NULL);
9805 
9806 	return 0;
9807 }
9808 
9809 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9810 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9811 
9812 static struct workqueue_struct *eval_map_wq __initdata;
9813 static struct work_struct eval_map_work __initdata;
9814 static struct work_struct tracerfs_init_work __initdata;
9815 
eval_map_work_func(struct work_struct * work)9816 static void __init eval_map_work_func(struct work_struct *work)
9817 {
9818 	int len;
9819 
9820 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9821 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9822 }
9823 
trace_eval_init(void)9824 static int __init trace_eval_init(void)
9825 {
9826 	INIT_WORK(&eval_map_work, eval_map_work_func);
9827 
9828 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9829 	if (!eval_map_wq) {
9830 		pr_err("Unable to allocate eval_map_wq\n");
9831 		/* Do work here */
9832 		eval_map_work_func(&eval_map_work);
9833 		return -ENOMEM;
9834 	}
9835 
9836 	queue_work(eval_map_wq, &eval_map_work);
9837 	return 0;
9838 }
9839 
9840 subsys_initcall(trace_eval_init);
9841 
trace_eval_sync(void)9842 static int __init trace_eval_sync(void)
9843 {
9844 	/* Make sure the eval map updates are finished */
9845 	if (eval_map_wq)
9846 		destroy_workqueue(eval_map_wq);
9847 	return 0;
9848 }
9849 
9850 late_initcall_sync(trace_eval_sync);
9851 
9852 
9853 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9854 static void trace_module_add_evals(struct module *mod)
9855 {
9856 	if (!mod->num_trace_evals)
9857 		return;
9858 
9859 	/*
9860 	 * Modules with bad taint do not have events created, do
9861 	 * not bother with enums either.
9862 	 */
9863 	if (trace_module_has_bad_taint(mod))
9864 		return;
9865 
9866 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9867 }
9868 
9869 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9870 static void trace_module_remove_evals(struct module *mod)
9871 {
9872 	union trace_eval_map_item *map;
9873 	union trace_eval_map_item **last = &trace_eval_maps;
9874 
9875 	if (!mod->num_trace_evals)
9876 		return;
9877 
9878 	mutex_lock(&trace_eval_mutex);
9879 
9880 	map = trace_eval_maps;
9881 
9882 	while (map) {
9883 		if (map->head.mod == mod)
9884 			break;
9885 		map = trace_eval_jmp_to_tail(map);
9886 		last = &map->tail.next;
9887 		map = map->tail.next;
9888 	}
9889 	if (!map)
9890 		goto out;
9891 
9892 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9893 	kfree(map);
9894  out:
9895 	mutex_unlock(&trace_eval_mutex);
9896 }
9897 #else
trace_module_remove_evals(struct module * mod)9898 static inline void trace_module_remove_evals(struct module *mod) { }
9899 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9900 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9901 static int trace_module_notify(struct notifier_block *self,
9902 			       unsigned long val, void *data)
9903 {
9904 	struct module *mod = data;
9905 
9906 	switch (val) {
9907 	case MODULE_STATE_COMING:
9908 		trace_module_add_evals(mod);
9909 		break;
9910 	case MODULE_STATE_GOING:
9911 		trace_module_remove_evals(mod);
9912 		break;
9913 	}
9914 
9915 	return NOTIFY_OK;
9916 }
9917 
9918 static struct notifier_block trace_module_nb = {
9919 	.notifier_call = trace_module_notify,
9920 	.priority = 0,
9921 };
9922 #endif /* CONFIG_MODULES */
9923 
tracer_init_tracefs_work_func(struct work_struct * work)9924 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9925 {
9926 
9927 	event_trace_init();
9928 
9929 	init_tracer_tracefs(&global_trace, NULL);
9930 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9931 
9932 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9933 			&global_trace, &tracing_thresh_fops);
9934 
9935 	trace_create_file("README", TRACE_MODE_READ, NULL,
9936 			NULL, &tracing_readme_fops);
9937 
9938 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9939 			NULL, &tracing_saved_cmdlines_fops);
9940 
9941 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9942 			  NULL, &tracing_saved_cmdlines_size_fops);
9943 
9944 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9945 			NULL, &tracing_saved_tgids_fops);
9946 
9947 	trace_create_eval_file(NULL);
9948 
9949 #ifdef CONFIG_MODULES
9950 	register_module_notifier(&trace_module_nb);
9951 #endif
9952 
9953 #ifdef CONFIG_DYNAMIC_FTRACE
9954 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9955 			NULL, &tracing_dyn_info_fops);
9956 #endif
9957 
9958 	create_trace_instances(NULL);
9959 
9960 	update_tracer_options(&global_trace);
9961 }
9962 
tracer_init_tracefs(void)9963 static __init int tracer_init_tracefs(void)
9964 {
9965 	int ret;
9966 
9967 	trace_access_lock_init();
9968 
9969 	ret = tracing_init_dentry();
9970 	if (ret)
9971 		return 0;
9972 
9973 	if (eval_map_wq) {
9974 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9975 		queue_work(eval_map_wq, &tracerfs_init_work);
9976 	} else {
9977 		tracer_init_tracefs_work_func(NULL);
9978 	}
9979 
9980 	rv_init_interface();
9981 
9982 	return 0;
9983 }
9984 
9985 fs_initcall(tracer_init_tracefs);
9986 
9987 static int trace_die_panic_handler(struct notifier_block *self,
9988 				unsigned long ev, void *unused);
9989 
9990 static struct notifier_block trace_panic_notifier = {
9991 	.notifier_call = trace_die_panic_handler,
9992 	.priority = INT_MAX - 1,
9993 };
9994 
9995 static struct notifier_block trace_die_notifier = {
9996 	.notifier_call = trace_die_panic_handler,
9997 	.priority = INT_MAX - 1,
9998 };
9999 
10000 /*
10001  * The idea is to execute the following die/panic callback early, in order
10002  * to avoid showing irrelevant information in the trace (like other panic
10003  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10004  * warnings get disabled (to prevent potential log flooding).
10005  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10006 static int trace_die_panic_handler(struct notifier_block *self,
10007 				unsigned long ev, void *unused)
10008 {
10009 	if (!ftrace_dump_on_oops)
10010 		return NOTIFY_DONE;
10011 
10012 	/* The die notifier requires DIE_OOPS to trigger */
10013 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10014 		return NOTIFY_DONE;
10015 
10016 	ftrace_dump(ftrace_dump_on_oops);
10017 
10018 	return NOTIFY_DONE;
10019 }
10020 
10021 /*
10022  * printk is set to max of 1024, we really don't need it that big.
10023  * Nothing should be printing 1000 characters anyway.
10024  */
10025 #define TRACE_MAX_PRINT		1000
10026 
10027 /*
10028  * Define here KERN_TRACE so that we have one place to modify
10029  * it if we decide to change what log level the ftrace dump
10030  * should be at.
10031  */
10032 #define KERN_TRACE		KERN_EMERG
10033 
10034 void
trace_printk_seq(struct trace_seq * s)10035 trace_printk_seq(struct trace_seq *s)
10036 {
10037 	/* Probably should print a warning here. */
10038 	if (s->seq.len >= TRACE_MAX_PRINT)
10039 		s->seq.len = TRACE_MAX_PRINT;
10040 
10041 	/*
10042 	 * More paranoid code. Although the buffer size is set to
10043 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10044 	 * an extra layer of protection.
10045 	 */
10046 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10047 		s->seq.len = s->seq.size - 1;
10048 
10049 	/* should be zero ended, but we are paranoid. */
10050 	s->buffer[s->seq.len] = 0;
10051 
10052 	printk(KERN_TRACE "%s", s->buffer);
10053 
10054 	trace_seq_init(s);
10055 }
10056 
trace_init_global_iter(struct trace_iterator * iter)10057 void trace_init_global_iter(struct trace_iterator *iter)
10058 {
10059 	iter->tr = &global_trace;
10060 	iter->trace = iter->tr->current_trace;
10061 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10062 	iter->array_buffer = &global_trace.array_buffer;
10063 
10064 	if (iter->trace && iter->trace->open)
10065 		iter->trace->open(iter);
10066 
10067 	/* Annotate start of buffers if we had overruns */
10068 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10069 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10070 
10071 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10072 	if (trace_clocks[iter->tr->clock_id].in_ns)
10073 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10074 
10075 	/* Can not use kmalloc for iter.temp and iter.fmt */
10076 	iter->temp = static_temp_buf;
10077 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10078 	iter->fmt = static_fmt_buf;
10079 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10080 }
10081 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10082 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10083 {
10084 	/* use static because iter can be a bit big for the stack */
10085 	static struct trace_iterator iter;
10086 	static atomic_t dump_running;
10087 	struct trace_array *tr = &global_trace;
10088 	unsigned int old_userobj;
10089 	unsigned long flags;
10090 	int cnt = 0, cpu;
10091 
10092 	/* Only allow one dump user at a time. */
10093 	if (atomic_inc_return(&dump_running) != 1) {
10094 		atomic_dec(&dump_running);
10095 		return;
10096 	}
10097 
10098 	/*
10099 	 * Always turn off tracing when we dump.
10100 	 * We don't need to show trace output of what happens
10101 	 * between multiple crashes.
10102 	 *
10103 	 * If the user does a sysrq-z, then they can re-enable
10104 	 * tracing with echo 1 > tracing_on.
10105 	 */
10106 	tracing_off();
10107 
10108 	local_irq_save(flags);
10109 
10110 	/* Simulate the iterator */
10111 	trace_init_global_iter(&iter);
10112 
10113 	for_each_tracing_cpu(cpu) {
10114 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10115 	}
10116 
10117 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10118 
10119 	/* don't look at user memory in panic mode */
10120 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10121 
10122 	switch (oops_dump_mode) {
10123 	case DUMP_ALL:
10124 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10125 		break;
10126 	case DUMP_ORIG:
10127 		iter.cpu_file = raw_smp_processor_id();
10128 		break;
10129 	case DUMP_NONE:
10130 		goto out_enable;
10131 	default:
10132 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10133 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10134 	}
10135 
10136 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10137 
10138 	/* Did function tracer already get disabled? */
10139 	if (ftrace_is_dead()) {
10140 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10141 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10142 	}
10143 
10144 	/*
10145 	 * We need to stop all tracing on all CPUS to read
10146 	 * the next buffer. This is a bit expensive, but is
10147 	 * not done often. We fill all what we can read,
10148 	 * and then release the locks again.
10149 	 */
10150 
10151 	while (!trace_empty(&iter)) {
10152 
10153 		if (!cnt)
10154 			printk(KERN_TRACE "---------------------------------\n");
10155 
10156 		cnt++;
10157 
10158 		trace_iterator_reset(&iter);
10159 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10160 
10161 		if (trace_find_next_entry_inc(&iter) != NULL) {
10162 			int ret;
10163 
10164 			ret = print_trace_line(&iter);
10165 			if (ret != TRACE_TYPE_NO_CONSUME)
10166 				trace_consume(&iter);
10167 		}
10168 		touch_nmi_watchdog();
10169 
10170 		trace_printk_seq(&iter.seq);
10171 	}
10172 
10173 	if (!cnt)
10174 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10175 	else
10176 		printk(KERN_TRACE "---------------------------------\n");
10177 
10178  out_enable:
10179 	tr->trace_flags |= old_userobj;
10180 
10181 	for_each_tracing_cpu(cpu) {
10182 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10183 	}
10184 	atomic_dec(&dump_running);
10185 	local_irq_restore(flags);
10186 }
10187 EXPORT_SYMBOL_GPL(ftrace_dump);
10188 
10189 #define WRITE_BUFSIZE  4096
10190 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10191 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10192 				size_t count, loff_t *ppos,
10193 				int (*createfn)(const char *))
10194 {
10195 	char *kbuf, *buf, *tmp;
10196 	int ret = 0;
10197 	size_t done = 0;
10198 	size_t size;
10199 
10200 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10201 	if (!kbuf)
10202 		return -ENOMEM;
10203 
10204 	while (done < count) {
10205 		size = count - done;
10206 
10207 		if (size >= WRITE_BUFSIZE)
10208 			size = WRITE_BUFSIZE - 1;
10209 
10210 		if (copy_from_user(kbuf, buffer + done, size)) {
10211 			ret = -EFAULT;
10212 			goto out;
10213 		}
10214 		kbuf[size] = '\0';
10215 		buf = kbuf;
10216 		do {
10217 			tmp = strchr(buf, '\n');
10218 			if (tmp) {
10219 				*tmp = '\0';
10220 				size = tmp - buf + 1;
10221 			} else {
10222 				size = strlen(buf);
10223 				if (done + size < count) {
10224 					if (buf != kbuf)
10225 						break;
10226 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10227 					pr_warn("Line length is too long: Should be less than %d\n",
10228 						WRITE_BUFSIZE - 2);
10229 					ret = -EINVAL;
10230 					goto out;
10231 				}
10232 			}
10233 			done += size;
10234 
10235 			/* Remove comments */
10236 			tmp = strchr(buf, '#');
10237 
10238 			if (tmp)
10239 				*tmp = '\0';
10240 
10241 			ret = createfn(buf);
10242 			if (ret)
10243 				goto out;
10244 			buf += size;
10245 
10246 		} while (done < count);
10247 	}
10248 	ret = done;
10249 
10250 out:
10251 	kfree(kbuf);
10252 
10253 	return ret;
10254 }
10255 
10256 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10257 __init static bool tr_needs_alloc_snapshot(const char *name)
10258 {
10259 	char *test;
10260 	int len = strlen(name);
10261 	bool ret;
10262 
10263 	if (!boot_snapshot_index)
10264 		return false;
10265 
10266 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10267 	    boot_snapshot_info[len] == '\t')
10268 		return true;
10269 
10270 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10271 	if (!test)
10272 		return false;
10273 
10274 	sprintf(test, "\t%s\t", name);
10275 	ret = strstr(boot_snapshot_info, test) == NULL;
10276 	kfree(test);
10277 	return ret;
10278 }
10279 
do_allocate_snapshot(const char * name)10280 __init static void do_allocate_snapshot(const char *name)
10281 {
10282 	if (!tr_needs_alloc_snapshot(name))
10283 		return;
10284 
10285 	/*
10286 	 * When allocate_snapshot is set, the next call to
10287 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10288 	 * will allocate the snapshot buffer. That will alse clear
10289 	 * this flag.
10290 	 */
10291 	allocate_snapshot = true;
10292 }
10293 #else
do_allocate_snapshot(const char * name)10294 static inline void do_allocate_snapshot(const char *name) { }
10295 #endif
10296 
enable_instances(void)10297 __init static void enable_instances(void)
10298 {
10299 	struct trace_array *tr;
10300 	char *curr_str;
10301 	char *str;
10302 	char *tok;
10303 
10304 	/* A tab is always appended */
10305 	boot_instance_info[boot_instance_index - 1] = '\0';
10306 	str = boot_instance_info;
10307 
10308 	while ((curr_str = strsep(&str, "\t"))) {
10309 
10310 		tok = strsep(&curr_str, ",");
10311 
10312 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10313 			do_allocate_snapshot(tok);
10314 
10315 		tr = trace_array_get_by_name(tok, NULL);
10316 		if (!tr) {
10317 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10318 			continue;
10319 		}
10320 		/* Allow user space to delete it */
10321 		trace_array_put(tr);
10322 
10323 		while ((tok = strsep(&curr_str, ","))) {
10324 			early_enable_events(tr, tok, true);
10325 		}
10326 	}
10327 }
10328 
tracer_alloc_buffers(void)10329 __init static int tracer_alloc_buffers(void)
10330 {
10331 	int ring_buf_size;
10332 	int ret = -ENOMEM;
10333 
10334 
10335 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10336 		pr_warn("Tracing disabled due to lockdown\n");
10337 		return -EPERM;
10338 	}
10339 
10340 	/*
10341 	 * Make sure we don't accidentally add more trace options
10342 	 * than we have bits for.
10343 	 */
10344 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10345 
10346 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10347 		goto out;
10348 
10349 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10350 		goto out_free_buffer_mask;
10351 
10352 	/* Only allocate trace_printk buffers if a trace_printk exists */
10353 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10354 		/* Must be called before global_trace.buffer is allocated */
10355 		trace_printk_init_buffers();
10356 
10357 	/* To save memory, keep the ring buffer size to its minimum */
10358 	if (ring_buffer_expanded)
10359 		ring_buf_size = trace_buf_size;
10360 	else
10361 		ring_buf_size = 1;
10362 
10363 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10364 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10365 
10366 	raw_spin_lock_init(&global_trace.start_lock);
10367 
10368 	/*
10369 	 * The prepare callbacks allocates some memory for the ring buffer. We
10370 	 * don't free the buffer if the CPU goes down. If we were to free
10371 	 * the buffer, then the user would lose any trace that was in the
10372 	 * buffer. The memory will be removed once the "instance" is removed.
10373 	 */
10374 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10375 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10376 				      NULL);
10377 	if (ret < 0)
10378 		goto out_free_cpumask;
10379 	/* Used for event triggers */
10380 	ret = -ENOMEM;
10381 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10382 	if (!temp_buffer)
10383 		goto out_rm_hp_state;
10384 
10385 	if (trace_create_savedcmd() < 0)
10386 		goto out_free_temp_buffer;
10387 
10388 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10389 		goto out_free_savedcmd;
10390 
10391 	/* TODO: make the number of buffers hot pluggable with CPUS */
10392 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10393 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10394 		goto out_free_pipe_cpumask;
10395 	}
10396 	if (global_trace.buffer_disabled)
10397 		tracing_off();
10398 
10399 	if (trace_boot_clock) {
10400 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10401 		if (ret < 0)
10402 			pr_warn("Trace clock %s not defined, going back to default\n",
10403 				trace_boot_clock);
10404 	}
10405 
10406 	/*
10407 	 * register_tracer() might reference current_trace, so it
10408 	 * needs to be set before we register anything. This is
10409 	 * just a bootstrap of current_trace anyway.
10410 	 */
10411 	global_trace.current_trace = &nop_trace;
10412 
10413 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10414 
10415 	ftrace_init_global_array_ops(&global_trace);
10416 
10417 	init_trace_flags_index(&global_trace);
10418 
10419 	register_tracer(&nop_trace);
10420 
10421 	/* Function tracing may start here (via kernel command line) */
10422 	init_function_trace();
10423 
10424 	/* All seems OK, enable tracing */
10425 	tracing_disabled = 0;
10426 
10427 	atomic_notifier_chain_register(&panic_notifier_list,
10428 				       &trace_panic_notifier);
10429 
10430 	register_die_notifier(&trace_die_notifier);
10431 
10432 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10433 
10434 	INIT_LIST_HEAD(&global_trace.systems);
10435 	INIT_LIST_HEAD(&global_trace.events);
10436 	INIT_LIST_HEAD(&global_trace.hist_vars);
10437 	INIT_LIST_HEAD(&global_trace.err_log);
10438 	list_add(&global_trace.list, &ftrace_trace_arrays);
10439 
10440 	apply_trace_boot_options();
10441 
10442 	register_snapshot_cmd();
10443 
10444 	return 0;
10445 
10446 out_free_pipe_cpumask:
10447 	free_cpumask_var(global_trace.pipe_cpumask);
10448 out_free_savedcmd:
10449 	free_saved_cmdlines_buffer(savedcmd);
10450 out_free_temp_buffer:
10451 	ring_buffer_free(temp_buffer);
10452 out_rm_hp_state:
10453 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10454 out_free_cpumask:
10455 	free_cpumask_var(global_trace.tracing_cpumask);
10456 out_free_buffer_mask:
10457 	free_cpumask_var(tracing_buffer_mask);
10458 out:
10459 	return ret;
10460 }
10461 
ftrace_boot_snapshot(void)10462 void __init ftrace_boot_snapshot(void)
10463 {
10464 #ifdef CONFIG_TRACER_MAX_TRACE
10465 	struct trace_array *tr;
10466 
10467 	if (!snapshot_at_boot)
10468 		return;
10469 
10470 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10471 		if (!tr->allocated_snapshot)
10472 			continue;
10473 
10474 		tracing_snapshot_instance(tr);
10475 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10476 	}
10477 #endif
10478 }
10479 
early_trace_init(void)10480 void __init early_trace_init(void)
10481 {
10482 	if (tracepoint_printk) {
10483 		tracepoint_print_iter =
10484 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10485 		if (MEM_FAIL(!tracepoint_print_iter,
10486 			     "Failed to allocate trace iterator\n"))
10487 			tracepoint_printk = 0;
10488 		else
10489 			static_key_enable(&tracepoint_printk_key.key);
10490 	}
10491 	tracer_alloc_buffers();
10492 
10493 	init_events();
10494 }
10495 
trace_init(void)10496 void __init trace_init(void)
10497 {
10498 	trace_event_init();
10499 
10500 	if (boot_instance_index)
10501 		enable_instances();
10502 }
10503 
clear_boot_tracer(void)10504 __init static void clear_boot_tracer(void)
10505 {
10506 	/*
10507 	 * The default tracer at boot buffer is an init section.
10508 	 * This function is called in lateinit. If we did not
10509 	 * find the boot tracer, then clear it out, to prevent
10510 	 * later registration from accessing the buffer that is
10511 	 * about to be freed.
10512 	 */
10513 	if (!default_bootup_tracer)
10514 		return;
10515 
10516 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10517 	       default_bootup_tracer);
10518 	default_bootup_tracer = NULL;
10519 }
10520 
10521 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10522 __init static void tracing_set_default_clock(void)
10523 {
10524 	/* sched_clock_stable() is determined in late_initcall */
10525 	if (!trace_boot_clock && !sched_clock_stable()) {
10526 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10527 			pr_warn("Can not set tracing clock due to lockdown\n");
10528 			return;
10529 		}
10530 
10531 		printk(KERN_WARNING
10532 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10533 		       "If you want to keep using the local clock, then add:\n"
10534 		       "  \"trace_clock=local\"\n"
10535 		       "on the kernel command line\n");
10536 		tracing_set_clock(&global_trace, "global");
10537 	}
10538 }
10539 #else
tracing_set_default_clock(void)10540 static inline void tracing_set_default_clock(void) { }
10541 #endif
10542 
late_trace_init(void)10543 __init static int late_trace_init(void)
10544 {
10545 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10546 		static_key_disable(&tracepoint_printk_key.key);
10547 		tracepoint_printk = 0;
10548 	}
10549 
10550 	tracing_set_default_clock();
10551 	clear_boot_tracer();
10552 	return 0;
10553 }
10554 
10555 late_initcall_sync(late_trace_init);
10556